[GitHub] incubator-hawq issue #904: HAWQ-1060. Refactor hawq register with better rea...
Github user zhangh43 commented on the issue: https://github.com/apache/incubator-hawq/pull/904 +1 --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] incubator-hawq issue #904: HAWQ-1060. Refactor hawq register with better rea...
Github user ictmalili commented on the issue: https://github.com/apache/incubator-hawq/pull/904 LGTM. +1 --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] incubator-hawq pull request #904: HAWQ-1060. Refactor hawq register with bet...
Github user ictmalili commented on a diff in the pull request: https://github.com/apache/incubator-hawq/pull/904#discussion_r79295805 --- Diff: tools/bin/hawqregister --- @@ -126,182 +127,319 @@ def option_parser_yml(yml_file): return 'AO', files, sizes, params['AO_Schema'], params['Distribution_Policy'], params['AO_FileLocations'], params['Bucketnum'] -def create_table(dburl, tablename, schema_info, fmt, distrbution_policy, file_locations, bucket_number): -try: -query = "select count(*) from pg_class where relname = '%s';" % tablename.split('.')[-1].lower() -conn = dbconn.connect(dburl, False) -rows = dbconn.execSQL(conn, query) -conn.commit() -conn.close() -for row in rows: -if row[0] != 0: -return False -except DatabaseError, ex: -logger.error('Failed to execute query "%s"' % query) -sys.exit(1) - -try: +class GpRegisterAccessor(object): +def __init__(self, conn): +self.conn = conn +rows = self.exec_query(""" +SELECT oid, datname, dat2tablespace, + pg_encoding_to_char(encoding) encoding +FROM pg_database WHERE datname=current_database()""") +self.dbid = rows[0]['oid'] +self.dbname = rows[0]['datname'] +self.spcid = rows[0]['dat2tablespace'] +self.dbencoding = rows[0]['encoding'] +self.dbversion = self.exec_query('select version()')[0]['version'] + +def exec_query(self, sql): +'''execute query and return dict result''' +return self.conn.query(sql).dictresult() + +def get_table_existed(self, tablename): +qry = """select count(*) from pg_class where relname = '%s';""" % tablename.split('.')[-1].lower() +return self.exec_query(qry)[0]['count'] == 1 + +def do_create_table(self, tablename, schema_info, fmt, distrbution_policy, file_locations, bucket_number): +if self.get_table_existed(tablename): +return False schema = ','.join([k['name'] + ' ' + k['type'] for k in schema_info]) fmt = 'ROW' if fmt == 'AO' else fmt if fmt == 'ROW': query = ('create table %s(%s) with (appendonly=true, orientation=%s, compresstype=%s, compresslevel=%s, checksum=%s, bucketnum=%s) %s;' -% (tablename, schema, fmt, file_locations['CompressionType'], file_locations['CompressionLevel'], file_locations['Checksum'], bucket_number, distrbution_policy)) + % (tablename, schema, fmt, file_locations['CompressionType'], file_locations['CompressionLevel'], file_locations['Checksum'], bucket_number, distrbution_policy)) else: # Parquet query = ('create table %s(%s) with (appendonly=true, orientation=%s, compresstype=%s, compresslevel=%s, pagesize=%s, rowgroupsize=%s, bucketnum=%s) %s;' -% (tablename, schema, fmt, file_locations['CompressionType'], file_locations['CompressionLevel'], file_locations['PageSize'], file_locations['RowGroupSize'], bucket_number, distrbution_policy)) -conn = dbconn.connect(dburl, False) -rows = dbconn.execSQL(conn, query) -conn.commit() -conn.close() + % (tablename, schema, fmt, file_locations['CompressionType'], file_locations['CompressionLevel'], file_locations['PageSize'], file_locations['RowGroupSize'], bucket_number, distrbution_policy)) +self.conn.query(query) return True -except DatabaseError, ex: -print DatabaseError, ex -logger.error('Failed to execute query "%s"' % query) -sys.exit(1) +def check_hash_type(self, tablename): +qry = """select attrnums from gp_distribution_policy, pg_class where pg_class.relname = '%s' and pg_class.oid = gp_distribution_policy.localoid;""" % tablename +rows = self.exec_query(qry) +if len(rows) == 0: +logger.error('Table %s not found in table gp_distribution_policy.' % tablename) +sys.exit(1) +if rows[0]['attrnums']: +logger.error('Cannot register file(s) to a table which is hash distribuetd.') +sys.exit(1) -def get_seg_name(dburl, tablename, database, fmt): -try: -relname = '' +# pg_paqseg_# +def get_seg_name(self, tablename, database, fmt): tablename = tablename.split('.')[-1] query = ("select pg_class2.relname from pg_class as pg_class1, pg_appendonly, pg_class as pg_class2 " "where pg_class1.relname ='%s' and pg_class1.oid = pg_appendonly.relid and pg_appendonly.segrelid = pg_class2.oid;") %
[GitHub] incubator-hawq pull request #904: HAWQ-1060. Refactor hawq register with bet...
Github user ictmalili commented on a diff in the pull request: https://github.com/apache/incubator-hawq/pull/904#discussion_r79295812 --- Diff: tools/bin/hawqregister --- @@ -126,182 +127,319 @@ def option_parser_yml(yml_file): return 'AO', files, sizes, params['AO_Schema'], params['Distribution_Policy'], params['AO_FileLocations'], params['Bucketnum'] -def create_table(dburl, tablename, schema_info, fmt, distrbution_policy, file_locations, bucket_number): -try: -query = "select count(*) from pg_class where relname = '%s';" % tablename.split('.')[-1].lower() -conn = dbconn.connect(dburl, False) -rows = dbconn.execSQL(conn, query) -conn.commit() -conn.close() -for row in rows: -if row[0] != 0: -return False -except DatabaseError, ex: -logger.error('Failed to execute query "%s"' % query) -sys.exit(1) - -try: +class GpRegisterAccessor(object): +def __init__(self, conn): +self.conn = conn +rows = self.exec_query(""" +SELECT oid, datname, dat2tablespace, + pg_encoding_to_char(encoding) encoding +FROM pg_database WHERE datname=current_database()""") +self.dbid = rows[0]['oid'] +self.dbname = rows[0]['datname'] +self.spcid = rows[0]['dat2tablespace'] +self.dbencoding = rows[0]['encoding'] +self.dbversion = self.exec_query('select version()')[0]['version'] + +def exec_query(self, sql): +'''execute query and return dict result''' +return self.conn.query(sql).dictresult() + +def get_table_existed(self, tablename): +qry = """select count(*) from pg_class where relname = '%s';""" % tablename.split('.')[-1].lower() +return self.exec_query(qry)[0]['count'] == 1 + +def do_create_table(self, tablename, schema_info, fmt, distrbution_policy, file_locations, bucket_number): +if self.get_table_existed(tablename): +return False schema = ','.join([k['name'] + ' ' + k['type'] for k in schema_info]) fmt = 'ROW' if fmt == 'AO' else fmt if fmt == 'ROW': query = ('create table %s(%s) with (appendonly=true, orientation=%s, compresstype=%s, compresslevel=%s, checksum=%s, bucketnum=%s) %s;' -% (tablename, schema, fmt, file_locations['CompressionType'], file_locations['CompressionLevel'], file_locations['Checksum'], bucket_number, distrbution_policy)) + % (tablename, schema, fmt, file_locations['CompressionType'], file_locations['CompressionLevel'], file_locations['Checksum'], bucket_number, distrbution_policy)) else: # Parquet query = ('create table %s(%s) with (appendonly=true, orientation=%s, compresstype=%s, compresslevel=%s, pagesize=%s, rowgroupsize=%s, bucketnum=%s) %s;' -% (tablename, schema, fmt, file_locations['CompressionType'], file_locations['CompressionLevel'], file_locations['PageSize'], file_locations['RowGroupSize'], bucket_number, distrbution_policy)) -conn = dbconn.connect(dburl, False) -rows = dbconn.execSQL(conn, query) -conn.commit() -conn.close() + % (tablename, schema, fmt, file_locations['CompressionType'], file_locations['CompressionLevel'], file_locations['PageSize'], file_locations['RowGroupSize'], bucket_number, distrbution_policy)) +self.conn.query(query) return True -except DatabaseError, ex: -print DatabaseError, ex -logger.error('Failed to execute query "%s"' % query) -sys.exit(1) +def check_hash_type(self, tablename): +qry = """select attrnums from gp_distribution_policy, pg_class where pg_class.relname = '%s' and pg_class.oid = gp_distribution_policy.localoid;""" % tablename +rows = self.exec_query(qry) +if len(rows) == 0: +logger.error('Table %s not found in table gp_distribution_policy.' % tablename) +sys.exit(1) +if rows[0]['attrnums']: +logger.error('Cannot register file(s) to a table which is hash distribuetd.') +sys.exit(1) -def get_seg_name(dburl, tablename, database, fmt): -try: -relname = '' +# pg_paqseg_# +def get_seg_name(self, tablename, database, fmt): tablename = tablename.split('.')[-1] query = ("select pg_class2.relname from pg_class as pg_class1, pg_appendonly, pg_class as pg_class2 " "where pg_class1.relname ='%s' and pg_class1.oid = pg_appendonly.relid and pg_appendonly.segrelid = pg_class2.oid;") %
[GitHub] incubator-hawq pull request #904: HAWQ-1060. Refactor hawq register with bet...
Github user ictmalili commented on a diff in the pull request: https://github.com/apache/incubator-hawq/pull/904#discussion_r79295886 --- Diff: tools/bin/hawqregister --- @@ -126,182 +127,319 @@ def option_parser_yml(yml_file): return 'AO', files, sizes, params['AO_Schema'], params['Distribution_Policy'], params['AO_FileLocations'], params['Bucketnum'] -def create_table(dburl, tablename, schema_info, fmt, distrbution_policy, file_locations, bucket_number): -try: -query = "select count(*) from pg_class where relname = '%s';" % tablename.split('.')[-1].lower() -conn = dbconn.connect(dburl, False) -rows = dbconn.execSQL(conn, query) -conn.commit() -conn.close() -for row in rows: -if row[0] != 0: -return False -except DatabaseError, ex: -logger.error('Failed to execute query "%s"' % query) -sys.exit(1) - -try: +class GpRegisterAccessor(object): +def __init__(self, conn): +self.conn = conn +rows = self.exec_query(""" +SELECT oid, datname, dat2tablespace, + pg_encoding_to_char(encoding) encoding +FROM pg_database WHERE datname=current_database()""") +self.dbid = rows[0]['oid'] +self.dbname = rows[0]['datname'] +self.spcid = rows[0]['dat2tablespace'] +self.dbencoding = rows[0]['encoding'] +self.dbversion = self.exec_query('select version()')[0]['version'] + +def exec_query(self, sql): +'''execute query and return dict result''' +return self.conn.query(sql).dictresult() + +def get_table_existed(self, tablename): +qry = """select count(*) from pg_class where relname = '%s';""" % tablename.split('.')[-1].lower() +return self.exec_query(qry)[0]['count'] == 1 + +def do_create_table(self, tablename, schema_info, fmt, distrbution_policy, file_locations, bucket_number): +if self.get_table_existed(tablename): +return False schema = ','.join([k['name'] + ' ' + k['type'] for k in schema_info]) fmt = 'ROW' if fmt == 'AO' else fmt if fmt == 'ROW': query = ('create table %s(%s) with (appendonly=true, orientation=%s, compresstype=%s, compresslevel=%s, checksum=%s, bucketnum=%s) %s;' -% (tablename, schema, fmt, file_locations['CompressionType'], file_locations['CompressionLevel'], file_locations['Checksum'], bucket_number, distrbution_policy)) + % (tablename, schema, fmt, file_locations['CompressionType'], file_locations['CompressionLevel'], file_locations['Checksum'], bucket_number, distrbution_policy)) else: # Parquet query = ('create table %s(%s) with (appendonly=true, orientation=%s, compresstype=%s, compresslevel=%s, pagesize=%s, rowgroupsize=%s, bucketnum=%s) %s;' -% (tablename, schema, fmt, file_locations['CompressionType'], file_locations['CompressionLevel'], file_locations['PageSize'], file_locations['RowGroupSize'], bucket_number, distrbution_policy)) -conn = dbconn.connect(dburl, False) -rows = dbconn.execSQL(conn, query) -conn.commit() -conn.close() + % (tablename, schema, fmt, file_locations['CompressionType'], file_locations['CompressionLevel'], file_locations['PageSize'], file_locations['RowGroupSize'], bucket_number, distrbution_policy)) +self.conn.query(query) return True -except DatabaseError, ex: -print DatabaseError, ex -logger.error('Failed to execute query "%s"' % query) -sys.exit(1) +def check_hash_type(self, tablename): +qry = """select attrnums from gp_distribution_policy, pg_class where pg_class.relname = '%s' and pg_class.oid = gp_distribution_policy.localoid;""" % tablename +rows = self.exec_query(qry) +if len(rows) == 0: +logger.error('Table %s not found in table gp_distribution_policy.' % tablename) +sys.exit(1) +if rows[0]['attrnums']: +logger.error('Cannot register file(s) to a table which is hash distribuetd.') +sys.exit(1) -def get_seg_name(dburl, tablename, database, fmt): -try: -relname = '' +# pg_paqseg_# +def get_seg_name(self, tablename, database, fmt): tablename = tablename.split('.')[-1] query = ("select pg_class2.relname from pg_class as pg_class1, pg_appendonly, pg_class as pg_class2 " "where pg_class1.relname ='%s' and pg_class1.oid = pg_appendonly.relid and pg_appendonly.segrelid = pg_class2.oid;") %
[GitHub] incubator-hawq pull request #904: HAWQ-1060. Refactor hawq register with bet...
Github user ictmalili commented on a diff in the pull request: https://github.com/apache/incubator-hawq/pull/904#discussion_r79296000 --- Diff: tools/bin/hawqregister --- @@ -126,182 +127,319 @@ def option_parser_yml(yml_file): return 'AO', files, sizes, params['AO_Schema'], params['Distribution_Policy'], params['AO_FileLocations'], params['Bucketnum'] -def create_table(dburl, tablename, schema_info, fmt, distrbution_policy, file_locations, bucket_number): -try: -query = "select count(*) from pg_class where relname = '%s';" % tablename.split('.')[-1].lower() -conn = dbconn.connect(dburl, False) -rows = dbconn.execSQL(conn, query) -conn.commit() -conn.close() -for row in rows: -if row[0] != 0: -return False -except DatabaseError, ex: -logger.error('Failed to execute query "%s"' % query) -sys.exit(1) - -try: +class GpRegisterAccessor(object): +def __init__(self, conn): +self.conn = conn +rows = self.exec_query(""" +SELECT oid, datname, dat2tablespace, + pg_encoding_to_char(encoding) encoding +FROM pg_database WHERE datname=current_database()""") +self.dbid = rows[0]['oid'] +self.dbname = rows[0]['datname'] +self.spcid = rows[0]['dat2tablespace'] +self.dbencoding = rows[0]['encoding'] +self.dbversion = self.exec_query('select version()')[0]['version'] + +def exec_query(self, sql): +'''execute query and return dict result''' +return self.conn.query(sql).dictresult() + +def get_table_existed(self, tablename): +qry = """select count(*) from pg_class where relname = '%s';""" % tablename.split('.')[-1].lower() +return self.exec_query(qry)[0]['count'] == 1 + +def do_create_table(self, tablename, schema_info, fmt, distrbution_policy, file_locations, bucket_number): +if self.get_table_existed(tablename): +return False schema = ','.join([k['name'] + ' ' + k['type'] for k in schema_info]) fmt = 'ROW' if fmt == 'AO' else fmt if fmt == 'ROW': query = ('create table %s(%s) with (appendonly=true, orientation=%s, compresstype=%s, compresslevel=%s, checksum=%s, bucketnum=%s) %s;' -% (tablename, schema, fmt, file_locations['CompressionType'], file_locations['CompressionLevel'], file_locations['Checksum'], bucket_number, distrbution_policy)) + % (tablename, schema, fmt, file_locations['CompressionType'], file_locations['CompressionLevel'], file_locations['Checksum'], bucket_number, distrbution_policy)) else: # Parquet query = ('create table %s(%s) with (appendonly=true, orientation=%s, compresstype=%s, compresslevel=%s, pagesize=%s, rowgroupsize=%s, bucketnum=%s) %s;' -% (tablename, schema, fmt, file_locations['CompressionType'], file_locations['CompressionLevel'], file_locations['PageSize'], file_locations['RowGroupSize'], bucket_number, distrbution_policy)) -conn = dbconn.connect(dburl, False) -rows = dbconn.execSQL(conn, query) -conn.commit() -conn.close() + % (tablename, schema, fmt, file_locations['CompressionType'], file_locations['CompressionLevel'], file_locations['PageSize'], file_locations['RowGroupSize'], bucket_number, distrbution_policy)) +self.conn.query(query) return True -except DatabaseError, ex: -print DatabaseError, ex -logger.error('Failed to execute query "%s"' % query) -sys.exit(1) +def check_hash_type(self, tablename): +qry = """select attrnums from gp_distribution_policy, pg_class where pg_class.relname = '%s' and pg_class.oid = gp_distribution_policy.localoid;""" % tablename +rows = self.exec_query(qry) +if len(rows) == 0: +logger.error('Table %s not found in table gp_distribution_policy.' % tablename) +sys.exit(1) +if rows[0]['attrnums']: +logger.error('Cannot register file(s) to a table which is hash distribuetd.') +sys.exit(1) -def get_seg_name(dburl, tablename, database, fmt): -try: -relname = '' +# pg_paqseg_# +def get_seg_name(self, tablename, database, fmt): tablename = tablename.split('.')[-1] query = ("select pg_class2.relname from pg_class as pg_class1, pg_appendonly, pg_class as pg_class2 " "where pg_class1.relname ='%s' and pg_class1.oid = pg_appendonly.relid and pg_appendonly.segrelid = pg_class2.oid;") %
[GitHub] incubator-hawq pull request #904: HAWQ-1060. Refactor hawq register with bet...
Github user ictmalili commented on a diff in the pull request: https://github.com/apache/incubator-hawq/pull/904#discussion_r79296004 --- Diff: tools/bin/hawqregister --- @@ -126,182 +127,319 @@ def option_parser_yml(yml_file): return 'AO', files, sizes, params['AO_Schema'], params['Distribution_Policy'], params['AO_FileLocations'], params['Bucketnum'] -def create_table(dburl, tablename, schema_info, fmt, distrbution_policy, file_locations, bucket_number): -try: -query = "select count(*) from pg_class where relname = '%s';" % tablename.split('.')[-1].lower() -conn = dbconn.connect(dburl, False) -rows = dbconn.execSQL(conn, query) -conn.commit() -conn.close() -for row in rows: -if row[0] != 0: -return False -except DatabaseError, ex: -logger.error('Failed to execute query "%s"' % query) -sys.exit(1) - -try: +class GpRegisterAccessor(object): +def __init__(self, conn): +self.conn = conn +rows = self.exec_query(""" +SELECT oid, datname, dat2tablespace, + pg_encoding_to_char(encoding) encoding +FROM pg_database WHERE datname=current_database()""") +self.dbid = rows[0]['oid'] +self.dbname = rows[0]['datname'] +self.spcid = rows[0]['dat2tablespace'] +self.dbencoding = rows[0]['encoding'] +self.dbversion = self.exec_query('select version()')[0]['version'] + +def exec_query(self, sql): +'''execute query and return dict result''' +return self.conn.query(sql).dictresult() + +def get_table_existed(self, tablename): +qry = """select count(*) from pg_class where relname = '%s';""" % tablename.split('.')[-1].lower() +return self.exec_query(qry)[0]['count'] == 1 + +def do_create_table(self, tablename, schema_info, fmt, distrbution_policy, file_locations, bucket_number): +if self.get_table_existed(tablename): +return False schema = ','.join([k['name'] + ' ' + k['type'] for k in schema_info]) fmt = 'ROW' if fmt == 'AO' else fmt if fmt == 'ROW': query = ('create table %s(%s) with (appendonly=true, orientation=%s, compresstype=%s, compresslevel=%s, checksum=%s, bucketnum=%s) %s;' -% (tablename, schema, fmt, file_locations['CompressionType'], file_locations['CompressionLevel'], file_locations['Checksum'], bucket_number, distrbution_policy)) + % (tablename, schema, fmt, file_locations['CompressionType'], file_locations['CompressionLevel'], file_locations['Checksum'], bucket_number, distrbution_policy)) else: # Parquet query = ('create table %s(%s) with (appendonly=true, orientation=%s, compresstype=%s, compresslevel=%s, pagesize=%s, rowgroupsize=%s, bucketnum=%s) %s;' -% (tablename, schema, fmt, file_locations['CompressionType'], file_locations['CompressionLevel'], file_locations['PageSize'], file_locations['RowGroupSize'], bucket_number, distrbution_policy)) -conn = dbconn.connect(dburl, False) -rows = dbconn.execSQL(conn, query) -conn.commit() -conn.close() + % (tablename, schema, fmt, file_locations['CompressionType'], file_locations['CompressionLevel'], file_locations['PageSize'], file_locations['RowGroupSize'], bucket_number, distrbution_policy)) +self.conn.query(query) return True -except DatabaseError, ex: -print DatabaseError, ex -logger.error('Failed to execute query "%s"' % query) -sys.exit(1) +def check_hash_type(self, tablename): +qry = """select attrnums from gp_distribution_policy, pg_class where pg_class.relname = '%s' and pg_class.oid = gp_distribution_policy.localoid;""" % tablename +rows = self.exec_query(qry) +if len(rows) == 0: +logger.error('Table %s not found in table gp_distribution_policy.' % tablename) +sys.exit(1) +if rows[0]['attrnums']: +logger.error('Cannot register file(s) to a table which is hash distribuetd.') +sys.exit(1) -def get_seg_name(dburl, tablename, database, fmt): -try: -relname = '' +# pg_paqseg_# +def get_seg_name(self, tablename, database, fmt): tablename = tablename.split('.')[-1] query = ("select pg_class2.relname from pg_class as pg_class1, pg_appendonly, pg_class as pg_class2 " "where pg_class1.relname ='%s' and pg_class1.oid = pg_appendonly.relid and pg_appendonly.segrelid = pg_class2.oid;") %
[GitHub] incubator-hawq pull request #904: HAWQ-1060. Refactor hawq register with bet...
Github user ictmalili commented on a diff in the pull request: https://github.com/apache/incubator-hawq/pull/904#discussion_r79295934 --- Diff: tools/bin/hawqregister --- @@ -126,182 +127,319 @@ def option_parser_yml(yml_file): return 'AO', files, sizes, params['AO_Schema'], params['Distribution_Policy'], params['AO_FileLocations'], params['Bucketnum'] -def create_table(dburl, tablename, schema_info, fmt, distrbution_policy, file_locations, bucket_number): -try: -query = "select count(*) from pg_class where relname = '%s';" % tablename.split('.')[-1].lower() -conn = dbconn.connect(dburl, False) -rows = dbconn.execSQL(conn, query) -conn.commit() -conn.close() -for row in rows: -if row[0] != 0: -return False -except DatabaseError, ex: -logger.error('Failed to execute query "%s"' % query) -sys.exit(1) - -try: +class GpRegisterAccessor(object): +def __init__(self, conn): +self.conn = conn +rows = self.exec_query(""" +SELECT oid, datname, dat2tablespace, + pg_encoding_to_char(encoding) encoding +FROM pg_database WHERE datname=current_database()""") +self.dbid = rows[0]['oid'] +self.dbname = rows[0]['datname'] +self.spcid = rows[0]['dat2tablespace'] +self.dbencoding = rows[0]['encoding'] +self.dbversion = self.exec_query('select version()')[0]['version'] + +def exec_query(self, sql): +'''execute query and return dict result''' +return self.conn.query(sql).dictresult() + +def get_table_existed(self, tablename): +qry = """select count(*) from pg_class where relname = '%s';""" % tablename.split('.')[-1].lower() +return self.exec_query(qry)[0]['count'] == 1 + +def do_create_table(self, tablename, schema_info, fmt, distrbution_policy, file_locations, bucket_number): +if self.get_table_existed(tablename): +return False schema = ','.join([k['name'] + ' ' + k['type'] for k in schema_info]) fmt = 'ROW' if fmt == 'AO' else fmt if fmt == 'ROW': query = ('create table %s(%s) with (appendonly=true, orientation=%s, compresstype=%s, compresslevel=%s, checksum=%s, bucketnum=%s) %s;' -% (tablename, schema, fmt, file_locations['CompressionType'], file_locations['CompressionLevel'], file_locations['Checksum'], bucket_number, distrbution_policy)) + % (tablename, schema, fmt, file_locations['CompressionType'], file_locations['CompressionLevel'], file_locations['Checksum'], bucket_number, distrbution_policy)) else: # Parquet query = ('create table %s(%s) with (appendonly=true, orientation=%s, compresstype=%s, compresslevel=%s, pagesize=%s, rowgroupsize=%s, bucketnum=%s) %s;' -% (tablename, schema, fmt, file_locations['CompressionType'], file_locations['CompressionLevel'], file_locations['PageSize'], file_locations['RowGroupSize'], bucket_number, distrbution_policy)) -conn = dbconn.connect(dburl, False) -rows = dbconn.execSQL(conn, query) -conn.commit() -conn.close() + % (tablename, schema, fmt, file_locations['CompressionType'], file_locations['CompressionLevel'], file_locations['PageSize'], file_locations['RowGroupSize'], bucket_number, distrbution_policy)) +self.conn.query(query) return True -except DatabaseError, ex: -print DatabaseError, ex -logger.error('Failed to execute query "%s"' % query) -sys.exit(1) +def check_hash_type(self, tablename): +qry = """select attrnums from gp_distribution_policy, pg_class where pg_class.relname = '%s' and pg_class.oid = gp_distribution_policy.localoid;""" % tablename +rows = self.exec_query(qry) +if len(rows) == 0: +logger.error('Table %s not found in table gp_distribution_policy.' % tablename) +sys.exit(1) +if rows[0]['attrnums']: +logger.error('Cannot register file(s) to a table which is hash distribuetd.') +sys.exit(1) -def get_seg_name(dburl, tablename, database, fmt): -try: -relname = '' +# pg_paqseg_# +def get_seg_name(self, tablename, database, fmt): tablename = tablename.split('.')[-1] query = ("select pg_class2.relname from pg_class as pg_class1, pg_appendonly, pg_class as pg_class2 " "where pg_class1.relname ='%s' and pg_class1.oid = pg_appendonly.relid and pg_appendonly.segrelid = pg_class2.oid;") %
[GitHub] incubator-hawq pull request #904: HAWQ-1060. Refactor hawq register with bet...
Github user ictmalili commented on a diff in the pull request: https://github.com/apache/incubator-hawq/pull/904#discussion_r79295956 --- Diff: tools/bin/hawqregister --- @@ -126,182 +127,319 @@ def option_parser_yml(yml_file): return 'AO', files, sizes, params['AO_Schema'], params['Distribution_Policy'], params['AO_FileLocations'], params['Bucketnum'] -def create_table(dburl, tablename, schema_info, fmt, distrbution_policy, file_locations, bucket_number): -try: -query = "select count(*) from pg_class where relname = '%s';" % tablename.split('.')[-1].lower() -conn = dbconn.connect(dburl, False) -rows = dbconn.execSQL(conn, query) -conn.commit() -conn.close() -for row in rows: -if row[0] != 0: -return False -except DatabaseError, ex: -logger.error('Failed to execute query "%s"' % query) -sys.exit(1) - -try: +class GpRegisterAccessor(object): +def __init__(self, conn): +self.conn = conn +rows = self.exec_query(""" +SELECT oid, datname, dat2tablespace, + pg_encoding_to_char(encoding) encoding +FROM pg_database WHERE datname=current_database()""") +self.dbid = rows[0]['oid'] +self.dbname = rows[0]['datname'] +self.spcid = rows[0]['dat2tablespace'] +self.dbencoding = rows[0]['encoding'] +self.dbversion = self.exec_query('select version()')[0]['version'] + +def exec_query(self, sql): +'''execute query and return dict result''' +return self.conn.query(sql).dictresult() + +def get_table_existed(self, tablename): +qry = """select count(*) from pg_class where relname = '%s';""" % tablename.split('.')[-1].lower() +return self.exec_query(qry)[0]['count'] == 1 + +def do_create_table(self, tablename, schema_info, fmt, distrbution_policy, file_locations, bucket_number): +if self.get_table_existed(tablename): +return False schema = ','.join([k['name'] + ' ' + k['type'] for k in schema_info]) fmt = 'ROW' if fmt == 'AO' else fmt if fmt == 'ROW': query = ('create table %s(%s) with (appendonly=true, orientation=%s, compresstype=%s, compresslevel=%s, checksum=%s, bucketnum=%s) %s;' -% (tablename, schema, fmt, file_locations['CompressionType'], file_locations['CompressionLevel'], file_locations['Checksum'], bucket_number, distrbution_policy)) + % (tablename, schema, fmt, file_locations['CompressionType'], file_locations['CompressionLevel'], file_locations['Checksum'], bucket_number, distrbution_policy)) else: # Parquet query = ('create table %s(%s) with (appendonly=true, orientation=%s, compresstype=%s, compresslevel=%s, pagesize=%s, rowgroupsize=%s, bucketnum=%s) %s;' -% (tablename, schema, fmt, file_locations['CompressionType'], file_locations['CompressionLevel'], file_locations['PageSize'], file_locations['RowGroupSize'], bucket_number, distrbution_policy)) -conn = dbconn.connect(dburl, False) -rows = dbconn.execSQL(conn, query) -conn.commit() -conn.close() + % (tablename, schema, fmt, file_locations['CompressionType'], file_locations['CompressionLevel'], file_locations['PageSize'], file_locations['RowGroupSize'], bucket_number, distrbution_policy)) +self.conn.query(query) return True -except DatabaseError, ex: -print DatabaseError, ex -logger.error('Failed to execute query "%s"' % query) -sys.exit(1) +def check_hash_type(self, tablename): +qry = """select attrnums from gp_distribution_policy, pg_class where pg_class.relname = '%s' and pg_class.oid = gp_distribution_policy.localoid;""" % tablename +rows = self.exec_query(qry) +if len(rows) == 0: +logger.error('Table %s not found in table gp_distribution_policy.' % tablename) +sys.exit(1) +if rows[0]['attrnums']: +logger.error('Cannot register file(s) to a table which is hash distribuetd.') +sys.exit(1) -def get_seg_name(dburl, tablename, database, fmt): -try: -relname = '' +# pg_paqseg_# +def get_seg_name(self, tablename, database, fmt): tablename = tablename.split('.')[-1] query = ("select pg_class2.relname from pg_class as pg_class1, pg_appendonly, pg_class as pg_class2 " "where pg_class1.relname ='%s' and pg_class1.oid = pg_appendonly.relid and pg_appendonly.segrelid = pg_class2.oid;") %
[GitHub] incubator-hawq pull request #837: HAWQ-779 support pxf filter pushdwon at th...
Github user jiadexin commented on a diff in the pull request: https://github.com/apache/incubator-hawq/pull/837#discussion_r79295676 --- Diff: src/backend/optimizer/plan/createplan.c --- @@ -1144,9 +1144,15 @@ static char** create_pxf_plan(char **segdb_file_map, RelOptInfo *rel, int total_ Relation relation = RelationIdGetRelation(planner_rt_fetch(scan_relid, ctx->root)->relid); - segdb_work_map = map_hddata_2gp_segments(uri_str, + if (pxf_enable_filter_pushdown){ --- End diff -- you are right. my C language unskilled, so... --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] incubator-hawq pull request #904: HAWQ-1060. Refactor hawq register with bet...
Github user xunzhang commented on a diff in the pull request: https://github.com/apache/incubator-hawq/pull/904#discussion_r79295679 --- Diff: tools/bin/hawqregister --- @@ -126,182 +127,319 @@ def option_parser_yml(yml_file): return 'AO', files, sizes, params['AO_Schema'], params['Distribution_Policy'], params['AO_FileLocations'], params['Bucketnum'] -def create_table(dburl, tablename, schema_info, fmt, distrbution_policy, file_locations, bucket_number): -try: -query = "select count(*) from pg_class where relname = '%s';" % tablename.split('.')[-1].lower() -conn = dbconn.connect(dburl, False) -rows = dbconn.execSQL(conn, query) -conn.commit() -conn.close() -for row in rows: -if row[0] != 0: -return False -except DatabaseError, ex: -logger.error('Failed to execute query "%s"' % query) -sys.exit(1) - -try: +class GpRegisterAccessor(object): +def __init__(self, conn): +self.conn = conn +rows = self.exec_query(""" +SELECT oid, datname, dat2tablespace, + pg_encoding_to_char(encoding) encoding +FROM pg_database WHERE datname=current_database()""") +self.dbid = rows[0]['oid'] +self.dbname = rows[0]['datname'] +self.spcid = rows[0]['dat2tablespace'] +self.dbencoding = rows[0]['encoding'] +self.dbversion = self.exec_query('select version()')[0]['version'] + +def exec_query(self, sql): +'''execute query and return dict result''' +return self.conn.query(sql).dictresult() + +def get_table_existed(self, tablename): +qry = """select count(*) from pg_class where relname = '%s';""" % tablename.split('.')[-1].lower() +return self.exec_query(qry)[0]['count'] == 1 + +def do_create_table(self, tablename, schema_info, fmt, distrbution_policy, file_locations, bucket_number): +if self.get_table_existed(tablename): +return False schema = ','.join([k['name'] + ' ' + k['type'] for k in schema_info]) fmt = 'ROW' if fmt == 'AO' else fmt if fmt == 'ROW': query = ('create table %s(%s) with (appendonly=true, orientation=%s, compresstype=%s, compresslevel=%s, checksum=%s, bucketnum=%s) %s;' -% (tablename, schema, fmt, file_locations['CompressionType'], file_locations['CompressionLevel'], file_locations['Checksum'], bucket_number, distrbution_policy)) + % (tablename, schema, fmt, file_locations['CompressionType'], file_locations['CompressionLevel'], file_locations['Checksum'], bucket_number, distrbution_policy)) else: # Parquet query = ('create table %s(%s) with (appendonly=true, orientation=%s, compresstype=%s, compresslevel=%s, pagesize=%s, rowgroupsize=%s, bucketnum=%s) %s;' -% (tablename, schema, fmt, file_locations['CompressionType'], file_locations['CompressionLevel'], file_locations['PageSize'], file_locations['RowGroupSize'], bucket_number, distrbution_policy)) -conn = dbconn.connect(dburl, False) -rows = dbconn.execSQL(conn, query) -conn.commit() -conn.close() + % (tablename, schema, fmt, file_locations['CompressionType'], file_locations['CompressionLevel'], file_locations['PageSize'], file_locations['RowGroupSize'], bucket_number, distrbution_policy)) +self.conn.query(query) return True -except DatabaseError, ex: -print DatabaseError, ex -logger.error('Failed to execute query "%s"' % query) -sys.exit(1) +def check_hash_type(self, tablename): +qry = """select attrnums from gp_distribution_policy, pg_class where pg_class.relname = '%s' and pg_class.oid = gp_distribution_policy.localoid;""" % tablename +rows = self.exec_query(qry) +if len(rows) == 0: +logger.error('Table %s not found in table gp_distribution_policy.' % tablename) --- End diff -- sure --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] incubator-hawq pull request #837: HAWQ-779 support pxf filter pushdwon at th...
Github user jiadexin commented on a diff in the pull request: https://github.com/apache/incubator-hawq/pull/837#discussion_r79295630 --- Diff: src/backend/optimizer/plan/createplan.c --- @@ -1144,9 +1144,15 @@ static char** create_pxf_plan(char **segdb_file_map, RelOptInfo *rel, int total_ Relation relation = RelationIdGetRelation(planner_rt_fetch(scan_relid, ctx->root)->relid); - segdb_work_map = map_hddata_2gp_segments(uri_str, + if (pxf_enable_filter_pushdown){ --- End diff -- you are right. My C language unskilled, so... --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] incubator-hawq pull request #904: HAWQ-1060. Refactor hawq register with bet...
Github user ictmalili commented on a diff in the pull request: https://github.com/apache/incubator-hawq/pull/904#discussion_r79295485 --- Diff: tools/bin/hawqregister --- @@ -126,182 +127,319 @@ def option_parser_yml(yml_file): return 'AO', files, sizes, params['AO_Schema'], params['Distribution_Policy'], params['AO_FileLocations'], params['Bucketnum'] -def create_table(dburl, tablename, schema_info, fmt, distrbution_policy, file_locations, bucket_number): -try: -query = "select count(*) from pg_class where relname = '%s';" % tablename.split('.')[-1].lower() -conn = dbconn.connect(dburl, False) -rows = dbconn.execSQL(conn, query) -conn.commit() -conn.close() -for row in rows: -if row[0] != 0: -return False -except DatabaseError, ex: -logger.error('Failed to execute query "%s"' % query) -sys.exit(1) - -try: +class GpRegisterAccessor(object): +def __init__(self, conn): +self.conn = conn +rows = self.exec_query(""" +SELECT oid, datname, dat2tablespace, + pg_encoding_to_char(encoding) encoding +FROM pg_database WHERE datname=current_database()""") +self.dbid = rows[0]['oid'] +self.dbname = rows[0]['datname'] +self.spcid = rows[0]['dat2tablespace'] +self.dbencoding = rows[0]['encoding'] +self.dbversion = self.exec_query('select version()')[0]['version'] + +def exec_query(self, sql): +'''execute query and return dict result''' +return self.conn.query(sql).dictresult() + +def get_table_existed(self, tablename): +qry = """select count(*) from pg_class where relname = '%s';""" % tablename.split('.')[-1].lower() +return self.exec_query(qry)[0]['count'] == 1 + +def do_create_table(self, tablename, schema_info, fmt, distrbution_policy, file_locations, bucket_number): +if self.get_table_existed(tablename): +return False schema = ','.join([k['name'] + ' ' + k['type'] for k in schema_info]) fmt = 'ROW' if fmt == 'AO' else fmt if fmt == 'ROW': query = ('create table %s(%s) with (appendonly=true, orientation=%s, compresstype=%s, compresslevel=%s, checksum=%s, bucketnum=%s) %s;' -% (tablename, schema, fmt, file_locations['CompressionType'], file_locations['CompressionLevel'], file_locations['Checksum'], bucket_number, distrbution_policy)) + % (tablename, schema, fmt, file_locations['CompressionType'], file_locations['CompressionLevel'], file_locations['Checksum'], bucket_number, distrbution_policy)) else: # Parquet query = ('create table %s(%s) with (appendonly=true, orientation=%s, compresstype=%s, compresslevel=%s, pagesize=%s, rowgroupsize=%s, bucketnum=%s) %s;' -% (tablename, schema, fmt, file_locations['CompressionType'], file_locations['CompressionLevel'], file_locations['PageSize'], file_locations['RowGroupSize'], bucket_number, distrbution_policy)) -conn = dbconn.connect(dburl, False) -rows = dbconn.execSQL(conn, query) -conn.commit() -conn.close() + % (tablename, schema, fmt, file_locations['CompressionType'], file_locations['CompressionLevel'], file_locations['PageSize'], file_locations['RowGroupSize'], bucket_number, distrbution_policy)) +self.conn.query(query) return True -except DatabaseError, ex: -print DatabaseError, ex -logger.error('Failed to execute query "%s"' % query) -sys.exit(1) +def check_hash_type(self, tablename): +qry = """select attrnums from gp_distribution_policy, pg_class where pg_class.relname = '%s' and pg_class.oid = gp_distribution_policy.localoid;""" % tablename +rows = self.exec_query(qry) +if len(rows) == 0: +logger.error('Table %s not found in table gp_distribution_policy.' % tablename) +sys.exit(1) +if rows[0]['attrnums']: +logger.error('Cannot register file(s) to a table which is hash distribuetd.') +sys.exit(1) -def get_seg_name(dburl, tablename, database, fmt): -try: -relname = '' +# pg_paqseg_# +def get_seg_name(self, tablename, database, fmt): tablename = tablename.split('.')[-1] query = ("select pg_class2.relname from pg_class as pg_class1, pg_appendonly, pg_class as pg_class2 " "where pg_class1.relname ='%s' and pg_class1.oid = pg_appendonly.relid and pg_appendonly.segrelid = pg_class2.oid;") %
[GitHub] incubator-hawq pull request #904: HAWQ-1060. Refactor hawq register with bet...
Github user ictmalili commented on a diff in the pull request: https://github.com/apache/incubator-hawq/pull/904#discussion_r79295436 --- Diff: tools/bin/hawqregister --- @@ -126,182 +127,319 @@ def option_parser_yml(yml_file): return 'AO', files, sizes, params['AO_Schema'], params['Distribution_Policy'], params['AO_FileLocations'], params['Bucketnum'] -def create_table(dburl, tablename, schema_info, fmt, distrbution_policy, file_locations, bucket_number): -try: -query = "select count(*) from pg_class where relname = '%s';" % tablename.split('.')[-1].lower() -conn = dbconn.connect(dburl, False) -rows = dbconn.execSQL(conn, query) -conn.commit() -conn.close() -for row in rows: -if row[0] != 0: -return False -except DatabaseError, ex: -logger.error('Failed to execute query "%s"' % query) -sys.exit(1) - -try: +class GpRegisterAccessor(object): +def __init__(self, conn): +self.conn = conn +rows = self.exec_query(""" +SELECT oid, datname, dat2tablespace, + pg_encoding_to_char(encoding) encoding +FROM pg_database WHERE datname=current_database()""") +self.dbid = rows[0]['oid'] +self.dbname = rows[0]['datname'] +self.spcid = rows[0]['dat2tablespace'] +self.dbencoding = rows[0]['encoding'] +self.dbversion = self.exec_query('select version()')[0]['version'] + +def exec_query(self, sql): +'''execute query and return dict result''' +return self.conn.query(sql).dictresult() + +def get_table_existed(self, tablename): +qry = """select count(*) from pg_class where relname = '%s';""" % tablename.split('.')[-1].lower() +return self.exec_query(qry)[0]['count'] == 1 + +def do_create_table(self, tablename, schema_info, fmt, distrbution_policy, file_locations, bucket_number): +if self.get_table_existed(tablename): +return False schema = ','.join([k['name'] + ' ' + k['type'] for k in schema_info]) fmt = 'ROW' if fmt == 'AO' else fmt if fmt == 'ROW': query = ('create table %s(%s) with (appendonly=true, orientation=%s, compresstype=%s, compresslevel=%s, checksum=%s, bucketnum=%s) %s;' -% (tablename, schema, fmt, file_locations['CompressionType'], file_locations['CompressionLevel'], file_locations['Checksum'], bucket_number, distrbution_policy)) + % (tablename, schema, fmt, file_locations['CompressionType'], file_locations['CompressionLevel'], file_locations['Checksum'], bucket_number, distrbution_policy)) else: # Parquet query = ('create table %s(%s) with (appendonly=true, orientation=%s, compresstype=%s, compresslevel=%s, pagesize=%s, rowgroupsize=%s, bucketnum=%s) %s;' -% (tablename, schema, fmt, file_locations['CompressionType'], file_locations['CompressionLevel'], file_locations['PageSize'], file_locations['RowGroupSize'], bucket_number, distrbution_policy)) -conn = dbconn.connect(dburl, False) -rows = dbconn.execSQL(conn, query) -conn.commit() -conn.close() + % (tablename, schema, fmt, file_locations['CompressionType'], file_locations['CompressionLevel'], file_locations['PageSize'], file_locations['RowGroupSize'], bucket_number, distrbution_policy)) +self.conn.query(query) return True -except DatabaseError, ex: -print DatabaseError, ex -logger.error('Failed to execute query "%s"' % query) -sys.exit(1) +def check_hash_type(self, tablename): +qry = """select attrnums from gp_distribution_policy, pg_class where pg_class.relname = '%s' and pg_class.oid = gp_distribution_policy.localoid;""" % tablename +rows = self.exec_query(qry) +if len(rows) == 0: +logger.error('Table %s not found in table gp_distribution_policy.' % tablename) --- End diff -- Could we change the description more user friendly? For example, "Table %s is not an append-only table. There's no record in gp_distribution_policy" --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---