Author: tille Date: 2012-02-26 13:05:23 +0000 (Sun, 26 Feb 2012) New Revision: 2163
Modified: udd/config-org.yaml udd/sql/setup.sql udd/sql/upgrade.sql udd/udd/ddtp_gatherer.py udd/udd/packages_gatherer.py Log: Finally also distribution needs to be gathered inside descriptions table because the injection of squeeze and squeeze-backports would just let the last one win otherwise Modified: udd/config-org.yaml =================================================================== --- udd/config-org.yaml 2012-02-24 14:48:55 UTC (rev 2162) +++ udd/config-org.yaml 2012-02-26 13:05:23 UTC (rev 2163) @@ -871,7 +871,8 @@ path: /org/udd.debian.org/mirrors/ddtp files: Translation-.*\.gz mirrorpath: /org/mirrors/ftp.debian.org/ftp/dists - table: descriptions + descriptions-table: descriptions + imports-table: description_imports schema: ddtp ftpnew: Modified: udd/sql/setup.sql =================================================================== --- udd/sql/setup.sql 2012-02-24 14:48:55 UTC (rev 2162) +++ udd/sql/setup.sql 2012-02-26 13:05:23 UTC (rev 2163) @@ -602,38 +602,41 @@ GRANT SELECT ON all_bugs TO PUBLIC; CREATE TABLE descriptions ( - package text not null, - release text not null, - component text not null, - language text not null, - description text not null, - long_description text not null, - description_md5 text not null, -- md5 sum of the original English description - PRIMARY KEY (package, release, component, language, description, description_md5) + package text not null, + distribution text not null, + release text not null, + component text not null, + language text not null, + description text not null, + long_description text not null, + description_md5 text not null, -- md5 sum of the original English description + PRIMARY KEY (package, distribution, release, component, language, description, description_md5) ); GRANT SELECT ON descriptions TO PUBLIC; CREATE TABLE ubuntu_descriptions ( - package text not null, - release text not null, - component text not null, - language text not null, - description text not null, - long_description text not null, - description_md5 text not null, -- md5 sum of the original English description - PRIMARY KEY (package, release, component, language, description, description_md5) + package text not null, + distribution text not null, + release text not null, + component text not null, + language text not null, + description text not null, + long_description text not null, + description_md5 text not null, -- md5 sum of the original English description + PRIMARY KEY (package, distribution, release, component, language, description, description_md5) ); GRANT SELECT ON ubuntu_descriptions TO PUBLIC; CREATE TABLE derivatives_descriptions ( - package text not null, - release text not null, - component text not null, - language text not null, - description text not null, - long_description text not null, - description_md5 text not null, -- md5 sum of the original English description - PRIMARY KEY (package, release, component, language, description, description_md5) + package text not null, + distribution text not null, + release text not null, + component text not null, + language text not null, + description text not null, + long_description text not null, + description_md5 text not null, -- md5 sum of the original English description + PRIMARY KEY (package, distribution, release, component, language, description, description_md5) ); GRANT SELECT ON derivatives_descriptions TO PUBLIC; @@ -641,13 +644,14 @@ -- was imported previousely and thus reducing workload on UDD host in -- preventing doing duplicate work CREATE TABLE description_imports ( - release text, - component text, - language text, - translationfile text, - translationfile_sha1 text, - import_date timestamp default now(), - PRIMARY KEY (release, component, language) + distribution text not null, + release text not null, + component text not null, + language text not null, + translationfile text not null, + translationfile_sha1 text not null, + import_date timestamp default now(), + PRIMARY KEY (distribution, release, component, language) ); -- active_dds view Modified: udd/sql/upgrade.sql =================================================================== --- udd/sql/upgrade.sql 2012-02-24 14:48:55 UTC (rev 2162) +++ udd/sql/upgrade.sql 2012-02-26 13:05:23 UTC (rev 2163) @@ -367,59 +367,70 @@ -- Finally assemble necessary changes for description-less packages file -- - ddtp table renamed to descriptions -- - add ubuntu_descriptions & derivative_descriptions --- - fix primary key constraint (It is necessary to add component because --- there is at least one case (clustalw) which has the same --- (package,release, language, description, description_md5) --- key because it was freed in squeeze-backports and thus --- component needs to be regarded -DROP TABLE ddtp; +-- - fix primary key constraint (It is necessary to add +-- - component because there is at least one case (clustalw) which +-- has the same +-- (package, release, language, description, description_md5) +-- key because it was freed in squeeze-backports and thus +-- component needs to be regarded +-- - distribution because the subsequent import of debian-squeeze +-- and debian-backports-squeeze deletes descriptions of the previous +-- one so the last imported distribution would win +DROP TABLE IF EXISTS ddtp; +DROP TABLE IF EXISTS descriptions; CREATE TABLE descriptions ( - package text not null, - release text not null, - component text not null, - language text not null, - description text not null, - long_description text not null, - description_md5 text not null, -- md5 sum of the original English description - PRIMARY KEY (package, release, component, language, description, description_md5) + package text not null, + distribution text not null, + release text not null, + component text not null, + language text not null, + description text not null, + long_description text not null, + description_md5 text not null, -- md5 sum of the original English description + PRIMARY KEY (package, distribution, release, component, language, description, description_md5) ); GRANT SELECT ON descriptions TO PUBLIC; -DROP TABLE ubuntu_descriptions; + +DROP TABLE IF EXISTS ubuntu_descriptions; CREATE TABLE ubuntu_descriptions ( - package text not null, - release text not null, - component text not null, - language text not null, - description text not null, - long_description text not null, - description_md5 text not null, -- md5 sum of the original English description - PRIMARY KEY (package, release, component, language, description, description_md5) + package text not null, + distribution text not null, + release text not null, + component text not null, + language text not null, + description text not null, + long_description text not null, + description_md5 text not null, -- md5 sum of the original English description + PRIMARY KEY (package, distribution, release, component, language, description, description_md5) ); GRANT SELECT ON ubuntu_descriptions TO PUBLIC; -DROP TABLE derivatives_descriptions; +DROP TABLE IF EXISTS derivatives_descriptions; CREATE TABLE derivatives_descriptions ( - package text not null, - release text not null, - component text not null, - language text not null, - description text not null, - long_description text not null, - description_md5 text not null, -- md5 sum of the original English description - PRIMARY KEY (package, release, component, language, description, description_md5) + package text not null, + distribution text not null, + release text not null, + component text not null, + language text not null, + description text not null, + long_description text not null, + description_md5 text not null, -- md5 sum of the original English description + PRIMARY KEY (package, distribution, release, component, language, description, description_md5) ); GRANT SELECT ON derivatives_descriptions TO PUBLIC; -- Add house keeping table to enable deciding whether some translation file -- was imported previousely and thus reducing workload on UDD host in -- preventing doing duplicate work +DROP TABLE IF EXISTS description_imports; CREATE TABLE description_imports ( - release text, - component text, - language text, - translationfile text, - translationfile_sha1 text, - import_date timestamp default now(), - PRIMARY KEY (release, component, language) + distribution text not null, + release text not null, + component text not null, + language text not null, + translationfile text not null, + translationfile_sha1 text not null, + import_date timestamp default now(), + PRIMARY KEY (distribution, release, component, language) ); Modified: udd/udd/ddtp_gatherer.py =================================================================== --- udd/udd/ddtp_gatherer.py 2012-02-24 14:48:55 UTC (rev 2162) +++ udd/udd/ddtp_gatherer.py 2012-02-26 13:05:23 UTC (rev 2163) @@ -56,8 +56,9 @@ def __init__(self, connection, config, source): gatherer.__init__(self, connection, config, source) - self.assert_my_config('path', 'files', 'mirrorpath', 'table') - my_config = self.my_config + # The ID for the distribution we want to include + self._distr = None + self.assert_my_config('path', 'files', 'mirrorpath', 'descriptions-table', 'imports-table') self.log = logging.getLogger(self.__class__.__name__) if debug==1: self.log.setLevel(logging.DEBUG) @@ -69,55 +70,53 @@ self.log.addHandler(handler) cur = self.cursor() - query = "PREPARE ddtp_delete (text, text) AS DELETE FROM %s WHERE release = $1 AND component = $2 AND language = $3" % my_config['table'] - cur.execute(query) - query = """PREPARE ddtp_insert AS INSERT INTO %s - (package, release, component, language, description, long_description, description_md5) - VALUES ($1, $2, $3, $4, $5, $6, $7)""" % (my_config['table']) - cur.execute(query) + pkg = None - query = """PREPARE ddtp_check_before_insert (text, text, text, text, text, text) AS - SELECT COUNT(*) FROM %s - WHERE package = $1 AND release = $2 AND component = $3 AND language = $4 AND - description = $5 AND description_md5 = $6""" % (my_config['table']) - cur.execute(query) + def run(self): + src_cfg = self.my_config + + table = src_cfg['descriptions-table'] + imports = src_cfg['imports-table'] + + # Set distribution ID + # FIXME: This simple and straightforeward setting does only work + # as long as backports and security does not contain i18n files + # The right way to go would be to read the config file again fo + # each release below + self._distr = 'debian' + cur = self.cursor() query = """PREPARE ddtp_check_previous_import (text, text, text) AS - SELECT translationfile_sha1, import_date FROM description_imports - WHERE release = $1 AND component = $2 AND language = $3""" + SELECT translationfile_sha1, import_date FROM %s + WHERE distribution = '%s' AND release = $1 AND component = $2 AND language = $3""" \ + % (imports, self._distr) cur.execute(query) - query = """PREPARE ddtp_update_current_import (text, text, text) AS - UPDATE description_imports SET import_date = now() WHERE release = $1 AND component = $2 AND language = $3""" + UPDATE %s SET import_date = now() WHERE distribution = '%s' AND release = $1 AND component = $2 AND language = $3""" \ + % (imports, self._distr) cur.execute(query) - query = """PREPARE ddtp_insert_current_import (text, text, text, text, text) AS - INSERT INTO description_imports (release, component, language, translationfile, translationfile_sha1) - VALUES ($1, $2, $3, $4, $5)""" + INSERT INTO %s (distribution, release, component, language, translationfile, translationfile_sha1) + VALUES ('%s', $1, $2, $3, $4, $5)""" \ + % (imports, self._distr) cur.execute(query) - pkg = None - - def run(self): - my_config = self.my_config - cur = self.cursor() - cur.execute('SELECT component FROM packages GROUP by component') rows = cur.fetchall() valid_components = [] for r in rows: valid_components.append(r[0]) - releases = listdir(my_config['path']) + releases = listdir(src_cfg['path']) for rel in releases: - cpath = my_config['path']+'/'+rel+'/' + cpath = src_cfg['path']+'/'+rel+'/' components = listdir(cpath) for comp in components: if comp not in valid_components: self.log.error("Invallid component '%s' file found in %s", comp, cpath) continue cfp = open(cpath+'/'+comp,'r') - trfilepath = my_config['mirrorpath']+'/'+rel+'/'+comp+'/i18n/' + trfilepath = src_cfg['mirrorpath']+'/'+rel+'/'+comp+'/i18n/' for line in cfp.readlines(): (sha1, size, file) = line.strip().split(' ') trfile = trfilepath + file @@ -149,6 +148,24 @@ if prev_import[0] == sha1: self.log.info("File %s was imported at %s and has not changed since then" % (trfile, prev_import[1])) continue + + # Once it is sure that the file needs to be imported prepare the relevant statements + query = """PREPARE ddtp_delete (text) AS DELETE FROM %s + WHERE distribution = '%s' AND release = '%s' AND component = '%s' AND language = $1""" \ + % ( table, self._distr, rel, comp ) + cur.execute(query) + query = """PREPARE ddtp_check_before_insert (text, text, text, text) AS + SELECT COUNT(*) FROM %s + WHERE package = $1 AND distribution = '%s' AND release = '%s' AND component = '%s' AND + language = $2 AND description = $3 AND description_md5 = $4""" \ + % (table, self._distr, rel, comp) + cur.execute(query) + query = """PREPARE ddtp_insert (text, text, text, text, text) AS INSERT INTO %s + (package, distribution, release, component, language, description, long_description, description_md5) + VALUES ($1, '%s', '%s', '%s', $2, $3, $4, $5)""" \ + % (table, self._distr, rel, comp) + cur.execute(query) + self.import_translations(trfile, rel, comp, lang) if has_previous_import == True: cur.execute("EXECUTE ddtp_update_current_import (%s, %s, %s)", (rel, comp, lang)) @@ -156,10 +173,12 @@ cur.execute("EXECUTE ddtp_insert_current_import (%s, %s, %s, %s, %s)", (rel, comp, lang, trfile, sha1)) # commit every successfully language to make sure we get any languages in and will not be blocked by a single failing import self.connection.commit() + cur.execute("DEALLOCATE ddtp_delete") + cur.execute("DEALLOCATE ddtp_check_before_insert") + cur.execute("DEALLOCATE ddtp_insert") cfp.close() - cur.execute("DEALLOCATE ddtp_insert") - cur.execute("ANALYZE %s" % my_config['table']) + cur.execute("ANALYZE %s" % table) def import_translations(self, trfile, rel, comp, lang): @@ -168,8 +187,7 @@ cur = self.cursor() # Delete only records where we actually have Translation files. This # prevents dump deletion of all data in case of broken downloads - cur.execute('EXECUTE ddtp_delete (%s, %s, %s)', (rel, comp, lang)) - self.log.debug('EXECUTE ddtp_delete (%s, %s, %s)', (rel, comp, lang)) + self.log.debug('EXECUTE ddtp_delete (%s)', (lang)) i18n_error_flag=0 descstring = 'Description-'+lang @@ -196,16 +214,16 @@ for line in lines[1:]: self.pkg.long_description += line + "\n" - paramtuple = (self.pkg.package, self.pkg.release, self.pkg.component, self.pkg.language, self.pkg.description, self.pkg.description_md5) - cur.execute('EXECUTE ddtp_check_before_insert (%s, %s, %s, %s, %s, %s)', paramtuple) + paramtuple = (self.pkg.package, self.pkg.language, self.pkg.description, self.pkg.description_md5) + cur.execute('EXECUTE ddtp_check_before_insert (%s, %s, %s, %s)', paramtuple) if cur.fetchone()[0] > 0: - self.log.error("Duplicated key for package %s in release %s component %s in language %s: %s / %s" % paramtuple) + self.log.error("Duplicated key for package %s in release %s component %s in language %s: %s / %s" % \ + (self.pkg.package, rel, comp, self.pkg.language, self.pkg.description, self.pkg.description_md5)) else: - query = 'EXECUTE ddtp_insert (%s, %s, %s, %s, %s, %s, %s)' + query = 'EXECUTE ddtp_insert (%s, %s, %s, %s, %s)' try: self.log.debug(query, tuple([quote(item) for item in paramtuple])) - cur.execute(query, (self.pkg.package, self.pkg.release, self.pkg.component, self.pkg.language, self.pkg.description, self.pkg.long_description, self.pkg.description_md5)) - # self.connection.commit() # commit every single insert as long as translation files are featuring duplicated keys + cur.execute(query, (self.pkg.package, self.pkg.language, self.pkg.description, self.pkg.long_description, self.pkg.description_md5)) except IntegrityError, err: self.log.exception("Duplicated key in language %s: (%s)", self.pkg.language, ", ".join([to_unicode(item) for item in (self.pkg.package, self.pkg.release, self.pkg.component, self.pkg.description, self.pkg.description_md5)])) Modified: udd/udd/packages_gatherer.py =================================================================== --- udd/udd/packages_gatherer.py 2012-02-24 14:48:55 UTC (rev 2162) +++ udd/udd/packages_gatherer.py 2012-02-26 13:05:23 UTC (rev 2163) @@ -195,8 +195,8 @@ # For releases that have long descriptions in Packages and not in Translation, # add the description to the configured descriptions table. if self.add_descriptions: - cur.execute("DELETE FROM %s WHERE release = '%s' AND component = '%s' AND language = '%s'" %\ - (src_cfg['descriptions-table'], src_cfg['release'], comp, 'en')) + cur.execute("DELETE FROM %s WHERE distribution = '%s' AND release = '%s' AND component = '%s' AND language = '%s'" %\ + (src_cfg['descriptions-table'], self._distr, src_cfg['release'], comp, 'en')) for arch in src_cfg['archs']: path = os.path.join(src_cfg['directory'], comp, 'binary-' + arch, 'Packages.gz') try: @@ -216,8 +216,8 @@ if self.add_descriptions: cur.execute("""PREPARE description_insert AS INSERT INTO %s - (package, release, component, language, description, long_description, description_md5) - (SELECT $1 AS package, '%s' AS release, '%s' AS component, $2 AS language, + (package, distribution, release, component, language, description, long_description, description_md5) + (SELECT $1 AS package, '%s' AS distribution, '%s' AS release, '%s' AS component, $2 AS language, $3 AS description, $4 AS long_description, $5 AS description_md5 WHERE NOT EXISTS (SELECT 1 @@ -225,7 +225,7 @@ WHERE package=$1 AND release='%s' AND component='%s' AND language=$2 AND description=$3 AND long_description=$4 AND description_md5=$5)) - """ % (src_cfg['descriptions-table'], src_cfg['release'], comp, + """ % (src_cfg['descriptions-table'], self._distr, src_cfg['release'], comp, src_cfg['descriptions-table'], src_cfg['release'], comp)) # aux.print_debug("Reading file " + path) # Copy content from gzipped file to temporary file, so that apt_pkg is _______________________________________________ Collab-qa-commits mailing list Collab-qa-commits@lists.alioth.debian.org http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/collab-qa-commits