Hi, Can you review the patch above. I haven't replaced '#' with '\#' yet (my previous mail). I have got commit permissions for collab-qa. If everything looks fine then I can commit these changes to the repo.
On Thu, Apr 23, 2015 at 11:17 AM, Akshita Jha <[email protected]> wrote: > --- > config-ullmann.yaml | 4 + > scripts/cron_ftpnew_blends.sh | 1 + > udd/bibref_gatherer.py | 112 +------------------- > udd/generate_bibtex.py | 230 > ++++++++++++++++++++++++++++++++++++++++++ > 4 files changed, 237 insertions(+), 110 deletions(-) > create mode 100644 udd/generate_bibtex.py > > diff --git a/config-ullmann.yaml b/config-ullmann.yaml > index 901550e..06c8a19 100644 > --- a/config-ullmann.yaml > +++ b/config-ullmann.yaml > @@ -45,6 +45,7 @@ general: > i18n-apps: module udd.i18n_apps_gatherer > hints: module udd.hints_gatherer > deferred: module udd.deferred_gatherer > + generate-bibtex: module udd.generate_bibtex > timestamp-dir: /srv/udd.debian.org/timestamps > lock-dir: /srv/udd.debian.org/locks > archs: > @@ -471,3 +472,6 @@ vcswatch: > > reproducible: > type: reproducible > + > +generate-bibtex: > + type: generate-bibtex > diff --git a/scripts/cron_ftpnew_blends.sh b/scripts/cron_ftpnew_blends.sh > index fc0d087..c38c076 100755 > --- a/scripts/cron_ftpnew_blends.sh > +++ b/scripts/cron_ftpnew_blends.sh > @@ -11,3 +11,4 @@ $UAR ftpnew > $UAR blends-prospective > # $UAR blends-metadata > $UAR blends-all > +$UAR generate-bibtex > diff --git a/udd/bibref_gatherer.py b/udd/bibref_gatherer.py > index 654d7e7..41f9618 100644 > --- a/udd/bibref_gatherer.py > +++ b/udd/bibref_gatherer.py > @@ -6,8 +6,7 @@ This script imports bibliographic references from > upstream-metadata.debian.net. > > from gatherer import gatherer > from sys import stderr, exit > -from os import listdir, unlink, rename, access, X_OK > -from os.path import isfile > +from os import listdir > from fnmatch import fnmatch > import yaml > from psycopg2 import IntegrityError, InternalError > @@ -23,43 +22,9 @@ debug=0 > def get_gatherer(connection, config, source): > return bibref_gatherer(connection, config, source) > > -def rm_f(file): > - try: > - unlink(file) > - except OSError: > - pass > - > -def cleanup_tex_logs(basetexfile): > - rm_f(basetexfile+'.aux') > - rm_f(basetexfile+'.bbl') > - rm_f(basetexfile+'.blg') > - rm_f(basetexfile+'.log') > - > # seek for authors separated by ',' rather than by ' and ' > seek_broken_authors_re = > re.compile('^[^\s^,]+\s+[^\s^,]+\s*,\s*[^\s^,]+\s+[^\s^,]') > > -def open_tex_process(texexe, basetexfile): > - if texexe == 'pdflatex': > - ptex = Popen(['pdflatex', '-interaction=batchmode', basetexfile], > shell=False, stdout=PIPE) > - elif texexe == 'bibtex': > - ptex = Popen(['bibtex', basetexfile], shell=False, stdout=PIPE) > - else: > - return(False, 'Wrong exe: '+texexe) > - errstring="" > - if ptex.wait(): > - if texexe == 'pdflatex': > - for logrow in ptex.communicate()[0].splitlines(): > - if logrow.startswith('!'): > - errstring += logrow > - return(False, errstring) > - else: > - for logrow in ptex.communicate()[0].splitlines(): > - if logrow.startswith('This is BibTeX'): > - continue > - errstring += logrow + '\n' > - return(True, errstring) > - return(True, errstring) > - > other_known_keys = ('Archive', > 'Bug-Database', > 'Cite-As', > @@ -297,10 +262,6 @@ class bibref_gatherer(gatherer): > handler.setFormatter(formatter) > self.log.addHandler(handler) > > - > - self.bibtexfile = 'debian.bib' > - self.bibtex_example_tex = 'debian.tex' > - > def run(self): > my_config = self.my_config > #start harassing the DB, preparing the final inserts and making place > @@ -364,76 +325,7 @@ class bibref_gatherer(gatherer): > # commit before check to make sure the table is not locked in case > LaTeX run will fail for whatever reason > self.connection.commit() > > - # if there is a working LaTeX installation try to build a BibTeX > database and test it by creating a debian.pdf file > - if isfile('/usr/bin/pdflatex') and access('/usr/bin/pdflatex', X_OK) > and \ > - isfile('/usr/bin/bibtex') and access('/usr/bin/bibtex', X_OK) > and \ > - ( > isfile('/usr/share/texlive/texmf-dist/fonts/source/jknappen/ec/ecrm.mf') or > \ > - > isfile('/usr/share/texmf-texlive/fonts/source/jknappen/ec/ecrm.mf') ) : > - # create BibTeX file > - bf = open(self.bibtexfile, 'w') > - cur.execute("SELECT * FROM bibtex()") > - for row in cur.fetchall(): > - print >>bf, row[0] > - bf.close() > - > - # create LaTeX file to test BibTeX functionality > - bf = open(self.bibtex_example_tex, 'w') > - print >>bf, """\documentclass[10]{article} > -\usepackage[T1]{fontenc} > -\usepackage[utf8]{inputenc} > -\usepackage[left=2mm,top=2mm,right=2mm,bottom=2mm,nohead,nofoot]{geometry} > -\usepackage{longtable} > -\usepackage[super]{natbib} > -\setlongtables > -\\begin{document} > -\small > -\\begin{longtable}{llp{70mm}l} > -\\bf package & \\bf source & \\bf description & BibTeX key \\\\ \hline""" > - > - cur.execute("SELECT * FROM bibtex_example_data() AS (package text, > source text, bibkey text, description text)") > - for row in cur.fetchall(): > - print >>bf, row[0], '&', row[1], '&', row[3] , '&', > row[2]+'\cite{'+row[2]+'} \\\\' > - > - print >>bf, """\end{longtable} > - > -% \\bibliographystyle{plain} > -% Try a bit harder by also including URL+DOI > -\\bibliographystyle{plainnat} > -\\bibliography{debian} > - > -\end{document} > -""" > - bf.close() > - > - # try to build debian.pdf file to test aboc LaTeX file > - basetexfile = self.bibtex_example_tex.replace('.tex','') > - cleanup_tex_logs(basetexfile) > - try: > - rename(basetexfile+'.pdf', basetexfile+'.pdf~') > - except OSError: > - pass > - > - (retcode,errstring) = open_tex_process('pdflatex', basetexfile) > - if not retcode: > - self.log.error("Problem in 1. PdfLaTeX run of %s.tex: `%s` --> > please inspect %s.log" % (basetexfile, errstring, basetexfile)) > - exit(1) > - (retcode,errstring) = open_tex_process('bibtex', basetexfile) > - if errstring != "": > - if not retcode: > - self.log.error("Problem in BibTeX run of %s.bib: `%s`" % > (basetexfile, errstring)) > - exit(1) > - self.log.error("Ignore the following problems in BibTeX run of > %s.bib: `%s`" % (basetexfile, errstring)) > - (retcode,errstring) = open_tex_process('pdflatex', basetexfile) > - if not retcode: > - self.log.error("Problem in 2. PdfLaTeX run of %s.tex: `%s` --> > please inspect %s.log" % (basetexfile, errstring, basetexfile)) > - exit(1) > - (retcode,errstring) = open_tex_process('pdflatex', basetexfile) > - if not retcode: > - self.log.error("Problem in 3. PdfLaTeX run of %s.tex: `%s` --> > please inspect %s.log" % (basetexfile, errstring, basetexfile)) > - exit(1) > - > - cleanup_tex_logs(basetexfile) > - > + > if __name__ == '__main__': > main() > > diff --git a/udd/generate_bibtex.py b/udd/generate_bibtex.py > new file mode 100644 > index 0000000..6ddc03a > --- /dev/null > +++ b/udd/generate_bibtex.py > @@ -0,0 +1,230 @@ > +from gatherer import gatherer > +from os import unlink, rename, access, X_OK > +from os.path import isfile > +from subprocess import Popen, PIPE > +import logging > +import logging.handlers > + > +debug = 0 > + > +def get_gatherer(connection, config, source): > + return generate_bibtex(connection, config, source) > + > +def rm_f(file): > + try: > + unlink(file) > + except OSError: > + pass > + > + > +def cleanup_tex_logs(basetexfile): > + rm_f(basetexfile+'.aux') > + rm_f(basetexfile+'.bbl') > + rm_f(basetexfile+'.blg') > + rm_f(basetexfile+'.log') > + > + > +def open_tex_process(texexe, basetexfile): > + if texexe == 'pdflatex': > + ptex = Popen(['pdflatex', '-interaction=batchmode', basetexfile], > shell=False, stdout=PIPE) > + elif texexe == 'bibtex': > + ptex = Popen(['bibtex', basetexfile], shell=False, stdout=PIPE) > + else: > + return(False, 'Wrong exe: '+texexe) > + errstring="" > + if ptex.wait(): > + if texexe == 'pdflatex': > + for logrow in ptex.communicate()[0].splitlines(): > + if logrow.startswith('!'): > + errstring += logrow > + return(False, errstring) > + else: > + for logrow in ptex.communicate()[0].splitlines(): > + if logrow.startswith('This is BibTeX'): > + continue > + errstring += logrow + '\n' > + return(True, errstring) > + return(True, errstring) > + > + > +class generate_bibtex(gatherer): > + """ > + Generate a debian.bib and debian.tex files > + """ > + > + def __init__(self, connection, config, source): > + gatherer.__init__(self, connection, config, source) > + > + self.log = logging.getLogger(self.__class__.__name__) > + if debug==1: > + self.log.setLevel(logging.DEBUG) > + else: > + self.log.setLevel(logging.INFO) > + handler = > logging.handlers.RotatingFileHandler(filename=self.__class__.__name__+'.log',mode='w') > + formatter = logging.Formatter("%(asctime)s - %(levelname)s - > (%(lineno)d): %(message)s") > + handler.setFormatter(formatter) > + self.log.addHandler(handler) > + > + self.bibtexfile = 'debian.bib' > + self.bibtex_example_tex = 'debian.tex' > + self.all_ref = 0 # to include all references from bibref table set > it to 1 > + > + def run(self): > + cur = self.cursor() > + > + # if there is a working LaTeX installation try to build a BibTeX > database and test it by creating a debian.pdf file > + if isfile('/usr/bin/pdflatex') and access('/usr/bin/pdflatex', X_OK) > and \ > + isfile('/usr/bin/bibtex') and access('/usr/bin/bibtex', X_OK) > and \ > + ( > isfile('/usr/share/texlive/texmf-dist/fonts/source/jknappen/ec/ecrm.mf') or > \ > + > isfile('/usr/share/texmf-texlive/fonts/source/jknappen/ec/ecrm.mf') ) : > + > + # create BibTeX file > + bf = open(self.bibtexfile, 'w') > + > + if self.all_ref == 1: > + query = "SELECT * FROM bibtex()" > + else: > + query = """ SELECT DISTINCT > + CASE WHEN bibjournal.value IS NULL AND > bibin.value IS NOT NULL AND bibpublisher.value IS NOT NULL THEN '@Book{' || > bibkey.value > + ELSE CASE WHEN bibauthor.value IS NULL OR > bibjournal.value IS NULL THEN '@Misc{'|| bibkey.value || > + CASE WHEN bibauthor.value IS NULL THEN > E',\n Key = "' || bibkey.value || '"' ELSE '' END -- without author we > need a sorting key > + ELSE '@Article{' || bibkey.value END END || > + CASE WHEN bibauthor.value IS NOT NULL THEN > E',\n Author = {' || bibauthor.value || '}' ELSE '' END || > + CASE WHEN bibtitle.value IS NOT NULL THEN > E',\n Title = "{' || > + replace(replace(replace(bibtitle.value, > + '_', E'\\_'), -- > + '%', E'\\%'), -- > + E'\xe2\x80\x89', E'\\,') -- TeX > syntax for '_' and UTF-8 "thin space" > + -- see > http://www.utf8-chartable.de/unicode-utf8-table.pl?start=8192&number=128&utf8=string-literal > + || '}"' > + ELSE '' END || > + CASE WHEN bibbooktitle.value IS NOT NULL THEN > E',\n Booktitle = "{' || bibbooktitle.value || '}"' ELSE '' END || > + CASE WHEN bibyear.value IS NOT NULL THEN > E',\n Year = {' || bibyear.value || '}' ELSE '' END || > + CASE WHEN bibmonth.value IS NOT NULL THEN > E',\n Month = {' || bibmonth.value || '}' ELSE '' END || > + CASE WHEN bibjournal.value IS NOT NULL THEN > E',\n Journal = {' || replace(bibjournal.value, '&', E'\\&') || '}' ELSE > '' END || > + CASE WHEN bibaddress.value IS NOT NULL THEN > E',\n Address = {' || bibaddress.value || '}' ELSE '' END || > + CASE WHEN bibpublisher.value IS NOT NULL THEN > E',\n Publisher = {' || bibpublisher.value || '}' ELSE '' END || > + CASE WHEN bibvolume.value IS NOT NULL THEN > E',\n Volume = {' || bibvolume.value || '}' ELSE '' END || > + CASE WHEN bibnumber.value IS NOT NULL THEN > E',\n Number = {' || bibnumber.value || '}' ELSE '' END || > + CASE WHEN bibpages.value IS NOT NULL THEN > E',\n Pages = {' || regexp_replace(bibpages.value, E'(\\d)-([\\d])', > E'\\1--\\2') || '}' ELSE '' END || > + CASE WHEN biburl.value IS NOT NULL THEN > E',\n URL = {' || > + replace(replace(replace(replace(biburl.value, > + '_', E'\\_'), -- > + '%', E'\\%'), -- > + '&', E'\\&'), -- > + '~', E'\\~{}') -- > + || '}' > + ELSE '' END || > + CASE WHEN bibdoi.value IS NOT NULL THEN > E',\n DOI = {' || > + replace(replace(bibdoi.value, > + '_', E'\\_'), -- > + '&', E'\\&') -- > + || '}' > + ELSE '' END || > + CASE WHEN bibpmid.value IS NOT NULL THEN > E',\n PMID = {' || bibpmid.value || '}' ELSE '' END || > + CASE WHEN bibeprint.value IS NOT NULL THEN > E',\n EPrint = {' || > + replace(replace(replace(replace(bibeprint.value, > + '_', E'\\_'), -- > + '%', E'\\%'), -- > + '&', E'\\&'), -- > + '~', E'\\~{}') -- > + || '}' > + ELSE '' END || > + CASE WHEN bibin.value IS NOT NULL THEN > E',\n In = {' || bibin.value || '}' ELSE '' END || > + CASE WHEN bibissn.value IS NOT NULL THEN > E',\n ISSN = {' || bibissn.value || '}' ELSE '' END || > + E',\n}\n' > + AS bibentry > + -- p.source AS source, > + -- p.rank AS rank, > + FROM (SELECT DISTINCT source, package, rank FROM bibref) p > + INNER JOIN sources s ON s.source = p.source > + LEFT OUTER JOIN bibref bibkey ON p.source = > bibkey.source AND bibkey.rank = p.rank AND bibkey.package = > p.package AND bibkey.key = 'bibtex' > + LEFT OUTER JOIN bibref bibyear ON p.source = > bibyear.source AND bibyear.rank = p.rank AND bibyear.package = > p.package AND bibyear.key = 'year' > + LEFT OUTER JOIN bibref bibmonth ON p.source = > bibmonth.source AND bibmonth.rank = p.rank AND bibmonth.package = > p.package AND bibmonth.key = 'month' > + LEFT OUTER JOIN bibref bibtitle ON p.source = > bibtitle.source AND bibtitle.rank = p.rank AND bibtitle.package = > p.package AND bibtitle.key = 'title' > + LEFT OUTER JOIN bibref bibbooktitle ON p.source = > bibbooktitle.source AND bibbooktitle.rank = p.rank AND bibbooktitle.package > = p.package AND bibbooktitle.key = 'booktitle' > + LEFT OUTER JOIN bibref bibauthor ON p.source = > bibauthor.source AND bibauthor.rank = p.rank AND bibauthor.package = > p.package AND bibauthor.key = 'author' > + LEFT OUTER JOIN bibref bibjournal ON p.source = > bibjournal.source AND bibjournal.rank = p.rank AND bibjournal.package = > p.package AND bibjournal.key = 'journal' > + LEFT OUTER JOIN bibref bibaddress ON p.source = > bibaddress.source AND bibaddress.rank = p.rank AND bibaddress.package = > p.package AND bibaddress.key = 'address' > + LEFT OUTER JOIN bibref bibpublisher ON p.source = > bibpublisher.source AND bibpublisher.rank = p.rank AND bibpublisher.package > = p.package AND bibpublisher.key = 'publisher' > + LEFT OUTER JOIN bibref bibvolume ON p.source = > bibvolume.source AND bibvolume.rank = p.rank AND bibvolume.package = > p.package AND bibvolume.key = 'volume' > + LEFT OUTER JOIN bibref bibdoi ON p.source = > bibdoi.source AND bibdoi.rank = p.rank AND bibdoi.package = > p.package AND bibdoi.key = 'doi' > + LEFT OUTER JOIN bibref bibpmid ON p.source = > bibpmid.source AND bibpmid.rank = p.rank AND bibpmid.package = > p.package AND bibpmid.key = 'pmid'LEFT OUTER JOIN bibref biburl ON > p.source = biburl.source AND biburl.rank = p.rank AND > biburl.package = p.package AND biburl.key = 'url' > + LEFT OUTER JOIN bibref bibnumber ON p.source = > bibnumber.source AND bibnumber.rank = p.rank AND bibnumber.package = > p.package AND bibnumber.key = 'number' > + LEFT OUTER JOIN bibref bibpages ON p.source = > bibpages.source AND bibpages.rank = p.rank AND bibpages.package = > p.package AND bibpages.key = 'pages' > + LEFT OUTER JOIN bibref bibeprint ON p.source = > bibeprint.source AND bibeprint.rank = p.rank AND bibeprint.package = > p.package AND bibeprint.key = 'eprint' > + LEFT OUTER JOIN bibref bibin ON p.source = > bibin.source AND bibin.rank = p.rank AND bibin.package = > p.package AND bibin.key = 'in' > + LEFT OUTER JOIN bibref bibissn ON p.source = > bibissn.source AND bibissn.rank = p.rank AND bibissn.package = > p.package AND bibissn.key = 'issn' > + ORDER BY bibentry -- p.source > + ;""" > + > + cur.execute(query) > + for row in cur.fetchall(): > + print >>bf, row[0] > + > + bf.close() > + > + # create LaTeX file to test BibTeX functionality > + bf = open(self.bibtex_example_tex, 'w') > + print >>bf, """\documentclass[10]{article} > +\usepackage[T1]{fontenc} > +\usepackage[utf8]{inputenc} > +\usepackage[left=2mm,top=2mm,right=2mm,bottom=2mm,nohead,nofoot]{geometry} > +\usepackage{longtable} > +\usepackage[super]{natbib} > +\setlongtables > +\\begin{document} > +\small > +\\begin{longtable}{llp{70mm}l} > +\\bf package & \\bf source & \\bf description & BibTeX key \\\\ \hline""" > + > + cur.execute("SELECT * FROM bibtex_example_data() AS (package text, > source text, bibkey text, description text)") > + for row in cur.fetchall(): > + print >>bf, row[0], '&', row[1], '&', row[3] , '&', > row[2]+'\cite{'+row[2]+'} \\\\' > + > + print >>bf, """\end{longtable} > + > +% \\bibliographystyle{plain} > +% Try a bit harder by also including URL+DOI > +\\bibliographystyle{plainnat} > +\\bibliography{debian} > + > +\end{document} > +""" > + bf.close() > + > + # try to build debian.pdf file to test aboc LaTeX file > + basetexfile = self.bibtex_example_tex.replace('.tex','') > + cleanup_tex_logs(basetexfile) > + try: > + rename(basetexfile+'.pdf', basetexfile+'.pdf~') > + except OSError: > + pass > + > + (retcode,errstring) = open_tex_process('pdflatex', basetexfile) > + if not retcode: > + self.log.error("Problem in 1. PdfLaTeX run of %s.tex: `%s` --> > please inspect %s.log" % (basetexfile, errstring, basetexfile)) > + exit(1) > + > + (retcode,errstring) = open_tex_process('bibtex', basetexfile) > + if errstring != "": > + if not retcode: > + self.log.error("Problem in BibTeX run of %s.bib: `%s`" % > (basetexfile, errstring)) > + exit(1) > + self.log.error("Ignore the following problems in BibTeX run of > %s.bib: `%s`" % (basetexfile, errstring)) > + > + (retcode,errstring) = open_tex_process('pdflatex', basetexfile) > + if not retcode: > + self.log.error("Problem in 2. PdfLaTeX run of %s.tex: `%s` --> > please inspect %s.log" % (basetexfile, errstring, basetexfile)) > + exit(1) > + > + (retcode,errstring) = open_tex_process('pdflatex', basetexfile) > + if not retcode: > + self.log.error("Problem in 3. PdfLaTeX run of %s.tex: `%s` --> > please inspect %s.log" % (basetexfile, errstring, basetexfile)) > + exit(1) > + > + cleanup_tex_logs(basetexfile) > + > +if __name__ == '__main__': > + main() > + > -- > 1.9.1 > > -- Akshita Jha
