Author: tille Date: 2012-04-20 22:08:11 +0000 (Fri, 20 Apr 2012) New Revision: 2232
Modified: udd/scripts/fetch_bibref.sh udd/udd/bibref_gatherer.py Log: Do not try to parse HTML/XML files and rather remove these from the archive to parse Modified: udd/scripts/fetch_bibref.sh =================================================================== --- udd/scripts/fetch_bibref.sh 2012-04-20 21:46:21 UTC (rev 2231) +++ udd/scripts/fetch_bibref.sh 2012-04-20 22:08:11 UTC (rev 2232) @@ -5,10 +5,17 @@ TARGETDIR=/org/udd.debian.org/mirrors/bibref FETCHURL=http://blends.debian.net/packages-metadata/packages-metadata.tar.bz2 ARCHIVE=`basename $FETCHURL` -#set -x +CURDIR=`pwd` + rm -rf $TARGETDIR mkdir -p $TARGETDIR wget -q ${FETCHURL} -O ${TARGETDIR}/${ARCHIVE} cd $TARGETDIR tar -xjf ${ARCHIVE} +# There is no point in keeping non-yaml files which later just cause errors +rm -f $CURDIR/bibref_gatherer_fetch.log +for nonyamlfile in `find . -name "*.upstream" -exec file \{\} \; | grep -e HTML -e XML | sed 's/:.*$//'` ; do + file $nonyamlfile >> $CURDIR/bibref_gatherer_fetch.log + rm -f $nonyamlfile +done rm -rf ${ARCHIVE} Modified: udd/udd/bibref_gatherer.py =================================================================== --- udd/udd/bibref_gatherer.py 2012-04-20 21:46:21 UTC (rev 2231) +++ udd/udd/bibref_gatherer.py 2012-04-20 22:08:11 UTC (rev 2232) @@ -133,10 +133,10 @@ try: fields = yaml.load(uf.read()) except yaml.scanner.ScannerError, err: - self.log.error("Syntax error in file %s: %s" % (ufile, str(err))) + self.log.error("Scanner error in file %s: %s" % (ufile, str(err))) continue except yaml.parser.ParserError, err: - self.log.error("Syntax error in file %s: %s" % (ufile, str(err))) + self.log.error("Parser error in file %s: %s" % (ufile, str(err))) continue except yaml.reader.ReaderError, err: self.log.error("Encoding problem in file %s: %s" % (ufile, str(err))) _______________________________________________ Collab-qa-commits mailing list Collab-qa-commits@lists.alioth.debian.org http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/collab-qa-commits