Personally I'd prefer to see this as a few separate patches. Especaially one that separates out the PEP8 stuff from the other problems.
On Sat, Mar 31, 2018 at 11:44 AM, Matěj Cepl <mc...@cepl.eu> wrote: > That means: > * Make it PEP8 compliant > * That unfortunately includes '\t' -> ' ' change (PEP8 doesn’t > like TABs), which makes for the massive diff. > * Don’t use deprecated API calls. > Use Sword.VersificationMgr.getSystemVersificationMgr() instead > of Sword.VerseMgr.getSystemVerseMgr(). > Use popError() instead of Error(). > * Switch to logging to make it a little bit more civilized. > * Don’t use RE when you don’t need it. > * Shorten the labels so they are not overflowing the screen. > * Don’t fool with PyQuery and use standard XML libraries. > * Use argparser. > --- > versification/av11n.py | 213 ++++++++++++++++++++++++++++-- > ------------------- > 1 file changed, 124 insertions(+), 89 deletions(-) > > diff --git a/versification/av11n.py b/versification/av11n.py > index 136a382..5386b8e 100755 > --- a/versification/av11n.py > +++ b/versification/av11n.py > @@ -1,4 +1,5 @@ > #!/usr/bin/env python > +# coding: utf-8 > # > # This does a very roughshod attempt to compare the osisIDs found in an > # XML file with each of the versifications that SWORD knows about to help > @@ -7,106 +8,140 @@ > # in the proper order, although within each testament, it requires nothing > # special as for ordering. > # > -# Invoke simply by calling the program and the file name. If you want > -# more output, change the following line to be True instead of False > -verbose = False > -debug = True > -import sys > +# Invoke simply by calling the program and the file name. > +from __future__ import print_function > +import argparse > +import io > +import logging > import re > -verseid = re.compile('^.+\..+\..+$') > +import sys > + > +# in normal state level should be debug.WARNING, debug.INFO and > debug.DEBUG > +# give additional information. > +logging.basicConfig(format='%(levelname)s:%(message)s', > + level=logging.INFO) > +log = logging.getLogger('versification') > + > +try: > + import lxml.etree as ET > +except ImportError: > + import xml.etree.ElementTree as ET > + > +OSIS_NS = 'http://www.bibletechnologies.net/2003/OSIS/namespace' > +VERSEID_RE = re.compile(r'^(.+\.\d+\.\d+).*$') > > # Inform the user that we need the SWORD extension > try: > - import Sword > -except: > - print "You do not have the SWORD library installed. Please install > it." > - sys.exit(1) > + import Sword > +except ImportError: > + log.exception( > + "You do not have the SWORD library installed. Please install it.") > + sys.exit(1) > > -# Inform the user that we need pyquery, as it makes parsing XML files > that much easier > -try: > - from pyquery import PyQuery as pq > -except: > - print "You do not appear to have PyQuery installed. Please install > it." > - sys.exit(2) > +arg_parser = argparse.ArgumentParser( > + description='Compare OSIS file with available v11ns.') > > -# Without the name of a file, we cannot proceed any further > -if len(sys.argv) < 2 or sys.argv[1] == '--help': > - print "Usage: %s <OSISfile>" % (sys.argv[0],) > +arg_parser.add_argument('--verbose', '-v', action='count') > +arg_parser.add_argument('filename', nargs=1) > + > + > +args = arg_parser.parse_args() > + > +if args.verbose: > + log.setLevel = logging.DEBUG > + > +log.debug('args = %s', args) > > # Open the file > -if debug: > - print 'Opening %s' % (sys.argv[1],) > -d = pq(filename=sys.argv[1]) > +log.debug('Opening %s', args.filename[0]) > + > +tree = ET.parse(io.open(args.filename[0], encoding='utf8')).getroot() > # Get the list of versifications > -if debug: > - print 'Fetching a list of versifications' > -vmgr = Sword.VerseMgr.getSystemVerseMgr() > +log.debug('Fetching a list of v11ns') > +vmgr = Sword.VersificationMgr.getSystemVersificationMgr() > av11ns = vmgr.getVersificationSystems() > +log.debug('av11ns = %s', av11ns) > > # Get the list of all osisIDs > -if debug: > - print 'Fetching a list of OSIS IDs' > -ids = d("*[osisID]") > +log.debug('Fetching a list of OSIS IDs') > +ids = set() > +for item in tree.iter('{%s}verse' % OSIS_NS): > + if 'osisID' in item.attrib: > + ids.add(item.attrib['osisID'].split('!')[0]) > +log.debug('ids = len(%d)', len(ids)) > + > # Iterate each versification scheme > for v11n in av11ns: > - print 'Checking %s' % (v11n.c_str(),) > - # Construct a list of the IDs in this versification > - key = Sword.VerseKey() > - key.setVersificationSystem(v11n.c_str()) > - otkeyList = [] # Anything left in this afterwards is missing from > the OSIS ot > - ntkeyList = [] # Anything left in this afterwards is missing from > the OSIS nt > - otextraKeys = [] # Anything that gets placed in here is extraneous > OT material (we think) > - ntextraKeys = [] # Anything that gets placed in here is extraneous > NT material (we think) > - > - inNT = False > - while key.Error() == '\x00': > - skey = key.getOSISRef() > - if not inNT and re.match('^Matt', skey): # Assume we enter > the NT when we hit Matthew > - inNT = True > - if inNT: > - ntkeyList.append(skey) > - else: > - otkeyList.append(skey) > - key.increment() > - ntkeyList = set(ntkeyList) # The 'in' operator only works on a set > - otkeyList = set(otkeyList) > - > - inNT = False > - # Now iterate the ones we have in this file > - for e in ids: > - osisid = e.attrib.get('osisID') > - #print 'Checking key %s' % (osisid,) > - if osisid in otkeyList: > - otkeyList.remove(osisid) > - elif osisid in ntkeyList: > - ntkeyList.remove(osisid) > - inNT = True > - elif verseid.match(osisid) and inNT: > - ntextraKeys.append(osisid) > - elif verseid.match(osisid) and not inNT: > - otextraKeys.append(osisid) > - # Ignore it if not verseid.match() > - > - # Now let's see what is left over > - keyList = list(otkeyList.union(ntkeyList)) # Sets in Python > cannot be ordered > - keyList.sort() > - if len(keyList) > 0: > - if verbose: > - print '\tThe following IDs do not appear in your > file:' > - for k in keyList: > - print k > - else: > - print '\tThere are %d OT IDs and %d NT IDs in the > versification which are not in your file.' % (len(otkeyList), > len(ntkeyList)) > - else: > - print '\tYour file has all the references in this > versification' > - > - # Now let's see if you had extra > - if len(otextraKeys + ntextraKeys) > 0: > - if verbose: > - print '\tThe following IDs do not appear in the > versification:' > - for k in ntextraKeys + otextraKeys: > - print k > - else: > - print '\tThere are %d OT IDs and %d NT IDs in your > file which do not appear in the versification.' % (len(otextraKeys), > len(ntextraKeys)) > - else: > - print '\tYour file has no extra references' > + v11n_name = v11n.c_str() > + print('\nChecking %s:\n%s' % > + (v11n_name, (len(v11n_name) + 10) * '-')) > + # Construct a list of the IDs in this versification > + key = Sword.VerseKey() > + key.setVersificationSystem(v11n.c_str()) > + # Anything left in this afterwards is missing from the OSIS ot > + otkeyList = [] > + # Anything left in this afterwards is missing from the OSIS nt > + ntkeyList = [] > + # Anything that gets placed in here is extraneous OT material (we > think) > + otextraKeys = [] > + # Anything that gets placed in here is extraneous NT material (we > think) > + ntextraKeys = [] > + > + inNT = False > + while key.popError() == '\x00': > + skey = key.getOSISRef() > + # Assume we enter the NT when we hit Matthew > + if not inNT and skey.startswith('Matt'): > + inNT = True > + if inNT: > + ntkeyList.append(skey) > + else: > + otkeyList.append(skey) > + key.increment() > + ntkeyList = set(ntkeyList) # The 'in' operator only works on a set > + otkeyList = set(otkeyList) > + > + inNT = False > + # Now iterate the ones we have in this file > + for osisid in ids: > +# log.debug('Checking key %s', osisid) > + if osisid in otkeyList: > + otkeyList.remove(osisid) > + elif osisid in ntkeyList: > + ntkeyList.remove(osisid) > + inNT = True > + else: > + verse_match = VERSEID_RE.match(osisid) > + if verse_match and inNT: > + ntextraKeys.append(verse_match.group(1)) > + elif verse_match and not inNT: > + otextraKeys.append(verse_match.group(1)) > + # Ignore it if not VERSEID_RE.match() > + > + # Now let's see what is left over > + # Sets in Python cannot be ordered > + keyList = list(otkeyList.union(ntkeyList)) > + keyList.sort() > + if len(keyList) > 0: > + if len(keyList) < 100: > + log.info('\tThe following IDs don’t appear in your > file:\n%s', > + str(", ".join(keyList))) > + print(('\tThere are %d OT IDs and %d NT IDs ' + > + 'in v11n which aren’t in your file.') \ > + % (len(otkeyList), len(ntkeyList))) > + else: > + print('\tYour file has all the references in this v11n') > + > + # Now let's see if you had extra > + if len(otextraKeys + ntextraKeys) > 0: > + # It doesn't make sense to print out lists longer than 100 > + # they cannot be read anyway > + if len(keyList) < 100: > + log.info( > + '\tThe following IDs don’t appear in v11n:\n%s', > + str(", ".join(keyList))) > + print('\tThere are %d OT IDs and %d NT IDs ' + > + 'in your file which don’t appear in v11n.') \ > + % (len(otextraKeys), len(ntextraKeys)) > + else: > + print('\tYour file has no extra references') > -- > 2.16.2 > > > _______________________________________________ > sword-devel mailing list: sword-devel@crosswire.org > http://www.crosswire.org/mailman/listinfo/sword-devel > Instructions to unsubscribe/change your settings at above page
_______________________________________________ sword-devel mailing list: sword-devel@crosswire.org http://www.crosswire.org/mailman/listinfo/sword-devel Instructions to unsubscribe/change your settings at above page