That means: * Make it PEP8 compliant * That unfortunately includes '\t' -> ' ' change (PEP8 doesn’t like TABs), which makes for the massive diff. * Don’t use deprecated API calls. Use Sword.VersificationMgr.getSystemVersificationMgr() instead of Sword.VerseMgr.getSystemVerseMgr(). Use popError() instead of Error(). * Switch to logging to make it a little bit more civilized. * Don’t use RE when you don’t need it. * Shorten the labels so they are not overflowing the screen. * Don’t fool with PyQuery and use standard XML libraries. * Use argparser. --- versification/av11n.py | 213 ++++++++++++++++++++++++++++--------------------- 1 file changed, 124 insertions(+), 89 deletions(-)
diff --git a/versification/av11n.py b/versification/av11n.py index 136a382..5386b8e 100755 --- a/versification/av11n.py +++ b/versification/av11n.py @@ -1,4 +1,5 @@ #!/usr/bin/env python +# coding: utf-8 # # This does a very roughshod attempt to compare the osisIDs found in an # XML file with each of the versifications that SWORD knows about to help @@ -7,106 +8,140 @@ # in the proper order, although within each testament, it requires nothing # special as for ordering. # -# Invoke simply by calling the program and the file name. If you want -# more output, change the following line to be True instead of False -verbose = False -debug = True -import sys +# Invoke simply by calling the program and the file name. +from __future__ import print_function +import argparse +import io +import logging import re -verseid = re.compile('^.+\..+\..+$') +import sys + +# in normal state level should be debug.WARNING, debug.INFO and debug.DEBUG +# give additional information. +logging.basicConfig(format='%(levelname)s:%(message)s', + level=logging.INFO) +log = logging.getLogger('versification') + +try: + import lxml.etree as ET +except ImportError: + import xml.etree.ElementTree as ET + +OSIS_NS = 'http://www.bibletechnologies.net/2003/OSIS/namespace' +VERSEID_RE = re.compile(r'^(.+\.\d+\.\d+).*$') # Inform the user that we need the SWORD extension try: - import Sword -except: - print "You do not have the SWORD library installed. Please install it." - sys.exit(1) + import Sword +except ImportError: + log.exception( + "You do not have the SWORD library installed. Please install it.") + sys.exit(1) -# Inform the user that we need pyquery, as it makes parsing XML files that much easier -try: - from pyquery import PyQuery as pq -except: - print "You do not appear to have PyQuery installed. Please install it." - sys.exit(2) +arg_parser = argparse.ArgumentParser( + description='Compare OSIS file with available v11ns.') -# Without the name of a file, we cannot proceed any further -if len(sys.argv) < 2 or sys.argv[1] == '--help': - print "Usage: %s <OSISfile>" % (sys.argv[0],) +arg_parser.add_argument('--verbose', '-v', action='count') +arg_parser.add_argument('filename', nargs=1) + + +args = arg_parser.parse_args() + +if args.verbose: + log.setLevel = logging.DEBUG + +log.debug('args = %s', args) # Open the file -if debug: - print 'Opening %s' % (sys.argv[1],) -d = pq(filename=sys.argv[1]) +log.debug('Opening %s', args.filename[0]) + +tree = ET.parse(io.open(args.filename[0], encoding='utf8')).getroot() # Get the list of versifications -if debug: - print 'Fetching a list of versifications' -vmgr = Sword.VerseMgr.getSystemVerseMgr() +log.debug('Fetching a list of v11ns') +vmgr = Sword.VersificationMgr.getSystemVersificationMgr() av11ns = vmgr.getVersificationSystems() +log.debug('av11ns = %s', av11ns) # Get the list of all osisIDs -if debug: - print 'Fetching a list of OSIS IDs' -ids = d("*[osisID]") +log.debug('Fetching a list of OSIS IDs') +ids = set() +for item in tree.iter('{%s}verse' % OSIS_NS): + if 'osisID' in item.attrib: + ids.add(item.attrib['osisID'].split('!')[0]) +log.debug('ids = len(%d)', len(ids)) + # Iterate each versification scheme for v11n in av11ns: - print 'Checking %s' % (v11n.c_str(),) - # Construct a list of the IDs in this versification - key = Sword.VerseKey() - key.setVersificationSystem(v11n.c_str()) - otkeyList = [] # Anything left in this afterwards is missing from the OSIS ot - ntkeyList = [] # Anything left in this afterwards is missing from the OSIS nt - otextraKeys = [] # Anything that gets placed in here is extraneous OT material (we think) - ntextraKeys = [] # Anything that gets placed in here is extraneous NT material (we think) - - inNT = False - while key.Error() == '\x00': - skey = key.getOSISRef() - if not inNT and re.match('^Matt', skey): # Assume we enter the NT when we hit Matthew - inNT = True - if inNT: - ntkeyList.append(skey) - else: - otkeyList.append(skey) - key.increment() - ntkeyList = set(ntkeyList) # The 'in' operator only works on a set - otkeyList = set(otkeyList) - - inNT = False - # Now iterate the ones we have in this file - for e in ids: - osisid = e.attrib.get('osisID') - #print 'Checking key %s' % (osisid,) - if osisid in otkeyList: - otkeyList.remove(osisid) - elif osisid in ntkeyList: - ntkeyList.remove(osisid) - inNT = True - elif verseid.match(osisid) and inNT: - ntextraKeys.append(osisid) - elif verseid.match(osisid) and not inNT: - otextraKeys.append(osisid) - # Ignore it if not verseid.match() - - # Now let's see what is left over - keyList = list(otkeyList.union(ntkeyList)) # Sets in Python cannot be ordered - keyList.sort() - if len(keyList) > 0: - if verbose: - print '\tThe following IDs do not appear in your file:' - for k in keyList: - print k - else: - print '\tThere are %d OT IDs and %d NT IDs in the versification which are not in your file.' % (len(otkeyList), len(ntkeyList)) - else: - print '\tYour file has all the references in this versification' - - # Now let's see if you had extra - if len(otextraKeys + ntextraKeys) > 0: - if verbose: - print '\tThe following IDs do not appear in the versification:' - for k in ntextraKeys + otextraKeys: - print k - else: - print '\tThere are %d OT IDs and %d NT IDs in your file which do not appear in the versification.' % (len(otextraKeys), len(ntextraKeys)) - else: - print '\tYour file has no extra references' + v11n_name = v11n.c_str() + print('\nChecking %s:\n%s' % + (v11n_name, (len(v11n_name) + 10) * '-')) + # Construct a list of the IDs in this versification + key = Sword.VerseKey() + key.setVersificationSystem(v11n.c_str()) + # Anything left in this afterwards is missing from the OSIS ot + otkeyList = [] + # Anything left in this afterwards is missing from the OSIS nt + ntkeyList = [] + # Anything that gets placed in here is extraneous OT material (we think) + otextraKeys = [] + # Anything that gets placed in here is extraneous NT material (we think) + ntextraKeys = [] + + inNT = False + while key.popError() == '\x00': + skey = key.getOSISRef() + # Assume we enter the NT when we hit Matthew + if not inNT and skey.startswith('Matt'): + inNT = True + if inNT: + ntkeyList.append(skey) + else: + otkeyList.append(skey) + key.increment() + ntkeyList = set(ntkeyList) # The 'in' operator only works on a set + otkeyList = set(otkeyList) + + inNT = False + # Now iterate the ones we have in this file + for osisid in ids: +# log.debug('Checking key %s', osisid) + if osisid in otkeyList: + otkeyList.remove(osisid) + elif osisid in ntkeyList: + ntkeyList.remove(osisid) + inNT = True + else: + verse_match = VERSEID_RE.match(osisid) + if verse_match and inNT: + ntextraKeys.append(verse_match.group(1)) + elif verse_match and not inNT: + otextraKeys.append(verse_match.group(1)) + # Ignore it if not VERSEID_RE.match() + + # Now let's see what is left over + # Sets in Python cannot be ordered + keyList = list(otkeyList.union(ntkeyList)) + keyList.sort() + if len(keyList) > 0: + if len(keyList) < 100: + log.info('\tThe following IDs don’t appear in your file:\n%s', + str(", ".join(keyList))) + print(('\tThere are %d OT IDs and %d NT IDs ' + + 'in v11n which aren’t in your file.') \ + % (len(otkeyList), len(ntkeyList))) + else: + print('\tYour file has all the references in this v11n') + + # Now let's see if you had extra + if len(otextraKeys + ntextraKeys) > 0: + # It doesn't make sense to print out lists longer than 100 + # they cannot be read anyway + if len(keyList) < 100: + log.info( + '\tThe following IDs don’t appear in v11n:\n%s', + str(", ".join(keyList))) + print('\tThere are %d OT IDs and %d NT IDs ' + + 'in your file which don’t appear in v11n.') \ + % (len(otextraKeys), len(ntextraKeys)) + else: + print('\tYour file has no extra references') -- 2.16.2 _______________________________________________ sword-devel mailing list: sword-devel@crosswire.org http://www.crosswire.org/mailman/listinfo/sword-devel Instructions to unsubscribe/change your settings at above page