GRDDL.py patch

Chimezie Ogbuji Fri, 09 Feb 2007 19:42:41 -0800

moved DEBUG from global variable to one which is passed through theprogram (to better facilitate integration with other apps)


Chimezie Ogbuji
Lead Systems Analyst
Thoracic and Cardiovascular Surgery
Cleveland Clinic Foundation
9500 Euclid Avenue/ W26
Cleveland, Ohio 44195
Office: (216)444-8593
[EMAIL PROTECTED]

# HG changeset patch
# User Chimezie Ogbuji http://metacognition.info
# Date 1171078434 18000
# Node ID 8a5bc100babfd83a87b331030ea9bd6bde6f1661
# Parent  6d7d38577f3e8b17b29e73798360ac712ac1b78c
moved DEBUG from global variable to one which is passed through the program (to 
better facilitate integration with other apps)


diff -r 6d7d38577f3e -r 8a5bc100babf GRDDL.py
--- a/GRDDL.py  Fri Feb 09 20:50:26 2007 -0500
+++ b/GRDDL.py  Fri Feb 09 22:33:54 2007 -0500
@@ -85,12 +85,13 @@ class Glean(object):
     """
     Handles all the GRDDL XML parsing and XSLT transformation from URLs
     """
-    def __init__(self, url, graph, preParsedDOM=None, useXInclude=True):
+    def __init__(self, url, graph, preParsedDOM=None, useXInclude=True,DEBUG = 
False):
         self.graph = graph
         self.url = url
         self.dom = preParsedDOM
         self.appliedTransforms = []
         self.useXInclude = useXInclude
+        self.DEBUG = DEBUG
 
     def load(self, webget):
         """
@@ -118,7 +119,7 @@ class Glean(object):
                     self.headers['content-type']) is None \
                     and CHECK_XML_MIMETYPE:
             #What does the spec mandate about this scenario?
-            if DEBUG:
+            if self.DEBUG:
                 print >>sys.stderr, "Ignoring non-xml information resource", 
url
                 print >>sys.stderr, self.headers['content-type'].split(';')[0]
             self.dom = None
@@ -127,7 +128,7 @@ class Glean(object):
             self.dom = XMLParser.parseString(content, self.url,
                                              processIncludes=self.useXInclude)
         except Exception, e: #@@ narrow exception
-            if DEBUG:
+            if self.DEBUG:
                 print >>sys.stderr, "Unable to parse ", self.url, repr(e)
             #Unable to glean.  Fail gracefully..
             self.dom = None
@@ -139,7 +140,7 @@ class Glean(object):
         sure to avoid transformation already applied
         """                
         for xformURL in transformURLs.split():
-            if DEBUG:
+            if self.DEBUG:
                 print >>sys.stderr, "applying transformation %s" % (xformURL)
             if xformURL not in self.appliedTransforms:
                 self.appliedTransforms.append(xformURL)
@@ -172,21 +173,21 @@ class Glean(object):
             currLen = len(self.graph)
             if method == 'xml':
                 self.graph.parse(StringIO(result), publicID=self.url)
-                if DEBUG:
+                if self.DEBUG:
                     print >>sys.stderr, "Parsed %s triples as RDF/XML" % (
                         max(0,len(self.graph) - currLen))
             elif method == 'text':
                 #Attempt a Notation 3 parse (covers NTriples, and Turtle)
                 self.graph.parse(StringIO(result), format='n3',
                                  publicID=self.url)
-                if DEBUG:
+                if self.DEBUG:
                     print >>sys.stderr, "Parsed %s triples as Notation 3" % (
                         max(0,len(self.graph) - currLen))
             else:
                 #HTML result - recursive GRDDL mechanism?
                 raise Exception("unsupported output type")
 
-def GRDDLAgent(url, graph, webget, useXInclude=True):
+def GRDDLAgent(url, graph, webget, useXInclude=True, DEBUG = False):
     """
     The main entry point for the GRDDL agent Takes a url and a graph
     to store the GRDDL result and a webget function and attempts to
@@ -200,7 +201,7 @@ def GRDDLAgent(url, graph, webget, useXI
                         ValidXHTMLGlean]:
         #Don't reparse the GRDDL source
         if not parsedSource:
-            gleaned = gleanMethod(url, graph, useXInclude=useXInclude)
+            gleaned = gleanMethod(url, graph, useXInclude=useXInclude, DEBUG = 
DEBUG)
             gleaned.load(webget)
             parsedSource = gleaned.dom                
         else:
@@ -278,7 +279,7 @@ class XMLNSGlean(Glean):
             try:
                 nsresult = Graph()
                 GRDDLAgent(self.nsURI, nsresult, webget)
-                if DEBUG:
+                if self.DEBUG:
                     print >>sys.stderr, "ns doc graph size", len(nsresult)
             except IOError:
                 pass # don't bother if we can't get a namespace document
@@ -292,7 +293,7 @@ class XMLNSGlean(Glean):
                     todoXForms = Set()
                     pat = (self.nsURI, GRDDL_VOCAB.namespaceTransformation, 
None)
                     for s, p, xform in nsresult.triples(pat):
-                        if DEBUG:
+                        if self.DEBUG:
                             print >>sys.stderr, "found txform in NS doc:", 
xform
                         if xform not in processedNSXForms:
                             todoXForms.add(xform)
@@ -363,7 +364,7 @@ class XHTMLProfileGlean(Glean):
                 for profile in self.profiles:
                     if profile == GRDDL_PROFILE:
                         continue
-                    if DEBUG:
+                    if self.DEBUG:
                         print >>sys.stderr, "processing profile url: ", profile
                     #glean GRDDL result from the profile document
                     prresult = Graph()
@@ -395,18 +396,18 @@ class WebMemo(object):
     Make sure we don't abuse any web sites;
     see http://www.w3.org/Help/abuse-info/re-reqs.html
     """
-    def __init__(self, zone = None):
+    def __init__(self, zone = None, DEBUG = False):
         """:param zone: URIs that don't start with this string
                         are prohibited by policy
         """
         self._memo = {}
         self._zone = zone
+        self.DEBUG = DEBUG
 
     def __call__(self, addr, types = None):
         """raises IOError iff addr outside zone.
         """
         import urllib2
-        global DEBUG
 
         if self._zone and not addr.startswith(self._zone):
             raise IOError, "%s outside policy zone %s" % (addr, self._zone)
@@ -419,7 +420,7 @@ class WebMemo(object):
             req = urllib2.Request(addr)
             if types:
                 req.add_header('Accept', ','.join(types))
-            if DEBUG:
+            if self.DEBUG:
                 import sys
                 print >>sys.stderr, "@@fetching: ", addr, "with types", types
             u = urllib2.urlopen(req)
@@ -443,7 +444,6 @@ Options:
 """
     
 def main(argv):
-    global DEBUG
     import os
     
     if argv is None: argv = sys.argv
@@ -503,7 +503,7 @@ def main(argv):
     addr = Absolutize(argv[-1], "file://%s/" % os.getcwd())
 
     try:
-        GRDDLAgent(addr, graph, WebMemo(zone))
+        GRDDLAgent(addr, graph, WebMemo(zone,DEBUG))
     except IOError, e:
         print >>sys.stderr, str(e)
         return 2
@@ -513,7 +513,7 @@ def main(argv):
     if noXIfilename is not None:
         graph = Graph()
         try:
-            GRDDLAgent(addr, graph, WebMemo(zone), False)
+            GRDDLAgent(addr, graph, WebMemo(zone), False,DEBUG = DEBUG)
         except IOError, e:
             print >>sys.stderr, str(e)
             return 2

GRDDL.py patch

Reply via email to