moved DEBUG from global variable to one which is passed through the
program (to better facilitate integration with other apps)
Chimezie Ogbuji
Lead Systems Analyst
Thoracic and Cardiovascular Surgery
Cleveland Clinic Foundation
9500 Euclid Avenue/ W26
Cleveland, Ohio 44195
Office: (216)444-8593
[EMAIL PROTECTED]
# HG changeset patch
# User Chimezie Ogbuji http://metacognition.info
# Date 1171078434 18000
# Node ID 8a5bc100babfd83a87b331030ea9bd6bde6f1661
# Parent 6d7d38577f3e8b17b29e73798360ac712ac1b78c
moved DEBUG from global variable to one which is passed through the program (to
better facilitate integration with other apps)
diff -r 6d7d38577f3e -r 8a5bc100babf GRDDL.py
--- a/GRDDL.py Fri Feb 09 20:50:26 2007 -0500
+++ b/GRDDL.py Fri Feb 09 22:33:54 2007 -0500
@@ -85,12 +85,13 @@ class Glean(object):
"""
Handles all the GRDDL XML parsing and XSLT transformation from URLs
"""
- def __init__(self, url, graph, preParsedDOM=None, useXInclude=True):
+ def __init__(self, url, graph, preParsedDOM=None, useXInclude=True,DEBUG =
False):
self.graph = graph
self.url = url
self.dom = preParsedDOM
self.appliedTransforms = []
self.useXInclude = useXInclude
+ self.DEBUG = DEBUG
def load(self, webget):
"""
@@ -118,7 +119,7 @@ class Glean(object):
self.headers['content-type']) is None \
and CHECK_XML_MIMETYPE:
#What does the spec mandate about this scenario?
- if DEBUG:
+ if self.DEBUG:
print >>sys.stderr, "Ignoring non-xml information resource",
url
print >>sys.stderr, self.headers['content-type'].split(';')[0]
self.dom = None
@@ -127,7 +128,7 @@ class Glean(object):
self.dom = XMLParser.parseString(content, self.url,
processIncludes=self.useXInclude)
except Exception, e: #@@ narrow exception
- if DEBUG:
+ if self.DEBUG:
print >>sys.stderr, "Unable to parse ", self.url, repr(e)
#Unable to glean. Fail gracefully..
self.dom = None
@@ -139,7 +140,7 @@ class Glean(object):
sure to avoid transformation already applied
"""
for xformURL in transformURLs.split():
- if DEBUG:
+ if self.DEBUG:
print >>sys.stderr, "applying transformation %s" % (xformURL)
if xformURL not in self.appliedTransforms:
self.appliedTransforms.append(xformURL)
@@ -172,21 +173,21 @@ class Glean(object):
currLen = len(self.graph)
if method == 'xml':
self.graph.parse(StringIO(result), publicID=self.url)
- if DEBUG:
+ if self.DEBUG:
print >>sys.stderr, "Parsed %s triples as RDF/XML" % (
max(0,len(self.graph) - currLen))
elif method == 'text':
#Attempt a Notation 3 parse (covers NTriples, and Turtle)
self.graph.parse(StringIO(result), format='n3',
publicID=self.url)
- if DEBUG:
+ if self.DEBUG:
print >>sys.stderr, "Parsed %s triples as Notation 3" % (
max(0,len(self.graph) - currLen))
else:
#HTML result - recursive GRDDL mechanism?
raise Exception("unsupported output type")
-def GRDDLAgent(url, graph, webget, useXInclude=True):
+def GRDDLAgent(url, graph, webget, useXInclude=True, DEBUG = False):
"""
The main entry point for the GRDDL agent Takes a url and a graph
to store the GRDDL result and a webget function and attempts to
@@ -200,7 +201,7 @@ def GRDDLAgent(url, graph, webget, useXI
ValidXHTMLGlean]:
#Don't reparse the GRDDL source
if not parsedSource:
- gleaned = gleanMethod(url, graph, useXInclude=useXInclude)
+ gleaned = gleanMethod(url, graph, useXInclude=useXInclude, DEBUG =
DEBUG)
gleaned.load(webget)
parsedSource = gleaned.dom
else:
@@ -278,7 +279,7 @@ class XMLNSGlean(Glean):
try:
nsresult = Graph()
GRDDLAgent(self.nsURI, nsresult, webget)
- if DEBUG:
+ if self.DEBUG:
print >>sys.stderr, "ns doc graph size", len(nsresult)
except IOError:
pass # don't bother if we can't get a namespace document
@@ -292,7 +293,7 @@ class XMLNSGlean(Glean):
todoXForms = Set()
pat = (self.nsURI, GRDDL_VOCAB.namespaceTransformation,
None)
for s, p, xform in nsresult.triples(pat):
- if DEBUG:
+ if self.DEBUG:
print >>sys.stderr, "found txform in NS doc:",
xform
if xform not in processedNSXForms:
todoXForms.add(xform)
@@ -363,7 +364,7 @@ class XHTMLProfileGlean(Glean):
for profile in self.profiles:
if profile == GRDDL_PROFILE:
continue
- if DEBUG:
+ if self.DEBUG:
print >>sys.stderr, "processing profile url: ", profile
#glean GRDDL result from the profile document
prresult = Graph()
@@ -395,18 +396,18 @@ class WebMemo(object):
Make sure we don't abuse any web sites;
see http://www.w3.org/Help/abuse-info/re-reqs.html
"""
- def __init__(self, zone = None):
+ def __init__(self, zone = None, DEBUG = False):
""":param zone: URIs that don't start with this string
are prohibited by policy
"""
self._memo = {}
self._zone = zone
+ self.DEBUG = DEBUG
def __call__(self, addr, types = None):
"""raises IOError iff addr outside zone.
"""
import urllib2
- global DEBUG
if self._zone and not addr.startswith(self._zone):
raise IOError, "%s outside policy zone %s" % (addr, self._zone)
@@ -419,7 +420,7 @@ class WebMemo(object):
req = urllib2.Request(addr)
if types:
req.add_header('Accept', ','.join(types))
- if DEBUG:
+ if self.DEBUG:
import sys
print >>sys.stderr, "@@fetching: ", addr, "with types", types
u = urllib2.urlopen(req)
@@ -443,7 +444,6 @@ Options:
"""
def main(argv):
- global DEBUG
import os
if argv is None: argv = sys.argv
@@ -503,7 +503,7 @@ def main(argv):
addr = Absolutize(argv[-1], "file://%s/" % os.getcwd())
try:
- GRDDLAgent(addr, graph, WebMemo(zone))
+ GRDDLAgent(addr, graph, WebMemo(zone,DEBUG))
except IOError, e:
print >>sys.stderr, str(e)
return 2
@@ -513,7 +513,7 @@ def main(argv):
if noXIfilename is not None:
graph = Graph()
try:
- GRDDLAgent(addr, graph, WebMemo(zone), False)
+ GRDDLAgent(addr, graph, WebMemo(zone), False,DEBUG = DEBUG)
except IOError, e:
print >>sys.stderr, str(e)
return 2