Author: sebb
Date: Mon Nov 16 16:26:25 2015
New Revision: 1714621
URL: http://svn.apache.org/viewvc?rev=1714621&view=rev
Log:
Add utility method for checking HTTP code
Modified:
comdev/projects.apache.org/scripts/cronjobs/urlutils.py
Modified: comdev/projects.apache.org/scripts/cronjobs/urlutils.py
URL:
http://svn.apache.org/viewvc/comdev/projects.apache.org/scripts/cronjobs/urlutils.py?rev=1714621&r1=1714620&r2=1714621&view=diff
==============================================================================
--- comdev/projects.apache.org/scripts/cronjobs/urlutils.py (original)
+++ comdev/projects.apache.org/scripts/cronjobs/urlutils.py Mon Nov 16 16:26:25
2015
@@ -24,6 +24,10 @@ import calendar
# time format used in Last-Modified/If-Modified-Since HTTP headers
_HTTP_TIME_FORMAT = '%a, %d %b %Y %H:%M:%S GMT'
+# Allow callers to check HTTP code from Python2 and 3
+def isHTTPNotFound(e):
+ return type(e) == HTTPError and e.code == 404
+
def touchFile(f, t):
if _PY3:
os.utime(f, times=(t, t))
@@ -76,6 +80,13 @@ def getIfNewer(url, sinceTime, encoding=
raise
return lastMod, response
+def findRelPath(relpath):
+ for d in ['./','../','../../']: # we may located at same level or 1 or 2
below
+ dir = join(d,relpath)
+ if os.path.isdir(dir):
+ return dir
+ raise OSError("Cannot find path " + path)
+
class UrlCache(object):
"""
Creates a cache for URLs
@@ -98,15 +109,11 @@ class UrlCache(object):
__CACHE = 'data/cache'
self.__interval = interval
self.__cachedir = None
- if cachedir:
+ if cachedir: # assumed to be correct
self.__cachedir = cachedir
else:
self.__cachedir = __CACHE # will be overwritten if actually found
- for d in ['./','../','../../']: # we may located at same level or
1 or 2 below
- dir = d + __CACHE
- if os.path.isdir(dir):
- self.__cachedir = dir
- break
+ self.__cachedir = findRelPath(__CACHE)
if os.path.isdir(self.__cachedir):
print("Cachedir: %s" % self.__cachedir)
@@ -116,6 +123,18 @@ class UrlCache(object):
def __getname(self, name):
return join(self.__cachedir, name)
+ def _deleteCacheFile(self, name):# intended mainly for debug use
+ path = self.__getname(name)
+ try:
+ os.remove(path)
+ except FileNotFoundError:
+ pass
+ dotpath = self.__getname('.'+name)
+ try:
+ os.remove(dotpath)
+ except FileNotFoundError:
+ pass
+
def get(self, url, name, encoding=None, errors=None):
"""
Check if the filename exists in the cache.
@@ -197,9 +216,17 @@ class UrlCache(object):
return open(target, 'rb')
if __name__ == '__main__':
- fc = UrlCache(interval=-1)
- icla_info =
fc.get("https://whimsy.apache.org/public/icla-info.json","icla-info.json",
encoding='utf-8')
+ try:
+ fc = UrlCache(cachedir='x')
+ raise Error("Expected OSError")
+ except OSError as e:
+ print('Expected: ', e)
+ fc = UrlCache(interval=0)
+ name = "_wao.html"
+ icla_info = fc.get("http://www.apache.org/", name, encoding='utf-8')
print(icla_info.readline().rstrip())
print(icla_info.readline().rstrip())
print(icla_info.readline().rstrip())
print(icla_info.readline().rstrip())
+ fc._deleteCacheFile(name)
+ fc._deleteCacheFile(name)
\ No newline at end of file