Revision: 7493
          http://matplotlib.svn.sourceforge.net/matplotlib/?rev=7493&view=rev
Author:   jouni
Date:     2009-08-15 20:00:09 +0000 (Sat, 15 Aug 2009)

Log Message:
-----------
get_sample_data improvements: remove old files from subdirectories and
not only the top-level directory; try to handle the disconnected use case;
use the perhaps more stable svnroot URL instead of the viewvc one

Modified Paths:
--------------
    trunk/matplotlib/lib/matplotlib/cbook.py

Modified: trunk/matplotlib/lib/matplotlib/cbook.py
===================================================================
--- trunk/matplotlib/lib/matplotlib/cbook.py    2009-08-15 18:37:25 UTC (rev 
7492)
+++ trunk/matplotlib/lib/matplotlib/cbook.py    2009-08-15 20:00:09 UTC (rev 
7493)
@@ -355,7 +355,7 @@
 class ViewVCCachedServer(urllib2.BaseHandler):
     """
     Urllib2 handler that takes care of caching files.
-    The file cache.pck holds the directory of files to be cached.
+    The file cache.pck holds the directory of files that have been cached.
     """
     def __init__(self, cache_dir, baseurl):
         self.cache_dir = cache_dir
@@ -386,9 +386,14 @@
         cache = cPickle.load(f)
         f.close()
 
+        # Earlier versions did not have the full paths in cache.pck
+        for url, (fn, x, y) in cache.items():
+            if not os.path.isabs(fn):
+                cache[url] = (self.in_cache_dir(fn), x, y)
+        
         # If any files are deleted, drop them from the cache
         for url, (fn, _, _) in cache.items():
-            if not os.path.exists(self.in_cache_dir(fn)):
+            if not os.path.exists(fn):
                 del cache[url]
 
         self.cache = cache
@@ -398,15 +403,21 @@
         Remove files from the cache directory that are not listed in
         cache.pck.
         """
-        listed = set([fn for (_, (fn, _, _)) in self.cache.items()])
-        for path in os.listdir(self.cache_dir):
-            if path not in listed and path != 'cache.pck':
-                thisfile = os.path.join(self.cache_dir, path)
-                if not os.path.isdir(thisfile):
-                    
matplotlib.verbose.report('ViewVCCachedServer:remove_stale_files: removing 
%s'%thisfile,
-                                              level='debug')
-                    os.remove(thisfile)
+        # TODO: remove empty subdirectories
+        listed = set(fn for (_, (fn, _, _)) in self.cache.items())
+        existing = reduce(set.union,
+                          (set(os.path.join(dirpath, fn) for fn in filenames)
+                          for (dirpath, _, filenames) in 
os.walk(self.cache_dir)))
+        matplotlib.verbose.report(
+            'ViewVCCachedServer: files listed in cache.pck: %s' % listed, 
'debug')
+        matplotlib.verbose.report(
+            'ViewVCCachedServer: files in cache directory: %s' % existing, 
'debug')
 
+        for path in existing - listed - set([self.in_cache_dir('cache.pck')]):
+            matplotlib.verbose.report('ViewVCCachedServer:remove_stale_files: 
removing %s'%path,
+                                      level='debug')
+            os.remove(path)
+
     def write_cache(self):
         """
         Write the cache data structure into the cache directory.
@@ -424,17 +435,12 @@
         fn = url[len(self.baseurl):]
         fullpath = self.in_cache_dir(fn)
 
-        #while os.path.exists(self.in_cache_dir(fn)):
-        #    fn = rightmost + '.' + str(random.randint(0,9999999))
-
-
-
-        f = open(self.in_cache_dir(fn), 'wb')
+        f = open(fullpath, 'wb')
         f.write(data)
         f.close()
 
         # Update the cache
-        self.cache[url] = (fn, headers.get('ETag'), 
headers.get('Last-Modified'))
+        self.cache[url] = (fullpath, headers.get('ETag'), 
headers.get('Last-Modified'))
         self.write_cache()
 
     # These urllib2 entry points are used:
@@ -459,9 +465,9 @@
         """
         url = req.get_full_url()
         fn, _, _ = self.cache[url]
-        cachefile = self.in_cache_dir(fn)
-        matplotlib.verbose.report('ViewVCCachedServer: reading data file from 
cache file "%s"'%cachefile)
-        file = open(cachefile, 'rb')
+        matplotlib.verbose.report('ViewVCCachedServer: reading data file from 
cache file "%s"'
+                                  %fn, 'debug')
+        file = open(fn, 'rb')
         handle = urllib2.addinfourl(file, hdrs, url)
         handle.code = 304
         return handle
@@ -470,6 +476,8 @@
         """
         Update the cache with the returned file.
         """
+        matplotlib.verbose.report('ViewVCCachedServer: received response %d: 
%s'
+                                  % (response.code, response.msg), 'debug')
         if response.code != 200:
             return response
         else:
@@ -489,11 +497,11 @@
         store it in the cachedir.
 
         If asfileobj is True, a file object will be returned.  Else the
-        path to the file as a string will be returned
-
+        path to the file as a string will be returned.
         """
+        # TODO: time out if the connection takes forever
+        # (may not be possible with urllib2 only - spawn a helper process?)
 
-
         # quote is not in python2.4, so check for it and get it from
         # urllib if it is not available
         quote = getattr(urllib2, 'quote', None)
@@ -501,13 +509,25 @@
             import urllib
             quote = urllib.quote
 
+        # retrieve the URL for the side effect of refreshing the cache
         url = self.baseurl + quote(fname)
-        response = self.opener.open(url)
+        error = 'unknown error'
+        matplotlib.verbose.report('ViewVCCachedServer: retrieving %s'
+                                  % url, 'debug')
+        try:
+            response = self.opener.open(url)
+        except urllib2.URLError, e:
+            # could be a missing network connection
+            error = str(e)
 
+        cached = self.cache.get(url)
+        if cached is None:
+            msg = 'file %s not in cache; received %s when trying to retrieve' \
+                % (fname, error)
+            raise KeyError(msg)
+        
+        fname = cached[0]
 
-        relpath = self.cache[url][0]
-        fname = self.in_cache_dir(relpath)
-
         if asfileobj:
             return file(fname)
         else:
@@ -519,7 +539,7 @@
     Check the cachedirectory ~/.matplotlib/sample_data for a sample_data
     file.  If it does not exist, fetch it with urllib from the mpl svn repo
 
-      
http://matplotlib.svn.sourceforge.net/viewvc/matplotlib/trunk/sample_data/
+      
http://matplotlib.svn.sourceforge.net/svnroot/matplotlib/trunk/sample_data/
 
     and store it in the cachedir.
 
@@ -539,7 +559,7 @@
     if myserver is None:
         configdir = matplotlib.get_configdir()
         cachedir = os.path.join(configdir, 'sample_data')
-        baseurl = 
'http://matplotlib.svn.sourceforge.net/viewvc/matplotlib/trunk/sample_data/'
+        baseurl = 
'http://matplotlib.svn.sourceforge.net/svnroot/matplotlib/trunk/sample_data/'
         myserver = get_sample_data.myserver = ViewVCCachedServer(cachedir, 
baseurl)
 
     return myserver.get_sample_data(fname, asfileobj=asfileobj)


This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.

------------------------------------------------------------------------------
Let Crystal Reports handle the reporting - Free Crystal Reports 2008 30-Day 
trial. Simplify your report design, integration and deployment - and focus on 
what you do best, core application coding. Discover what's new with 
Crystal Reports now.  http://p.sf.net/sfu/bobj-july
_______________________________________________
Matplotlib-checkins mailing list
Matplotlib-checkins@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/matplotlib-checkins

Reply via email to