Attached is a patch which adds download caching to setuptools.
At TOPP (http://topp.openplans.org/), we use a system called fassembler
to build our opencore stack. It creates approximately a dozen
virtualenvs, each with their own lib/python, and then uses setuptools to
install lots of libraries. Some of these libraries are common among
multiple apps, but we install multiple copies for ease of development.
And every time we rebuild, we start the whole process over again. The
major slowdown in building is downloading a bunch of things which
probably haven't changed since last time we downloaded them. This
patch will let us maintain a cache of all downloads, and thus do builds
much faster.
Anyway, I hope you'll accept this patch.
Index: setuptools/package_index.py
===================================================================
--- setuptools/package_index.py (revision 65919)
+++ setuptools/package_index.py (working copy)
@@ -6,6 +6,7 @@
from distutils.errors import DistutilsError
from md5 import md5
from fnmatch import translate
+from pickle import dumps, loads
EGG_FRAGMENT = re.compile(r'^egg=([-A-Za-z0-9_.]+)$')
HREF = re.compile("""href\\s*=\\s*['"]?([^'"> ]+)""", re.I)
@@ -150,9 +151,11 @@
"""A distribution index that scans web pages for download URLs"""
def __init__(self, index_url="http://pypi.python.org/simple", hosts=('*',),
+ cache_directory = None,
*args, **kw
):
Environment.__init__(self,*args,**kw)
+ self.cache_directory = cache_directory
self.index_url = index_url + "/"[:not index_url.endswith('/')]
self.scanned_urls = {}
self.fetched_urls = {}
@@ -161,7 +164,6 @@
self.to_scan = []
-
def process_url(self, url, retrieve=False):
"""Evaluate a URL as a possible download, and maybe retrieve it"""
if url in self.scanned_urls and not retrieve:
@@ -575,8 +577,63 @@
def open_url(self, url, warning=None):
if url.startswith('file:'):
return local_open(url)
+
+ class fake_remote_file:
+ """This class acts as if it were the output from urlopen, by
+ reading a previously stored representation of that output"""
+ def __init__(self, filename):
+ self.fp = open(filename, "r")
+ data_fp = open(filename + '.data', "r")
+ data = loads(data_fp.read())
+ data_fp.close()
+ self.__dict__.update(data)
+
+ def read(self, *a, **kw):
+ return self.fp.read(*a, **kw)
+
+ def seek(self, *a, **kw):
+ return self.fp.seek(*a, **kw)
+
+ def close(self, *a, **kw):
+ return self.fp.close(*a, **kw)
+
+ def __getitem__(self, *a, **kw):
+ return self.fp.__getitem__(*a, **kw)
+
+ def info(self):
+ return self.headers
+
+ def geturl(self):
+ return self.url
+
+ if self.cache_directory:
+ from urllib import quote
+ encoded_url = quote(url).replace("/", "%2f")
+ cached_filename = os.path.join(self.cache_directory, encoded_url)
+ if os.path.exists(cached_filename):
+ return fake_remote_file(cached_filename)
+
+
try:
- return open_with_auth(url)
+ result = open_with_auth(url)
+ if not self.cache_directory:
+ return result
+ page = result.read()
+
+ f = open(cached_filename + ".data", "w")
+ data = {}
+ for attr in ['code', 'headers', 'msg', 'url']:
+ data[attr] = getattr(result, attr)
+
+ f.write(dumps(data))
+ f.close()
+
+ f = open(cached_filename, "w")
+ f.write(page)
+ f.close()
+ return fake_remote_file(cached_filename)
+
+
except urllib2.HTTPError, v:
return v
except urllib2.URLError, v:
Index: setuptools/command/easy_install.py
===================================================================
--- setuptools/command/easy_install.py (revision 65919)
+++ setuptools/command/easy_install.py (working copy)
@@ -71,6 +71,7 @@
('no-deps', 'N', "don't install dependencies"),
('allow-hosts=', 'H', "pattern(s) that hostnames must match"),
('local-snapshots-ok', 'l', "allow building eggs from local checkouts"),
+ ('cache-directory=', 'c', "Use a persistent download cache in the supplied directory -- useful if you have multiple python library directories that need copies of the same library"),
]
boolean_options = [
'zip-ok', 'multi-version', 'exclude-scripts', 'upgrade', 'always-copy',
@@ -91,6 +92,8 @@
self.upgrade = self.always_copy = self.multi_version = None
self.editable = self.no_deps = self.allow_hosts = None
self.root = self.prefix = self.no_report = None
+ if not hasattr(self, 'cache_directory'):
+ self.cache_directory = None
# Options not specifiable via command line
self.package_index = None
@@ -169,6 +172,7 @@
if self.package_index is None:
self.package_index = self.create_index(
self.index_url, search_path = self.shadow_path, hosts=hosts,
+ cache_directory = self.cache_directory
)
self.local_index = Environment(self.shadow_path+sys.path)
Index: setuptools.txt
===================================================================
--- setuptools.txt (revision 65919)
+++ setuptools.txt (working copy)
@@ -1899,6 +1899,15 @@
respect to the installation directory. If you use this option when
installing, you must supply the same relative path when uninstalling.
+``--cache-directory=DIR``
+ Use a persistent download cache in the supplied directory. This
+ can be useful if you have multiple python library directories
+ that need copies of the same library, or if you need to run
+ setup.py or easy_install repeatedly while testing, and you have a
+ slow Internet connection or want to save bandwidth for upstream
+ developers.
+
+
In addition to the above options, the ``develop`` command also accepts all of
the same options accepted by ``easy_install``. If you've configured any
``easy_install`` settings in your ``setup.cfg`` (or other distutils config
_______________________________________________
Distutils-SIG maillist - [email protected]
http://mail.python.org/mailman/listinfo/distutils-sig