Author: danielsh
Date: Thu Dec  2 17:06:29 2010
New Revision: 1041457

URL: http://svn.apache.org/viewvc?rev=1041457&view=rev
Log:
Add a script to populate the PRISTINE dir from a .svn/pristine/ hierarchy.

NOTE: This is not useful yet, since we don't currently know how to avoid
fetching a file if a corresponding SHA-1 is listed in the PRISTINE table.
I wrote the script before I bothered to investigate that minor detail...

* tools/dev/wc-ng/populate-pristine.py:
    New script.

Added:
    subversion/trunk/tools/dev/wc-ng/populate-pristine.py   (with props)

Added: subversion/trunk/tools/dev/wc-ng/populate-pristine.py
URL: 
http://svn.apache.org/viewvc/subversion/trunk/tools/dev/wc-ng/populate-pristine.py?rev=1041457&view=auto
==============================================================================
--- subversion/trunk/tools/dev/wc-ng/populate-pristine.py (added)
+++ subversion/trunk/tools/dev/wc-ng/populate-pristine.py Thu Dec  2 17:06:29 
2010
@@ -0,0 +1,91 @@
+#!/usr/bin/env python
+
+"""
+A script that takes a .svn/pristine/ hierarchy, with its existing
+.svn/wc.db database, and populates the database's PRISTINE table
+accordingly.  (Use 'svn cleanup' to remove unreferenced pristines.)
+
+Usage:
+
+  %s /path/to/wc [...]
+"""
+
+# TODO: resolve the NotImplemented() in __main__
+
+# TODO: increment refcount upon collision
+# TODO: add <given file>, not just argv[1]/.svn/pristine/??/*
+
+import hashlib
+import os
+import re
+import sqlite3
+import sys
+
+# ### This could require any other format that has the same PRISTINE schema
+# ### and semantics.
+FORMAT = 22
+BUFFER_SIZE = 4 * 1024
+
+class UnknownFormat(Exception):
+  def __init__(self, formatno):
+    self.formatno = formatno
+
+def open_db(wc_path):
+  wc_db = os.path.join(wc_path, '.svn', 'wc.db')
+  conn = sqlite3.connect(wc_db)
+  curs = conn.cursor()
+  curs.execute('pragma user_version;')
+  formatno = int(curs.fetchone()[0])
+  if formatno > FORMAT:
+    raise UnknownFormat(formatno)
+  return conn
+
+_sha1_re = re.compile(r'^[0-9a-f]{40}$')
+
+def md5_of(path):
+  fd = os.open(path, os.O_RDONLY)
+  ctx = hashlib.md5()
+  while True:
+    s = os.read(fd, BUFFER_SIZE)
+    if len(s):
+      ctx.update(s)
+    else:
+      os.close(fd)
+      return ctx.hexdigest()
+
+INSERT_QUERY = """
+  INSERT OR REPLACE
+  INTO pristine(checksum,compression,size,refcount,md5_checksum)
+  VALUES (?,?,?,?,?)
+"""
+  
+def populate(wc_path):
+  conn = open_db(wc_path)
+  sys.stdout.write("Updating '%s': " % wc_path)
+  for dirname, dirs, files in os.walk(os.path.join(wc_path, '.svn/pristine/')):
+    # skip everything but .svn/pristine/xx/
+    if os.path.basename(os.path.dirname(dirname)) == 'pristine':
+      sys.stdout.write("'%s', " % os.path.basename(dirname))
+      for f in filter(lambda x: _sha1_re.match(x), files):
+        fullpath = os.path.join(dirname, f)
+        conn.execute(INSERT_QUERY,
+                     ('$sha1$'+f, None, os.stat(fullpath).st_size, 1,
+                      '$md5 $'+md5_of(fullpath)))
+      # periodic transaction commits, for efficiency
+      conn.commit()
+  else:
+    sys.stdout.write(".\n")
+
+if __name__ == '__main__':
+  raise NotImplemented("""Subversion does not know yet to avoid fetching
+  a file when a file with matching sha1 appears in the PRISTINE table.""")
+
+  paths = sys.argv[1:]
+  if not paths:
+    paths = ['.']
+  for wc_path in paths:
+    try:
+      populate(wc_path)
+    except UnknownFormat, e:
+      sys.stderr.write("Don't know how to handle '%s' (format %d)'\n"
+                       % (wc_path, e.formatno))

Propchange: subversion/trunk/tools/dev/wc-ng/populate-pristine.py
------------------------------------------------------------------------------
    svn:executable = *


Reply via email to