Filippo Giunchedi has submitted this change and it was merged.

Change subject: swiftrepl: name-based filter for objects
......................................................................


swiftrepl: name-based filter for objects

this can be used for example to limit the scope of replication, e.g. to
particular thumbnails only

Bug: T125791
Change-Id: I88f97f282d9197e78340e05a2849500e311b09e6
---
M swiftrepl/swiftrepl.py
1 file changed, 37 insertions(+), 12 deletions(-)

Approvals:
  Filippo Giunchedi: Verified; Looks good to me, approved
  Mark Bergsma: Looks good to me, but someone else must approve



diff --git a/swiftrepl/swiftrepl.py b/swiftrepl/swiftrepl.py
index 7a180dc..10b8a29 100644
--- a/swiftrepl/swiftrepl.py
+++ b/swiftrepl/swiftrepl.py
@@ -285,10 +285,10 @@
     else:
         print "Created container", name
 
-def sync_container(srccontainer, srcconnpool, dstconnpool):
+def sync_container(srccontainer, srcconnpool, dstconnpool, filename_regexp):
 
     last = ''
-    hits, processed, gets = 0, 0, 0
+    hits, processed, gets, skipped = 0, 0, 0, 0
 
     dstconn = dstconnpool.get()
     try:
@@ -315,8 +315,11 @@
                     break
 
         for srcobj in srcobjects:
-            processed += 1
             objname = srcobj.name.encode("ascii", errors="ignore")
+            if filename_regexp is not None and not 
filename_regexp.match(objname):
+                skipped += 1
+                continue
+            processed += 1
             last = srcobj.name.encode("utf-8")
             msg = "%s\t%s\t%s\t%s\t%s" % (srccontainer.name, srcobj.etag, 
srcobj.size, objname, srcobj.last_modified)
             try:
@@ -347,11 +350,14 @@
             replicate_object(srcobj, dstobj, srcconnpool, dstconnpool)
 
         pct = lambda x, y: y != 0 and int(float(x) / y * 100) or 0
-        print ("STATS: %s processed: %d/%d (%d%%), hit rate: %d%%" %
+        print ("STATS: %s processed: %d/%d (%d%%), hit rate: %d%%, skipped 
%d/%d (%d%%)" %
                (srccontainer.name,
                 processed, srccontainer.object_count,
                 pct(processed, srccontainer.object_count),
-                pct(hits, processed)))
+                pct(hits, processed),
+                skipped, srccontainer.object_count,
+                pct(skipped, srccontainer.object_count),
+                ))
 
         if len(srcobjects) < NOBJECT:
             break
@@ -408,7 +414,7 @@
         srcconnpool.put(srccontainer.conn)
         srccontainer.conn = None
 
-def sync_deletes(srccontainer, srcconnpool, dstconnpool):
+def sync_deletes(srccontainer, srcconnpool, dstconnpool, filename_regexp):
 
     dstconn = dstconnpool.get()
     try:
@@ -424,7 +430,7 @@
     srclimit = int(srclimit * 1.2)
 
     last = ''
-    deletes, processed = 0, 0
+    deletes, processed, skipped = 0, 0, 0
     while True:
 
         dstobjects = get_container_objects(dstcontainer, limit=dstlimit, 
marker=last, connpool=dstconnpool)
@@ -440,6 +446,9 @@
         diff = dstset - srcset
 
         for dstname in diff:
+            if filename_regexp is not None and not 
filename_regexp.match(dstname):
+                skipped += 1
+                continue
             # do a HEAD to make sure it's gone
             srccontainer.conn = srcconnpool.get()
             try:
@@ -463,11 +472,14 @@
         processed += len(dstobjects)
 
         pct = lambda x, y: y != 0 and int(float(x) / y * 100) or 0
-        print ("STATS: %s processed: %d/%d (%d%%), deleted: %d" %
+        print ("STATS: %s processed: %d/%d (%d%%), deleted: %d, skipped %d/%d 
(%d%%)" %
                (srccontainer.name,
                 processed, dstcontainer.object_count,
                 pct(processed, dstcontainer.object_count),
-                deletes))
+                deletes,
+                skipped, dstcontainer.object_count,
+                pct(skipped, dstcontainer.object_count),
+                ))
 
         if len(dstobjects) < dstlimit:
             break
@@ -482,9 +494,11 @@
                 break
 
             if options.sync_deletes:
-                sync_deletes(container, kwargs['srcconnpool'], 
kwargs['dstconnpool'])
+                sync_deletes(container, kwargs['srcconnpool'],
+                        kwargs['dstconnpool'], kwargs['filename_regexp'])
             else:
-                sync_container(container, kwargs['srcconnpool'], 
kwargs['dstconnpool'])
+                sync_container(container, kwargs['srcconnpool'],
+                        kwargs['dstconnpool'], kwargs['filename_regexp'])
 
             if not options.once:  # once
                 containers.append(container)
@@ -521,6 +535,7 @@
     parser.add_argument('--sync-deletes', '-d', dest='sync_deletes', 
action='store_true', default=False)
     parser.add_argument('--container-set', dest='container_set', metavar='SET')
     parser.add_argument('--container-regexp', dest='container_regexp', 
metavar='REGEXP')
+    parser.add_argument('--filename-regexp', dest='filename_regexp', 
metavar='REGEXP')
     options = parser.parse_args()
 
     src, dst, container_sets = parse_config(options.config)
@@ -538,6 +553,13 @@
         if not options.container_set in container_sets:
             parser.error('container set %s not found in config' % 
options.container_set)
         container_regexp = container_sets[options.container_set]
+
+    filename_regexp = None
+    if options.filename_regexp is not None:
+        try:
+            filename_regexp = re.compile(options.filename_regexp)
+        except re.error as e:
+            parser.error('cannot compile %r: %r' % (options.filename_regexp, 
e))
 
     srcconnpool = connect(src)
     dstconnpool = connect(dst)
@@ -564,7 +586,10 @@
 
     # Start threads
     for i in range(options.threads):
-        t = threading.Thread(target=replicator_thread, kwargs={'srcconnpool': 
srcconnpool, 'dstconnpool': dstconnpool})
+        t = threading.Thread(target=replicator_thread,
+                             kwargs={'srcconnpool': srcconnpool,
+                                     'dstconnpool': dstconnpool,
+                                     'filename_regexp': filename_regexp})
         t.daemon = True
         t.start()
 

-- 
To view, visit https://gerrit.wikimedia.org/r/269387
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I88f97f282d9197e78340e05a2849500e311b09e6
Gerrit-PatchSet: 2
Gerrit-Project: operations/software
Gerrit-Branch: master
Gerrit-Owner: Filippo Giunchedi <fgiunch...@wikimedia.org>
Gerrit-Reviewer: Faidon Liambotis <fai...@wikimedia.org>
Gerrit-Reviewer: Filippo Giunchedi <fgiunch...@wikimedia.org>
Gerrit-Reviewer: Giuseppe Lavagetto <glavage...@wikimedia.org>
Gerrit-Reviewer: Mark Bergsma <m...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to