This is an automated email from the ASF dual-hosted git repository. humbedooh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-ponymail-foal.git
The following commit(s) were added to refs/heads/master by this push: new 789c7b6 Create bulk-edit.py 789c7b6 is described below commit 789c7b6e06adf46e69adc69f705cad6f60118d28 Author: Daniel Gruno <humbed...@apache.org> AuthorDate: Thu Jul 7 15:02:52 2022 +0200 Create bulk-edit.py --- tools/bulk-edit.py | 197 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 197 insertions(+) diff --git a/tools/bulk-edit.py b/tools/bulk-edit.py new file mode 100644 index 0000000..f49d4a3 --- /dev/null +++ b/tools/bulk-edit.py @@ -0,0 +1,197 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" + bulk-edit.py: mbox bulk editor for Apache Pony Mail (Foal) + + Examples: + - Move all email from <foo.bar.example.org> to <bar.baz.example.org>: + python3 bulk-edit.py --search 'list_raw:"<foo.bar.example.org>"' --action move --destination "<bar.baz.example.org>" + - Make all emails from gn...@example.org private: + python3 bulk-edit.py --search 'from:"<gn...@example.org>"' --action private + - Delete all emails on f...@bar.example.org with 'gnomes' in the subject: + python3 bulk-edit.py --search 'list_raw:"<foo.bar.example.org>" AND subject:gnomes' --action delete + + Be sure to always run your query with --test first, to see which documents would be affected! +""" + +import elasticsearch.exceptions +import sys +import asyncio +import argparse +import time +import re +import warnings +from elasticsearch.helpers import async_scan + + +if not __package__: + from plugins import ponymailconfig + from plugins.elastic import Elastic +else: + from .plugins import ponymailconfig + from .plugins.elastic import Elastic + + +def gen_args() -> argparse.Namespace: + """Generate/parse CLI arguments""" + parser = argparse.ArgumentParser(description="Command line options.") + parser.add_argument( + "--search", + dest="search", + nargs=1, + help="""Search parameters (Lucene query string) to narrow down what to edit (for instance: 'list_raw:"<dev.maven.apache.org>"')""", + default="*", + ), + parser.add_argument( + "--action", + dest="action", + type=str, + choices=["move", "delete", "private", "public", "list"], + help="The action to perform on each matching document", + default="list", + ) + parser.add_argument( + "--destination", + dest="destination", + type=str, + help="If action is 'move', this sets the destination list-id to move the matching documents to", + default="", + ), + parser.add_argument( + "--test", + dest="test", + action="store_true", + help="Test mode, only scan database and report, but do not make any changes to it.", + ) + parser.add_argument( + "--warn", + dest="warn", + action="store_true", + help="Enable ElasticSearch Warnings (defaults to disabled to suppress xpack nonsense)", + default=False, + ) + args = parser.parse_args() + return args + + +async def main(): + start_time = time.time() + args = gen_args() + config = ponymailconfig.PonymailConfig() + es = Elastic(is_async=True) + if not args.warn: + warnings.filterwarnings("ignore", category=elasticsearch.exceptions.ElasticsearchWarning) + docs_changed = 0 + if args.action == "move": + if not re.match(r"<([-a-z0-9_]+\.?)+>", args.destination): + sys.stderr.write("ERROR: Destination list (--destination) MUST be using the <foo.bar.baz> format!\n") + exit(-1) + + async for doc in async_scan(client=es.es, q=args.search, index=es.db_mbox): + source = doc["_source"] + if args.action == "list": + docs_changed += 1 + subject = source["subject"].replace("\n", "") + print(f"""found: {doc['_id']} {source['list_raw']}: {subject}""") + elif args.action == "move": + if args.test: + print(f"""[TEST] Would have moved {source["mid"]} from {source["list_raw"]} to {args.destination}""") + else: + sys.stdout.write( + f"""[MOVE] Moving {source["mid"]} from {source["list_raw"]} to {args.destination}...""" + ) + sys.stdout.flush() + await es.es.update( + index=es.db_mbox, + id=doc["_id"], + body={ + "doc": { + "list": args.destination, + "list_raw": args.destination, + } + }, + ) + sys.stdout.write(" [DONE]\n") + sys.stdout.flush() + docs_changed += 1 + elif args.action == "private": + if not source["private"]: + if args.test: + print(f"""[TEST] Would have made {source["mid"]} from {source["list_raw"]} private""") + else: + sys.stdout.write(f"""[HIDE] Turning {source["mid"]} from {source["list_raw"]} private...""") + sys.stdout.flush() + await es.es.update( + index=es.db_mbox, + id=doc["_id"], + body={ + "doc": { + "private": True, + } + }, + ) + sys.stdout.write(" [DONE]\n") + sys.stdout.flush() + docs_changed += 1 + elif args.action == "public": + if source["private"]: + if args.test: + print(f"""[TEST] Would have made {source["mid"]} from {source["list_raw"]} public""") + else: + sys.stdout.write(f"""[SHOW] Turning {source["mid"]} from {source["list_raw"]} public...""") + sys.stdout.flush() + await es.es.update( + index=es.db_mbox, + id=doc["_id"], + body={ + "doc": { + "private": False, + } + }, + ) + sys.stdout.write(" [DONE]\n") + sys.stdout.flush() + docs_changed += 1 + elif args.action == "delete": + if args.test: + print( + f"""[TEST] Would have deleted {source["mid"]} (and source {source["dbid"]}) from {source["list_raw"]}""" + ) + else: + sys.stdout.write( + f"""[DELETE] Removing {source["mid"]} (and source {source["dbid"]}) from {source["list_raw"]}...""" + ) + sys.stdout.flush() + await es.es.delete( + index=es.db_mbox, + id=doc["_id"], + ) + await es.es.delete( + index=es.db_source, + id=source["dbid"], + ) + sys.stdout.write(" [DONE]\n") + sys.stdout.flush() + docs_changed += 1 + stop_time = time.time() + time_taken = int(stop_time - start_time) + print(f"Handled {docs_changed} document(s) in {time_taken} second(s).") + await es.es.close() + +if __name__ == "__main__": + loop = asyncio.get_event_loop() + loop.run_until_complete(main())