janl opened a new issue, #5393: URL: https://github.com/apache/couchdb/issues/5393
Heya, for hot-fixing a cluster I had to write a `_conflicts` and `_deleted _conflicts` finder that I could paste into remsh. I thought this might be a good idea to turn into a couch scanner plugin, but I don’t have time to do this right now. If someone would like to take this on, you’re all very welcome. This is “make it work for me” levels of good code, but I ran this across ~2TB worth of shard files without issue. The “progress bar” can probably taken out and the doc/s reporting needs to go into stats, but that’s all. Here’s my code: ```erlang rr(couch_db). rr(couch_changes). MinConflicts = 5. ioq:set_io_priority({compaction, self()}). Sorter = fun({_, A}, {_, B}) -> A > B end. MinConflictsFilter = fun ({_, Conflicts}) when Conflicts < MinConflicts -> false; (_) -> true end. MaybeAppend = fun (_Id, 0, Acc) -> Acc; (Id, List, Acc) -> lists:append([{Id, List}], Acc) end. GetDocCount = fun(Db) -> {ok, DbInfo} = couch_db:get_db_info(Db), DocCount = proplists:get_value(doc_count, DbInfo, 0), DelDocCount = proplists:get_value(del_doc_count, DbInfo, 0), DocCount + DelDocCount end. MaybePrintStats = fun(_, 0, _, LastPrinted) -> LastPrinted; (_, _, 0, LastPrinted) -> LastPrinted; (Begin, DocCount, DocsProcessed, LastPrinted) -> Perc = 100 / DocCount * DocsProcessed, DoJump = (Perc - LastPrinted) > 10, case DoJump of true -> End = os:timestamp(), Duration = timer:now_diff(End, Begin) / 1000 / 1000, DocsPerSecond = DocsProcessed / Duration, io:format("~p% (~.2f docs/s) ", [trunc(Perc), DocsPerSecond]), Perc; _ -> LastPrinted end end. Scanner = fun(DbName) -> OpenOpts = [{user_ctx, #user_ctx{name = <<"admin">>, roles = [<<"_admin">>]}}], {ok, Db} = couch_db:open(DbName, OpenOpts), Begin = os:timestamp(), io:format("~n ~p: ", [DbName]), DocCount = GetDocCount(Db), UserFun = fun(FullDocInfo, Acc) -> DocInfo = couch_doc:to_doc_info(FullDocInfo), % RevTree = FullDocInfo#full_doc_info.rev_tree, % Leafs = couch_key_tree:get_all_leafs(RevTree), % io:format("~n FullDocInfo: ~p~n", [FullDocInfo]), % io:format("~n DocInfo: ~p~n", [DocInfo]), % io:format("~n Leafs: ~p~n", [Leafs]), [_ | Revs ] = DocInfo#doc_info.revs, % first one is winning rev? {DeletedConflicts, Conflicts} = lists:partition(fun(RevInfo) -> % io:format("~n RevInfo: ~p~n", [RevInfo]), % io:format("~n RevInfo#rev_info.deleted: ~p~n", [RevInfo#rev_info.deleted]), % {Leaf0} = Leaf, % io:format("~n Leaf0: ~p~n", [Leaf0]), RevInfo#rev_info.deleted end, Revs), {AccDeletedConflicts, AccConflicts, DocsProcessed, LastPrinted} = Acc#changes_acc.user_acc, NewLastPrinted = MaybePrintStats(Begin, DocCount, DocsProcessed, LastPrinted), {ok, Acc#changes_acc{ user_acc = { MaybeAppend(DocInfo#doc_info.id, length(DeletedConflicts), AccDeletedConflicts), MaybeAppend(DocInfo#doc_info.id, length(Conflicts), AccConflicts), DocsProcessed + 1, NewLastPrinted } }} end, StartSeq = 0, UserAcc = #changes_acc{user_acc = {[], [], 0, 0}}, Opts = [{include_docs, true},{deleted, true}], {ok, ChangesAcc} = couch_db:fold_changes(Db, StartSeq, UserFun, UserAcc, Opts), couch_db:close(Db), {DeletedConflicts, Conflicts, DocsProcessed, _} = ChangesAcc#changes_acc.user_acc, io:format("Total Docs Processed: ~p ", [DocsProcessed]), ConflictsSorted = lists:filter(MinConflictsFilter, lists:sort(Sorter, Conflicts)), DeletedConflictsSorted = lists:filter(MinConflictsFilter, lists:sort(Sorter, DeletedConflicts)), {ConflictsSorted, DeletedConflictsSorted} end. Enumerator = fun () -> {ok, AllShards} = couch_server:all_databases(), lists:foreach(fun(Shard) -> {Conflicts, DeletedConflicts} = Scanner(Shard), io:format("~n Conflicts: ~n ~p ~n DeletedConflicts: ~n ~p", [Conflicts, DeletedConflicts]) end, AllShards) end. Enumerator(). ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: notifications-unsubscr...@couchdb.apache.org.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org