janl opened a new issue, #5393:
URL: https://github.com/apache/couchdb/issues/5393

   Heya, for hot-fixing a cluster I had to write a `_conflicts` and `_deleted 
_conflicts` finder that I could paste into remsh. I thought this might be a 
good idea to turn into a couch scanner plugin, but I don’t have time to do this 
right now. If someone would like to take this on, you’re all very welcome.
   
   This is “make it work for me” levels of good code, but I ran this across 
~2TB worth of shard files without issue. The “progress bar” can probably taken 
out and the doc/s reporting needs to go into stats, but that’s all.
   
   Here’s my code:
   
   ```erlang
   
   rr(couch_db).
   rr(couch_changes).
   
   MinConflicts = 5.
   ioq:set_io_priority({compaction, self()}).
    
   Sorter = fun({_, A}, {_, B}) -> A > B end.
   MinConflictsFilter = fun ({_, Conflicts}) when Conflicts < MinConflicts -> 
false;
                                   (_) -> true
   end.
   
   MaybeAppend = fun (_Id, 0, Acc) -> Acc;
                                (Id, List, Acc) -> lists:append([{Id, List}], 
Acc)
   end.
   
   GetDocCount = fun(Db) ->
        {ok, DbInfo} = couch_db:get_db_info(Db),
        DocCount = proplists:get_value(doc_count, DbInfo, 0),
        DelDocCount = proplists:get_value(del_doc_count, DbInfo, 0),
        DocCount + DelDocCount
   end.
   
   MaybePrintStats = fun(_, 0, _, LastPrinted) -> LastPrinted;
                                         (_, _, 0, LastPrinted) -> LastPrinted;
                                         (Begin, DocCount, DocsProcessed, 
LastPrinted) ->
                                         Perc = 100 / DocCount * DocsProcessed,
                                         DoJump = (Perc - LastPrinted) > 10,
                                         case DoJump of
                                                true ->
                                                End = os:timestamp(),
                                                Duration = timer:now_diff(End, 
Begin) / 1000 / 1000,
                                                DocsPerSecond = DocsProcessed / 
Duration,
                                                io:format("~p% (~.2f docs/s) ", 
[trunc(Perc), DocsPerSecond]),
                                                Perc;
                                                _ -> LastPrinted
                                         end
   end.
   
   Scanner = fun(DbName) -> 
        OpenOpts = [{user_ctx, #user_ctx{name = <<"admin">>, roles = 
[<<"_admin">>]}}],
        {ok, Db} = couch_db:open(DbName, OpenOpts),
        Begin = os:timestamp(),
        io:format("~n ~p: ", [DbName]),
        DocCount = GetDocCount(Db),
        UserFun = fun(FullDocInfo, Acc) ->
                DocInfo = couch_doc:to_doc_info(FullDocInfo),
                % RevTree = FullDocInfo#full_doc_info.rev_tree,
                % Leafs = couch_key_tree:get_all_leafs(RevTree),
                % io:format("~n FullDocInfo: ~p~n", [FullDocInfo]),
                % io:format("~n DocInfo: ~p~n", [DocInfo]),
                % io:format("~n Leafs: ~p~n", [Leafs]),
                [_ | Revs ] = DocInfo#doc_info.revs, % first one is winning rev?
                {DeletedConflicts, Conflicts} = lists:partition(fun(RevInfo) ->
                        % io:format("~n RevInfo: ~p~n", [RevInfo]),
                        % io:format("~n RevInfo#rev_info.deleted: ~p~n", 
[RevInfo#rev_info.deleted]),
                        % {Leaf0} = Leaf,
                        % io:format("~n Leaf0: ~p~n", [Leaf0]),
                        RevInfo#rev_info.deleted
                end, Revs),
                {AccDeletedConflicts, AccConflicts, DocsProcessed, LastPrinted} 
= Acc#changes_acc.user_acc,
                NewLastPrinted = MaybePrintStats(Begin, DocCount, 
DocsProcessed, LastPrinted),
                {ok, Acc#changes_acc{
                        user_acc = {
                                MaybeAppend(DocInfo#doc_info.id, 
length(DeletedConflicts), AccDeletedConflicts),
                                MaybeAppend(DocInfo#doc_info.id, 
length(Conflicts), AccConflicts),
                                DocsProcessed + 1,
                                NewLastPrinted
                        }
                }}
        end,
        StartSeq = 0,
        UserAcc = #changes_acc{user_acc = {[], [], 0, 0}},
        Opts = [{include_docs, true},{deleted, true}],
        {ok, ChangesAcc} = couch_db:fold_changes(Db, StartSeq, UserFun, 
UserAcc, Opts),
        couch_db:close(Db),
        {DeletedConflicts, Conflicts, DocsProcessed, _} = 
ChangesAcc#changes_acc.user_acc,
        io:format("Total Docs Processed: ~p ", [DocsProcessed]),
        ConflictsSorted = lists:filter(MinConflictsFilter, lists:sort(Sorter, 
Conflicts)),
        DeletedConflictsSorted = lists:filter(MinConflictsFilter, 
lists:sort(Sorter, DeletedConflicts)),
        {ConflictsSorted, DeletedConflictsSorted}
   end.
   
   Enumerator = fun () ->
        {ok, AllShards} = couch_server:all_databases(),
        lists:foreach(fun(Shard) -> 
                {Conflicts, DeletedConflicts} = Scanner(Shard),
                io:format("~n      Conflicts: ~n     ~p ~n      
DeletedConflicts: ~n     ~p", [Conflicts, DeletedConflicts])
        end, AllShards)
   end.
   
   Enumerator().
   ```


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscr...@couchdb.apache.org.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to