This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 0abc34d882a5a2d4238c82bec117f6f071fc45be
Author: Nick Vatamaniuc <[email protected]>
AuthorDate: Fri Jan 30 19:10:58 2026 -0500

    Fix intermittent _scheduler/docs 500 error
    
    Previously, if users called the `_scheduler/docs` API at just the right 
moment,
    when a job would appear in the replicator doc processor ets table as
    `scheduled`, but it would not be in the replicator scheduler's ets table, 
users
    would see get function clause errors that look like this along with 500 HTTP
    response:
    
    ```
    req_err(2666435525) unknown_error : function_clause [
     <<"couch_replicator_httpd_util:update_db_name/1 L183">>,
     <<"couch_replicator_httpd:handle_scheduler_doc/3 L157">>,
     <<"chttpd:handle_req_after_auth/2 L432">>,
     ...
    ]
    ```
    
    To fix it, explicitly handle this state as transitional pending state 
returning
    all the information we have about the job in the replicator doc processor.
---
 src/couch_replicator/src/couch_replicator_doc_processor.erl | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/couch_replicator/src/couch_replicator_doc_processor.erl 
b/src/couch_replicator/src/couch_replicator_doc_processor.erl
index 533f8d754..caf8d4454 100644
--- a/src/couch_replicator/src/couch_replicator_doc_processor.erl
+++ b/src/couch_replicator/src/couch_replicator_doc_processor.erl
@@ -58,7 +58,7 @@
 -define(MIN_START_DELAY_MSEC, 500).
 
 -type filter_type() :: nil | view | user | docids | mango.
--type repstate() :: initializing | error | scheduled | not_owner.
+-type repstate() :: initializing | error | scheduled | not_owner | pending.
 
 -record(st, {
     % Timer reference
@@ -584,7 +584,13 @@ ejson_doc(#rdoc{state = scheduled} = RDoc, 
HealthThreshold) ->
     JobProps = couch_replicator_scheduler:job_summary(RepId, HealthThreshold),
     case JobProps of
         nil ->
-            nil;
+            % The job is in the doc processor table as "scheduled" but it's not
+            % in the scheduler's table. This is a transitional state and may
+            % happen, for example, when jobs complete, but the replicator doc
+            % processor hasn't yet noticed the "completed" changes feed event
+            % from the doc update. Since know some info about the job return
+            % that as pending state instead of just returning nil.
+            ejson_doc(RDoc#rdoc{state = pending}, HealthThreshold);
         [{_, _} | _] ->
             {[
                 {doc_id, DocId},

Reply via email to