nickva commented on PR #4729: URL: https://github.com/apache/couchdb/pull/4729#issuecomment-1684533075
> I confirmed directly and via haproxy that a client that disconnects (curl and CTRL-C in the first case and simply doing something that hits haproxy's server timeout like `_changes?feed=continuous&timeout=10000000`) that the mochiweb process is killed, and this triggers the new code above to take down workers. Cleanup happens without this patch already form what I've observed. Here is how I tested on main: ```diff diff --git a/src/chttpd/src/chttpd_db.erl b/src/chttpd/src/chttpd_db.erl index e2de301b2..6c2beba83 100644 --- a/src/chttpd/src/chttpd_db.erl +++ b/src/chttpd/src/chttpd_db.erl @@ -127,6 +127,8 @@ handle_changes_req1(#httpd{} = Req, Db) -> db_open_options = [{user_ctx, couch_db:get_user_ctx(Db)}] }, Max = chttpd:chunked_response_buffer_size(), + couch_log:error(" +++TRACING _changes++++ ~p:~p@~B REQPID:~p", [?MODULE, ?FUNCTION_NAME, ?LINE, self()]), + dbg:tracer(), dbg:p(self(), p), case ChangesArgs#changes_args.feed of "normal" -> T0 = os:timestamp(), diff --git a/src/fabric/src/fabric_db_update_listener.erl b/src/fabric/src/fabric_db_update_listener.erl index 78ccf5a4d..fb508294a 100644 --- a/src/fabric/src/fabric_db_update_listener.erl +++ b/src/fabric/src/fabric_db_update_listener.erl @@ -37,6 +37,8 @@ }). go(Parent, ParentRef, DbName, Timeout) -> + couch_log:error(" +++TRACING UPDATE NOTIFIER+++ ~p:~p@~B ~p Parent:~p", [?MODULE, ?FUNCTION_NAME, ?LINE, DbName, Parent]), + dbg:tracer(), dbg:p(self(), p), Shards = mem3:shards(DbName), Notifiers = start_update_notifiers(Shards), MonRefs = lists:usort([rexi_utils:server_pid(N) || #worker{node = N} <- Notifiers]), @@ -82,6 +84,8 @@ start_update_notifiers(Shards) -> % rexi endpoint start_update_notifier(DbNames) -> + couch_log:error(" +++TRACING UPDATE NOTIFIER WORKER+++ ~p:~p@~B~p", [?MODULE, ?FUNCTION_NAME, ?LINE, DbNames]), + dbg:tracer(), dbg:p(self(), p), {Caller, Ref} = get(rexi_from), Notify = config:get("couchdb", "maintenance_mode", "false") /= "true", State = #cb_state{client_pid = Caller, client_ref = Ref, notify = Notify}, diff --git a/src/fabric/src/fabric_rpc.erl b/src/fabric/src/fabric_rpc.erl index fa6ea5116..64fdbf4b5 100644 --- a/src/fabric/src/fabric_rpc.erl +++ b/src/fabric/src/fabric_rpc.erl @@ -69,6 +69,8 @@ changes(DbName, Args, StartSeq) -> changes(DbName, #changes_args{} = Args, StartSeq, DbOptions) -> changes(DbName, [Args], StartSeq, DbOptions); changes(DbName, Options, StartVector, DbOptions) -> + couch_log:error(" ++TRACING CHANGES WORKER+++++ ~p:~p@~B~p", [?MODULE, ?FUNCTION_NAME, ?LINE, DbName]), + dbg:tracer(), dbg:p(self(), p), set_io_priority(DbName, DbOptions), Args0 = lists:keyfind(changes_args, 1, Options), #changes_args{dir = Dir, filter_fun = Filter} = Args0, ``` That's setting up process even traces on the request process, worker process, db updater process and db update worker process. The goal is that these should be cleaned as soon as we attempt any write to the closed socket. (`db` is Q=1 empty db) ``` % curl -i $DB/db/_changes'?feed=continuous&since=now' HTTP/1.1 200 OK ... ^C ``` ``` [error] 2023-08-18T22:54:36.654375Z node1@127.0.0.1 <0.949.0> cc6744542c +++TRACING _changes++++ chttpd_db:handle_changes_req1@130 REQPID:<0.949.0> (<0.949.0>) spawn <0.974.0> as erlang:apply(#Fun<rexi_monitor.1.77517421>,[]) (<0.949.0>) link <0.974.0> (<0.949.0>) getting_unlinked <0.974.0> [error] 2023-08-18T22:54:36.654949Z node1@127.0.0.1 <0.976.0> -------- +++TRACING UPDATE NOTIFIER+++ fabric_db_update_listener:go@40 <<"db">> Parent:<0.949.0> (<0.949.0>) spawn <0.976.0> as fabric_db_update_listener:go(<0.949.0>,#Ref<0.830586435.801374217.85503>,<<"db">>,60000) (<0.949.0>) link <0.976.0> (<0.949.0>) spawn <0.978.0> as erlang:apply(#Fun<rexi_monitor.1.77517421>,[]) (<0.949.0>) link <0.978.0> (<0.949.0>) spawn <0.979.0> as erlang:apply(#Fun<fabric_streams.3.106508571>,[]) [error] 2023-08-18T22:54:36.655018Z node1@127.0.0.1 <0.977.0> cc6744542c ++TRACING CHANGES WORKER+++++ fabric_rpc:changes@72<<"shards/00000000-ffffffff/db.1692392006">> (<0.976.0>) spawn <0.980.0> as erlang:apply(#Fun<rexi_monitor.1.77517421>,[]) [error] 2023-08-18T22:54:36.655123Z node1@127.0.0.1 <0.982.0> -------- +++TRACING UPDATE NOTIFIER WORKER+++ fabric_db_update_listener:start_update_notifier@87[<<"shards/00000000-ffffffff/db.1692392006">>] (<0.976.0>) link <0.980.0> (<0.976.0>) spawn <0.981.0> as erlang:apply(#Fun<fabric_db_update_listener.2.111341222>,[]) (<0.977.0>) exit normal (<0.949.0>) getting_unlinked <0.978.0> (<0.976.0>) exit normal (<0.982.0>) exit killed (<0.949.0>) exit shutdown ``` * `0.949.0` request process is killed with a shutdown as expected. Probably after trying to write a closed socket. * `0.976.0` db update notifier exits `normal` * `0.982.0` db update notifier worker is killed by the cleaner process Since `db` is empty we're not actually starting any workers but those should also by cleaned by the cleanup helper process in [fabric_streams](https://github.com/apache/couchdb/blob/main/src/fabric/src/fabric_streams.erl). -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: notifications-unsubscr...@couchdb.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org