URL: https://github.com/SSSD/sssd/pull/107
Author: fidencio
Title: #107: WATCHDOG: Avoid non async-signal-safe from the signal_handler
Action: opened
PR body:
"""
While debugging rhbz#1396912 some deadlock on sssd_be was noticed[0] and
it's been caused by the use of non async-signal-safe functions from the
signal_handler (please, see man 7 signal for more info about which are
the async-signal-safe functions that can be used).
The removal of those functions we lose some debug messages, which is the
least of problems now. Also instead of re-setting the watchdog timeout
in case of time-shift (please, see b8ceaeb for more info), let's just
re-set the timer's time.
Along with this commit, the function to teardown the watchdog has been
removed as it's not used anymore.
[0]:
[root@dusan ~]# pstack 17922
#0 __lll_lock_wait_private () at
../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:95
#1 0x00007fe707d04f93 in _L_lock_14932 () from /lib64/libc.so.6
#2 0x00007fe707d02013 in __GI___libc_malloc (bytes=140630248638304,
bytes@entry=15) at malloc.c:2891
#3 0x00007fe707d0888a in __GI___strdup (s=0x7fe707dff4f7 "/etc/localtime")
at strdup.c:42
#4 0x00007fe707d31b61 in tzset_internal (always=<optimized out>,
explicit=explicit@entry=1) at tzset.c:438
#5 0x00007fe707d32523 in __tz_convert (timer=timer@entry=0x7ffcd5d2b090,
use_localtime=use_localtime@entry=1, tp=tp@entry=0x7fe708041d40 <_tmbuf>) at
tzset.c:621
#6 0x00007fe707d30521 in __GI_localtime (t=t@entry=0x7ffcd5d2b090) at
localtime.c:42
#7 0x00007fe70886c7b0 in sss_vdebug_fn (file=<optimized out>,
line=<optimized out>, function=0x7fe70bff27f0 <__FUNCTION__.9379>
"watchdog_handler", level=16, flags=flags@entry=0,
format=format@entry=0x7fe70bff2760 "Watchdog timer overflow, killing
process!\n", ap=ap@entry=0x7ffcd5d2b130) at src/util/debug.c:248
#8 0x00007fe70886c995 in sss_debug_fn (file=file@entry=0x7fe70bff263b
"src/util/util_watchdog.c", line=line@entry=82,
function=function@entry=0x7fe70bff27f0 <__FUNCTION__.9379> "watchdog_handler",
level=level@entry=16, format=format@entry=0x7fe70bff2760 "Watchdog timer
overflow, killing process!\n") at src/util/debug.c:284
#9 0x00007fe70bfdb409 in watchdog_handler (sig=<optimized out>) at
src/util/util_watchdog.c:81
#10 <signal handler called>
#11 0x00007fe707cff664 in _int_malloc (av=av@entry=0x7fe70803c760
<main_arena>, bytes=bytes@entry=151) at malloc.c:3494
#12 0x00007fe707d01fbc in __GI___libc_malloc (bytes=bytes@entry=151) at
malloc.c:2893
#13 0x00007fe708450749 in __talloc_with_prefix (prefix_len=0, size=55,
context=0x7fe718373210) at ../talloc.c:668
#14 __talloc (size=55, context=0x7fe718373210) at ../talloc.c:708
#15 _talloc_named_const (name=0x7fe70bb7015d "../common/ldb_pack.c:425",
size=55, context=0x7fe718373210) at ../talloc.c:865
#16 talloc_named_const (context=<optimized out>, size=size@entry=55,
name=name@entry=0x7fe70bb7015d "../common/ldb_pack.c:425") at ../talloc.c:1606
#17 0x00007fe70bb61803 in ldb_unpack_data_only_attr_list
(ldb=ldb@entry=0x7fe70e4d52c0, data=data@entry=0x7ffcd5d2b990,
message=0x7fe7184aa1e0, list=list@entry=0x0, list_size=list_size@entry=0,
nb_elements_in_db=nb_elements_in_db@entry=0x0) at ../common/ldb_pack.c:425
#18 0x00007fe70bb61a7d in ldb_unpack_data (ldb=ldb@entry=0x7fe70e4d52c0,
data=data@entry=0x7ffcd5d2b990, message=<optimized out>) at
../common/ldb_pack.c:470
#19 0x00007fe6fdc29b46 in ltdb_parse_data_unpack (key=..., data=...,
private_data=0x7ffcd5d2ba70) at ../ldb_tdb/ldb_search.c:249
#20 0x00007fe70a5e0a24 in tdb_parse_data (tdb=tdb@entry=0x7fe70e4eaa10,
key=..., offset=15619748, len=414772, parser=parser@entry=0x7fe6fdc29b10
<ltdb_parse_data_unpack>, private_data=private_data@entry=0x7ffcd5d2ba70) at
../common/io.c:637
#21 0x00007fe70a5dc1fc in tdb_parse_record (tdb=0x7fe70e4eaa10, key=...,
parser=parser@entry=0x7fe6fdc29b10 <ltdb_parse_data_unpack>,
private_data=private_data@entry=0x7ffcd5d2ba70) at ../common/tdb.c:253
#22 0x00007fe6fdc29e7b in ltdb_search_dn1
(module=module@entry=0x7fe70e4eab50, dn=dn@entry=0x7fe7183c4940,
msg=msg@entry=0x7fe7184aa1e0) at ../ldb_tdb/ldb_search.c:287
#23 0x00007fe6fdc2acbb in ltdb_dn_list_load
(module=module@entry=0x7fe70e4eab50, dn=dn@entry=0x7fe7183c4940,
list=list@entry=0x7fe7183c3a30) at ../ldb_tdb/ldb_index.c:181
#24 0x00007fe6fdc2bbbb in ltdb_index_add1
(module=module@entry=0x7fe70e4eab50, dn=dn@entry=0x7fe7183bf3e0
"[email protected],cn=users,cn=DOMAIN.COM,cn=sysdb",
v_idx=v_idx@entry=0, el=<optimized out>, el=<optimized out>) at
../ldb_tdb/ldb_index.c:1134
#25 0x00007fe6fdc2c62c in ltdb_index_add_el (el=0x7fe7184aa3e0,
dn=0x7fe7183bf3e0
"[email protected],cn=users,cn=DOMAIN.COM,cn=sysdb",
module=0x7fe70e4eab50) at ../ldb_tdb/ldb_index.c:1180
#26 ltdb_index_add_element (module=module@entry=0x7fe70e4eab50,
dn=<optimized out>, el=el@entry=0x7fe7184aa3e0) at ../ldb_tdb/ldb_index.c:1290
#27 0x00007fe6fdc290bb in ltdb_modify_internal
(module=module@entry=0x7fe70e4eab50, msg=0x7fe7183bf0c0,
req=req@entry=0x7fe7183bdc10) at ../ldb_tdb/ldb_tdb.c:903
#28 0x00007fe6fdc2958a in ltdb_modify (ctx=0x7fe7183c2950,
ctx=0x7fe7183c2950) at ../ldb_tdb/ldb_tdb.c:998
#29 ltdb_callback (ev=<optimized out>, te=<optimized out>, t=...,
private_data=<optimized out>) at ../ldb_tdb/ldb_tdb.c:1380
#30 0x00007fe708664b4f in tevent_common_loop_timer_delay
(ev=ev@entry=0x7fe70e4d2890) at ../tevent_timed.c:341
#31 0x00007fe708665b5a in epoll_event_loop_once (ev=0x7fe70e4d2890,
location=<optimized out>) at ../tevent_epoll.c:911
#32 0x00007fe708664257 in std_event_loop_once (ev=0x7fe70e4d2890,
location=0x7fe70bb72ec5 "../common/ldb.c:631") at ../tevent_standard.c:114
#33 0x00007fe70866040d in _tevent_loop_once (ev=ev@entry=0x7fe70e4d2890,
location=location@entry=0x7fe70bb72ec5 "../common/ldb.c:631") at ../tevent.c:533
#34 0x00007fe70bb6bc4f in ldb_wait (handle=0x7fe7183c4530, type=<optimized
out>) at ../common/ldb.c:631
#35 0x00007fe70bb6c793 in ldb_autotransaction_request (ldb=0x7fe70e4d52c0,
req=0x7fe7183bdc10) at ../common/ldb.c:573
#36 0x00007fe70bb6d263 in ldb_modify (ldb=ldb@entry=0x7fe70e4d52c0,
message=<optimized out>) at ../common/ldb.c:1655
#37 0x00007fe70bfa2ab5 in sysdb_set_cache_entry_attr (ldb=0x7fe70e4d52c0,
entry_dn=entry_dn@entry=0x7fe7183c4760, attrs=attrs@entry=0x7fe7183bf680,
mod_op=mod_op@entry=2) at src/db/sysdb_ops.c:1159
#38 0x00007fe70bfa304d in sysdb_rep_ts_entry_attr (sysdb=0x7fe70e4eadd0,
attrs=0x7fe7183bf680, entry_dn=0x7fe7183c4760) at src/db/sysdb_ops.c:1218
#39 sysdb_set_ts_entry_attr (sysdb=sysdb@entry=0x7fe70e4eadd0,
entry_dn=entry_dn@entry=0x7fe7183c4760, attrs=attrs@entry=0x7fe7183bb840,
mod_op=mod_op@entry=2) at src/db/sysdb_ops.c:1248
#40 0x00007fe70bfa4aa9 in sysdb_set_entry_attr (sysdb=0x7fe70e4eadd0,
entry_dn=0x7fe7183c4760, attrs=attrs@entry=0x7fe7183bb840,
mod_op=mod_op@entry=2) at src/db/sysdb_ops.c:1199
#41 0x00007fe70bfa4b5f in sysdb_set_user_attr
(domain=domain@entry=0x7fe70e4d62f0, name=name@entry=0x7fe7183c01f0
"[email protected]", attrs=attrs@entry=0x7fe7183bb840,
mod_op=mod_op@entry=2) at src/db/sysdb_ops.c:1285
#42 0x00007fe70bfa58c3 in sysdb_add_user
(domain=domain@entry=0x7fe70e4d62f0, name=name@entry=0x7fe7183c01f0
"[email protected]", uid=uid@entry=1415408147, gid=<optimized out>,
gid@entry=1415400513, gecos=gecos@entry=0x7fe710465d00 "Test User7045",
homedir=homedir@entry=0x0, shell=shell@entry=0x0,
orig_dn=orig_dn@entry=0x7fe710465940 "CN=Test
User7045,OU=Sales,DC=DOMAIN,DC=COM", attrs=attrs@entry=0x7fe7183bb840,
cache_timeout=cache_timeout@entry=5400, now=now@entry=1481105315) at
src/db/sysdb_ops.c:1928
#43 0x00007fe70bfab271 in sysdb_store_new_user (now=1481105315,
cache_timeout=5400, attrs=0x7fe7183bb840, orig_dn=0x7fe710465940 "CN=Test
User7045,OU=Sales,DC=DOMAIN,DC=COM", shell=0x0, homedir=0x0,
gecos=0x7fe710465d00 "Test User7045", gid=1415400513, uid=1415408147,
name=0x7fe7183c01f0 "[email protected]", domain=0x7fe70e4d62f0) at
src/db/sysdb_ops.c:2549
#44 sysdb_store_user (domain=domain@entry=0x7fe70e4d62f0,
name=0x7fe7183c01f0 "[email protected]", pwd=pwd@entry=0x0,
uid=1415408147, gid=1415400513, gecos=gecos@entry=0x7fe710465d00 "Test
User7045", homedir=homedir@entry=0x0, shell=shell@entry=0x0,
orig_dn=orig_dn@entry=0x7fe710465940 "CN=Test
User7045,OU=Sales,DC=DOMAIN,DC=COM", attrs=attrs@entry=0x7fe7183bb840,
remove_attrs=0x7fe7183c08a0, cache_timeout=cache_timeout@entry=5400,
now=now@entry=1481105315) at src/db/sysdb_ops.c:2499
#45 0x00007fe6fba0d9f9 in sdap_save_user
(memctx=memctx@entry=0x7fe70e544ee0, opts=opts@entry=0x7fe70e518400,
dom=dom@entry=0x7fe70e4d62f0, attrs=<optimized out>,
_usn_value=_usn_value@entry=0x7ffcd5d2c260, now=now@entry=1481105315) at
src/providers/ldap/sdap_async_users.c:509
#46 0x00007fe6fba0df9a in sdap_save_users
(memctx=memctx@entry=0x7fe70e544e40, sysdb=0x7fe70e4eadd0, dom=0x7fe70e4d62f0,
opts=0x7fe70e518400, users=<optimized out>, num_users=10006,
_usn_value=_usn_value@entry=0x7fe70e544e60) at
src/providers/ldap/sdap_async_users.c:572
#47 0x00007fe6fba0e460 in sdap_get_users_done (subreq=<optimized out>) at
src/providers/ldap/sdap_async_users.c:938
#48 0x00007fe6fba0c9d5 in sdap_search_user_process (subreq=0x0) at
src/providers/ldap/sdap_async_users.c:814
#49 0x00007fe6fba07379 in generic_ext_search_handler (subreq=0x0,
opts=<optimized out>) at src/providers/ldap/sdap_async.c:1689
#50 0x00007fe6fba0991b in sdap_get_generic_op_finished (op=<optimized out>,
reply=<optimized out>, error=<optimized out>, pvt=<optimized out>) at
src/providers/ldap/sdap_async.c:1621
#51 0x00007fe6fba083cd in sdap_process_message (ev=<optimized out>,
sh=<optimized out>, msg=0x7fe70e5f9ce0) at src/providers/ldap/sdap_async.c:353
#52 sdap_process_result (ev=<optimized out>, pvt=<optimized out>) at
src/providers/ldap/sdap_async.c:197
#53 0x00007fe708664b4f in tevent_common_loop_timer_delay
(ev=ev@entry=0x7fe70e4cbc30) at ../tevent_timed.c:341
#54 0x00007fe708665b5a in epoll_event_loop_once (ev=0x7fe70e4cbc30,
location=<optimized out>) at ../tevent_epoll.c:911
#55 0x00007fe708664257 in std_event_loop_once (ev=0x7fe70e4cbc30,
location=0x7fe70bfee8e7 "src/util/server.c:702") at ../tevent_standard.c:114
#56 0x00007fe70866040d in _tevent_loop_once (ev=ev@entry=0x7fe70e4cbc30,
location=location@entry=0x7fe70bfee8e7 "src/util/server.c:702") at
../tevent.c:533
#57 0x00007fe7086605ab in tevent_common_loop_wait (ev=0x7fe70e4cbc30,
location=0x7fe70bfee8e7 "src/util/server.c:702") at ../tevent.c:637
#58 0x00007fe7086641f7 in std_event_loop_wait (ev=0x7fe70e4cbc30,
location=0x7fe70bfee8e7 "src/util/server.c:702") at ../tevent_standard.c:140
#59 0x00007fe70bfd1993 in server_loop (main_ctx=0x7fe70e4cd080) at
src/util/server.c:702
#60 0x00007fe70c84cb82 in main (argc=8, argv=<optimized out>) at
src/providers/data_provider_be.c:587
Resolves:
https://fedorahosted.org/sssd/ticket/3266
Signed-off-by: Fabiano FidĂȘncio <[email protected]>
"""
To pull the PR as Git branch:
git remote add ghsssd https://github.com/SSSD/sssd
git fetch ghsssd pull/107/head:pr107
git checkout pr107
From 80723231b9ae02373408dd9e2a0b1a615ab51e19 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= <[email protected]>
Date: Wed, 7 Dec 2016 21:36:56 +0100
Subject: [PATCH] WATCHDOG: Avoid non async-signal-safe from the signal_handler
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
While debugging rhbz#1396912 some deadlock on sssd_be was noticed[0] and
it's been caused by the use of non async-signal-safe functions from the
signal_handler (please, see man 7 signal for more info about which are
the async-signal-safe functions that can be used).
The removal of those functions we lose some debug messages, which is the
least of problems now. Also instead of re-setting the watchdog timeout
in case of time-shift (please, see b8ceaeb for more info), let's just
re-set the timer's time.
Along with this commit, the function to teardown the watchdog has been
removed as it's not used anymore.
[0]:
[root@dusan ~]# pstack 17922
#0 __lll_lock_wait_private () at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:95
#1 0x00007fe707d04f93 in _L_lock_14932 () from /lib64/libc.so.6
#2 0x00007fe707d02013 in __GI___libc_malloc (bytes=140630248638304, bytes@entry=15) at malloc.c:2891
#3 0x00007fe707d0888a in __GI___strdup (s=0x7fe707dff4f7 "/etc/localtime") at strdup.c:42
#4 0x00007fe707d31b61 in tzset_internal (always=<optimized out>, explicit=explicit@entry=1) at tzset.c:438
#5 0x00007fe707d32523 in __tz_convert (timer=timer@entry=0x7ffcd5d2b090, use_localtime=use_localtime@entry=1, tp=tp@entry=0x7fe708041d40 <_tmbuf>) at tzset.c:621
#6 0x00007fe707d30521 in __GI_localtime (t=t@entry=0x7ffcd5d2b090) at localtime.c:42
#7 0x00007fe70886c7b0 in sss_vdebug_fn (file=<optimized out>, line=<optimized out>, function=0x7fe70bff27f0 <__FUNCTION__.9379> "watchdog_handler", level=16, flags=flags@entry=0, format=format@entry=0x7fe70bff2760 "Watchdog timer overflow, killing process!\n", ap=ap@entry=0x7ffcd5d2b130) at src/util/debug.c:248
#8 0x00007fe70886c995 in sss_debug_fn (file=file@entry=0x7fe70bff263b "src/util/util_watchdog.c", line=line@entry=82, function=function@entry=0x7fe70bff27f0 <__FUNCTION__.9379> "watchdog_handler", level=level@entry=16, format=format@entry=0x7fe70bff2760 "Watchdog timer overflow, killing process!\n") at src/util/debug.c:284
#9 0x00007fe70bfdb409 in watchdog_handler (sig=<optimized out>) at src/util/util_watchdog.c:81
#10 <signal handler called>
#11 0x00007fe707cff664 in _int_malloc (av=av@entry=0x7fe70803c760 <main_arena>, bytes=bytes@entry=151) at malloc.c:3494
#12 0x00007fe707d01fbc in __GI___libc_malloc (bytes=bytes@entry=151) at malloc.c:2893
#13 0x00007fe708450749 in __talloc_with_prefix (prefix_len=0, size=55, context=0x7fe718373210) at ../talloc.c:668
#14 __talloc (size=55, context=0x7fe718373210) at ../talloc.c:708
#15 _talloc_named_const (name=0x7fe70bb7015d "../common/ldb_pack.c:425", size=55, context=0x7fe718373210) at ../talloc.c:865
#16 talloc_named_const (context=<optimized out>, size=size@entry=55, name=name@entry=0x7fe70bb7015d "../common/ldb_pack.c:425") at ../talloc.c:1606
#17 0x00007fe70bb61803 in ldb_unpack_data_only_attr_list (ldb=ldb@entry=0x7fe70e4d52c0, data=data@entry=0x7ffcd5d2b990, message=0x7fe7184aa1e0, list=list@entry=0x0, list_size=list_size@entry=0, nb_elements_in_db=nb_elements_in_db@entry=0x0) at ../common/ldb_pack.c:425
#18 0x00007fe70bb61a7d in ldb_unpack_data (ldb=ldb@entry=0x7fe70e4d52c0, data=data@entry=0x7ffcd5d2b990, message=<optimized out>) at ../common/ldb_pack.c:470
#19 0x00007fe6fdc29b46 in ltdb_parse_data_unpack (key=..., data=..., private_data=0x7ffcd5d2ba70) at ../ldb_tdb/ldb_search.c:249
#20 0x00007fe70a5e0a24 in tdb_parse_data (tdb=tdb@entry=0x7fe70e4eaa10, key=..., offset=15619748, len=414772, parser=parser@entry=0x7fe6fdc29b10 <ltdb_parse_data_unpack>, private_data=private_data@entry=0x7ffcd5d2ba70) at ../common/io.c:637
#21 0x00007fe70a5dc1fc in tdb_parse_record (tdb=0x7fe70e4eaa10, key=..., parser=parser@entry=0x7fe6fdc29b10 <ltdb_parse_data_unpack>, private_data=private_data@entry=0x7ffcd5d2ba70) at ../common/tdb.c:253
#22 0x00007fe6fdc29e7b in ltdb_search_dn1 (module=module@entry=0x7fe70e4eab50, dn=dn@entry=0x7fe7183c4940, msg=msg@entry=0x7fe7184aa1e0) at ../ldb_tdb/ldb_search.c:287
#23 0x00007fe6fdc2acbb in ltdb_dn_list_load (module=module@entry=0x7fe70e4eab50, dn=dn@entry=0x7fe7183c4940, list=list@entry=0x7fe7183c3a30) at ../ldb_tdb/ldb_index.c:181
#24 0x00007fe6fdc2bbbb in ltdb_index_add1 (module=module@entry=0x7fe70e4eab50, dn=dn@entry=0x7fe7183bf3e0 "[email protected],cn=users,cn=DOMAIN.COM,cn=sysdb", v_idx=v_idx@entry=0, el=<optimized out>, el=<optimized out>) at ../ldb_tdb/ldb_index.c:1134
#25 0x00007fe6fdc2c62c in ltdb_index_add_el (el=0x7fe7184aa3e0, dn=0x7fe7183bf3e0 "[email protected],cn=users,cn=DOMAIN.COM,cn=sysdb", module=0x7fe70e4eab50) at ../ldb_tdb/ldb_index.c:1180
#26 ltdb_index_add_element (module=module@entry=0x7fe70e4eab50, dn=<optimized out>, el=el@entry=0x7fe7184aa3e0) at ../ldb_tdb/ldb_index.c:1290
#27 0x00007fe6fdc290bb in ltdb_modify_internal (module=module@entry=0x7fe70e4eab50, msg=0x7fe7183bf0c0, req=req@entry=0x7fe7183bdc10) at ../ldb_tdb/ldb_tdb.c:903
#28 0x00007fe6fdc2958a in ltdb_modify (ctx=0x7fe7183c2950, ctx=0x7fe7183c2950) at ../ldb_tdb/ldb_tdb.c:998
#29 ltdb_callback (ev=<optimized out>, te=<optimized out>, t=..., private_data=<optimized out>) at ../ldb_tdb/ldb_tdb.c:1380
#30 0x00007fe708664b4f in tevent_common_loop_timer_delay (ev=ev@entry=0x7fe70e4d2890) at ../tevent_timed.c:341
#31 0x00007fe708665b5a in epoll_event_loop_once (ev=0x7fe70e4d2890, location=<optimized out>) at ../tevent_epoll.c:911
#32 0x00007fe708664257 in std_event_loop_once (ev=0x7fe70e4d2890, location=0x7fe70bb72ec5 "../common/ldb.c:631") at ../tevent_standard.c:114
#33 0x00007fe70866040d in _tevent_loop_once (ev=ev@entry=0x7fe70e4d2890, location=location@entry=0x7fe70bb72ec5 "../common/ldb.c:631") at ../tevent.c:533
#34 0x00007fe70bb6bc4f in ldb_wait (handle=0x7fe7183c4530, type=<optimized out>) at ../common/ldb.c:631
#35 0x00007fe70bb6c793 in ldb_autotransaction_request (ldb=0x7fe70e4d52c0, req=0x7fe7183bdc10) at ../common/ldb.c:573
#36 0x00007fe70bb6d263 in ldb_modify (ldb=ldb@entry=0x7fe70e4d52c0, message=<optimized out>) at ../common/ldb.c:1655
#37 0x00007fe70bfa2ab5 in sysdb_set_cache_entry_attr (ldb=0x7fe70e4d52c0, entry_dn=entry_dn@entry=0x7fe7183c4760, attrs=attrs@entry=0x7fe7183bf680, mod_op=mod_op@entry=2) at src/db/sysdb_ops.c:1159
#38 0x00007fe70bfa304d in sysdb_rep_ts_entry_attr (sysdb=0x7fe70e4eadd0, attrs=0x7fe7183bf680, entry_dn=0x7fe7183c4760) at src/db/sysdb_ops.c:1218
#39 sysdb_set_ts_entry_attr (sysdb=sysdb@entry=0x7fe70e4eadd0, entry_dn=entry_dn@entry=0x7fe7183c4760, attrs=attrs@entry=0x7fe7183bb840, mod_op=mod_op@entry=2) at src/db/sysdb_ops.c:1248
#40 0x00007fe70bfa4aa9 in sysdb_set_entry_attr (sysdb=0x7fe70e4eadd0, entry_dn=0x7fe7183c4760, attrs=attrs@entry=0x7fe7183bb840, mod_op=mod_op@entry=2) at src/db/sysdb_ops.c:1199
#41 0x00007fe70bfa4b5f in sysdb_set_user_attr (domain=domain@entry=0x7fe70e4d62f0, name=name@entry=0x7fe7183c01f0 "[email protected]", attrs=attrs@entry=0x7fe7183bb840, mod_op=mod_op@entry=2) at src/db/sysdb_ops.c:1285
#42 0x00007fe70bfa58c3 in sysdb_add_user (domain=domain@entry=0x7fe70e4d62f0, name=name@entry=0x7fe7183c01f0 "[email protected]", uid=uid@entry=1415408147, gid=<optimized out>, gid@entry=1415400513, gecos=gecos@entry=0x7fe710465d00 "Test User7045", homedir=homedir@entry=0x0, shell=shell@entry=0x0, orig_dn=orig_dn@entry=0x7fe710465940 "CN=Test User7045,OU=Sales,DC=DOMAIN,DC=COM", attrs=attrs@entry=0x7fe7183bb840, cache_timeout=cache_timeout@entry=5400, now=now@entry=1481105315) at src/db/sysdb_ops.c:1928
#43 0x00007fe70bfab271 in sysdb_store_new_user (now=1481105315, cache_timeout=5400, attrs=0x7fe7183bb840, orig_dn=0x7fe710465940 "CN=Test User7045,OU=Sales,DC=DOMAIN,DC=COM", shell=0x0, homedir=0x0, gecos=0x7fe710465d00 "Test User7045", gid=1415400513, uid=1415408147, name=0x7fe7183c01f0 "[email protected]", domain=0x7fe70e4d62f0) at src/db/sysdb_ops.c:2549
#44 sysdb_store_user (domain=domain@entry=0x7fe70e4d62f0, name=0x7fe7183c01f0 "[email protected]", pwd=pwd@entry=0x0, uid=1415408147, gid=1415400513, gecos=gecos@entry=0x7fe710465d00 "Test User7045", homedir=homedir@entry=0x0, shell=shell@entry=0x0, orig_dn=orig_dn@entry=0x7fe710465940 "CN=Test User7045,OU=Sales,DC=DOMAIN,DC=COM", attrs=attrs@entry=0x7fe7183bb840, remove_attrs=0x7fe7183c08a0, cache_timeout=cache_timeout@entry=5400, now=now@entry=1481105315) at src/db/sysdb_ops.c:2499
#45 0x00007fe6fba0d9f9 in sdap_save_user (memctx=memctx@entry=0x7fe70e544ee0, opts=opts@entry=0x7fe70e518400, dom=dom@entry=0x7fe70e4d62f0, attrs=<optimized out>, _usn_value=_usn_value@entry=0x7ffcd5d2c260, now=now@entry=1481105315) at src/providers/ldap/sdap_async_users.c:509
#46 0x00007fe6fba0df9a in sdap_save_users (memctx=memctx@entry=0x7fe70e544e40, sysdb=0x7fe70e4eadd0, dom=0x7fe70e4d62f0, opts=0x7fe70e518400, users=<optimized out>, num_users=10006, _usn_value=_usn_value@entry=0x7fe70e544e60) at src/providers/ldap/sdap_async_users.c:572
#47 0x00007fe6fba0e460 in sdap_get_users_done (subreq=<optimized out>) at src/providers/ldap/sdap_async_users.c:938
#48 0x00007fe6fba0c9d5 in sdap_search_user_process (subreq=0x0) at src/providers/ldap/sdap_async_users.c:814
#49 0x00007fe6fba07379 in generic_ext_search_handler (subreq=0x0, opts=<optimized out>) at src/providers/ldap/sdap_async.c:1689
#50 0x00007fe6fba0991b in sdap_get_generic_op_finished (op=<optimized out>, reply=<optimized out>, error=<optimized out>, pvt=<optimized out>) at src/providers/ldap/sdap_async.c:1621
#51 0x00007fe6fba083cd in sdap_process_message (ev=<optimized out>, sh=<optimized out>, msg=0x7fe70e5f9ce0) at src/providers/ldap/sdap_async.c:353
#52 sdap_process_result (ev=<optimized out>, pvt=<optimized out>) at src/providers/ldap/sdap_async.c:197
#53 0x00007fe708664b4f in tevent_common_loop_timer_delay (ev=ev@entry=0x7fe70e4cbc30) at ../tevent_timed.c:341
#54 0x00007fe708665b5a in epoll_event_loop_once (ev=0x7fe70e4cbc30, location=<optimized out>) at ../tevent_epoll.c:911
#55 0x00007fe708664257 in std_event_loop_once (ev=0x7fe70e4cbc30, location=0x7fe70bfee8e7 "src/util/server.c:702") at ../tevent_standard.c:114
#56 0x00007fe70866040d in _tevent_loop_once (ev=ev@entry=0x7fe70e4cbc30, location=location@entry=0x7fe70bfee8e7 "src/util/server.c:702") at ../tevent.c:533
#57 0x00007fe7086605ab in tevent_common_loop_wait (ev=0x7fe70e4cbc30, location=0x7fe70bfee8e7 "src/util/server.c:702") at ../tevent.c:637
#58 0x00007fe7086641f7 in std_event_loop_wait (ev=0x7fe70e4cbc30, location=0x7fe70bfee8e7 "src/util/server.c:702") at ../tevent_standard.c:140
#59 0x00007fe70bfd1993 in server_loop (main_ctx=0x7fe70e4cd080) at src/util/server.c:702
#60 0x00007fe70c84cb82 in main (argc=8, argv=<optimized out>) at src/providers/data_provider_be.c:587
Resolves:
https://fedorahosted.org/sssd/ticket/3266
Signed-off-by: Fabiano FidĂȘncio <[email protected]>
---
src/util/util.h | 1 -
src/util/util_watchdog.c | 55 +++++++++++++++++++++---------------------------
2 files changed, 24 insertions(+), 32 deletions(-)
diff --git a/src/util/util.h b/src/util/util.h
index 4bfccfa..f06bfab 100644
--- a/src/util/util.h
+++ b/src/util/util.h
@@ -644,6 +644,5 @@ int sss_unique_filename(TALLOC_CTX *owner, char *path_tmpl);
/* from util_watchdog.c */
int setup_watchdog(struct tevent_context *ev, int interval);
-void teardown_watchdog(void);
#endif /* __SSSD_UTIL_H__ */
diff --git a/src/util/util_watchdog.c b/src/util/util_watchdog.c
index c184fbd..9bd0e19 100644
--- a/src/util/util_watchdog.c
+++ b/src/util/util_watchdog.c
@@ -41,20 +41,29 @@ static bool watchdog_detect_timeshift(void)
{
time_t prev_time;
time_t cur_time;
- errno_t ret;
prev_time = watchdog_ctx.timestamp;
cur_time = watchdog_ctx.timestamp = time(NULL);
if (cur_time < prev_time) {
- /* Time shift detected. We need to restart watchdog. */
- DEBUG(SSSDBG_IMPORTANT_INFO, "Time shift detected, "
- "restarting watchdog!\n");
- teardown_watchdog();
- ret = setup_watchdog(watchdog_ctx.ev, watchdog_ctx.input_interval);
- if (ret != EOK) {
- DEBUG(SSSDBG_FATAL_FAILURE, "Unable to restart watchdog "
- "[%d]: %s\n", ret, sss_strerror(ret));
- orderly_shutdown(1);
+ struct itimerspec its;
+ errno_t ret;
+ /* Time shift detected.
+ *
+ * As restarting the watchdog from the signal_handler may
+ * lead sssd_be to a deadlock due to the use of non async-
+ * signal-safe function. let's just (re)set the timer's
+ * time.
+ */
+
+ /* As done when setting up the watchdog handler,
+ * we give 1 second head start to the watchdog event */
+ its.it_value.tv_sec = watchdog_ctx.interval.tv_sec + 1;
+ its.it_value.tv_nsec = 0;
+ its.it_interval.tv_sec = watchdog_ctx.interval.tv_sec;
+ its.it_interval.tv_nsec = 0;
+ ret = timer_settime(watchdog_ctx.timerid, 0, &its, NULL);
+ if (ret == -1) {
+ _exit(1);
}
return true;
@@ -78,9 +87,11 @@ static void watchdog_handler(int sig)
/* if a pre-defined number of ticks passed by kills itself */
if (__sync_add_and_fetch(&watchdog_ctx.ticks, 1) > WATCHDOG_MAX_TICKS) {
- DEBUG(SSSDBG_FATAL_FAILURE,
- "Watchdog timer overflow, killing process!\n");
- orderly_shutdown(1);
+ /* A debug message here would be welcome but has been purposely
+ * avoided as it's (way down in the stack) a non async-signal-safe
+ * function.
+ */
+ _exit(1);
}
}
@@ -163,21 +174,3 @@ int setup_watchdog(struct tevent_context *ev, int interval)
return EOK;
}
-
-void teardown_watchdog(void)
-{
- int ret;
-
- /* Disarm the timer */
- errno = 0;
- ret = timer_delete(watchdog_ctx.timerid);
- if (ret == -1) {
- ret = errno;
- DEBUG(SSSDBG_FATAL_FAILURE,
- "Failed to destroy watchdog timer (%d) [%s]\n",
- ret, strerror(ret));
- }
-
- /* and kill the watchdog event */
- talloc_free(watchdog_ctx.te);
-}
_______________________________________________
sssd-devel mailing list -- [email protected]
To unsubscribe send an email to [email protected]