The installed version of 389* is 1.3.7.10-1.fc27 for armv7hl, which appears to be the latest available version.
On Wed, May 16, 2018 at 2:38 PM, Jonathan Vaughn <jonat...@creatuity.com> wrote: > (gdb) up > #1 0xb6926b40 in cvt_s (flags=0, prec=<optimized out>, width=0, str=0x4 > <error: Cannot access memory at address 0x4>, ss=<optimized out>) > at ../../.././nspr/pr/src/io/prprf.c:374 > 374 slen = strlen(str); > (gdb) up > #2 dosprintf (ss=ss@entry=0x9e06e4bc, fmt=0xb34b0df2 "", fmt@entry=0xb34da770 > <agmt_set> "\360\317p\002", ap=...) at ../../.././nspr/pr/src/io/ > prprf.c:1018 > 1018 rv = cvt_s(ss, u.s, width, prec, flags); > (gdb) up > #3 0xb6926c8c in PR_vsmprintf (fmt=fmt@entry=0xb34da770 <agmt_set> > "\360\317p\002", ap=..., ap@entry=...) at ../../.././nspr/pr/src/io/ > prprf.c:1184 > 1184 rv = dosprintf(&ss, fmt, ap); > (gdb) up > #4 0xb6de9d18 in slapi_ch_smprintf (fmt=0xb34b0de0 "%s;%s;%s;%ld;%d;%s") > at ldap/servers/slapd/ch_malloc.c:331 > 331 p = PR_vsmprintf(fmt, ap); > (gdb) up > #5 0xb34625a4 in agmt_update_maxcsn (r=r@entry=0x2737810, sdn=0x38d9a40, > sdn@entry=0x10, op=<optimized out>, mods=0x10, mods@entry=0x27d0100, > csn=csn@entry=0x38de350) > at ldap/servers/plugins/replication/repl5_agmt.c:3036 > 3036 agmt->maxcsn = slapi_ch_smprintf("%s;%s;%s;%ld;%d;%s", > slapi_sdn_get_dn(agmt->replarea), > (gdb) p *agmt > $1 = {hostname = 0x2757be0 "ipa-12.company.internal", port = 389, > transport_flags = 0, binddn = 0x26ed650 "", creds = 0x26ed7a0, bindmethod = > 3, replarea = 0x2757480, > frac_attrs = 0x27579c0, frac_attrs_total = 0x2757a40, > frac_attr_total_defined = 1, schedule = 0x22dd0c0, auto_initialize = 502, > dn = 0x2756d00, rdn = 0x2756c20, > long_name = 0x22dd100 "agmt=\"cn=meToipa-12.company.internal\" > (ipa-12:5)", protocol = 0x2220930, changecounters = 0x20cb180, > num_changecounters = 0, > max_changecounters = 256, last_update_start_time = 1526499214, > last_update_end_time = 1526499214, > last_update_status = "Error (0) Replica acquired successfully: > Incremental update succeeded", '\000' <repeats 954 times>, > update_in_progress = 0, is_enabled = 1, > last_init_start_time = 0, last_init_end_time = 0, last_init_status = > '\000' <repeats 1023 times>, lock = 0x2741740, consumerRUV = 0x2772e50, > consumerSchemaCSN = 0x39da520, consumerRID = 4, tmpConsumerRID = 0, > timeout = 120, stop_in_progress = 0, busywaittime = 0, pausetime = 0, priv > = 0x0, > attrs_to_strip = 0x2757ba0, agreement_type = 0, protocol_timeout = > 0x26ed5f0, maxcsn = 0x0, flowControlWindow = 1000, flowControlPause = 2000, > ignoreMissingChange = 0, > attr_lock = 0x2757c20, WaitForAsyncResults = 100} > > > On Tue, May 15, 2018 at 5:36 PM, Mark Reynolds <mreyno...@redhat.com> > wrote: > >> This looks really familiar and I thought it was fixed. It should have >> been fixed in 1.3.7.10-1 (https://pagure.io/389-ds-base/issue/49618). >> In your debug session go "up" into agmt_maxcsn_update() and do: >> >> (gdb) p *agmt >> >> Then send us that output please. >> >> Thanks, >> Mark >> >> >> On 05/15/2018 05:29 PM, Jonathan Vaughn via FreeIPA-users wrote: >> >> Here is a backtrace from live gdb after the segfault. Looks like things >> went wrong somewhere during in the replication code ? >> >> Thread 36 "ns-slapd" received signal SIGSEGV, Segmentation fault. >> [Switching to Thread 0x9e0bc280 (LWP 4662)] >> strlen () at ../sysdeps/arm/armv6t2/strlen.S:142 >> 142 ldrd data1a, data1b, [src] >> (gdb) bt >> #0 0xb6728f2e in strlen () at ../sysdeps/arm/armv6t2/strlen.S:142 >> #1 0xb6973b40 in cvt_s (flags=0, prec=<optimized out>, width=0, str=0x4 >> <error: Cannot access memory at address 0x4>, ss=<optimized out>) >> at ../../.././nspr/pr/src/io/prprf.c:374 >> #2 0xb6973b40 in dosprintf (ss=ss@entry=0x9e0bb4bc, fmt=0xb34fddf2 "", >> fmt@entry=0xb3527770 <agmt_set> "\360\357\305\002", ap=...) >> at ../../.././nspr/pr/src/io/prprf.c:1018 >> #3 0xb6973c8c in PR_vsmprintf (fmt=fmt@entry=0xb3527770 <agmt_set> >> "\360\357\305\002", ap=..., ap@entry=...) at >> ../../.././nspr/pr/src/io/prprf.c:1184 >> #4 0xb6e36d18 in slapi_ch_smprintf (fmt=0xb34fdde0 "%s;%s;%s;%ld;%d;%s") >> at ldap/servers/slapd/ch_malloc.c:331 >> #5 0xb34af5a4 in agmt_update_maxcsn (r=r@entry=0x2c89810, >> sdn=0x3e2bac0, sdn@entry=0x10, op=<optimized out>, mods=0x10, >> mods@entry=0x3eec100, >> csn=csn@entry=0x3e30220) >> at ldap/servers/plugins/replication/repl5_agmt.c:3036 >> #6 0xb34bd424 in write_changelog_and_ruv (pb=pb@entry=0x2cb54a0) at >> ldap/servers/plugins/replication/repl5_plugins.c:1124 >> #7 0xb34be7ec in multimaster_be_betxnpostop_add (pb=0x2cb54a0) at >> ldap/servers/plugins/replication/repl5_plugins.c:855 >> #8 0xb34be880 in multimaster_mmr_postop (pb=<optimized out>, flags=560) >> at ldap/servers/plugins/replication/repl5_plugins.c:616 >> #9 0xb6e8fcf0 in plugin_call_mmr_plugin_postop (pb=pb@entry=0x2cb54a0, >> e=e@entry=0x0, flags=flags@entry=560) at ldap/servers/slapd/plugin_mmr. >> c:65 >> #10 0xb35ba870 in ldbm_back_add (pb=0x2cb54a0) at >> ldap/servers/slapd/back-ldbm/ldbm_add.c:1218 >> #11 0xb6e2ce4c in op_shared_add (pb=pb@entry=0x2cb54a0) at >> ldap/servers/slapd/add.c:679 >> #12 0xb6e2d35c in add_internal_pb (pb=pb@entry=0x2cb54a0) at >> ldap/servers/slapd/add.c:407 >> #13 0xb6e2e0f8 in slapi_add_internal_pb (pb=pb@entry=0x2cb54a0) at >> ldap/servers/slapd/add.c:332 >> #14 0xb368db50 in ipa_topo_util_segment_write (tconf=tconf@entry=0x2cb5860, >> tsegm=tsegm@entry=0x3f2d9a0) at topology_util.c:1251 >> #15 0xb368e01c in ipa_topo_util_update_agmt_list (conf=0x2cb5860, >> repl_segments=<optimized out>) at topology_util.c:696 >> #16 0xb368891c in ipa_topo_apply_shared_config () at topology_init.c:165 >> #17 0xb6e4e65c in eq_call_all () at ldap/servers/slapd/eventq.c:278 >> #18 0xb6e4e65c in eq_loop (arg=<optimized out>) at >> ldap/servers/slapd/eventq.c:323 >> #19 0xb6982d68 in _pt_root (arg=0x2c8da40) at >> ../../.././nspr/pr/src/pthreads/ptthread.c:201 >> #20 0xb6906ee8 in start_thread (arg=0x9e0bc280) at pthread_create.c:465 >> #21 0xb6785da8 in None () at ../sysdeps/unix/sysv/linux/arm/clone.S:73 >> >> >> On Tue, May 15, 2018 at 3:01 AM, thierry bordaz <tbor...@redhat.com> >> wrote: >> >>> Hi Jonathan, >>> >>> This problem looks new to me and has something specific to your >>> environment. >>> I think the best approach is to continue to debug on your system if you >>> have the possibility to do so. >>> >>> From strace we can see that DS started smoothly (created its pid file >>> then notified systemd it was running fine). According to the pstack >>> nunc-stans was running and was able to accept network events even if it >>> appears it detected no incoming connection. >>> So the server should be ready to serve for some seconds (more than a >>> minute), then it crashed with one thread dereferencing likely a wrong >>> pointer. >>> >>> Could you attach a debugger when the server is started and wait for the >>> sigsegv to occur. Then confirm the crashing thread backstack. >>> If it is confirmed, I am afraid this is a stack corruption and valgrind >>> could help (http://www.port389.org/docs/389ds/FAQ/faq.html#debugging-me >>> mory-growthinvalid-access-with-valgrind). >>> >>> best regards >>> thierry >>> >>> >>> On 05/14/2018 10:20 PM, Jonathan Vaughn wrote: >>> >>> Here's a strace from before it dies. Most of the elapsed time is it >>> waiting on some futex call it looks like near the end, when it finally >>> "returns" (from lack of strace output for duration of call I assume it >>> didn't actually return but SIGSEGV in that call) and strace prints ' = ?' >>> on the futex it then immediately reports SIGSEGV after. So maybe the >>> problem is that futex call, which may mean the problem is not directly in >>> 389DS / FreeIPA itself? >>> >>> >>> >>> 15:13:31.626587 (+ 0.000630) listen(8, 128) = 0 <0.000068> >>> 15:13:31.626857 (+ 0.000235) listen(9, 128) = 0 <0.000048> >>> 15:13:31.627111 (+ 0.000251) clock_gettime(CLOCK_MONOTONIC, >>> {tv_sec=1464932, tv_nsec=41120614}) = 0 <0.000085> >>> 15:13:31.627457 (+ 0.000356) clock_gettime(CLOCK_REALTIME, >>> {tv_sec=1526328811, tv_nsec=627560772}) = 0 <0.000043> >>> 15:13:31.631233 (+ 0.003839) clock_gettime(CLOCK_MONOTONIC, >>> {tv_sec=1464932, tv_nsec=45286796}) = 0 <0.000077> >>> 15:13:31.631720 (+ 0.000427) clock_gettime(CLOCK_MONOTONIC, >>> {tv_sec=1464932, tv_nsec=45661430}) = 0 <0.000042> >>> 15:13:31.631955 (+ 0.000220) clock_gettime(CLOCK_REALTIME, >>> {tv_sec=1526328811, tv_nsec=632049036}) = 0 <0.000047> >>> 15:13:31.635669 (+ 0.003785) clock_gettime(CLOCK_MONOTONIC, >>> {tv_sec=1464932, tv_nsec=49725840}) = 0 <0.000146> >>> 15:13:31.636484 (+ 0.000784) write(16, "a", 1) = 1 <0.000118> >>> 15:13:31.636855 (+ 0.000341) sched_yield() = 0 <0.000252> >>> 15:13:31.637322 (+ 0.000470) futex(0x1cb57a0, FUTEX_WAKE_PRIVATE, 1) >>> = 1 <0.000088> >>> 15:13:31.637897 (+ 0.000610) write(16, "a", 1) = 1 <0.000221> >>> 15:13:31.638394 (+ 0.000467) sched_yield() = 0 <0.000047> >>> 15:13:31.638619 (+ 0.000202) futex(0x1cb5710, FUTEX_WAKE_PRIVATE, 1) >>> = 1 <0.000065> >>> 15:13:31.638908 (+ 0.000298) openat(AT_FDCWD, >>> "/var/run/dirsrv/slapd-COMPANY-INTERNAL.pid", O_WRONLY|O_CREAT|O_TRUNC, >>> 0666) = 33 <0.000831> >>> 15:13:31.640260 (+ 0.001387) getpid() = 32353 <0.000077> >>> 15:13:31.640558 (+ 0.000256) fstat64(33, {st_mode=S_IFREG|0644, >>> st_size=0, ...}) = 0 <0.000119> >>> 15:13:31.641106 (+ 0.000556) write(33, "32353\n", 6) = 6 <0.000127> >>> 15:13:31.641472 (+ 0.000362) close(33) = 0 <0.000519> >>> 15:13:31.642216 (+ 0.000758) >>> chmod("/var/run/dirsrv/slapd-COMPANY-INTERNAL.pid", >>> 0644) = 0 <0.000152> >>> 15:13:31.642900 (+ 0.000679) clock_gettime(CLOCK_REALTIME, >>> {tv_sec=1526328811, tv_nsec=643020294}) = 0 <0.000056> >>> 15:13:31.643495 (+ 0.000590) write(2, "[14/May/2018:15:13:31.643020294 >>> "..., 134) = 134 <0.002697> >>> 15:13:31.646515 (+ 0.003052) clock_gettime(CLOCK_REALTIME, >>> {tv_sec=1526328811, tv_nsec=646694394}) = 0 <0.000075> >>> 15:13:31.646892 (+ 0.000337) write(4, "[14/May/2018:15:13:31.646694394 >>> "..., 134) = 134 <0.000522> >>> 15:13:31.647841 (+ 0.000973) fsync(4) = 0 <0.005967> >>> 15:13:31.654425 (+ 0.006617) clock_gettime(CLOCK_REALTIME, >>> {tv_sec=1526328811, tv_nsec=654598946}) = 0 <0.000253> >>> 15:13:31.655137 (+ 0.000717) write(2, "[14/May/2018:15:13:31.654598946 >>> "..., 136) = 136 <0.002427> >>> 15:13:31.658312 (+ 0.003165) clock_gettime(CLOCK_REALTIME, >>> {tv_sec=1526328811, tv_nsec=658486117}) = 0 <0.000251> >>> 15:13:31.659032 (+ 0.000682) write(4, "[14/May/2018:15:13:31.658486117 >>> "..., 136) = 136 <0.000346> >>> 15:13:31.659623 (+ 0.000595) fsync(4) = 0 <0.003311> >>> 15:13:31.663230 (+ 0.003642) getpid() = 32353 <0.000045> >>> 15:13:31.663732 (+ 0.000454) socket(AF_UNIX, >>> SOCK_DGRAM|SOCK_CLOEXEC, 0) = 33 <0.000296> >>> 15:13:31.664760 (+ 0.001048) getsockopt(33, SOL_SOCKET, SO_SNDBUF, >>> [163840], [4]) = 0 <0.000108> >>> 15:13:31.665141 (+ 0.000386) setsockopt(33, SOL_SOCKET, >>> SO_SNDBUFFORCE, [8388608], 4) = -1 EPERM (Operation not permitted) >>> <0.000051> >>> 15:13:31.665500 (+ 0.000334) setsockopt(33, SOL_SOCKET, SO_SNDBUF, >>> [8388608], 4) = 0 <0.000229> >>> 15:13:31.665973 (+ 0.000468) sendmsg(33, >>> {msg_name={sa_family=AF_UNIX, sun_path="/run/systemd/notify"}, >>> msg_namelen=21, msg_iov=[{iov_base="READY=1\nSTATUS=slapd started: >>> Re"..., iov_len=69}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, >>> MSG_NOSIGNAL) = 69 <0.000600> >>> 15:13:31.667270 (+ 0.001334) close(33) = 0 <0.003140> >>> 15:13:31.677320 (+ 0.010146) futex(0xb31122e8, FUTEX_WAIT, 32357, >>> NULL) = ? >>> 15:14:27.661809 (+ 55.984448) +++ killed by SIGSEGV (core dumped) +++ >>> >>> On Mon, May 14, 2018 at 10:27 AM, thierry bordaz <tbor...@redhat.com> >>> wrote: >>> >>>> Hi Jonathan, >>>> >>>> This is weird as the crashing thread stack looks truncated (did you >>>> copy/paste all of it ?) >>>> >>>> Thread 1 (Thread 0x9e13c280 (LWP 17245)): >>>> #0 0xb67bbf2e in strlen () at /lib/libc.so.6 >>>> #1 0xb6a06b40 in dosprintf () at /lib/libnspr4.so >>>> #2 0x00000000 in None () >>>> >>>> Did you install 389-ds-base-debuginfo ? >>>> How did you get that backtrace ? from a core dumped, pstack ? Can you >>>> attach a debugger before the crash occurs ? >>>> >>>> It looks it crashed soon at startup, could it be related to a broken >>>> dse.ldif. It should exists a dse.ldif.OK, is it possibly to try to start >>>> with it ? >>>> >>>> best regards >>>> thierry >>>> >>>> On 05/12/2018 01:22 AM, Jonathan Vaughn via FreeIPA-users wrote: >>>> >>>> Not sure if it makes a difference... I was looking into this again and >>>> realized I had a bunch of messages from gdb telling me to install more >>>> debuginfo. I've done that now, here it is again freshly run through gdb >>>> >>>> GNU gdb (GDB) Fedora 8.0.1-36.fc27 >>>> Copyright (C) 2017 Free Software Foundation, Inc. >>>> License GPLv3+: GNU GPL version 3 or later < >>>> http://gnu.org/licenses/gpl.html> >>>> This is free software: you are free to change and redistribute it. >>>> There is NO WARRANTY, to the extent permitted by law. Type "show >>>> copying" >>>> and "show warranty" for details. >>>> This GDB was configured as "armv7hl-redhat-linux-gnueabi". >>>> Type "show configuration" for configuration details. >>>> For bug reporting instructions, please see: >>>> <http://www.gnu.org/software/gdb/bugs/>. >>>> Find the GDB manual and other documentation resources online at: >>>> <http://www.gnu.org/software/gdb/documentation/>. >>>> For help, type "help". >>>> Type "apropos word" to search for commands related to "word"... >>>> Reading symbols from /usr/sbin/ns-slapd...Reading symbols from >>>> /usr/lib/debug/usr/sbin/ns-slapd-1.3.7.10-1.fc27.arm.debug...done. >>>> done. >>>> ... >>>> >>>> Thread 1 (Thread 0x9e13c280 (LWP 17245)): >>>> #0 0xb67bbf2e in strlen () at /lib/libc.so.6 >>>> #1 0xb6a06b40 in dosprintf () at /lib/libnspr4.so >>>> #2 0x00000000 in None () >>>> >>>> >>>> >>>> On Tue, May 8, 2018 at 7:52 AM, Rob Crittenden <rcrit...@redhat.com> >>>>> wrote: >>>>> >>>>>> Jonathan Vaughn via FreeIPA-users wrote: >>>>>> >>>>>>> Still trying to figure this out. It looks like slapd is dying, I >>>>>>> thought it was still running for some reason. >>>>>>> >>>>>>> slapd is dying to segfault. strace of it happening doesn't seem to >>>>>>> reveal much: >>>>>>> >>>>>> >>>>>> A stack trace would very much help trying to track down the cause. >>>>>> >>>>>> http://directory.fedoraproject.org/docs/389ds/FAQ/faq.html#d >>>>>> ebugging-crashes >>>>>> >>>>>> rob >>>>>> >>>>>> >>>>>>> 18:32:41.543717 (+ 0.000801) openat(AT_FDCWD, >>>>>>> "/var/run/dirsrv/slapd-COMPANY-INTERNAL.pid", >>>>>>> O_WRONLY|O_CREAT|O_TRUNC, 0666) = 32 >>>>>>> 18:32:41.544907 (+ 0.001195) getpid() = 16014 >>>>>>> 18:32:41.545269 (+ 0.000329) fstat64(32, {st_mode=S_IFREG|0644, >>>>>>> st_size=0, ...}) = 0 >>>>>>> 18:32:41.545799 (+ 0.000536) write(32, "16014\n", 6) = 6 >>>>>>> 18:32:41.546603 (+ 0.000818) close(32) = 0 >>>>>>> 18:32:41.547061 (+ 0.000448) >>>>>>> chmod("/var/run/dirsrv/slapd-COMPANY-INTERNAL.pid", >>>>>>> 0644) = 0 >>>>>>> 18:32:41.547741 (+ 0.000676) clock_gettime(CLOCK_REALTIME, >>>>>>> {tv_sec=1525735961, tv_nsec=548030641}) = 0 >>>>>>> 18:32:41.548324 (+ 0.000587) write(2, >>>>>>> "[07/May/2018:18:32:41.548030641 "..., 134) = 134 >>>>>>> 18:32:41.551096 (+ 0.002840) clock_gettime(CLOCK_REALTIME, >>>>>>> {tv_sec=1525735961, tv_nsec=551287555}) = 0 >>>>>>> 18:32:41.551568 (+ 0.000406) write(4, >>>>>>> "[07/May/2018:18:32:41.551287555 "..., 134) = 134 >>>>>>> 18:32:41.552360 (+ 0.000811) fsync(4) = 0 >>>>>>> 18:32:41.558499 (+ 0.006170) clock_gettime(CLOCK_REALTIME, >>>>>>> {tv_sec=1525735961, tv_nsec=558678099}) = 0 >>>>>>> 18:32:41.558901 (+ 0.000350) write(2, >>>>>>> "[07/May/2018:18:32:41.558678099 "..., 136) = 136 >>>>>>> 18:32:41.561537 (+ 0.002680) clock_gettime(CLOCK_REALTIME, >>>>>>> {tv_sec=1525735961, tv_nsec=561718659}) = 0 >>>>>>> 18:32:41.562357 (+ 0.000793) write(4, >>>>>>> "[07/May/2018:18:32:41.561718659 "..., 136) = 136 >>>>>>> 18:32:41.563293 (+ 0.001148) fsync(4) = 0 >>>>>>> 18:32:41.566928 (+ 0.003452) getpid() = 16014 >>>>>>> 18:32:41.567712 (+ 0.000752) socket(AF_UNIX, >>>>>>> SOCK_DGRAM|SOCK_CLOEXEC, 0) = 32 >>>>>>> 18:32:41.568628 (+ 0.000912) getsockopt(32, SOL_SOCKET, >>>>>>> SO_SNDBUF, [163840], [4]) = 0 >>>>>>> 18:32:41.568972 (+ 0.000319) setsockopt(32, SOL_SOCKET, >>>>>>> SO_SNDBUFFORCE, [8388608], 4) = -1 EPERM (Operation not permitted) >>>>>>> 18:32:41.569548 (+ 0.000589) setsockopt(32, SOL_SOCKET, >>>>>>> SO_SNDBUF, [8388608], 4) = 0 >>>>>>> 18:32:41.570064 (+ 0.000513) sendmsg(32, >>>>>>> {msg_name={sa_family=AF_UNIX, sun_path="/run/systemd/notify"}, >>>>>>> msg_namelen=21, msg_iov=[{iov_base="READY=1\nSTATUS=slapd started: >>>>>>> Re"..., iov_len=69}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, >>>>>>> MSG_NOSIGNAL) = 69 >>>>>>> 18:32:41.570845 (+ 0.000789) close(32) = 0 >>>>>>> 18:32:41.576358 (+ 0.005575) futex(0xb30ed2e8, FUTEX_WAIT, >>>>>>> 16016, NULL) = ? >>>>>>> 18:33:01.730774 (+ 20.154428) +++ killed by SIGSEGV +++ >>>>>>> >>>>>>> >>>> >>> >>> >> >> >> _______________________________________________ >> FreeIPA-users mailing list -- freeipa-users@lists.fedorahosted.org >> To unsubscribe send an email to freeipa-users-le...@lists.fedorahosted.org >> >> >> >
_______________________________________________ FreeIPA-users mailing list -- freeipa-users@lists.fedorahosted.org To unsubscribe send an email to freeipa-users-le...@lists.fedorahosted.org