Hi.

About a month ago I was experimenting with HAST on my servers, and, though I did have a complications with signal 6 on init phase, I was able to start it and it was working in test mode for a couple of weeks. After that I had to reboo both of them and now it doesn't start al all - both node hast is crashing on signal 6, and I'm unable to launch it as primary in either one. As soon as I switch from init or secondary to primary on either node - bad things are starting to happen - cyclic signal 6 for hastd and hangups for hastctl.


Both  nodes are running FreeBSD 11.1-RELEASE-pX (p1 and p6).

Here's an extempt from the PR https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=227461 I just created:

===Cut===

Node A (11.1-RELEASE-p1):
========

[root@gw0:/var/log]# service hastd start
Starting hastd.
[root@gw0:/var/log]# hastctl status
Name    Status   Role           Components
hasta   -        init           /dev/gpt/hasta  tcp4://192.168.0.247
hastb   -        init           /dev/gpt/hastb  tcp4://192.168.0.247
[root@gw0:/var/log]# hastctl role secondary hasta
[root@gw0:/var/log]# hastctl role secondary hastb
[root@gw0:/var/log]# hastctl status
Name    Status   Role           Components
hasta   -        secondary      /dev/gpt/hasta  tcp4://192.168.0.247
hastb   -        secondary      /dev/gpt/hastb  tcp4://192.168.0.247

Node B (11.1-RELEASE-p6):
========
[root@gw1:/var/log]# service hastd start
Starting hastd.
[root@gw1:/var/log]# hastctl status
Name    Status   Role           Components
hasta   -        init           /dev/gpt/hasta  tcp4://192.168.0.248
hastb   -        init           /dev/gpt/hastb  tcp4://192.168.0.248
[root@gw1:/var/log]# hastctl role promary hasta
usage: hastctl create [-d] [-c config] [-e extentsize] [-k keepdirty]
                [-m mediasize] name ...
       hastctl role [-d] [-c config] <init | primary | secondary> all | name ...
       hastctl list [-d] [-c config] [all | name ...]
       hastctl status [-d] [-c config] [all | name ...]
       hastctl dump [-d] [-c config] [all | name ...]
[root@gw1:/var/log]# hastctl role primary hasta
[root@gw1:/var/log]# hastctl role primary hastb
[root@gw1:/var/log]# hastctl status
(hangs)

Node B dmesg:
pid 26813 (hastd), uid 0: exited on signal 6 (core dumped)
pid 26814 (hastd), uid 0: exited on signal 6 (core dumped)
pid 26815 (hastd), uid 0: exited on signal 6 (core dumped)
pid 26816 (hastd), uid 0: exited on signal 6 (core dumped)
pid 26817 (hastd), uid 0: exited on signal 6 (core dumped)
pid 26822 (hastd), uid 0: exited on signal 6 (core dumped)
pid 26825 (hastd), uid 0: exited on signal 6 (core dumped)
pid 26828 (hastd), uid 0: exited on signal 6 (core dumped)
pid 26829 (hastd), uid 0: exited on signal 6 (core dumped)
pid 26830 (hastd), uid 0: exited on signal 6 (core dumped)
pid 26831 (hastd), uid 0: exited on signal 6 (core dumped)
pid 26833 (hastd), uid 0: exited on signal 6 (core dumped)
pid 26836 (hastd), uid 0: exited on signal 6 (core dumped)
pid 26837 (hastd), uid 0: exited on signal 6 (core dumped)

Node B messages:
Apr 12 15:02:49 gw1 kernel: pid 26891 (hastd), uid 0: exited on signal 6 (core 
dumped)
Apr 12 15:02:50 gw1 hastd[26679]: [hastb] (primary) Worker process killed 
(pid=26891, signal=6).
Apr 12 15:02:50 gw1 hastd[26893]: [hasta] (primary) Descriptor 7 is open (pipe 
or FIFO), but should be closed.
Apr 12 15:02:50 gw1 hastd[26893]: [hasta] (primary) Aborted at function 
descriptors_assert, file /usr/src/sbin/hastd/hastd.c, line 303.
Apr 12 15:02:50 gw1 kernel: pid 26893 (hastd), uid 0: exited on signal 6 (core 
dumped)
Apr 12 15:02:51 gw1 hastd[26679]: [hasta] (primary) Worker process killed 
(pid=26893, signal=6).
Apr 12 15:02:51 gw1 hastd[26896]: [hastb] (primary) Descriptor 7 is open (pipe 
or FIFO), but should be closed.
Apr 12 15:02:51 gw1 hastd[26896]: [hastb] (primary) Aborted at function 
descriptors_assert, file /usr/src/sbin/hastd/hastd.c, line 303.
Apr 12 15:02:52 gw1 kernel: pid 26896 (hastd), uid 0: exited on signal 6 (core 
dumped)
Apr 12 15:02:52 gw1 hastd[26679]: [hastb] (primary) Worker process killed 
(pid=26896, signal=6).
Apr 12 15:02:52 gw1 hastd[26900]: [hasta] (primary) Descriptor 7 is open (pipe 
or FIFO), but should be closed.
Apr 12 15:02:52 gw1 hastd[26900]: [hasta] (primary) Aborted at function 
descriptors_assert, file /usr/src/sbin/hastd/hastd.c, line 303.
Apr 12 15:02:53 gw1 kernel: pid 26900 (hastd), uid 0: exited on signal 6 (core 
dumped)
Apr 12 15:02:54 gw1 hastd[26679]: [hasta] (primary) Worker process killed 
(pid=26900, signal=6).
Apr 12 15:02:54 gw1 hastd[26904]: [hastb] (primary) Descriptor 7 is open (pipe 
or FIFO), but should be closed.
Apr 12 15:02:54 gw1 hastd[26904]: [hastb] (primary) Aborted at function 
descriptors_assert, file /usr/src/sbin/hastd/hastd.c, line 303.
Apr 12 15:02:54 gw1 kernel: pid 26904 (hastd), uid 0: exited on signal 6 (core 
dumped)

Now when I'm trying to switch A to primary:

[root@gw0:/var/log]# hastctl role primary hastb
[root@gw0:/var/log]# hastctl role primary hasta
[root@gw0:/var/log]# hastctl status
(hangs)

Node A dmesg:

pid 72301 (hastd), uid 0: exited on signal 6 (core dumped)
pid 72328 (hastd), uid 0: exited on signal 6 (core dumped)
pid 72355 (hastd), uid 0: exited on signal 6 (core dumped)
pid 72389 (hastd), uid 0: exited on signal 6 (core dumped)
pid 72412 (hastd), uid 0: exited on signal 6 (core dumped)
pid 72436 (hastd), uid 0: exited on signal 6 (core dumped)
pid 72467 (hastd), uid 0: exited on signal 6 (core dumped)
pid 72496 (hastd), uid 0: exited on signal 6 (core dumped)
pid 72514 (hastd), uid 0: exited on signal 6 (core dumped)
pid 72530 (hastd), uid 0: exited on signal 6 (core dumped)
pid 72554 (hastd), uid 0: exited on signal 6 (core dumped)
pid 72584 (hastd), uid 0: exited on signal 6 (core dumped)
pid 72620 (hastd), uid 0: exited on signal 6 (core dumped)
pid 72656 (hastd), uid 0: exited on signal 6 (core dumped)
pid 72708 (hastd), uid 0: exited on signal 6 (core dumped)
pid 72759 (hastd), uid 0: exited on signal 6 (core dumped)
pid 72799 (hastd), uid 0: exited on signal 6 (core dumped)

Apr 12 15:04:40 gw0 kernel: pid 72530 (hastd), uid 0: exited on signal 6 (core 
dumped)
Apr 12 15:04:41 gw0 hastd[63097]: [hasta] (primary) Worker process killed 
(pid=72530, signal=6).
Apr 12 15:04:41 gw0 hastd[72554]: [hastb] (primary) Descriptor 8 is open (pipe 
or FIFO), but should be closed.
Apr 12 15:04:41 gw0 hastd[72554]: [hastb] (primary) Aborted at function 
descriptors_assert, file /usr/src/sbin/hastd/hastd.c, line 303.
Apr 12 15:04:41 gw0 kernel: pid 72554 (hastd), uid 0: exited on signal 6 (core 
dumped)
Apr 12 15:04:42 gw0 hastd[63097]: [hastb] (primary) Worker process killed 
(pid=72554, signal=6).
Apr 12 15:04:42 gw0 hastd[72584]: [hasta] (primary) Descriptor 8 is open (pipe 
or FIFO), but should be closed.
Apr 12 15:04:42 gw0 hastd[72584]: [hasta] (primary) Aborted at function 
descriptors_assert, file /usr/src/sbin/hastd/hastd.c, line 303.
Apr 12 15:04:42 gw0 kernel: pid 72584 (hastd), uid 0: exited on signal 6 (core 
dumped)
Apr 12 15:04:43 gw0 hastd[63097]: [hasta] (primary) Worker process killed 
(pid=72584, signal=6).
Apr 12 15:04:43 gw0 hastd[72620]: [hastb] (primary) Descriptor 8 is open (pipe 
or FIFO), but should be closed.
Apr 12 15:04:43 gw0 hastd[72620]: [hastb] (primary) Aborted at function 
descriptors_assert, file /usr/src/sbin/hastd/hastd.c, line 303.
Apr 12 15:04:43 gw0 kernel: pid 72620 (hastd), uid 0: exited on signal 6 (core 
dumped)
Apr 12 15:04:44 gw0 hastd[63097]: [hastb] (primary) Worker process killed 
(pid=72620, signal=6).
Apr 12 15:04:44 gw0 hastd[72656]: [hasta] (primary) Descriptor 8 is open (pipe 
or FIFO), but should be closed.
Apr 12 15:04:44 gw0 hastd[72656]: [hasta] (primary) Aborted at function 
descriptors_assert, file /usr/src/sbin/hastd/hastd.c, line 303.
Apr 12 15:04:44 gw0 kernel: pid 72656 (hastd), uid 0: exited on signal 6 (core 
dumped)
Apr 12 15:04:45 gw0 hastd[63097]: [hasta] (primary) Worker process killed 
(pid=72656, signal=6).
Apr 12 15:04:45 gw0 hastd[72708]: [hastb] (primary) Descriptor 8 is open (pipe 
or FIFO), but should be closed.
Apr 12 15:04:45 gw0 hastd[72708]: [hastb] (primary) Aborted at function 
descriptors_assert, file /usr/src/sbin/hastd/has
td.c, line 303.
Apr 12 15:04:45 gw0 kernel: pid 72708 (hastd), uid 0: exited on signal 6 (core 
dumped)
Apr 12 15:04:46 gw0 hastd[63097]: [hastb] (primary) Worker process killed 
(pid=72708, signal=6).
Apr 12 15:04:46 gw0 hastd[72759]: [hasta] (primary) Descriptor 8 is open (pipe 
or FIFO), but should be closed.
Apr 12 15:04:46 gw0 hastd[72759]: [hasta] (primary) Aborted at function 
descriptors_assert, file /usr/src/sbin/hastd/has
td.c, line 303.
Apr 12 15:04:46 gw0 kernel: pid 72759 (hastd), uid 0: exited on signal 6 (core 
dumped)
Apr 12 15:04:47 gw0 hastd[63097]: [hasta] (primary) Worker process killed 
(pid=72759, signal=6).
Apr 12 15:04:47 gw0 hastd[72799]: [hastb] (primary) Descriptor 8 is open (pipe 
or FIFO), but should be closed.
Apr 12 15:04:47 gw0 hastd[72799]: [hastb] (primary) Aborted at function 
descriptors_assert, file /usr/src/sbin/hastd/has
td.c, line 303.
Apr 12 15:04:47 gw0 kernel: pid 72799 (hastd), uid 0: exited on signal 6 (core 
dumped)

Node A config:
==============
resource hasta {
    local /dev/gpt/hasta
    on gw0 {
        remote tcp4://192.168.0.247
        source tcp4://192.168.0.248
    }
    on gw1 {
        remote tcp4://192.168.0.248
        source tcp4://192.168.0.247
    }
}

resource hastb {
    local /dev/gpt/hastb
    on gw0 {
        remote tcp4://192.168.0.247
        source tcp4://192.168.0.248
    }
    on gw1 {
        remote tcp4://192.168.0.248
        source tcp4://192.168.0.247
    }
}


Node B config:
==============

resource hasta {
    local /dev/gpt/hasta
    on gw0 {
        remote tcp4://192.168.0.247
        source tcp4://192.168.0.248
    }
    on gw1 {
        remote tcp4://192.168.0.248
        source tcp4://192.168.0.247
    }
}

resource hastb {
    local /dev/gpt/hastb
    on gw0 {
        remote tcp4://192.168.0.247
        source tcp4://192.168.0.248
    }
    on gw1 {
        remote tcp4://192.168.0.248
        source tcp4://192.168.0.247
    }
}

Backtrace:

(gdb) bt
#0  0x000000080155a84a in thr_kill () from /lib/libc.so.7
#1  0x000000080155a814 in __raise (s=6) at /usr/src/lib/libc/gen/raise.c:52
#2  0x000000080155a789 in abort () at /usr/src/lib/libc/stdlib/abort.c:65
#3  0x0000000000414579 in pjdlog_abort (func=0x420e3f "descriptors_assert",
    file=0x420aeb "/usr/src/sbin/hastd/hastd.c", line=303, failedexpr=0x0, 
fmt=<value optimized out>)
    at /usr/src/sbin/hastd/pjdlog.c:613
#4  0x0000000000408267 in descriptors_assert (res=0x80204b400, pjdlogmode=<value 
optimized out>)
    at /usr/src/sbin/hastd/hastd.c:303
#5  0x00000000004146eb in hastd_primary (res=0x80204b400) at 
/usr/src/sbin/hastd/primary.c:1030
#6  0x000000000040a55a in check_signals () at /usr/src/sbin/hastd/hastd.c:359
#7  0x0000000000408852 in main (argc=<value optimized out>, argv=<value optimized 
out>)
    at /usr/src/sbin/hastd/hastd.c:1138
#8  0x0000000000403b0f in _start ()
#9  0x000000080064f000 in ?? ()
#10 0x0000000000000000 in ?? ()
(gdb)

===Cut===

If somebody has any idea how do I bring it up - please let me know.

Thanks.

Eugene.

_______________________________________________
freebsd-stable@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/freebsd-stable
To unsubscribe, send any mail to "freebsd-stable-unsubscr...@freebsd.org"

Reply via email to