OK, so, I reinstalled with debug binaries, having figured out how to
override global make.conf for specific builds.  This morning, three jobs
were hung, and once again it seemed to be random clients hung on random
files.  There was nothing much of interest showing in the client trace
files (at d200) except for heartbeat messages one after another.

On trying to kill the first stuck job, nothing happened except that it
did not terminate.  On trying to kill the second, the director crashed
again.

Upon restarting the Director and re-queueing the failed jobs, all ran
successfully.


Here's the traceback:


minbar:root:~:9 # cat /var/lib/bacula/bacula.15819.traceback
[New LWP 15832]
[New LWP 15833]
[New LWP 23804]
[New LWP 23810]
[New LWP 23811]
[New LWP 30797]
[New LWP 22128]
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib64/libthread_db.so.1".
0x00007f74d89434c5 in nanosleep () from /lib64/libpthread.so.0
$1 = "17-Jun-2020 08:24:13\000\000\000\000\000\000\000\000\000"
$2 = 0x5587048c6e40 <my_name> "minbar-dir"
$3 = 0x55870660d0f8 "bacula-dir"
$4 = 0x55870660d138 "/usr/sbin/bacula-dir"
$5 = 0x0
$6 = "MySQL", '\000' <repeats 44 times>
$7 = 0x7f74d89b44f4 "9.6.5 (11 June 2020)"
$8 = 0x7f74d89b44d8 "x86_64-pc-linux-gnu"
$9 = 0x7f74d89b44d1 "gentoo"
$10 = 0x7f74d89b3ff3 ""
$11 = "minbar", '\000' <repeats 43 times>
$12 = 0x7f74d89b44ec "gentoo "
Environment variable "TestName" not defined.
#0  0x00007f74d89434c5 in nanosleep () from /lib64/libpthread.so.0
#1  0x00007f74d896c4e6 in bmicrosleep (sec=sec@entry=60,
usec=usec@entry=0) at bsys.c:192
#2  0x000055870485c0d4 in wait_for_next_job
(one_shot_job_to_run=<optimized out>) at scheduler.c:121
#3  0x000055870482e5f5 in main (argc=<optimized out>, argv=<optimized
out>) at dird.c:387

Thread 8 (Thread 0x7f74c4ff9700 (LWP 22128)):
#0  0x00007f74d8942dfc in read () from /lib64/libpthread.so.0
#1  0x00007f74d8996ed3 in BSOCKCORE::socketRead (this=0x7f74d0000d78,
len=4, buf=0x7f74c4ff8df4, fd=<optimized out>) at ../lib/bsockcore.h:202
#2  BSOCKCORE::read_nbytes (nbytes=<optimized out>, ptr=0x7f74c4ff8df4
"t\177", this=<optimized out>) at bsockcore.c:1144
#3  BSOCKCORE::read_nbytes (this=0x7f74d0000d78, ptr=<optimized out>,
nbytes=4) at bsockcore.c:1130
#4  0x00007f74d89703bd in BSOCK::recv (this=this@entry=0x7f74d0000d78)
at bsock.c:441
#5  0x00005587048846d6 in handle_UA_client_request (arg=0x7f74d0000d78)
at ua_server.c:144
#6  0x00007f74d89a55b5 in workq_server (arg=0x5587048c79c0 <ua_workq>)
at workq.c:372
#7  0x00007f74d8938ea7 in start_thread () from /lib64/libpthread.so.0
#8  0x00007f74d85f7c6f in clone () from /lib64/libc.so.6

Thread 7 (Thread 0x7f74c77fe700 (LWP 30797)):
#0  0x00007f74d89434c5 in nanosleep () from /lib64/libpthread.so.0
#1  0x00007f74d896c4e6 in bmicrosleep (sec=sec@entry=2,
usec=usec@entry=0) at bsys.c:192
#2  0x000055870484fd92 in jobq_server (arg=0x5587048c76a0 <job_queue>)
at jobq.c:616
#3  0x00007f74d8938ea7 in start_thread () from /lib64/libpthread.so.0
#4  0x00007f74d85f7c6f in clone () from /lib64/libc.so.6

Thread 6 (Thread 0x7f74c5ffb700 (LWP 23811)):
#0  0x00007f74d8942dfc in read () from /lib64/libpthread.so.0
#1  0x00007f74d8996ed3 in BSOCKCORE::socketRead (this=0x7f74b00045c8,
len=4, buf=0x7f74c5ffa9b4, fd=<optimized out>) at ../lib/bsockcore.h:202
#2  BSOCKCORE::read_nbytes (nbytes=<optimized out>, ptr=0x7f74c5ffa9b4
"\372\377\377\377", this=<optimized out>) at bsockcore.c:1144
#3  BSOCKCORE::read_nbytes (this=0x7f74b00045c8, ptr=<optimized out>,
nbytes=4) at bsockcore.c:1130
#4  0x00007f74d89703bd in BSOCK::recv (this=this@entry=0x7f74b00045c8)
at bsock.c:441
#5  0x00005587048466e7 in bget_dirmsg (bs=bs@entry=0x7f74b00045c8) at
getmsg.c:150
#6  0x0000558704834d78 in wait_for_job_termination
(jcr=jcr@entry=0x55870667a8c8, timeout=timeout@entry=0) at backup.c:685
#7  0x0000558704837009 in do_backup (jcr=jcr@entry=0x55870667a8c8) at
backup.c:633
#8  0x0000558704849318 in job_thread (arg=0x55870667a8c8) at job.c:453
#9  0x000055870484f7fb in jobq_server (arg=0x5587048c76a0 <job_queue>)
at jobq.c:468
#10 0x00007f74d8938ea7 in start_thread () from /lib64/libpthread.so.0
#11 0x00007f74d85f7c6f in clone () from /lib64/libc.so.6

Thread 5 (Thread 0x7f74c67fc700 (LWP 23810)):
#0  0x00007f74d89439ae in waitpid () from /lib64/libpthread.so.0
#1  0x00007f74d899a74c in signal_handler (sig=6) at signal.c:233
#2  <signal handler called>
#3  0x00007f74d8537621 in raise () from /lib64/libc.so.6
#4  0x00007f74d852155b in abort () from /lib64/libc.so.6
#5  0x00007f74d7e88c01 in ?? () from /usr/lib64/libmysqlclient.so.21
#6  0x00007f74d7e8b5c7 in ?? () from /usr/lib64/libmysqlclient.so.21
#7  0x00007f74d7e398d0 in ?? () from /usr/lib64/libmysqlclient.so.21
#8  0x00007f74d7e39e1d in ?? () from /usr/lib64/libmysqlclient.so.21
#9  0x00007f74d7e35146 in mysql_send_query () from
/usr/lib64/libmysqlclient.so.21
#10 0x00007f74d7e35385 in mysql_real_query () from
/usr/lib64/libmysqlclient.so.21
#11 0x00007f74d89e0138 in BDB_MYSQL::sql_query (this=0x558706673108,
query=0x7f74c801b320 "UPDATE Job SET JobStatus='f',EndTime='2020-06-17
08:24:13',ClientId=31,JobBytes=15644859,ReadBytes=0,JobFiles=19,JobErrors=1,VolSessionId=33,VolSessionTime=1592158122,PoolId=5,FileSetId=31,JobTDate=15"...,
flags=0) at mysql.c:537
#12 0x00007f74d89f323c in BDB::UpdateDB (this=this@entry=0x558706673108,
jcr=jcr@entry=0x558706672128, cmd=0x7f74c801b320 "UPDATE Job SET
JobStatus='f',EndTime='2020-06-17
08:24:13',ClientId=31,JobBytes=15644859,ReadBytes=0,JobFiles=19,JobErrors=1,VolSessionId=33,VolSessionTime=1592158122,PoolId=5,FileSetId=31,JobTDate=15"...,
can_be_empty=can_be_empty@entry=false, file=file@entry=0x7f74d8a040d7
"bdb.h", line=line@entry=140) at sql.c:474
#13 0x00007f74d8a0292c in BDB::bdb_update_job_end_record
(this=0x558706673108, jcr=jcr@entry=0x558706672128,
jr=jr@entry=0x558706672618) at sql_update.c:190
#14 0x000055870484aff1 in update_job_end_record (jcr=0x558706672128) at
job.c:1369
#15 0x0000558704835c1c in backup_cleanup (jcr=jcr@entry=0x558706672128,
TermCode=TermCode@entry=69) at backup.c:898
#16 0x00005587048491aa in job_thread (arg=0x558706672128) at job.c:455
#17 0x000055870484f7fb in jobq_server (arg=0x5587048c76a0 <job_queue>)
at jobq.c:468
#18 0x00007f74d8938ea7 in start_thread () from /lib64/libpthread.so.0
#19 0x00007f74d85f7c6f in clone () from /lib64/libc.so.6

Thread 4 (Thread 0x7f74d561c700 (LWP 23804)):
#0  0x00007f74d8942dfc in read () from /lib64/libpthread.so.0
#1  0x00007f74d8996ed3 in BSOCKCORE::socketRead (this=0x7f74cc000fd8,
len=4, buf=0x7f74d561b9b4, fd=<optimized out>) at ../lib/bsockcore.h:202
#2  BSOCKCORE::read_nbytes (nbytes=<optimized out>, ptr=0x7f74d561b9b4
"\372\377\377\377", this=<optimized out>) at bsockcore.c:1144
#3  BSOCKCORE::read_nbytes (this=0x7f74cc000fd8, ptr=<optimized out>,
nbytes=4) at bsockcore.c:1130
#4  0x00007f74d89703bd in BSOCK::recv (this=this@entry=0x7f74cc000fd8)
at bsock.c:441
#5  0x00005587048466e7 in bget_dirmsg (bs=bs@entry=0x7f74cc000fd8) at
getmsg.c:150
#6  0x0000558704834d78 in wait_for_job_termination
(jcr=jcr@entry=0x558706683928, timeout=timeout@entry=0) at backup.c:685
#7  0x0000558704837009 in do_backup (jcr=jcr@entry=0x558706683928) at
backup.c:633
#8  0x0000558704849318 in job_thread (arg=0x558706683928) at job.c:453
#9  0x000055870484f7fb in jobq_server (arg=0x5587048c76a0 <job_queue>)
at jobq.c:468
#10 0x00007f74d8938ea7 in start_thread () from /lib64/libpthread.so.0
#11 0x00007f74d85f7c6f in clone () from /lib64/libc.so.6

Thread 3 (Thread 0x7f74d664d700 (LWP 15833)):
#0  0x00007f74d893f878 in pthread_cond_timedwait@@GLIBC_2.3.2 () from
/lib64/libpthread.so.0
#1  0x00007f74d89a4be9 in watchdog_thread (arg=<optimized out>) at
watchdog.c:299
#2  0x00007f74d8938ea7 in start_thread () from /lib64/libpthread.so.0
#3  0x00007f74d85f7c6f in clone () from /lib64/libc.so.6

Thread 2 (Thread 0x7f74d6e4e700 (LWP 15832)):
#0  0x00007f74d85ef9d3 in select () from /lib64/libc.so.6
#1  0x00007f74d896fa38 in bnet_thread_server
(addrs=addrs@entry=0x55870660c718, max_clients=20,
client_wq=client_wq@entry=0x5587048c79c0 <ua_workq>,
handle_client_request=handle_client_request@entry=0x558704884650
<handle_UA_client_request(void*)>) at bnet_server.c:166
#2  0x0000558704884296 in connect_thread (arg=0x55870660c718) at
ua_server.c:85
#3  0x00007f74d8938ea7 in start_thread () from /lib64/libpthread.so.0
#4  0x00007f74d85f7c6f in clone () from /lib64/libc.so.6

Thread 1 (Thread 0x7f74d79110c0 (LWP 15819)):
#0  0x00007f74d89434c5 in nanosleep () from /lib64/libpthread.so.0
#1  0x00007f74d896c4e6 in bmicrosleep (sec=sec@entry=60,
usec=usec@entry=0) at bsys.c:192
#2  0x000055870485c0d4 in wait_for_next_job
(one_shot_job_to_run=<optimized out>) at scheduler.c:121
#3  0x000055870482e5f5 in main (argc=<optimized out>, argv=<optimized
out>) at dird.c:387
#0  0x00007f74d89434c5 in nanosleep () from /lib64/libpthread.so.0
No symbol table info available.
#1  0x00007f74d896c4e6 in bmicrosleep (sec=sec@entry=60,
usec=usec@entry=0) at bsys.c:192
192     bsys.c: No such file or directory.
timeout = {tv_sec = 60, tv_nsec = 0}
tv = {tv_sec = 1, tv_usec = 1578147840}
tz = {tz_minuteswest = -661166920, tz_dsttime = 32628}
stat = <optimized out>
#2  0x000055870485c0d4 in wait_for_next_job
(one_shot_job_to_run=<optimized out>) at scheduler.c:121
121     scheduler.c: No such file or directory.
jcr = <optimized out>
job = <optimized out>
run = <optimized out>
now = <optimized out>
prev = <optimized out>
first = false
next_job = <optimized out>
tjcr = <optimized out>
#3  0x000055870482e5f5 in main (argc=<optimized out>, argv=<optimized
out>) at dird.c:387
387     dird.c: No such file or directory.
ch = <optimized out>
jcr = <optimized out>
no_signals = false
uid = 0x7fff69664b24 "root"
gid = 0x7fff69664b2c "bacula"
mode = <optimized out>
/usr/libexec/bacula/btraceback.gdb:24: Error in sourced command file:
No frame at level 4.
[Inferior 1 (process 15819) detached]
Attempt to dump current JCRs. njcrs=6
threadid=0x7f74d79110c0 JobId=0 JobStatus=R jcr=0x55870664f4c8
name=*JobMonitor*.2020-06-16_11.24.20_01
        use_count=1 killable=0
        JobType=I JobLevel=
        sched_time=16-Jun-2020 11:24 start_time=16-Jun-2020 11:24
        end_time=31-Dec-1969 19:00 wait_time=31-Dec-1969 19:00
        db=(nil) db_batch=(nil) batch_started=0
        wstore=0x558706615b78 rstore=(nil) wjcr=(nil)
client=0x558706617798 reschedule_count=0 SD_msg_chan_started=0
threadid=0x7f74c67fc700 JobId=24993 JobStatus=f jcr=0x558706672128
name=Fisherprice_Backup.2020-06-17_04.30.00_11
        use_count=1 killable=1
        JobType=B JobLevel=I
        sched_time=17-Jun-2020 04:30 start_time=17-Jun-2020 04:30
        end_time=17-Jun-2020 08:24 wait_time=31-Dec-1969 19:00
        db=0x558706673108 db_batch=(nil) batch_started=0
        wstore=0x558706615b78 rstore=(nil) wjcr=(nil)
client=0x558706617c78 reschedule_count=0 SD_msg_chan_started=0
BDB=0x558706673108 db_name=bacula db_user=bacula connected=true
        cmd="UPDATE Job SET JobStatus='f',EndTime='2020-06-17
08:24:13',ClientId=31,JobBytes=15644859,ReadBytes=0,JobFiles=19,JobErrors=1,VolSessionId=33,VolSessionTime=1592158122,PoolId=5,FileSetId=31,JobTDate=1592396653,RealEndTime='2020-06-17
08:24:13',PriorJobId=0,HasBase=0,PurgedFiles=0 WHERE JobId=24993" changes=15
        RWLOCK=0x558706673120 w_active=1 w_wait=0
threadid=0x7f74c5ffb700 JobId=24994 JobStatus=f jcr=0x55870667a8c8
name=Minbar_Backup.2020-06-17_04.30.00_12
        use_count=1 killable=1
        JobType=B JobLevel=I
        sched_time=17-Jun-2020 04:30 start_time=17-Jun-2020 04:30
        end_time=31-Dec-1969 19:00 wait_time=31-Dec-1969 19:00
        db=0x55870667b8a8 db_batch=(nil) batch_started=0
        wstore=0x558706615b78 rstore=(nil) wjcr=(nil)
client=0x558706617ee8 reschedule_count=0 SD_msg_chan_started=0
BDB=0x55870667b8a8 db_name=bacula db_user=bacula connected=true
        cmd="INSERT INTO File
(FileIndex,JobId,PathId,FilenameId,LStat,MD5,DeltaSeq) VALUES
(129,24994,4939,19751,'gC BgB9rk InJ B A A A I9a4 BAA BHw Be6PZK Be6PZK
Be6PZN A A C','hm7Q72q+MloeGARz9VAJfw',0)" changes=161
        RWLOCK=0x55870667b8c0 w_active=0 w_wait=0
threadid=0x7f74d561c700 JobId=24995 JobStatus=f jcr=0x558706683928
name=Narn_Backup.2020-06-17_04.30.00_13
        use_count=1 killable=1
        JobType=B JobLevel=I
        sched_time=17-Jun-2020 04:30 start_time=17-Jun-2020 04:30
        end_time=31-Dec-1969 19:00 wait_time=31-Dec-1969 19:00
        db=0x5587066854e8 db_batch=(nil) batch_started=0
        wstore=0x558706615b78 rstore=(nil) wjcr=(nil)
client=0x558706618158 reschedule_count=0 SD_msg_chan_started=0
BDB=0x5587066854e8 db_name=bacula db_user=bacula connected=true
        cmd="INSERT INTO File
(FileIndex,JobId,PathId,FilenameId,LStat,MD5,DeltaSeq) VALUES
(33,24995,2366,109,'c E EH9 R Po Bk A R CAA F Be6dSJ Bel6xw Bel6xw A A
C','0',0)" changes=34
        RWLOCK=0x558706685500 w_active=0 w_wait=0
threadid=0x7f74d79110c0 JobId=24996 JobStatus=p jcr=0x55870664d0c8
name=MySQL_Backup_New.2020-06-17_04.55.00_17
        use_count=1 killable=0
        JobType=B JobLevel=I
        sched_time=17-Jun-2020 04:55 start_time=17-Jun-2020 04:55
        end_time=31-Dec-1969 19:00 wait_time=17-Jun-2020 04:55
        db=0x55870664c688 db_batch=(nil) batch_started=0
        wstore=0x558706615b78 rstore=(nil) wjcr=(nil)
client=0x558706617ee8 reschedule_count=0 SD_msg_chan_started=0
BDB=0x55870664c688 db_name=bacula db_user=bacula connected=true
        cmd="SELECT
PoolId,Name,NumVols,MaxVols,UseOnce,UseCatalog,AcceptAnyVolume,AutoPrune,Recycle,VolRetention,VolUseDuration,MaxVolJobs,MaxVolFiles,MaxVolBytes,PoolType,LabelType,LabelFormat,RecyclePoolId,ScratchPoolId,ActionOnPurge,CacheRetention
FROM Pool WHERE Pool.Name='Incr-Disk'" changes=1
        RWLOCK=0x55870664c6a0 w_active=0 w_wait=0
threadid=0x7f74c4ff9700 JobId=0 JobStatus=R jcr=0x7f74b40044f8
name=-Console-.2020-06-17_08.15.46_18
        use_count=1 killable=0
        JobType=U JobLevel=
        sched_time=17-Jun-2020 08:15 start_time=17-Jun-2020 08:15
        end_time=31-Dec-1969 19:00 wait_time=31-Dec-1969 19:00
        db=0x7f74b4001e28 db_batch=(nil) batch_started=0
        wstore=0x558706615b78 rstore=(nil) wjcr=(nil)
client=0x558706617ee8 reschedule_count=0 SD_msg_chan_started=0
BDB=0x7f74b4001e28 db_name=bacula db_user=bacula connected=true
        cmd="UPDATE Client SET
AutoPrune=1,FileRetention=31104000,JobRetention=31104000,Uname='9.4.4
(28May19) x86_64-redhat-linux-gnu,redhat,' WHERE Name='fisherprice'"
changes=81
        RWLOCK=0x7f74b4001e40 w_active=0 w_wait=0
List plugins. Hook count=0



-- 
  Phil Stracchino
  Babylon Communications
  ph...@caerllewys.net
  p...@co.ordinate.org
  Landline: +1.603.293.8485
  Mobile:   +1.603.998.6958


_______________________________________________
Bacula-devel mailing list
Bacula-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/bacula-devel

Reply via email to