On sat 26 may 2018 it segfaulted again. Identical pattern:
(gdb) bt
#0 0x0000000000006811 in ?? ()
#1 0x0000000000412b1c in TCP_Client::unblock_handler (closure=0xdb1870) at
TCP_Client.c++:270
#2 0x00000000004105dc in Scheduler::handle_io (fds=0xaf3eb0,
fds@entry=0x7ffc71013620,
iotype=&Scheduler::FDInfo::read, iotype@entry=&Scheduler::FDInfo::write) at
Scheduler.c++:315
#3 0x0000000000410811 in Scheduler::select () at Scheduler.c++:342
#4 0x0000000000402fa5 in loop () at Scheduler.h:89
#5 main (argc=<optimized out>, argv=0x7ffc71013898) at main.c++:306
Clearly, the set contains a pointer to an object of type Interest which was
deleted.
Since ClientInterest issues a dequeue_from_scan when it is deleted, the culprit
must be a DirEntry. In fact, a call to its scan() function would not be
detectable from the core dump, because the code is tail-optimized:
bool
DirEntry::scan(Interest *ip)
{
assert(!ip);
return parent->scan(this);
}
(gdb) info address DirEntry::scan
Symbol "DirEntry::scan(Interest*)" is a function at address 0x404750.
(gdb) disass 0x404750
Dump of assembler code for function DirEntry::scan(Interest*):
0x0000000000404750 <+0>: test %rsi,%rsi
0x0000000000404753 <+3>: jne 0x40476b <DirEntry::scan(Interest*)+27>
0x0000000000404755 <+5>: mov %rdi,%rax
0x0000000000404758 <+8>: mov 0xc8(%rdi),%rdi
0x000000000040475f <+15>: mov %rax,%rsi
0x0000000000404762 <+18>: mov (%rdi),%rdx
0x0000000000404765 <+21>: mov 0x20(%rdx),%rdx
0x0000000000404769 <+25>: jmpq *%rdx <--- this jumps to a deleted
ClientInterest
0x000000000040476b <+27>: push %rax
0x000000000040476c <+28>: mov $0x414c80,%ecx
0x0000000000404771 <+33>: mov $0x3b,%edx
0x0000000000404776 <+38>: mov $0x414b8e,%esi
0x000000000040477b <+43>: mov $0x414b9b,%edi
0x0000000000404780 <+48>: callq 0x4024b0 <__assert_fail@plt>
End of assembler dump.
Now I modified ClientInterest.h and Directory.c++, and further modified
ClientInterest.c++. I have:
--- orig/src/ClientInterest.h 2018-02-06 18:05:59.000000000 +0100
+++ ./src/ClientInterest.h 2018-06-16 10:25:18.000000000 +0200
@@ -73,6 +73,8 @@
virtual FileSystem * get_filesystem() { return myfilesystem; }
+ void dequeue_from_scan(Interest *ip);
+
private:
enum { ACTIVE_STATE = 1 << 0 };
--- orig/src/ClientInterest.c++ 2003-01-18 15:18:12.000000000 +0100
+++ ./src/ClientInterest.c++ 2018-06-16 10:29:08.000000000 +0200
@@ -62,6 +62,8 @@
ClientInterest::~ClientInterest()
{
myfilesystem->cancel(this, fs_request);
+ if (myclient)
+ myclient->dequeue_from_scan(this);
}
void
@@ -162,6 +164,13 @@
myclient->dequeue_from_scan(ip);
}
+void
+ClientInterest::dequeue_from_scan(Interest *ip)
+{
+ if (myclient)
+ myclient->dequeue_from_scan(ip);
+}
+
bool
ClientInterest::do_scan()
{
--- orig/src/Directory.c++ 2003-01-18 15:18:12.000000000 +0100
+++ ./src/Directory.c++ 2018-06-16 10:17:32.000000000 +0200
@@ -77,6 +77,7 @@
{ (void) chdir();
while (p)
{ q = p->next;
+ dequeue_from_scan(p);
delete p;
p = q;
}
Waiting for next core dump...