From: Elena Ufimtseva <elena.ufimts...@oracle.com> In order to detect remote processes which are hung, the proxy periodically sends heartbeat messages to confirm if the remote process is alive
Signed-off-by: Jagannathan Raman <jag.ra...@oracle.com> Signed-off-by: John G Johnson <john.g.john...@oracle.com> Signed-off-by: Elena Ufimtseva <elena.ufimts...@oracle.com> --- v4 -> v5: - Heart beat messages are sent asynchronously hw/proxy/qemu-proxy.c | 115 ++++++++++++++++++++++++++++++++++++++++++ include/hw/proxy/qemu-proxy.h | 2 + include/io/mpqemu-link.h | 1 + 3 files changed, 118 insertions(+) diff --git a/hw/proxy/qemu-proxy.c b/hw/proxy/qemu-proxy.c index c888677..bcb24f7 100644 --- a/hw/proxy/qemu-proxy.c +++ b/hw/proxy/qemu-proxy.c @@ -24,11 +24,104 @@ #include "hw/boards.h" #include "include/qemu/log.h" +QEMUTimer *hb_timer; static void pci_proxy_dev_realize(PCIDevice *dev, Error **errp); static void setup_irqfd(PCIProxyDev *dev); +static void pci_dev_exit(PCIDevice *dev); +static void start_broadcast_timer(void); +static void stop_broadcast_timer(void); +static void childsig_handler(int sig, siginfo_t *siginfo, void *ctx); +static void broadcast_init(void); static int config_op_send(PCIProxyDev *dev, uint32_t addr, uint32_t *val, int l, unsigned int op); +static void childsig_handler(int sig, siginfo_t *siginfo, void *ctx) +{ + /* TODO: Add proper handler. */ + printf("Child (pid %d) is dead? Signal is %d, Exit code is %d.\n", + siginfo->si_pid, siginfo->si_signo, siginfo->si_code); +} + +static void remote_ping_handler(void *opaque) +{ + PCIProxyDev *pdev = opaque; + + if (!event_notifier_test_and_clear(&pdev->en_ping)) { + /* + * TODO: Is retry needed? Add the handling of the + * non-responsive process. How its done in case + * of managed process? + */ + printf("No reply from remote process, pid %d\n", pdev->remote_pid); + event_notifier_cleanup(&pdev->en_ping); + } +} + +static void broadcast_msg(void) +{ + MPQemuMsg msg; + PCIProxyDev *entry; + + QLIST_FOREACH(entry, &proxy_dev_list.devices, next) { + if (event_notifier_get_fd(&entry->en_ping) == -1) { + continue; + } + + memset(&msg, 0, sizeof(MPQemuMsg)); + + msg.num_fds = 1; + msg.cmd = PROXY_PING; + msg.bytestream = 0; + msg.size = 0; + msg.fds[0] = event_notifier_get_fd(&entry->en_ping); + + mpqemu_msg_send(&msg, entry->mpqemu_link->com); + } +} + +static void broadcast_init(void) +{ + PCIProxyDev *entry; + + QLIST_FOREACH(entry, &proxy_dev_list.devices, next) { + event_notifier_init(&entry->en_ping, 0); + qemu_set_fd_handler(event_notifier_get_fd(&entry->en_ping), + remote_ping_handler, NULL, entry); + } +} + +#define NOP_INTERVAL 1000000 + +static void remote_ping(void *opaque) +{ + broadcast_msg(); + timer_mod(hb_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + NOP_INTERVAL); +} + +static void start_broadcast_timer(void) +{ + hb_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, + remote_ping, + &proxy_dev_list); + timer_mod(hb_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + NOP_INTERVAL); + +} + +static void stop_broadcast_timer(void) +{ + timer_del(hb_timer); + timer_free(hb_timer); +} + +static void set_sigchld_handler(void) +{ + struct sigaction sa_sigterm; + memset(&sa_sigterm, 0, sizeof(sa_sigterm)); + sa_sigterm.sa_sigaction = childsig_handler; + sa_sigterm.sa_flags = SA_SIGINFO | SA_NOCLDWAIT | SA_NOCLDSTOP; + sigaction(SIGCHLD, &sa_sigterm, NULL); +} + static void probe_pci_info(PCIDevice *dev) { PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev); @@ -111,6 +204,9 @@ static void proxy_ready(PCIDevice *dev) setup_irqfd(pdev); probe_pci_info(dev); + set_sigchld_handler(); + broadcast_init(); + start_broadcast_timer(); } static int set_remote_opts(PCIDevice *dev, QDict *qdict, unsigned int cmd) @@ -325,6 +421,7 @@ static void pci_proxy_dev_class_init(ObjectClass *klass, void *data) PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); k->realize = pci_proxy_dev_realize; + k->exit = pci_dev_exit; k->config_read = pci_proxy_read_config; k->config_write = pci_proxy_write_config; } @@ -455,6 +552,24 @@ static void pci_proxy_dev_realize(PCIDevice *device, Error **errp) dev->proxy_ready = proxy_ready; } +static void pci_dev_exit(PCIDevice *pdev) +{ + PCIProxyDev *entry, *sentry; + PCIProxyDev *dev = PCI_PROXY_DEV(pdev); + + stop_broadcast_timer(); + + QLIST_FOREACH_SAFE(entry, &proxy_dev_list.devices, next, sentry) { + if (entry->remote_pid == dev->remote_pid) { + QLIST_REMOVE(entry, next); + } + } + + if (!QLIST_EMPTY(&proxy_dev_list.devices)) { + start_broadcast_timer(); + } +} + static void send_bar_access_msg(PCIProxyDev *dev, MemoryRegion *mr, bool write, hwaddr addr, uint64_t *val, unsigned size, bool memory) diff --git a/include/hw/proxy/qemu-proxy.h b/include/hw/proxy/qemu-proxy.h index 7c6bddd..f7666fb 100644 --- a/include/hw/proxy/qemu-proxy.h +++ b/include/hw/proxy/qemu-proxy.h @@ -55,6 +55,8 @@ struct PCIProxyDev { EventNotifier resample; pid_t remote_pid; + EventNotifier en_ping; + int socket; char *rid; diff --git a/include/io/mpqemu-link.h b/include/io/mpqemu-link.h index 2ca89c5..aaaf1a4 100644 --- a/include/io/mpqemu-link.h +++ b/include/io/mpqemu-link.h @@ -60,6 +60,7 @@ typedef enum { DEVICE_DEL, GET_PCI_INFO, RET_PCI_INFO, + PROXY_PING, MAX, } mpqemu_cmd_t; -- 1.8.3.1