Quoting Dwight Engen (dwight.en...@oracle.com):
> Thanks for the review Serge, I believe this fixes all your comments.
> 
> Changes from v1
> - made fd array dynamic, fix lxc-monitord when run by hand
> - removed BUGS section of lxc-monitor manpage
> 
> ---
> 
> Signed-off-by: Dwight Engen <dwight.en...@oracle.com>

Well Huh.  I would have expected the instantiation of mon before the
definition of struct lxc_monitor to be a problem, but a quick test shows
that it works.

Thanks, Dwight, this is awesome!

Acked-by: Serge E. Hallyn <serge.hal...@ubuntu.com>

> ---
>  .gitignore              |   1 +
>  doc/lxc-monitor.sgml.in |  12 --
>  src/lxc/Makefile.am     |   2 +
>  src/lxc/lxc_console.c   |   4 +-
>  src/lxc/lxc_monitor.c   |   2 +
>  src/lxc/lxc_monitord.c  | 409 
> ++++++++++++++++++++++++++++++++++++++++++++++++
>  src/lxc/lxccontainer.c  |   6 +-
>  src/lxc/mainloop.c      |   7 +-
>  src/lxc/mainloop.h      |   7 +-
>  src/lxc/monitor.c       | 196 ++++++++++++++++-------
>  src/lxc/monitor.h       |  10 +-
>  src/lxc/start.c         |   4 +-
>  src/lxc/utils.h         |  26 +++
>  13 files changed, 610 insertions(+), 76 deletions(-)
>  create mode 100644 src/lxc/lxc_monitord.c
> 
> diff --git a/.gitignore b/.gitignore
> index 905a2dc..c614a75 100644
> --- a/.gitignore
> +++ b/.gitignore
> @@ -52,6 +52,7 @@ src/lxc/lxc-info
>  src/lxc/lxc-init
>  src/lxc/lxc-kill
>  src/lxc/lxc-monitor
> +src/lxc/lxc-monitord
>  src/lxc/lxc-netstat
>  src/lxc/lxc-ps
>  src/lxc/lxc-restart
> diff --git a/doc/lxc-monitor.sgml.in b/doc/lxc-monitor.sgml.in
> index f9760a5..eae6f82 100644
> --- a/doc/lxc-monitor.sgml.in
> +++ b/doc/lxc-monitor.sgml.in
> @@ -68,18 +68,6 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 
> 02111-1307 USA
>    &commonoptions;
>  
>    <refsect1>
> -    <title>Bugs</title>
> -
> -    <para>
> -      Only one <command>lxc-monitor</command> can run at a time. Other
> -      invocations will fail with the following error:
> -    </para>
> -    <para>
> -      lxc-monitor: bind : Address already in use
> -    </para>
> -
> -  </refsect1>
> -  <refsect1>
>      <title>Examples</title>
>      <variablelist>
>        <varlistentry>
> diff --git a/src/lxc/Makefile.am b/src/lxc/Makefile.am
> index ebeca466..1fa0fa8 100644
> --- a/src/lxc/Makefile.am
> +++ b/src/lxc/Makefile.am
> @@ -150,6 +150,7 @@ bin_PROGRAMS = \
>       lxc-start \
>       lxc-execute \
>       lxc-monitor \
> +     lxc-monitord \
>       lxc-wait \
>       lxc-console \
>       lxc-freeze \
> @@ -181,6 +182,7 @@ lxc_freeze_SOURCES = lxc_freeze.c
>  lxc_info_SOURCES = lxc_info.c
>  lxc_init_SOURCES = lxc_init.c
>  lxc_monitor_SOURCES = lxc_monitor.c
> +lxc_monitord_SOURCES = lxc_monitord.c
>  lxc_restart_SOURCES = lxc_restart.c
>  lxc_start_SOURCES = lxc_start.c
>  lxc_stop_SOURCES = lxc_stop.c
> diff --git a/src/lxc/lxc_console.c b/src/lxc/lxc_console.c
> index 643c442..f6659f6 100644
> --- a/src/lxc/lxc_console.c
> +++ b/src/lxc/lxc_console.c
> @@ -241,7 +241,7 @@ Type <Ctrl+%1$c q> to exit the console, \
>               goto out_mainloop_open;
>       }
>  
> -     err = lxc_mainloop(&descr);
> +     err = lxc_mainloop(&descr, -1);
>       if (err) {
>               ERROR("mainloop returned an error");
>               goto out_mainloop_open;
> @@ -255,7 +255,7 @@ out_mainloop_open:
>  out:
>       /* Restore previous terminal parameter */
>       tcsetattr(0, TCSAFLUSH, &oldtios);
> -     
> +
>       /* Return to line it is */
>       printf("\n");
>  
> diff --git a/src/lxc/lxc_monitor.c b/src/lxc/lxc_monitor.c
> index 8c15869..0ca829f 100644
> --- a/src/lxc/lxc_monitor.c
> +++ b/src/lxc/lxc_monitor.c
> @@ -87,6 +87,8 @@ int main(int argc, char *argv[])
>               return -1;
>       }
>  
> +     lxc_monitord_spawn(my_args.lxcpath);
> +
>       fd = lxc_monitor_open(my_args.lxcpath);
>       if (fd < 0)
>               return -1;
> diff --git a/src/lxc/lxc_monitord.c b/src/lxc/lxc_monitord.c
> new file mode 100644
> index 0000000..3b5cf53
> --- /dev/null
> +++ b/src/lxc/lxc_monitord.c
> @@ -0,0 +1,409 @@
> +/*
> + * lxc: linux Container library
> + *
> + * Copyright © 2012 Oracle.
> + *
> + * Authors:
> + * Dwight Engen <dwight.en...@oracle.com>
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
> + */
> +
> +#define _GNU_SOURCE
> +#include <stdio.h>
> +#include <signal.h>
> +#include <errno.h>
> +#include <unistd.h>
> +#include <string.h>
> +#include <stdlib.h>
> +#include <fcntl.h>
> +#include <sys/types.h>
> +#include <sys/stat.h>
> +#include <sys/param.h>
> +#include <sys/socket.h>
> +#include <sys/un.h>
> +#include <netinet/in.h>
> +#include <net/if.h>
> +
> +#include <lxc/af_unix.h>
> +#include <lxc/log.h>
> +#include <lxc/mainloop.h>
> +#include <lxc/monitor.h>
> +#include <lxc/utils.h>
> +
> +#define CLIENTFDS_CHUNK 64
> +
> +lxc_log_define(lxc_monitord, lxc);
> +
> +static struct lxc_monitor mon;

You are defining
> +
> +static void lxc_monitord_cleanup(void);
> +
> +/*
> + * Defines the structure to store the monitor information
> + * @lxcpath        : the path being monitored
> + * @fifofd         : the file descriptor for publishers (containers) to 
> write state
> + * @listenfd       : the file descriptor for subscribers (lxc-monitors) to 
> connect
> + * @clientfds      : accepted client file descriptors
> + * @clientfds_size : number of file descriptors clientfds can hold
> + * @clientfds_cnt  : the count of valid fds in clientfds
> + * @descr          : the lxc_mainloop state
> + */
> +struct lxc_monitor {
> +     const char *lxcpath;
> +     int fifofd;
> +     int listenfd;
> +     int *clientfds;
> +     int clientfds_size;
> +     int clientfds_cnt;
> +     struct lxc_epoll_descr descr;
> +};
> +
> +static int lxc_monitord_fifo_create(struct lxc_monitor *mon)
> +{
> +     char fifo_path[PATH_MAX];
> +     int ret;
> +
> +     ret = snprintf(fifo_path, sizeof(fifo_path), "%s/monitor-fifo", 
> mon->lxcpath);
> +     if (ret < 0 || ret >= sizeof(fifo_path)) {
> +             ERROR("lxcpath too long to monitor fifo");
> +             return -1;
> +     }
> +
> +     ret = mknod(fifo_path, S_IFIFO|S_IRUSR|S_IWUSR, 0);
> +     if (ret < 0) {
> +             INFO("monitor fifo %s exists, already running?", fifo_path);
> +             return -1;
> +     }
> +
> +     mon->fifofd = open(fifo_path, O_RDWR);
> +     if (mon->fifofd < 0) {
> +             unlink(fifo_path);
> +             ERROR("failed to open monitor fifo");
> +             return -1;
> +     }
> +     return 0;
> +}
> +
> +static int lxc_monitord_fifo_delete(struct lxc_monitor *mon)
> +{
> +     char fifo_path[PATH_MAX];
> +     int ret;
> +
> +     ret = snprintf(fifo_path, sizeof(fifo_path), "%s/monitor-fifo", 
> mon->lxcpath);
> +     if (ret < 0 || ret >= sizeof(fifo_path)) {
> +             ERROR("lxcpath too long to monitor fifo");
> +             return -1;
> +     }
> +     unlink(fifo_path);
> +     return 0;
> +}
> +
> +static void lxc_monitord_sockfd_remove(struct lxc_monitor *mon, int fd) {
> +     int i;
> +
> +     if (lxc_mainloop_del_handler(&mon->descr, fd))
> +             CRIT("fd:%d not found in mainloop", fd);
> +     close(fd);
> +
> +     for (i = 0; i < mon->clientfds_cnt; i++) {
> +             if (mon->clientfds[i] == fd)
> +                     break;
> +     }
> +     if (i >= mon->clientfds_cnt) {
> +             CRIT("fd:%d not found in clients array", fd);
> +             lxc_monitord_cleanup();
> +             exit(EXIT_FAILURE);
> +     }
> +
> +     memmove(&mon->clientfds[i], &mon->clientfds[i+1],
> +             (mon->clientfds_cnt - i - 1) * sizeof(mon->clientfds[0]));
> +     mon->clientfds_cnt--;
> +}
> +
> +static int lxc_monitord_sock_handler(int fd, void *data,
> +                                  struct lxc_epoll_descr *descr)
> +{
> +     struct lxc_monitor *mon = data;
> +
> +     lxc_monitord_sockfd_remove(mon, fd);
> +     return 0;
> +}
> +
> +static int lxc_monitord_sock_accept(int fd, void *data,
> +                                 struct lxc_epoll_descr *descr)
> +{
> +     int ret,clientfd;
> +     struct lxc_monitor *mon = data;
> +     struct ucred cred;
> +     socklen_t credsz = sizeof(cred);
> +
> +     ret = -1;
> +     clientfd = accept(fd, NULL, 0);
> +     if (clientfd < 0) {
> +             SYSERROR("failed to accept connection");
> +             goto out;
> +     }
> +
> +     if (fcntl(clientfd, F_SETFD, FD_CLOEXEC)) {
> +             SYSERROR("failed to set close-on-exec on incoming connection");
> +             goto err1;
> +     }
> +
> +     if (getsockopt(clientfd, SOL_SOCKET, SO_PEERCRED, &cred, &credsz))
> +     {
> +             ERROR("failed to get credentials on socket");
> +             goto err1;
> +     }
> +     if (cred.uid && cred.uid != geteuid()) {
> +             WARN("monitor denied for uid:%d", cred.uid);
> +             ret = -EACCES;
> +             goto err1;
> +     }
> +
> +     if (mon->clientfds_cnt + 1 > mon->clientfds_size) {
> +             int *clientfds;
> +             DEBUG("realloc space for %d clientfds",
> +                   mon->clientfds_size + CLIENTFDS_CHUNK);
> +             clientfds = realloc(mon->clientfds,
> +                                 (mon->clientfds_size + CLIENTFDS_CHUNK) *
> +                                  sizeof(mon->clientfds[0]));
> +             if (clientfds == NULL) {
> +                     ERROR("failed to realloc memory for clientfds");
> +                     goto err1;
> +             }
> +             mon->clientfds = clientfds;
> +             mon->clientfds_size += CLIENTFDS_CHUNK;
> +     }
> +
> +     ret = lxc_mainloop_add_handler(&mon->descr, clientfd,
> +                                    lxc_monitord_sock_handler, mon);
> +     if (ret) {
> +             ERROR("failed to add socket handler");
> +             goto err1;
> +     }
> +
> +     mon->clientfds[mon->clientfds_cnt++] = clientfd;
> +     INFO("accepted client fd:%d clients:%d", clientfd, mon->clientfds_cnt);
> +     goto out;
> +
> +err1:
> +     close(clientfd);
> +out:
> +     return ret;
> +}
> +
> +static int lxc_monitord_sock_create(struct lxc_monitor *mon)
> +{
> +     struct sockaddr_un addr;
> +     int fd;
> +
> +     if (lxc_monitor_sock_name(mon->lxcpath, &addr) < 0)
> +             return -1;
> +
> +     fd = lxc_af_unix_open(addr.sun_path, SOCK_STREAM, O_TRUNC);
> +     if (fd < 0) {
> +             ERROR("failed to open unix socket : %s", strerror(errno));
> +             return -1;
> +     }
> +
> +     mon->listenfd = fd;
> +     return 0;
> +}
> +
> +static int lxc_monitord_sock_delete(struct lxc_monitor *mon)
> +{
> +     struct sockaddr_un addr;
> +
> +     if (lxc_monitor_sock_name(mon->lxcpath, &addr) < 0)
> +             return -1;
> +     if (addr.sun_path[0])
> +             unlink(addr.sun_path);
> +     return 0;
> +}
> +
> +static int lxc_monitord_create(struct lxc_monitor *mon)
> +{
> +     int ret;
> +
> +     ret = lxc_monitord_fifo_create(mon);
> +     if (ret < 0)
> +             return ret;
> +
> +     ret = lxc_monitord_sock_create(mon);
> +     return ret;
> +}
> +
> +static void lxc_monitord_delete(struct lxc_monitor *mon)
> +{
> +     int i;
> +
> +     lxc_mainloop_del_handler(&mon->descr, mon->listenfd);
> +     close(mon->listenfd);
> +     lxc_monitord_sock_delete(mon);
> +
> +     lxc_mainloop_del_handler(&mon->descr, mon->fifofd);
> +     close(mon->fifofd);
> +     lxc_monitord_fifo_delete(mon);
> +
> +     for (i = 0; i < mon->clientfds_cnt; i++) {
> +             lxc_mainloop_del_handler(&mon->descr, mon->clientfds[i]);
> +             close(mon->clientfds[i]);
> +     }
> +     mon->clientfds_cnt = 0;
> +}
> +
> +static int lxc_monitord_fifo_handler(int fd, void *data,
> +                                  struct lxc_epoll_descr *descr)
> +{
> +     int ret,i;
> +     struct lxc_msg msglxc;
> +     struct lxc_monitor *mon = data;
> +
> +     ret = read(fd, &msglxc, sizeof(msglxc));
> +     if (ret != sizeof(msglxc)) {
> +             SYSERROR("read fifo failed : %s", strerror(errno));
> +             return 1;
> +     }
> +
> +     for (i = 0; i < mon->clientfds_cnt; i++) {
> +             DEBUG("writing client fd:%d", mon->clientfds[i]);
> +             ret = write(mon->clientfds[i], &msglxc, sizeof(msglxc));
> +             if (ret < 0) {
> +                     ERROR("write failed to client sock:%d %d %s",
> +                           mon->clientfds[i], errno, strerror(errno));
> +             }
> +     }
> +
> +     return 0;
> +}
> +
> +static int lxc_monitord_mainloop_add(struct lxc_monitor *mon)
> +{
> +     int ret;
> +
> +     ret = lxc_mainloop_add_handler(&mon->descr, mon->fifofd,
> +                                    lxc_monitord_fifo_handler, mon);
> +     if (ret < 0) {
> +             ERROR("failed to add to mainloop monitor handler for fifo");
> +             return -1;
> +     }
> +
> +     ret = lxc_mainloop_add_handler(&mon->descr, mon->listenfd,
> +                                    lxc_monitord_sock_accept, mon);
> +     if (ret < 0) {
> +             ERROR("failed to add to mainloop monitor handler for listen 
> socket");
> +             return -1;
> +     }
> +
> +     return 0;
> +}
> +
> +static void lxc_monitord_cleanup(void)
> +{
> +     lxc_monitord_delete(&mon);
> +}
> +
> +static void lxc_monitord_sig_handler(int sig)
> +{
> +     INFO("caught signal %d", sig);
> +     lxc_monitord_cleanup();
> +     exit(EXIT_SUCCESS);
> +}
> +
> +int main(int argc, char *argv[])
> +{
> +     int ret,pipefd;
> +     char *lxcpath = argv[1];
> +     char logpath[PATH_MAX];
> +     sigset_t mask;
> +
> +     if (argc != 3) {
> +             fprintf(stderr,
> +                     "Usage: lxc-monitord lxcpath sync-pipe-fd\n\n"
> +                     "NOTE: lxc-monitord is intended for use by lxc 
> internally\n"
> +                     "      and does not need to be run by hand\n\n");
> +             exit(EXIT_FAILURE);
> +     }
> +
> +     ret = snprintf(logpath, sizeof(logpath), "%s/lxc-monitord.log",
> +                    lxcpath);
> +     if (ret < 0 || ret >= sizeof(logpath))
> +             return EXIT_FAILURE;
> +
> +     ret = lxc_log_init(NULL, logpath, "NOTICE", "lxc-monitord", 0);
> +     if (ret)
> +             return ret;
> +
> +     pipefd = atoi(argv[2]);
> +
> +     if (sigfillset(&mask) ||
> +         sigdelset(&mask, SIGILL)  ||
> +         sigdelset(&mask, SIGSEGV) ||
> +         sigdelset(&mask, SIGBUS)  ||
> +         sigdelset(&mask, SIGTERM) ||
> +         sigprocmask(SIG_BLOCK, &mask, NULL)) {
> +             SYSERROR("failed to set signal mask");
> +             return -1;
> +     }
> +
> +     signal(SIGILL,  lxc_monitord_sig_handler);
> +     signal(SIGSEGV, lxc_monitord_sig_handler);
> +     signal(SIGBUS,  lxc_monitord_sig_handler);
> +     signal(SIGTERM, lxc_monitord_sig_handler);
> +
> +     ret = EXIT_FAILURE;
> +     memset(&mon, 0, sizeof(mon));
> +     mon.lxcpath = lxcpath;
> +     if (lxc_mainloop_open(&mon.descr)) {
> +             ERROR("failed to create mainloop");
> +             goto out;
> +     }
> +
> +     if (lxc_monitord_create(&mon)) {
> +             goto out;
> +     }
> +
> +     /* sync with parent, we're ignoring the return from write
> +      * because regardless if it works or not, the following
> +      * close will sync us with the parent process. the
> +      * if-empty-statement construct is to quiet the
> +      * warn-unused-result warning.
> +      */
> +     if (write(pipefd, "S", 1)) ;
> +     close(pipefd);
> +
> +     if (lxc_monitord_mainloop_add(&mon)) {
> +             ERROR("failed to add mainloop handlers");
> +             goto out;
> +     }
> +
> +     NOTICE("monitoring lxcpath %s", mon.lxcpath);
> +     for(;;) {
> +             ret = lxc_mainloop(&mon.descr, 1000 * 30);
> +             if (mon.clientfds_cnt <= 0)
> +             {
> +                     NOTICE("no clients for 30 seconds, exiting");
> +                     break;
> +             }
> +     }
> +
> +     lxc_mainloop_close(&mon.descr);
> +     lxc_monitord_cleanup();
> +     ret = EXIT_SUCCESS;
> +     NOTICE("monitor exiting");
> +out:
> +     return ret;
> +}
> diff --git a/src/lxc/lxccontainer.c b/src/lxc/lxccontainer.c
> index 1df6a98..53765b0 100644
> --- a/src/lxc/lxccontainer.c
> +++ b/src/lxc/lxccontainer.c
> @@ -32,6 +32,7 @@
>  #include <sys/wait.h>
>  #include <errno.h>
>  #include <lxc/utils.h>
> +#include <lxc/monitor.h>
>  
>  lxc_log_define(lxc_container, lxc);
>  
> @@ -370,6 +371,7 @@ static bool lxcapi_start(struct lxc_container *c, int 
> useinit, char * const argv
>       if (daemonize) {
>               if (!lxc_container_get(c))
>                       return false;
> +             lxc_monitord_spawn(c->config_path);
>               pid_t pid = fork();
>               if (pid < 0) {
>                       lxc_container_put(c);
> @@ -560,7 +562,7 @@ static bool lxcapi_create(struct lxc_container *c, char 
> *t, char *const argv[])
>       }
>  
>       /* container is already created if we have a config and rootfs.path is 
> accessible */
> -     if (lxcapi_is_defined(c) && c->lxc_conf && c->lxc_conf->rootfs.path && 
> access(c->lxc_conf->rootfs.path, F_OK) == 0) 
> +     if (lxcapi_is_defined(c) && c->lxc_conf && c->lxc_conf->rootfs.path && 
> access(c->lxc_conf->rootfs.path, F_OK) == 0)
>               goto out;
>  
>       /* we're going to fork.  but since we'll wait for our child, we
> @@ -826,7 +828,7 @@ static bool lxcapi_destroy(struct lxc_container *c)
>               return false;
>  
>       /* container is already destroyed if we don't have a config and 
> rootfs.path is not accessible */
> -     if (!lxcapi_is_defined(c) && (!c->lxc_conf || !c->lxc_conf->rootfs.path 
> || access(c->lxc_conf->rootfs.path, F_OK) != 0)) 
> +     if (!lxcapi_is_defined(c) && (!c->lxc_conf || !c->lxc_conf->rootfs.path 
> || access(c->lxc_conf->rootfs.path, F_OK) != 0))
>               return false;
>  
>       pid = fork();
> diff --git a/src/lxc/mainloop.c b/src/lxc/mainloop.c
> index 975215d..d9ab5d1 100644
> --- a/src/lxc/mainloop.c
> +++ b/src/lxc/mainloop.c
> @@ -38,7 +38,7 @@ struct mainloop_handler {
>  
>  #define MAX_EVENTS 10
>  
> -int lxc_mainloop(struct lxc_epoll_descr *descr)
> +int lxc_mainloop(struct lxc_epoll_descr *descr, int timeout_ms)
>  {
>       int i, nfds;
>       struct mainloop_handler *handler;
> @@ -46,7 +46,7 @@ int lxc_mainloop(struct lxc_epoll_descr *descr)
>  
>       for (;;) {
>  
> -             nfds = epoll_wait(descr->epfd, events, MAX_EVENTS, -1);
> +             nfds = epoll_wait(descr->epfd, events, MAX_EVENTS, timeout_ms);
>               if (nfds < 0) {
>                       if (errno == EINTR)
>                               continue;
> @@ -64,6 +64,9 @@ int lxc_mainloop(struct lxc_epoll_descr *descr)
>                               return 0;
>               }
>  
> +             if (nfds == 0 && timeout_ms != 0)
> +                     return 0;
> +
>               if (lxc_list_empty(&descr->handlers))
>                       return 0;
>       }
> diff --git a/src/lxc/mainloop.h b/src/lxc/mainloop.h
> index 6b16242..ec87569 100644
> --- a/src/lxc/mainloop.h
> +++ b/src/lxc/mainloop.h
> @@ -21,6 +21,9 @@
>   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
>   */
>  
> +#ifndef _mainloop_h
> +#define _mainloop_h
> +
>  #include "list.h"
>  
>  struct lxc_epoll_descr {
> @@ -31,7 +34,7 @@ struct lxc_epoll_descr {
>  typedef int (*lxc_mainloop_callback_t)(int fd, void *data,
>                                      struct lxc_epoll_descr *descr);
>  
> -extern int lxc_mainloop(struct lxc_epoll_descr *descr);
> +extern int lxc_mainloop(struct lxc_epoll_descr *descr, int timeout_ms);
>  
>  extern int lxc_mainloop_add_handler(struct lxc_epoll_descr *descr, int fd,
>                                   lxc_mainloop_callback_t callback,
> @@ -42,3 +45,5 @@ extern int lxc_mainloop_del_handler(struct lxc_epoll_descr 
> *descr, int fd);
>  extern int lxc_mainloop_open(struct lxc_epoll_descr *descr);
>  
>  extern int lxc_mainloop_close(struct lxc_epoll_descr *descr);
> +
> +#endif
> diff --git a/src/lxc/monitor.c b/src/lxc/monitor.c
> index afdaf67..e108eb7 100644
> --- a/src/lxc/monitor.c
> +++ b/src/lxc/monitor.c
> @@ -5,6 +5,7 @@
>   *
>   * Authors:
>   * Daniel Lezcano <daniel.lezcano at free.fr>
> + * Dwight Engen <dwight.en...@oracle.com>
>   *
>   * This library is free software; you can redistribute it and/or
>   * modify it under the terms of the GNU Lesser General Public
> @@ -20,6 +21,7 @@
>   * License along with this library; if not, write to the Free Software
>   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
>   */
> +
>  #include <stdio.h>
>  #include <errno.h>
>  #include <unistd.h>
> @@ -30,7 +32,7 @@
>  #include <sys/stat.h>
>  #include <sys/param.h>
>  #include <sys/socket.h>
> -#include <sys/un.h>
> +#include <sys/wait.h>
>  #include <netinet/in.h>
>  #include <net/if.h>
>  
> @@ -40,37 +42,36 @@
>  #include <lxc/log.h>
>  #include <lxc/state.h>
>  #include <lxc/monitor.h>
> +#include <lxc/utils.h>
>  
>  lxc_log_define(lxc_monitor, lxc);
>  
> -#ifndef UNIX_PATH_MAX
> -#define UNIX_PATH_MAX 108
> -#endif
> -
> -static void lxc_monitor_send(struct lxc_msg *msg, const char *lxcpath)
> +/* routines used by monitor publishers (containers) */
> +static void lxc_monitor_fifo_send(struct lxc_msg *msg, const char *lxcpath)
>  {
> -     int fd;
> -     struct sockaddr_un addr = { .sun_family = AF_UNIX };
> -     char *offset = &addr.sun_path[1];
> -     size_t ret, len;
> -
> -     /*
> -      * addr.sun_path is only 108 bytes.
> -      * should we take a hash of lxcpath?  a subset of it?
> -      */
> -     len = sizeof(addr.sun_path) - 1;
> -     ret = snprintf(offset, len, "%s/lxc-monitor", lxcpath);
> -     if (ret < 0 || ret >= len) {
> -             ERROR("lxcpath too long to open monitor");
> +     int fd,ret;
> +     char fifo_path[PATH_MAX];
> +
> +     BUILD_BUG_ON(sizeof(*msg) > PIPE_BUF); /* write not guaranteed atomic */
> +     ret = snprintf(fifo_path, sizeof(fifo_path), "%s/monitor-fifo", 
> lxcpath);
> +     if (ret < 0 || ret >= sizeof(fifo_path)) {
> +             ERROR("lxcpath too long to open monitor fifo");
>               return;
>       }
>  
> -     fd = socket(PF_UNIX, SOCK_DGRAM, 0);
> -     if (fd < 0)
> +     fd = open(fifo_path, O_WRONLY);
> +     if (fd < 0) {
> +             /* it is normal for this open to fail when there is no monitor
> +              * running, so we don't log it
> +              */
>               return;
> +     }
>  
> -     sendto(fd, msg, sizeof(*msg), 0,
> -            (const struct sockaddr *)&addr, sizeof(addr));
> +     ret = write(fd, msg, sizeof(*msg));
> +     if (ret != sizeof(*msg)) {
> +             SYSERROR("failed to write monitor fifo %s", fifo_path);
> +             return;
> +     }
>  
>       close(fd);
>  }
> @@ -82,50 +83,74 @@ void lxc_monitor_send_state(const char *name, lxc_state_t 
> state, const char *lxc
>       strncpy(msg.name, name, sizeof(msg.name));
>       msg.name[sizeof(msg.name) - 1] = 0;
>  
> -     lxc_monitor_send(&msg, lxcpath);
> +     lxc_monitor_fifo_send(&msg, lxcpath);
>  }
>  
> -int lxc_monitor_open(const char *lxcpath)
> +
> +/* routines used by monitor subscribers (lxc-monitor) */
> +int lxc_monitor_close(int fd)
>  {
> -     struct sockaddr_un addr = { .sun_family = AF_UNIX };
> -     char *offset = &addr.sun_path[1];
> -     int fd;
> -     size_t ret, len;
> -
> -     /*
> -      * addr.sun_path is only 108 bytes.
> -      * should we take a hash of lxcpath?  a subset of it?
> +     return close(fd);
> +}
> +
> +int lxc_monitor_sock_name(const char *lxcpath, struct sockaddr_un *addr) {
> +     size_t len;
> +     int ret;
> +     char *sockname = &addr->sun_path[0]; // 1 for abstract
> +
> +     /* addr.sun_path is only 108 bytes.
> +      * should we take a hash of lxcpath? a subset of it? ftok()? we need
> +      * to make sure it is unique.
>        */
> -     len = sizeof(addr.sun_path) - 1;
> -     ret = snprintf(offset, len, "%s/lxc-monitor", lxcpath);
> +     memset(addr, 0, sizeof(*addr));
> +     addr->sun_family = AF_UNIX;
> +     len = sizeof(addr->sun_path) - 1;
> +     ret = snprintf(sockname, len, "%s/monitor-sock", lxcpath);
>       if (ret < 0 || ret >= len) {
> -             ERROR("lxcpath too long to open monitor");
> +             ERROR("lxcpath too long for unix socket");
>               return -1;
>       }
> +     return 0;
> +}
>  
> -     fd = socket(PF_UNIX, SOCK_DGRAM, 0);
> +int lxc_monitor_open(const char *lxcpath)
> +{
> +     struct sockaddr_un addr;
> +     int fd,ret;
> +     int retry,backoff_ms[] = {10, 50, 100};
> +
> +     if (lxc_monitor_sock_name(lxcpath, &addr) < 0)
> +             return -1;
> +
> +     fd = socket(PF_UNIX, SOCK_STREAM, 0);
>       if (fd < 0) {
>               ERROR("socket : %s", strerror(errno));
>               return -1;
>       }
>  
> -     if (bind(fd, (struct sockaddr *)&addr, sizeof(addr))) {
> -             ERROR("bind : %s", strerror(errno));
> -             close(fd);
> -             return -1;
> +     for (retry = 0; retry < sizeof(backoff_ms)/sizeof(backoff_ms[0]); 
> retry++) {
> +             ret = connect(fd, (struct sockaddr *)&addr, sizeof(addr));
> +             if (ret == 0 || errno != ECONNREFUSED)
> +                     break;
> +             ERROR("connect : backing off %d", backoff_ms[retry]);
> +             usleep(backoff_ms[retry] * 1000);
>       }
>  
> +     if (ret < 0) {
> +             ERROR("connect : %s", strerror(errno));
> +             goto err1;
> +     }
>       return fd;
> +err1:
> +     close(fd);
> +     return ret;
>  }
>  
> -/* timeout of 0 means return immediately;  -1 means wait forever */
> -int lxc_monitor_read_timeout(int fd, struct lxc_msg *msg, int timeout)
> +int lxc_monitor_read_timeout(int fd, struct lxc_msg *msglxc, int timeout)
>  {
> -     struct sockaddr_un from;
> -     socklen_t len = sizeof(from);
> -     int ret;
>       fd_set rfds;
>       struct timeval tv;
> +     int ret;
>  
>       if (timeout != -1) {
>               FD_ZERO(&rfds);
> @@ -141,13 +166,12 @@ int lxc_monitor_read_timeout(int fd, struct lxc_msg 
> *msg, int timeout)
>                       return -2;  // timed out
>       }
>  
> -     ret = recvfrom(fd, msg, sizeof(*msg), 0,
> -                    (struct sockaddr *)&from, &len);
> -     if (ret < 0) {
> -             SYSERROR("failed to receive state");
> +     ret = recv(fd, msglxc, sizeof(*msglxc), 0);
> +     if (ret <= 0) {
> +             SYSERROR("client failed to recv (monitord died?) %s",
> +                      strerror(errno));
>               return -1;
>       }
> -
>       return ret;
>  }
>  
> @@ -156,7 +180,73 @@ int lxc_monitor_read(int fd, struct lxc_msg *msg)
>       return lxc_monitor_read_timeout(fd, msg, -1);
>  }
>  
> -int lxc_monitor_close(int fd)
> +
> +
> +/* used to spawn a monitord either on startup of a daemon container, or when
> + * lxc-monitor starts
> + */
> +int lxc_monitord_spawn(const char *lxcpath)
>  {
> -     return close(fd);
> +     pid_t pid1,pid2;
> +     int pipefd[2];
> +     char pipefd_str[11];
> +
> +     char * const args[] = {
> +             "/usr/bin/lxc-monitord",
> +             (char *)lxcpath,
> +             pipefd_str,
> +             NULL,
> +     };
> +
> +     /* double fork to avoid zombies when monitord exits */
> +     pid1 = fork();
> +     if (pid1 < 0) {
> +             SYSERROR("failed to fork");
> +             return -1;
> +     }
> +
> +     if (pid1) {
> +             waitpid(pid1, NULL, 0);
> +             return 0;
> +     }
> +
> +     if (pipe(pipefd) < 0) {
> +             SYSERROR("failed to create pipe");
> +             exit(EXIT_FAILURE);
> +     }
> +
> +     pid2 = fork();
> +     if (pid2 < 0) {
> +             SYSERROR("failed to fork");
> +             exit(EXIT_FAILURE);
> +     }
> +     if (pid2) {
> +             char c;
> +             /* wait for daemon to create socket */
> +             close(pipefd[1]);
> +             /* sync with child, we're ignoring the return from read
> +              * because regardless if it works or not, either way we've
> +              * synced with the child process. the if-empty-statement
> +              * construct is to quiet the warn-unused-result warning.
> +              */
> +             if (read(pipefd[0], &c, 1)) ;
> +             close(pipefd[0]);
> +             exit(EXIT_SUCCESS);
> +     }
> +
> +     umask(0);
> +     if (setsid() < 0) {
> +             SYSERROR("failed to setsid");
> +             exit(EXIT_FAILURE);
> +     }
> +     close(0);
> +     close(1);
> +     close(2);
> +     open("/dev/null", O_RDONLY);
> +     open("/dev/null", O_RDWR);
> +     open("/dev/null", O_RDWR);
> +     close(pipefd[0]);
> +     sprintf(pipefd_str, "%d", pipefd[1]);
> +     execvp(args[0], args);
> +     exit(EXIT_FAILURE);
>  }
> diff --git a/src/lxc/monitor.h b/src/lxc/monitor.h
> index 8bef4c7..cd59ee8 100644
> --- a/src/lxc/monitor.h
> +++ b/src/lxc/monitor.h
> @@ -24,6 +24,9 @@
>  #define __monitor_h
>  
>  #include <sys/param.h>
> +#include <sys/un.h>
> +
> +#include <lxc/conf.h>
>  
>  typedef enum {
>       lxc_msg_state,
> @@ -32,11 +35,14 @@ typedef enum {
>  
>  struct lxc_msg {
>       lxc_msg_type_t type;
> -     char name[MAXPATHLEN];
> +     char name[NAME_MAX+1];
>       int value;
>  };
>  
> -void lxc_monitor_send_state(const char *name, lxc_state_t state,
> +extern int lxc_monitor_open(const char *lxcpath);
> +extern int lxc_monitor_sock_name(const char *lxcpath, struct sockaddr_un 
> *addr);
> +extern void lxc_monitor_send_state(const char *name, lxc_state_t state,
>                           const char *lxcpath);
> +extern int lxc_monitord_spawn(const char *lxcpath);
>  
>  #endif
> diff --git a/src/lxc/start.c b/src/lxc/start.c
> index 0a0cc40..fd96d4f 100644
> --- a/src/lxc/start.c
> +++ b/src/lxc/start.c
> @@ -390,7 +390,7 @@ int lxc_poll(const char *name, struct lxc_handler 
> *handler)
>               #endif
>       }
>  
> -     return lxc_mainloop(&descr);
> +     return lxc_mainloop(&descr, -1);
>  
>  out_mainloop_open:
>       lxc_mainloop_close(&descr);
> @@ -808,7 +808,7 @@ int lxc_spawn(struct lxc_handler *handler)
>       /* TODO - pass lxc.cgroup.dir (or user's pam cgroup) in for first 
> argument */
>       if ((handler->cgroup = lxc_cgroup_path_create(NULL, name)) == NULL)
>               goto out_delete_net;
> -     
> +
>       if (lxc_cgroup_enter(handler->cgroup, handler->pid) < 0)
>               goto out_delete_net;
>  
> diff --git a/src/lxc/utils.h b/src/lxc/utils.h
> index 8954503..8e6a748 100644
> --- a/src/lxc/utils.h
> +++ b/src/lxc/utils.h
> @@ -32,4 +32,30 @@ extern int mkdir_p(const char *dir, mode_t mode);
>   */
>  extern const char *default_lxc_path(void);
>  
> +/**
> + * BUILD_BUG_ON - break compile if a condition is true.
> + * @condition: the condition which the compiler should know is false.
> + *
> + * If you have some code which relies on certain constants being equal, or
> + * other compile-time-evaluated condition, you should use BUILD_BUG_ON to
> + * detect if someone changes it.
> + *
> + * The implementation uses gcc's reluctance to create a negative array, but
> + * gcc (as of 4.4) only emits that error for obvious cases (eg. not arguments
> + * to inline functions).  So as a fallback we use the optimizer; if it can't
> + * prove the condition is false, it will cause a link error on the undefined
> + * "__build_bug_on_failed".  This error message can be harder to track down
> + * though, hence the two different methods.
> + */
> +#ifndef __OPTIMIZE__
> +#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
> +#else
> +extern int __build_bug_on_failed;
> +#define BUILD_BUG_ON(condition)                                      \
> +     do {                                                    \
> +             ((void)sizeof(char[1 - 2*!!(condition)]));      \
> +             if (condition) __build_bug_on_failed = 1;       \
> +     } while(0)
> +#endif
> +
>  #endif
> -- 
> 1.8.1.4
> 

------------------------------------------------------------------------------
Try New Relic Now & We'll Send You this Cool Shirt
New Relic is the only SaaS-based application performance monitoring service 
that delivers powerful full stack analytics. Optimize and monitor your
browser, app, & servers with just a few lines of code. Try New Relic
and get this awesome Nerd Life shirt! http://p.sf.net/sfu/newrelic_d2d_apr
_______________________________________________
Lxc-devel mailing list
Lxc-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/lxc-devel

Reply via email to