On Mon, Mar 03, 2014 at 04:39:00PM -0600, Serge Hallyn wrote:
> 1. remove the cgm_dbus_disconnected handler.  We're using a proxy
>    anyway, and not keeping it around.
> 
> 2. comment most of the cgm functions to describe when they are called, to
>    ease locking review
> 
> 3. the cgmanager mutex is now held for the duration of a connection, from
>    cgm_dbus_connect to cgm_dbus_disconnect.
> 
> 3b. so remove the mutex lock/unlock from functions which are called during
>    container startup with the cgmanager connection already up
> 
> 4. remove the cgroup_restart().  It's no longer needed since we don't
>    daemonize while we have the cgmanager socket open.
> 
> 5. report errors and return early if cgm_dbus_connect() fails
> 
> 6. don't keep the cgm connection open after cgm_ops_init.  I'm a bit torn
>    on this one as it means that things like lxc-start will always connect
>    twice.  But if we do this there is no good answer, given threaded API
>    users, on when to drop that initial connection.
> 
> 7. cgm_unfreeze and nrtasks: grab the dbus connection, as we'll never
>    have it at that point.  (technically i doubt anyone will use
>    cgmanager and utmp helper on the same host :)
> 
> 8. lxc_spawn: make sure we only disconnect cgroups if they were already
>    connected.
> 

The change looks fine to me though it still looks fragile and I think
I'd be much happier with all cgm_ functions always calling
cgm_dbus_connect and cgm_dbus_disconnect and just have those do the
right thing.

cgm_dbus_{dis}connect would just look for a global cgm_keep_connected
bool which we'd set to true before initialization and set to false after
the container is ready.
So long as this is set to true, any call to cgm_dbus_connect will check
whether we have a connection, if we do, it'll just return, if we don't,
it'll establish one. Calls to cgm_dbus_disconnect would turn into no-ops
unless cgm_keep_connected is set to false, which we'd do just before
calling a final cgm_dbus_disconnect once we're done setting up the
container.

That way if at some random point in the future we decide to use one of
the cgm_ functions somewhere else in the code, we won't get into weird
behaviors that we didn't think about.

> Signed-off-by: Serge Hallyn <[email protected]>
> ---
>  src/lxc/cgmanager.c    | 143 
> ++++++++++++++++++++++++++++++-------------------
>  src/lxc/cgroup.c       |   8 ---
>  src/lxc/cgroup.h       |   1 -
>  src/lxc/lxccontainer.c |   1 -
>  src/lxc/start.c        |   6 +++
>  5 files changed, 95 insertions(+), 64 deletions(-)
> 
> diff --git a/src/lxc/cgmanager.c b/src/lxc/cgmanager.c
> index 7be94a7..c95b3d0 100644
> --- a/src/lxc/cgmanager.c
> +++ b/src/lxc/cgmanager.c
> @@ -95,14 +95,13 @@ static char **subsystems;
>  static DBusConnection *connection;
>  
>  #define CGMANAGER_DBUS_SOCK "unix:path=/sys/fs/cgroup/cgmanager/sock"
> -static void cgm_dbus_disconnected(DBusConnection *connection);
>  static bool cgm_dbus_connect(void)
>  {
>       DBusError dbus_error;
>       dbus_error_init(&dbus_error);
>  
>       lock_mutex(&thread_mutex);
> -     connection = nih_dbus_connect(CGMANAGER_DBUS_SOCK, 
> cgm_dbus_disconnected);
> +     connection = nih_dbus_connect(CGMANAGER_DBUS_SOCK, NULL);
>       if (!connection) {
>               NihError *nerr;
>               nerr = nih_error_get();
> @@ -134,36 +133,20 @@ static bool cgm_dbus_connect(void)
>               ERROR("Error pinging cgroup manager: %s", nerr->message);
>               nih_free(nerr);
>       }
> -     unlock_mutex(&thread_mutex);
>       return true;
>  }
>  
>  static void cgm_dbus_disconnect(void)
>  {
> -     lock_mutex(&thread_mutex);
>       if (cgroup_manager)
>               nih_free(cgroup_manager);
>       cgroup_manager = NULL;
> -     dbus_connection_unref(connection);
> +     if (connection)
> +             dbus_connection_unref(connection);
>       connection = NULL;
>       unlock_mutex(&thread_mutex);
>  }
>  
> -static void cgm_dbus_disconnected(DBusConnection *connection)
> -{
> -     lock_mutex(&thread_mutex);
> -     WARN("Cgroup manager connection was terminated");
> -     cgroup_manager = NULL;
> -     dbus_connection_unref(connection);
> -     connection = NULL;
> -     unlock_mutex(&thread_mutex);
> -     if (cgm_dbus_connect()) {
> -             INFO("New cgroup manager connection was opened");
> -     } else {
> -             WARN("Cgroup manager unable to re-open connection");
> -     }
> -}
> -
>  static int send_creds(int sock, int rpid, int ruid, int rgid)
>  {
>       struct msghdr msg = { 0 };
> @@ -200,9 +183,11 @@ static int send_creds(int sock, int rpid, int ruid, int 
> rgid)
>       return 0;
>  }
>  
> +/*
> + * Called during container startup.  The cgmanager socket is open
> + */
>  static bool lxc_cgmanager_create(const char *controller, const char 
> *cgroup_path, int32_t *existed)
>  {
> -     lock_mutex(&thread_mutex);
>       if ( cgmanager_create_sync(NULL, cgroup_manager, controller,
>                                      cgroup_path, existed) != 0) {
>               NihError *nerr;
> @@ -210,11 +195,9 @@ static bool lxc_cgmanager_create(const char *controller, 
> const char *cgroup_path
>               ERROR("call to cgmanager_create_sync failed: %s", 
> nerr->message);
>               nih_free(nerr);
>               ERROR("Failed to create %s:%s", controller, cgroup_path);
> -             unlock_mutex(&thread_mutex);
>               return false;
>       }
>  
> -     unlock_mutex(&thread_mutex);
>       return true;
>  }
>  
> @@ -222,7 +205,6 @@ static bool lxc_cgmanager_escape(void)
>  {
>       pid_t me = getpid();
>       int i;
> -     lock_mutex(&thread_mutex);
>       for (i = 0; i < nr_subsystems; i++) {
>               if (cgmanager_move_pid_abs_sync(NULL, cgroup_manager,
>                                       subsystems[i], "/", me) != 0) {
> @@ -231,12 +213,10 @@ static bool lxc_cgmanager_escape(void)
>                       ERROR("call to cgmanager_move_pid_abs_sync(%s) failed: 
> %s",
>                                       subsystems[i], nerr->message);
>                       nih_free(nerr);
> -                     unlock_mutex(&thread_mutex);
>                       return false;
>               }
>       }
>  
> -     unlock_mutex(&thread_mutex);
>       return true;
>  }
>  
> @@ -254,7 +234,6 @@ static int do_chown_cgroup(const char *controller, const 
> char *cgroup_path,
>  
>       uid_t caller_nsuid = get_ns_uid(origuid);
>  
> -     lock_mutex(&thread_mutex);
>       if (socketpair(AF_UNIX, SOCK_DGRAM, 0, sv) < 0) {
>               SYSERROR("Error creating socketpair");
>               goto out;
> @@ -316,7 +295,6 @@ static int do_chown_cgroup(const char *controller, const 
> char *cgroup_path,
>  out:
>       close(sv[0]);
>       close(sv[1]);
> -     unlock_mutex(&thread_mutex);
>       if (ret == 1 && *buf == '1')
>               return 0;
>       return -1;
> @@ -335,23 +313,26 @@ static int chown_cgroup_wrapper(void *data)
>       return do_chown_cgroup(arg->controller, arg->cgroup_path, arg->origuid);
>  }
>  
> +/*
> + * Called during container startup.  The cgmanager socket is open
> + */
>  static bool lxc_cgmanager_chmod(const char *controller,
>               const char *cgroup_path, const char *file, int mode)
>  {
> -     lock_mutex(&thread_mutex);
>       if (cgmanager_chmod_sync(NULL, cgroup_manager, controller,
>                       cgroup_path, file, mode) != 0) {
>               NihError *nerr;
>               nerr = nih_error_get();
>               ERROR("call to cgmanager_chmod_sync failed: %s", nerr->message);
>               nih_free(nerr);
> -             unlock_mutex(&thread_mutex);
>               return false;
>       }
> -     unlock_mutex(&thread_mutex);
>       return true;
>  }
>  
> +/*
> + * Called during container startup.  The cgmanager socket is open
> + */
>  static bool chown_cgroup(const char *controller, const char *cgroup_path,
>                       struct lxc_conf *conf)
>  {
> @@ -384,26 +365,30 @@ static bool chown_cgroup(const char *controller, const 
> char *cgroup_path,
>  static void cgm_remove_cgroup(const char *controller, const char *path)
>  {
>       int existed;
> -     lock_mutex(&thread_mutex);
>       if ( cgmanager_remove_sync(NULL, cgroup_manager, controller,
>                                  path, CG_REMOVE_RECURSIVE, &existed) != 0) {
>               NihError *nerr;
>               nerr = nih_error_get();
>               ERROR("call to cgmanager_remove_sync failed: %s", 
> nerr->message);
>               nih_free(nerr);
> -             unlock_mutex(&thread_mutex);
>               ERROR("Error removing %s:%s", controller, path);
>       }
> -     unlock_mutex(&thread_mutex);
>       if (existed == -1)
>               INFO("cgroup removal attempt: %s:%s did not exist", controller, 
> path);
>  }
>  
> +/*
> + * We are starting up a container.  We open the cgmanager socket, and leave 
> it
> + * open (and mutex held) for the rest of container startup.
> + */
>  static void *cgm_init(const char *name)
>  {
>       struct cgm_data *d;
>  
> -     cgm_dbus_connect();
> +     if (!cgm_dbus_connect()) {
> +             ERROR("Error connecting to cgroup manager");
> +             return NULL;
> +     }
>       d = malloc(sizeof(*d));
>       if (!d)
>               return NULL;
> @@ -429,6 +414,10 @@ err1:
>       return NULL;
>  }
>  
> +/*
> + * Called after a failed container startup.  The cgmanager socket was just
> + * closed at end of lxc_spawn.  We need to re-open
> + */
>  static void cgm_destroy(void *hdata)
>  {
>       struct cgm_data *d = hdata;
> @@ -436,7 +425,10 @@ static void cgm_destroy(void *hdata)
>  
>       if (!d)
>               return;
> -     cgm_dbus_connect();
> +     if (!cgm_dbus_connect()) {
> +             ERROR("Error connecting to cgroup manager");
> +             return;
> +     }
>       for (i = 0; i < nr_subsystems; i++)
>               cgm_remove_cgroup(subsystems[i], d->cgroup_path);
>  
> @@ -457,6 +449,9 @@ static inline void cleanup_cgroups(char *path)
>               cgm_remove_cgroup(subsystems[i], path);
>  }
>  
> +/*
> + * Called during container startup.  The cgmanager socket is open
> + */
>  static inline bool cgm_create(void *hdata)
>  {
>       struct cgm_data *d = hdata;
> @@ -527,17 +522,14 @@ next:
>  static bool lxc_cgmanager_enter(pid_t pid, const char *controller,
>               const char *cgroup_path)
>  {
> -     lock_mutex(&thread_mutex);
>       if (cgmanager_move_pid_sync(NULL, cgroup_manager, controller,
>                       cgroup_path, pid) != 0) {
>               NihError *nerr;
>               nerr = nih_error_get();
>               ERROR("call to cgmanager_move_pid_sync failed: %s", 
> nerr->message);
>               nih_free(nerr);
> -             unlock_mutex(&thread_mutex);
>               return false;
>       }
> -     unlock_mutex(&thread_mutex);
>       return true;
>  }
>  
> @@ -552,6 +544,9 @@ static bool do_cgm_enter(pid_t pid, const char 
> *cgroup_path)
>       return true;
>  }
>  
> +/*
> + * called during container startup.  cgmanager socket is open
> + */
>  static inline bool cgm_enter(void *hdata, pid_t pid)
>  {
>       struct cgm_data *d = hdata;
> @@ -570,6 +565,11 @@ static const char *cgm_get_cgroup(void *hdata, const 
> char *subsystem)
>       return d->cgroup_path;
>  }
>  
> +/*
> + * nrtasks is called by the utmp helper by the container monitor.
> + * cgmanager socket was closed after cgroup setup was complete, so we need
> + * to reopen here.
> + */
>  static int cgm_get_nrtasks(void *hdata)
>  {
>       struct cgm_data *d = hdata;
> @@ -579,21 +579,28 @@ static int cgm_get_nrtasks(void *hdata)
>       if (!d || !d->cgroup_path)
>               return false;
>  
> -     lock_mutex(&thread_mutex);
> +     if (!cgm_dbus_connect()) {
> +             ERROR("Error connecting to cgroup manager");
> +             return false;
> +     }
>       if (cgmanager_get_tasks_sync(NULL, cgroup_manager, subsystems[0],
>                                    d->cgroup_path, &pids, &pids_len) != 0) {
>               NihError *nerr;
>               nerr = nih_error_get();
>               ERROR("call to cgmanager_get_tasks_sync failed: %s", 
> nerr->message);
>               nih_free(nerr);
> -             unlock_mutex(&thread_mutex);
> +             cgm_dbus_disconnect();
>               return -1;
>       }
> -     unlock_mutex(&thread_mutex);
>       nih_free(pids);
> +     cgm_dbus_disconnect();
>       return pids_len;
>  }
>  
> +/*
> + * cgm_get is called to get container cgroup settings.  cgmanager is not
> + * connected.
> + */
>  static int cgm_get(const char *filename, char *value, size_t len, const char 
> *name, const char *lxcpath)
>  {
>       char *result, *controller, *key, *cgroup;
> @@ -610,8 +617,11 @@ static int cgm_get(const char *filename, char *value, 
> size_t len, const char *na
>       cgroup = lxc_cmd_get_cgroup_path(name, lxcpath, controller);
>       if (!cgroup)
>               return -1;
> -     cgm_dbus_connect();
> -     lock_mutex(&thread_mutex);
> +     if (!cgm_dbus_connect()) {
> +             ERROR("Error connecting to cgroup manager");
> +             free(cgroup);
> +             return -1;
> +     }
>       if (cgmanager_get_value_sync(NULL, cgroup_manager, controller, cgroup, 
> filename, &result) != 0) {
>               /*
>                * must consume the nih error
> @@ -622,11 +632,9 @@ static int cgm_get(const char *filename, char *value, 
> size_t len, const char *na
>               nerr = nih_error_get();
>               nih_free(nerr);
>               free(cgroup);
> -             unlock_mutex(&thread_mutex);
>               cgm_dbus_disconnect();
>               return -1;
>       }
> -     unlock_mutex(&thread_mutex);
>       cgm_dbus_disconnect();
>       free(cgroup);
>       newlen = strlen(result);
> @@ -653,7 +661,6 @@ static int cgm_do_set(const char *controller, const char 
> *file,
>                        const char *cgroup, const char *value)
>  {
>       int ret;
> -     lock_mutex(&thread_mutex);
>       ret = cgmanager_set_value_sync(NULL, cgroup_manager, controller,
>                                cgroup, file, value);
>       if (ret != 0) {
> @@ -663,10 +670,12 @@ static int cgm_do_set(const char *controller, const 
> char *file,
>               nih_free(nerr);
>               ERROR("Error setting cgroup %s limit %s", file, cgroup);
>       }
> -     unlock_mutex(&thread_mutex);
>       return ret;
>  }
>  
> +/*
> + * cgm_set is called to change cgroup settings.  cgmanager is not connected
> + */
>  static int cgm_set(const char *filename, const char *value, const char 
> *name, const char *lxcpath)
>  {
>       char *controller, *key, *cgroup;
> @@ -686,7 +695,11 @@ static int cgm_set(const char *filename, const char 
> *value, const char *name, co
>                       controller, lxcpath, name);
>               return -1;
>       }
> -     cgm_dbus_connect();
> +     if (!cgm_dbus_connect()) {
> +             ERROR("Error connecting to cgroup manager");
> +             free(cgroup);
> +             return -1;
> +     }
>       ret = cgm_do_set(controller, filename, cgroup, value);
>       cgm_dbus_disconnect();
>       free(cgroup);
> @@ -759,6 +772,11 @@ out_free:
>       return false;
>  }
>  
> +/*
> + * called during cgroup.c:cgroup_ops_init(), at startup.  No threads.
> + * We check whether we can talk to cgmanager, escape to root cgroup if
> + * we are root, then close the connection.
> + */
>  struct cgroup_ops *cgm_ops_init(void)
>  {
>       if (!collect_subsytems())
> @@ -767,8 +785,11 @@ struct cgroup_ops *cgm_ops_init(void)
>               goto err1;
>  
>       // root;  try to escape to root cgroup
> -     if (geteuid() == 0 && !lxc_cgmanager_escape())
> +     if (geteuid() == 0 && !lxc_cgmanager_escape()) {
> +             cgm_dbus_disconnect();
>               goto err2;
> +     }
> +     cgm_dbus_disconnect();
>  
>       return &cgmanager_ops;
>  
> @@ -779,6 +800,10 @@ err1:
>       return NULL;
>  }
>  
> +/*
> + * unfreeze is called by the command api after killing a container.
> + * cgmanager is not connected.
> + */
>  static bool cgm_unfreeze(void *hdata)
>  {
>       struct cgm_data *d = hdata;
> @@ -786,7 +811,10 @@ static bool cgm_unfreeze(void *hdata)
>       if (!d || !d->cgroup_path)
>               return false;
>  
> -     lock_mutex(&thread_mutex);
> +     if (!cgm_dbus_connect()) {
> +             ERROR("Error connecting to cgroup manager");
> +             return false;
> +     }
>       if (cgmanager_set_value_sync(NULL, cgroup_manager, "freezer", 
> d->cgroup_path,
>                       "freezer.state", "THAWED") != 0) {
>               NihError *nerr;
> @@ -794,13 +822,17 @@ static bool cgm_unfreeze(void *hdata)
>               ERROR("call to cgmanager_set_value_sync failed: %s", 
> nerr->message);
>               nih_free(nerr);
>               ERROR("Error unfreezing %s", d->cgroup_path);
> -             unlock_mutex(&thread_mutex);
> +             cgm_dbus_disconnect();
>               return false;
>       }
> -     unlock_mutex(&thread_mutex);
> +     cgm_dbus_disconnect();
>       return true;
>  }
>  
> +/*
> + * setup_limits is called during startup.  cgmanager is connected, and mutex
> + * is held
> + */
>  static bool cgm_setup_limits(void *hdata, struct lxc_list *cgroup_settings, 
> bool do_devices)
>  {
>       struct cgm_data *d = hdata;
> @@ -888,7 +920,10 @@ static bool cgm_attach(const char *name, const char 
> *lxcpath, pid_t pid)
>               goto out;
>       }
>  
> -     cgm_dbus_connect();
> +     if (!cgm_dbus_connect()) {
> +             ERROR("Error connecting to cgroup manager");
> +             goto out;
> +     }
>       if (!(pass = do_cgm_enter(pid, cgroup)))
>               ERROR("Failed to enter group %s", cgroup);
>       cgm_dbus_disconnect();
> diff --git a/src/lxc/cgroup.c b/src/lxc/cgroup.c
> index 23c8c96..63a2ad4 100644
> --- a/src/lxc/cgroup.c
> +++ b/src/lxc/cgroup.c
> @@ -168,14 +168,6 @@ int lxc_cgroup_get(const char *filename, char *value, 
> size_t len, const char *na
>       return -1;
>  }
>  
> -void restart_cgroups(void)
> -{
> -     if (ops && ops->disconnect)
> -             ops->disconnect();
> -     ops = NULL;
> -     cgroup_ops_init();
> -}
> -
>  void cgroup_disconnect(void)
>  {
>       if (ops && ops->disconnect)
> diff --git a/src/lxc/cgroup.h b/src/lxc/cgroup.h
> index 8cba030..3e222a8 100644
> --- a/src/lxc/cgroup.h
> +++ b/src/lxc/cgroup.h
> @@ -65,7 +65,6 @@ extern bool cgroup_create_legacy(struct lxc_handler 
> *handler);
>  extern int cgroup_nrtasks(struct lxc_handler *handler);
>  extern const char *cgroup_get_cgroup(struct lxc_handler *handler, const char 
> *subsystem);
>  extern bool cgroup_unfreeze(struct lxc_handler *handler);
> -extern void restart_cgroups(void);
>  extern void cgroup_disconnect(void);
>  
>  #endif
> diff --git a/src/lxc/lxccontainer.c b/src/lxc/lxccontainer.c
> index b0ae44b..b7adebe 100644
> --- a/src/lxc/lxccontainer.c
> +++ b/src/lxc/lxccontainer.c
> @@ -637,7 +637,6 @@ static bool lxcapi_start(struct lxc_container *c, int 
> useinit, char * const argv
>               open("/dev/null", O_RDWR);
>               open("/dev/null", O_RDWR);
>               setsid();
> -             restart_cgroups();
>       } else {
>               if (!am_single_threaded()) {
>                       ERROR("Cannot start non-daemonized container when 
> threaded");
> diff --git a/src/lxc/start.c b/src/lxc/start.c
> index 97c8207..eb1c659 100644
> --- a/src/lxc/start.c
> +++ b/src/lxc/start.c
> @@ -773,6 +773,7 @@ static int lxc_spawn(struct lxc_handler *handler)
>  {
>       int failed_before_rename = 0;
>       const char *name = handler->name;
> +     bool cgroups_connected = false;
>       int saved_ns_fd[LXC_NS_MAX];
>       int preserve_mask = 0, i;
>       int netpipepair[2], nveths;
> @@ -842,6 +843,8 @@ static int lxc_spawn(struct lxc_handler *handler)
>               goto out_delete_net;
>       }
>  
> +     cgroups_connected = true;
> +
>       if (!cgroup_create(handler)) {
>               ERROR("failed creating cgroups");
>               goto out_delete_net;
> @@ -953,6 +956,7 @@ static int lxc_spawn(struct lxc_handler *handler)
>       }
>  
>       cgroup_disconnect();
> +     cgroups_connected = false;
>  
>       /* Tell the child to complete its initialization and wait for
>        * it to exec or return an error.  (the child will never
> @@ -981,6 +985,8 @@ static int lxc_spawn(struct lxc_handler *handler)
>       return 0;
>  
>  out_delete_net:
> +     if (cgroups_connected)
> +             cgroup_disconnect();
>       if (handler->clone_flags & CLONE_NEWNET)
>               lxc_delete_network(handler);
>  out_abort:
> -- 
> 1.9.0
> 
> _______________________________________________
> lxc-devel mailing list
> [email protected]
> http://lists.linuxcontainers.org/listinfo/lxc-devel

-- 
Stéphane Graber
Ubuntu developer
http://www.ubuntu.com

Attachment: signature.asc
Description: Digital signature

_______________________________________________
lxc-devel mailing list
[email protected]
http://lists.linuxcontainers.org/listinfo/lxc-devel

Reply via email to