25.08.2011, 02:07, "Greg Kurz" <gk...@fr.ibm.com>: > On Wed, 2011-08-24 at 14:17 +0400, Vladimir Smirnov wrote: > >> Clone allows more flexible control. Currently, if there is any inherited fd, >> lxc-start exits with error. With clone it's possible not to pass open fd's >> to childs. > > Hmm... when it comes to file descriptors, you have two flavours: > - CLONE_FILES: the child will share the file descriptor table with the > parent. If any of the two tasks open or close a fd, the other will also > see the change. > - without CLONE_FILES: the child will get a copy of the parent's file > descriptor table. Then, each task may open or close a fd without effect > for the other. > > As you see, the child shares or at least inherits a copy of all the fd's > with its parent. Honestly, I don't know any clone flag that would > prevent the child to be passed a fd...
Yes. I've missunderstood it. Maybe change back to fork, but 1-st action of child should be closing all fd's except of whitelist? As it was before rev 80090207defda9adfc922555b359b11acdad1401? > >> Signed-off-by: Vladimir Smirnov <ci...@yandex-team.ru> >> --- >> src/lxc/lxc_attach.c | 79 >> +++++++++++++++++++++++++++++--------------------- >> src/lxc/lxc_init.c | 46 ++++++++++++++++++----------- >> 2 files changed, 75 insertions(+), 50 deletions(-) >> >> diff --git a/src/lxc/lxc_attach.c b/src/lxc/lxc_attach.c > > Why do you need to patch lxc-attach ? AFAIK it doesn't check for > inherited file descriptors... No need, yes. > >> index ed3d5a4..930865e 100644 >> --- a/src/lxc/lxc_attach.c >> +++ b/src/lxc/lxc_attach.c >> @@ -20,6 +20,7 @@ >> * License along with this library; if not, write to the Free Software >> * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA >> */ >> +#define STACKSIZE 16384 >> >> #define _GNU_SOURCE >> #include <unistd.h> >> @@ -29,6 +30,7 @@ >> #include <sys/param.h> >> #include <sys/types.h> >> #include <sys/wait.h> >> +#include <sched.h> >> >> #include "commands.h" >> #include "arguments.h" >> @@ -56,14 +58,48 @@ Options :\n\ >> .checker = NULL, >> }; >> >> +static char *envp_global; >> + >> +int child_main() >> +{ >> + struct passwd *passwd; >> + uid_t uid; >> + >> + if (my_args.argc) { >> + execve(my_args.argv[0], my_args.argv, envp_global); >> + SYSERROR("failed to exec '%s'", my_args.argv[0]); >> + return -1; >> + } >> + >> + uid = getuid(); >> + >> + passwd = getpwuid(uid); >> + if (!passwd) { >> + SYSERROR("failed to get passwd " \ >> + "entry for uid '%d'", uid); >> + return -1; >> + } >> + >> + { >> + char *const args[] = { >> + passwd->pw_shell, >> + NULL, >> + }; >> + >> + execve(args[0], args, envp_global); >> + SYSERROR("failed to exec '%s'", args[0]); >> + return -1; >> + } >> +} >> + >> int main(int argc, char *argv[], char *envp[]) >> { >> int ret; >> pid_t pid; >> - struct passwd *passwd; >> - uid_t uid; >> char *curdir; >> >> + envp_global = envp; >> + >> ret = lxc_caps_init(); >> if (ret) >> return ret; >> @@ -96,7 +132,14 @@ int main(int argc, char *argv[], char *envp[]) >> >> free(curdir); >> >> - pid = fork(); >> + void **newstack; >> + newstack = (void **) malloc(STACKSIZE); >> + if (!newstack) >> + exit(newstack); >> + >> + newstack = (void **)(STACKSIZE + (char *)newstack); >> + *--newstack = 0; >> + pid = clone(child_main, newstack, CLONE_VM | CLONE_FS | CLONE_SIGHAND, 0); >> >> if (pid < 0) { >> SYSERROR("failed to fork"); >> @@ -120,35 +163,5 @@ int main(int argc, char *argv[], char *envp[]) >> return -1; >> } >> >> - if (!pid) { >> - >> - if (my_args.argc) { >> - execve(my_args.argv[0], my_args.argv, envp); >> - SYSERROR("failed to exec '%s'", my_args.argv[0]); >> - return -1; >> - } >> - >> - uid = getuid(); >> - >> - passwd = getpwuid(uid); >> - if (!passwd) { >> - SYSERROR("failed to get passwd " \ >> - "entry for uid '%d'", uid); >> - return -1; >> - } >> - >> - { >> - char *const args[] = { >> - passwd->pw_shell, >> - NULL, >> - }; >> - >> - execve(args[0], args, envp); >> - SYSERROR("failed to exec '%s'", args[0]); >> - return -1; >> - } >> - >> - } >> - >> return 0; >> } >> diff --git a/src/lxc/lxc_init.c b/src/lxc/lxc_init.c >> index a534b51..7debd6f 100644 >> --- a/src/lxc/lxc_init.c >> +++ b/src/lxc/lxc_init.c >> @@ -20,6 +20,7 @@ >> * License along with this library; if not, write to the Free Software >> * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA >> */ >> +#define STACKSIZE 16384 >> >> #include <stdio.h> >> #include <unistd.h> >> @@ -32,6 +33,7 @@ >> #include <sys/wait.h> >> #define _GNU_SOURCE >> #include <getopt.h> >> +#include <sched.h> >> >> #include "log.h" >> #include "caps.h" >> @@ -40,6 +42,9 @@ >> >> lxc_log_define(lxc_init, lxc); >> >> +static sigset_t mask, omask; >> +static char **aargv; >> + >> static int quiet; >> >> static struct option options[] = { >> @@ -49,6 +54,23 @@ static struct option options[] = { >> >> static int was_interrupted = 0; >> >> +int child_main() >> +{ >> + int i; >> + int err = -1; >> + /* restore default signal handlers */ >> + for (i = 1; i < NSIG; i++) >> + signal(i, SIG_DFL); >> + >> + sigprocmask(SIG_SETMASK, &omask, NULL); >> + >> + NOTICE("about to exec '%s'", aargv[0]); >> + >> + execvp(aargv[0], aargv); >> + ERROR("failed to exec: '%s' : %m", aargv[0]); >> + exit(err); >> +} >> + >> int main(int argc, char *argv[]) >> { >> >> @@ -61,8 +83,6 @@ int main(int argc, char *argv[]) >> pid_t pid; >> int nbargs = 0; >> int err = -1; >> - char **aargv; >> - sigset_t mask, omask; >> int i, shutdown = 0; >> >> while (1) { >> @@ -115,25 +135,17 @@ int main(int argc, char *argv[]) >> if (lxc_caps_reset()) >> exit(err); >> >> - pid = fork(); >> - >> - if (pid < 0) >> + void **newstack; >> + newstack = (void **) malloc(STACKSIZE); >> + if (!newstack) >> exit(err); >> >> - if (!pid) { >> - >> - /* restore default signal handlers */ >> - for (i = 1; i < NSIG; i++) >> - signal(i, SIG_DFL); >> + newstack = (void **)(STACKSIZE + (char *)newstack); >> + *--newstack = 0; >> + pid = clone(child_main, newstack, CLONE_VM | CLONE_FS | CLONE_SIGHAND, 0); > > Passing CLONE_VM here means execvp() will flush the memory for both > parent and child... This means the lxc-init code doesn't run anymore and > this isn't acceptable, as it honors the child ripping work in an > application container. Yes... I understand it now. > >> - sigprocmask(SIG_SETMASK, &omask, NULL); >> - >> - NOTICE("about to exec '%s'", aargv[0]); >> - >> - execvp(aargv[0], aargv); >> - ERROR("failed to exec: '%s' : %m", aargv[0]); >> + if (pid < 0) >> exit(err); >> - } >> >> /* let's process the signals now */ >> sigdelset(&omask, SIGALRM); > -- > Gregory Kurz gk...@fr.ibm.com > Software Engineer @ IBM/Meiosys http://www.ibm.com > Tel +33 (0)534 638 479 Fax +33 (0)561 400 420 > > "Anarchy is about taking complete responsibility for yourself." > Alan Moore. -- С уважением, Смирнов Владимир. http://staff.yandex-team.ru/civil ------------------------------------------------------------------------------ EMC VNX: the world's simplest storage, starting under $10K The only unified storage solution that offers unified management Up to 160% more powerful than alternatives and 25% more efficient. Guaranteed. http://p.sf.net/sfu/emc-vnx-dev2dev _______________________________________________ Lxc-devel mailing list Lxc-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/lxc-devel