On Fri, Feb 20, 2015 at 4:43 PM, Andy Lutomirski <[email protected]> wrote:
> On Fri, Feb 20, 2015 at 4:31 PM, Eric W. Biederman
> <[email protected]> wrote:
>> Andy Lutomirski <[email protected]> writes:
>>
>>> On Thu, Feb 19, 2015 at 8:38 AM, Alexander Larsson <[email protected]> wrote:
>>>> On Tue, 2015-02-17 at 13:23 -0800, Andy Lutomirski wrote:
>>>>
>>>>>  - setuid / privileged helper.  Why do you need a privileged helper?
>>>>> You should be able to do all of this using user namespaces.  The
>>>>> Sandstorm code linked above does exactly this.
>>>>
>>>> I tried this a bit, but i ran into two snags i don't understand.
>>>>
>>>> First of all, as uid/gid 1000 i can put "1000 1000 1"
>>>> in /proc/self/uid_map from the child. However, i cannot put "1000 1000
>>>> 1" into gid_map, as i get EPERM.
>>>> I don't understand this, is this not supposed to work?
>>>
>>> You need newer manpages :-/  Try the attached variant.
>>
>> Yeah.  You need to disable setgroups for that to work.
>>
>>>> Secondly, i'm failing to mount another instance of devpts. It fails with
>>>> EINVAL.
>>>
>>> Hmm.  Off the top of my head, there's no good reason that devpts with
>>> the newinstance option couldn't be allowed in a userns.  Eric, any
>>> thoughts here?  The patch would be straightforward.
>>
>> Looking at the code you have to have uid 0 and gid 0 mapped and you have
>> to specify newinstance.  But devepts is mountable without being the
>> global root user.'
>
> Wow, my grepping skills are nonexistent today.
>
>>
>> The restriction of having uid 0 and gid 0 mapped is just that /dev/ptmx is
>> and has alwasy been owned by root and so mknod_ptmx just won't let you
>> create a device inode as with a uid or gid you can't map.
>
> All we'd have to do is to add ptmx_uid and ptmx_gid options, right?
> I'll send a patch.

With the patch I just sent out, the attached version of the test code works.

--Andy
#define _GNU_SOURCE /* Required for CLONE_NEWNS */
#include <assert.h>
#include <arpa/inet.h>
#include <dirent.h>
#include <errno.h>
#include <fcntl.h>
#include <linux/loop.h>
#include <linux/netlink.h>
#include <linux/rtnetlink.h>
#include <net/if.h>
#include <netinet/in.h>
#include <sched.h>
#include <signal.h>
#include <poll.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mount.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/eventfd.h>
#include <sys/signalfd.h>
#include <sys/capability.h>
#include <sys/prctl.h>
#include <unistd.h>

static void
die_with_error (const char *format, ...)
{
  va_list args;
  int errsv;

  errsv = errno;

  va_start (args, format);
  vfprintf (stderr, format, args);
  va_end (args);

  fprintf (stderr, ": %s\n", strerror (errsv));

  exit (1);
}

static void
die (const char *format, ...)
{
  va_list args;

  va_start (args, format);
  vfprintf (stderr, format, args);
  va_end (args);

  fprintf (stderr, "\n");

  exit (1);
}

static char*
strdup_printf (const char *format,
               ...)
{
  char *buffer = NULL;
  va_list args;

  va_start (args, format);
  vasprintf (&buffer, format, args);
  va_end (args);

  if (buffer == NULL)
    die ("oom");

  return buffer;
}

static inline int raw_clone(unsigned long flags, void *child_stack) {
#if defined(__s390__) || defined(__CRIS__)
        /* On s390 and cris the order of the first and second arguments
         * of the raw clone() system call is reversed. */
        return (int) syscall(__NR_clone, child_stack, flags);
#else
        return (int) syscall(__NR_clone, flags, child_stack);
#endif
}

static int
write_to_file (int fd, const char *content)
{
  ssize_t len = strlen (content);
  ssize_t res;

  while (len > 0)
    {
      res = write (fd, content, len);
      if (res < 0 && errno == EINTR)
	continue;
      if (res <= 0)
	return -1;
      len -= res;
      content += res;
    }

  return 0;
}

static int
write_file (const char *path, const char *content)
{
  int fd;
  int res;

  fd = open (path, O_RDWR | O_CLOEXEC, 0);
  if (fd == -1)
    return -1;

  res = 0;
  if (content)
    res = write_to_file (fd, content);

  close (fd);

  return res;
}

int
main (int argc,
      char **argv)
{
  char *args[] = { "/bin/sh", NULL };
  pid_t pid;
  char *uid_map, *gid_map;
  int uid, gid;

  uid = getuid();
  gid = getgid();
  
  pid = raw_clone (SIGCHLD | CLONE_NEWUSER | CLONE_NEWNS,
		   NULL);
  if (pid == -1)
    die_with_error ("Creating new namespace failed");

  if (pid != 0)
    {
      int status;
      wait(&status);
      exit (0); /* Should not be reached, but better safe... */
    }

  if (write_file("/proc/self/setgroups", "deny\n") < 0)
    die_with_error ("error writing to setgroups");

  uid_map = strdup_printf ("%d %d 1\n", uid, uid);
  if (write_file ("/proc/self/uid_map", uid_map) < 0)
    die_with_error ("setting up uid map");
  free (uid_map);

  gid_map = strdup_printf ("%d %d 1\n", gid, gid);
  if (write_file ("/proc/self/gid_map", gid_map) < 0)
    {
      int errsv = errno;
      fprintf (stderr, "error writing to gid_map: %s, content: %s", strerror (errsv), gid_map);
    }
  free (gid_map);

  if (mkdir ("/tmp/foo", 0755) && errno != EEXIST)
    die_with_error ("unable to create tmp");
 
  if (mount ("", "/tmp/foo", "tmpfs", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL) != 0)
    die_with_error ("Failed to mount tmpfs");

  if (mkdir ("/tmp/foo/devpts", 0755))
    die_with_error ("unable to mount devpts");

  if (mount ("", "/tmp/foo/devpts", "devpts", MS_NOEXEC|MS_NOSUID, "newinstance,ptmxuid=1000,ptmxgid=1000") != 0)
    {
      int errsv = errno;
      fprintf (stderr, "error mounting devpts: %s\n", strerror (errsv));
    }
  
  if (execv ("/bin/sh", args) == -1)
    die_with_error ("execvp %s", args[0]);

  printf ("end??\n");
  return 1;
}
_______________________________________________
gnome-os-list mailing list
[email protected]
https://mail.gnome.org/mailman/listinfo/gnome-os-list

Reply via email to