> Please check Documentation/CodingStyle. Every second line above 
> violates it. scripts/checkpatch.pl can help out with the more 
> obvious ones.
Ingo, 

Thank you for your comment.
You are right, I beg your pardon.
I have updated the patch, now it should be (more) consistent
with the Coding Style specifications.

This patch adds the new PTRACE_VM_SKIPCALL and PTRACE_VM_SKIPEXIT
tags for ptrace's addr parameter.
In this way it is possible to (eventually) get rid of PTRACE_SYSEMU
PTRACE_SYSEMU_SINGLESTEP, while providing not only the same features
but a more general support for Virtual Machines.
Part#2: user-mode Linux support.
User-mode Linux by this patch uses PTRACE_VM of the hosting operating system
and provides PTRACE_VM to its processes.
UML tests at startup which features are provided and uses PTRACE_VM or
PTRACE_SYSEMU (or nothing). PTRACE_VM and/or PTRACE_SYSEMU support can be
disabled by command line flags.

renzo

Signed-off-by: Renzo Davoli <re...@cs.unibo.it>
----
diff -Naur linux-2.6.29-rc7-git4/arch/um/include/shared/kern_util.h 
linux-2.6.29-rc7-git4.vm2/arch/um/include/shared/kern_util.h
--- linux-2.6.29-rc7-git4/arch/um/include/shared/kern_util.h    2009-03-11 
09:25:27.000000000 +0100
+++ linux-2.6.29-rc7-git4.vm2/arch/um/include/shared/kern_util.h        
2009-03-11 09:35:23.000000000 +0100
@@ -57,7 +57,7 @@
 extern unsigned long to_irq_stack(unsigned long *mask_out);
 extern unsigned long from_irq_stack(int nested);
 
-extern void syscall_trace(struct uml_pt_regs *regs, int entryexit);
+extern int syscall_trace(struct uml_pt_regs *regs, int entryexit);
 extern int singlestepping(void *t);
 
 extern void segv_handler(int sig, struct uml_pt_regs *regs);
diff -Naur linux-2.6.29-rc7-git4/arch/um/include/shared/ptrace_user.h 
linux-2.6.29-rc7-git4.vm2/arch/um/include/shared/ptrace_user.h
--- linux-2.6.29-rc7-git4/arch/um/include/shared/ptrace_user.h  2009-03-11 
09:25:27.000000000 +0100
+++ linux-2.6.29-rc7-git4.vm2/arch/um/include/shared/ptrace_user.h      
2009-03-11 09:35:23.000000000 +0100
@@ -40,9 +40,20 @@
 #define PTRACE_OLDSETOPTIONS PTRACE_SETOPTIONS
 #endif
 
+/* these constant should eventually enter in sys/ptrace.h */
+#ifndef PTRACE_SYSCALL_SKIPCALL
+#define PTRACE_SYSCALL_SKIPCALL      0x6
+#endif
+#ifndef PTRACE_SYSCALL_SKIPEXIT
+#define PTRACE_SYSCALL_SKIPEXIT      0x2
+#endif
+
 void set_using_sysemu(int value);
 int get_using_sysemu(void);
 extern int sysemu_supported;
+void set_using_sysptvm(int value);
+int get_using_sysptvm(void);
+extern int sysptvm_supported;
 
 #define SELECT_PTRACE_OPERATION(sysemu_mode, singlestep_mode) \
        (((int[3][3] ) { \
diff -Naur linux-2.6.29-rc7-git4/arch/um/kernel/process.c 
linux-2.6.29-rc7-git4.vm2/arch/um/kernel/process.c
--- linux-2.6.29-rc7-git4/arch/um/kernel/process.c      2009-03-11 
09:25:27.000000000 +0100
+++ linux-2.6.29-rc7-git4.vm2/arch/um/kernel/process.c  2009-03-11 
10:03:05.000000000 +0100
@@ -322,7 +322,9 @@
 }
 
 static atomic_t using_sysemu = ATOMIC_INIT(0);
+static atomic_t using_sysptvm = ATOMIC_INIT(0);
 int sysemu_supported;
+int sysptvm_supported;
 
 void set_using_sysemu(int value)
 {
@@ -336,7 +338,18 @@
        return atomic_read(&using_sysemu);
 }
 
-static int proc_read_sysemu(char *buf, char **start, off_t offset, int 
size,int *eof, void *data)
+void set_using_sysptvm(int value)
+{
+       atomic_set(&using_sysptvm, value);
+}
+
+int get_using_sysptvm(void)
+{
+       return atomic_read(&using_sysptvm);
+}
+
+static int proc_read_sysemu(char *buf, char **start, off_t offset,
+               int size, int *eof, void *data)
 {
        if (snprintf(buf, size, "%d\n", get_using_sysemu()) < size)
                /* No overflow */
@@ -345,7 +358,8 @@
        return strlen(buf);
 }
 
-static int proc_write_sysemu(struct file *file,const char __user *buf, 
unsigned long count,void *data)
+static int proc_write_sysemu(struct file *file, const char __user *buf,
+               unsigned long count, void *data)
 {
        char tmp[2];
 
@@ -358,27 +372,63 @@
        return count;
 }
 
-int __init make_proc_sysemu(void)
+
+static int proc_read_sysptvm(char *buf, char **start, off_t offset,
+               int size, int *eof, void *data)
 {
-       struct proc_dir_entry *ent;
-       if (!sysemu_supported)
-               return 0;
+       int sysptvm = (get_using_sysptvm() != 0);
+       if (snprintf(buf, size, "%d\n", sysptvm) < size)
+               /* No overflow */
+               *eof = 1;
 
-       ent = create_proc_entry("sysemu", 0600, NULL);
+       return strlen(buf);
+}
 
-       if (ent == NULL)
-       {
-               printk(KERN_WARNING "Failed to register /proc/sysemu\n");
-               return 0;
-       }
+static int proc_write_sysptvm(struct file *file, const char __user *buf,
+               unsigned long count, void *data)
+{
+       char tmp[2];
+
+       if (copy_from_user(tmp, buf, 1))
+               return -EFAULT;
+
+       if (tmp[0] == '0')
+               set_using_sysptvm(0);
+       if (tmp[0] == '1')
+               set_using_sysemu(/* XXX */ 6);
+       /* We use the first char, but pretend to write everything */
+       return count;
+}
 
-       ent->read_proc  = proc_read_sysemu;
-       ent->write_proc = proc_write_sysemu;
+int __init make_proc_sysemu_or_sysptvm(void)
+{
+       struct proc_dir_entry *ent;
 
+       if (sysptvm_supported) {
+               ent = create_proc_entry("sysptvm", 0600, NULL);
+
+               if (ent == NULL) {
+                       printk(KERN_WARNING "Failed to register 
/proc/sysptvm\n");
+                       return 0;
+               }
+
+               ent->read_proc  = proc_read_sysptvm;
+               ent->write_proc = proc_write_sysptvm;
+       } else if (sysemu_supported) {
+               ent = create_proc_entry("sysemu", 0600, NULL);
+
+               if (ent == NULL) {
+                       printk(KERN_WARNING "Failed to register 
/proc/sysemu\n");
+                       return 0;
+               }
+
+               ent->read_proc  = proc_read_sysemu;
+               ent->write_proc = proc_write_sysemu;
+       }
        return 0;
 }
 
-late_initcall(make_proc_sysemu);
+late_initcall(make_proc_sysemu_or_sysptvm);
 
 int singlestepping(void * t)
 {
diff -Naur linux-2.6.29-rc7-git4/arch/um/kernel/ptrace.c 
linux-2.6.29-rc7-git4.vm2/arch/um/kernel/ptrace.c
--- linux-2.6.29-rc7-git4/arch/um/kernel/ptrace.c       2009-03-11 
09:30:19.000000000 +0100
+++ linux-2.6.29-rc7-git4.vm2/arch/um/kernel/ptrace.c   2009-03-11 
09:35:23.000000000 +0100
@@ -81,6 +81,8 @@
                if (request == PTRACE_SYSCALL)
                        set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
                else clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+               child->ptrace &= ~PT_SYSCALL_MASK;
+               child->ptrace |= (addr & PTRACE_SYSCALL_MASK) << 28;
                child->exit_code = data;
                wake_up_process(child);
                ret = 0;
@@ -107,7 +109,9 @@
                ret = -EIO;
                if (!valid_signal(data))
                        break;
+               child->ptrace &= ~PT_SYSCALL_MASK;
                clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+               child->ptrace |= (addr & PTRACE_SYSCALL_MASK) << 28;
                set_singlestepping(child, 1);
                child->exit_code = data;
                /* give it a chance to run. */
@@ -250,7 +254,7 @@
  * XXX Check PT_DTRACE vs TIF_SINGLESTEP for singlestepping check and
  * PT_PTRACED vs TIF_SYSCALL_TRACE for syscall tracing check
  */
-void syscall_trace(struct uml_pt_regs *regs, int entryexit)
+int syscall_trace(struct uml_pt_regs *regs, int entryexit)
 {
        int is_singlestep = (current->ptrace & PT_DTRACE) && entryexit;
        int tracesysgood;
@@ -272,10 +276,13 @@
                send_sigtrap(current, regs, 0);
 
        if (!test_thread_flag(TIF_SYSCALL_TRACE))
-               return;
+               return 0;
 
        if (!(current->ptrace & PT_PTRACED))
-               return;
+               return 0;
+
+       if (entryexit && (current->ptrace & PT_SYSCALL_SKIPEXIT))
+               return 0;
 
        /*
         * the 0x80 provides a way for the tracing parent to distinguish
@@ -296,4 +303,8 @@
                send_sig(current->exit_code, current, 1);
                current->exit_code = 0;
        }
+       if (!entryexit && (current->ptrace & PT_SYSCALL_SKIPCALL))
+               return 1;
+       else
+               return 0;
 }
diff -Naur linux-2.6.29-rc7-git4/arch/um/kernel/skas/syscall.c 
linux-2.6.29-rc7-git4.vm2/arch/um/kernel/skas/syscall.c
--- linux-2.6.29-rc7-git4/arch/um/kernel/skas/syscall.c 2009-03-11 
09:25:27.000000000 +0100
+++ linux-2.6.29-rc7-git4.vm2/arch/um/kernel/skas/syscall.c     2009-03-11 
09:41:29.000000000 +0100
@@ -17,8 +17,9 @@
        struct pt_regs *regs = container_of(r, struct pt_regs, regs);
        long result;
        int syscall;
+       int skip_call;
 
-       syscall_trace(r, 0);
+       skip_call = syscall_trace(r, 0);
 
        /*
         * This should go in the declaration of syscall, but when I do that,
@@ -29,12 +30,15 @@
         *     gcc version 4.0.1 20050727 (Red Hat 4.0.1-5)
         * in case it's a compiler bug.
         */
-       syscall = UPT_SYSCALL_NR(r);
-       if ((syscall >= NR_syscalls) || (syscall < 0))
-               result = -ENOSYS;
-       else result = EXECUTE_SYSCALL(syscall, regs);
+       if (skip_call == 0) {
+               syscall = UPT_SYSCALL_NR(r);
+               if ((syscall >= NR_syscalls) || (syscall < 0))
+                       result = -ENOSYS;
+               else
+                       result = EXECUTE_SYSCALL(syscall, regs);
 
-       REGS_SET_SYSCALL_RETURN(r->gp, result);
+               REGS_SET_SYSCALL_RETURN(r->gp, result);
+       }
 
        syscall_trace(r, 1);
 }
diff -Naur linux-2.6.29-rc7-git4/arch/um/os-Linux/skas/process.c 
linux-2.6.29-rc7-git4.vm2/arch/um/os-Linux/skas/process.c
--- linux-2.6.29-rc7-git4/arch/um/os-Linux/skas/process.c       2009-03-11 
09:25:27.000000000 +0100
+++ linux-2.6.29-rc7-git4.vm2/arch/um/os-Linux/skas/process.c   2009-03-11 
09:35:23.000000000 +0100
@@ -157,7 +157,7 @@
  * (in local_using_sysemu
  */
 static void handle_trap(int pid, struct uml_pt_regs *regs,
-                       int local_using_sysemu)
+                       int local_using_sysptvm_or_sysemu)
 {
        int err, status;
 
@@ -167,7 +167,7 @@
        /* Mark this as a syscall */
        UPT_SYSCALL_NR(regs) = PT_SYSCALL_NR(regs->gp);
 
-       if (!local_using_sysemu)
+       if (!local_using_sysptvm_or_sysemu)
        {
                err = ptrace(PTRACE_POKEUSR, pid, PT_SYSCALL_NR_OFFSET,
                             __NR_getpid);
@@ -354,6 +354,7 @@
        int err, status, op, pid = userspace_pid[0];
        /* To prevent races if using_sysemu changes under us.*/
        int local_using_sysemu;
+       int local_using_sysptvm;
 
        if (getitimer(ITIMER_VIRTUAL, &timer))
                printk(UM_KERN_ERR "Failed to get itimer, errno = %d\n", errno);
@@ -375,11 +376,12 @@
 
                /* Now we set local_using_sysemu to be used for one loop */
                local_using_sysemu = get_using_sysemu();
+               local_using_sysptvm = get_using_sysptvm();
 
                op = SELECT_PTRACE_OPERATION(local_using_sysemu,
                                             singlestepping(NULL));
 
-               if (ptrace(op, pid, 0, 0)) {
+               if (ptrace(op, pid, local_using_sysptvm, 0)) {
                        printk(UM_KERN_ERR "userspace - ptrace continue "
                               "failed, op = %d, errno = %d\n", op, errno);
                        fatal_sigsegv();
diff -Naur linux-2.6.29-rc7-git4/arch/um/os-Linux/start_up.c 
linux-2.6.29-rc7-git4.vm2/arch/um/os-Linux/start_up.c
--- linux-2.6.29-rc7-git4/arch/um/os-Linux/start_up.c   2009-03-11 
09:25:27.000000000 +0100
+++ linux-2.6.29-rc7-git4.vm2/arch/um/os-Linux/start_up.c       2009-03-11 
09:58:40.000000000 +0100
@@ -198,6 +198,34 @@
 "    See http://perso.wanadoo.fr/laurent.vivier/UML/ for further \n"
 "    information.\n\n");
 
+/* Changed only during early boot */
+static int force_sysptvm_disabled;
+
+static int __init nosysptvm_cmd_param(char *str, int* add)
+{
+       force_sysptvm_disabled = 1;
+       return 0;
+}
+
+__uml_setup("nosysptvm", nosysptvm_cmd_param,
+"nosysptvm\n"
+"    Turns off syscall emulation tags for ptrace (ptrace_vm) on.\n"
+"    Ptrace_vm is a feature introduced by Renzo Davoli. It changes\n"
+"    behaviour of ptrace() and helps reducing host context switch rate.\n\n");
+
+static int use_sysemu;
+
+static int __init usesysemu_cmd_param(char *str, int* add)
+{
+       use_sysemu = 1;
+       return 0;
+}
+
+__uml_setup("usesysemu", usesysemu_cmd_param,
+"usesysemu\n"
+"    Use sysemu instead of sysptvm even when the kernel supports it.\n\n"
+);
+
 static void __init check_sysemu(void)
 {
        unsigned long regs[MAX_REG_NR];
@@ -293,6 +321,114 @@
        non_fatal("missing\n");
 }
 
+/*
+ * test thread code. This thread is started only to test
+ * which features are provided by the linux kernel
+ */
+static int sysptvm_child(void *arg)
+{
+       int *featurep = arg;
+       int p[2] = {-1, -1};
+       pid_t pid = os_getpid();
+       if (ptrace(PTRACE_TRACEME, 0, 0, 0) < 0) {
+               perror("ptrace test_ptracemulti");
+               kill(pid, SIGKILL);
+       }
+       kill(pid, SIGSTOP);
+       *featurep = 0;
+       os_getpid();
+       /*
+        * if it reaches this point in 1 stop it means that
+        * PTRACE_SYSCALL_SKIPEXIT works
+        */
+       *featurep = PTRACE_SYSCALL_SKIPEXIT;
+       pipe(p);
+       /*
+        * if after a PTRACE_SYSCALL_SKIPCALL p[0] is already <0
+        * pipe has been really skipped
+        */
+       if (p[0] < 0)
+               *featurep = PTRACE_SYSCALL_SKIPCALL;
+       else { /* clean up everything */
+               close(p[0]);
+               close(p[1]);
+       }
+       return 0;
+}
+
+/*
+ * kernel feature test:
+ * it returns:
+ *   -1 error
+ *   0 old PTRACE_SYSCALL (addr is ignored)
+ *   PTRACE_SYSCALL_SKIPEXIT: just skip_exit is provided
+ *   PTRACE_SYSCALL_SKIPCALL: the entire syntax is implemented
+ *   by the running kernel
+ */
+static int __init test_ptrace_sysptvm(void)
+{
+       int pid, status, rv, feature;
+       static char stack[1024];
+       feature = 0;
+
+       pid = clone(sysptvm_child, &stack[1020], SIGCHLD | CLONE_VM, &feature);
+       if (pid < 0)
+               return 0;
+       if (waitpid(pid, &status, WUNTRACED) < 0) {
+               kill(pid, SIGKILL);
+               return 0;
+       }
+       /* restart and wait for the next syscall (getpid)*/
+       rv = ptrace(PTRACE_SYSCALL, pid, 0, 0);
+       if (waitpid(pid, &status, WUNTRACED) < 0)
+               goto out;
+       /* try to skip the exit call */
+       rv = ptrace(PTRACE_SYSCALL, pid, PTRACE_SYSCALL_SKIPEXIT, 0);
+       if (rv < 0)
+               goto out;
+       /* wait for the next stop */
+       if (waitpid(pid, &status, WUNTRACED) < 0)
+               goto out;
+       /*
+        * if feature is already 0 it means that this is the exit call,
+        * and it has not been skipped, otherwise this is the
+        * entry call for the system call "time"
+        */
+       if (feature < PTRACE_SYSCALL_SKIPEXIT)
+               goto out;
+       /* restart (time) and and try to skip the entire call */
+       rv = ptrace(PTRACE_SYSCALL, pid, PTRACE_SYSCALL_SKIPCALL, 0);
+       if (waitpid(pid, &status, WUNTRACED) < 0)
+               return 0;
+out:
+       ptrace(PTRACE_KILL, pid, 0, 0);
+       /* eliminate zombie */
+       if (waitpid(pid, &status, WUNTRACED) < 0)
+               return 0;
+       return feature;
+}
+
+static int  __init check_sysptvm(void)
+{
+       int feature = test_ptrace_sysptvm();
+
+       non_fatal("Checking ptrace new tags for syscall emulation...");
+       if (feature == PTRACE_SYSCALL_SKIPCALL) {
+               sysptvm_supported = 1;
+               non_fatal("OK");
+               if (!force_sysptvm_disabled) {
+                       set_using_sysptvm(PTRACE_SYSCALL_SKIPCALL);
+                       non_fatal("\n");
+                       return 1;
+               } else {
+                       non_fatal(" (disabled)\n");
+                       return 0;
+               }
+       } else
+               non_fatal("unsupported\n");
+       return 0;
+}
+
 static void __init check_ptrace(void)
 {
        int pid, syscall, n, status;
@@ -330,7 +466,8 @@
        }
        stop_ptraced_child(pid, 0, 1);
        non_fatal("OK\n");
-       check_sysemu();
+       if (use_sysemu || !check_sysptvm())
+               check_sysemu();
 }
 
 extern void check_tmpexec(void);

------------------------------------------------------------------------------
Apps built with the Adobe(R) Flex(R) framework and Flex Builder(TM) are
powering Web 2.0 with engaging, cross-platform capabilities. Quickly and
easily build your RIAs with Flex Builder, the Eclipse(TM)based development
software that enables intelligent coding and step-through debugging.
Download the free 60 day trial. http://p.sf.net/sfu/www-adobe-com
_______________________________________________
User-mode-linux-devel mailing list
User-mode-linux-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/user-mode-linux-devel

Reply via email to