Re: [linux-usb-devel] [PATCH] : Allow embedded developers USB options normally reserved for OTG

2008-01-02 Thread Robin Getz
On Wed 2 Jan 2008 22:43, David Brownell pondered:
> This patch might be improved slightly -- in ways that, as I
> understand things, could save some RAM on Blackfin! -- by
> having the BLACKLIST_HUB option get rid of the transaction
> translator support (changing C code not just Kconfig).
> It's pretty minimal, but won't be used...
>
> Also, as you point out, it's no longer OTG specific, so
> renaming the option would improve clarity.   Maybe to
> something along the lines of USB_HOST_NO_EXTERNAL_HUBS.
> No big deal, at least now.

Are these two things something that you want me to have a stab at, or since 
you are more familiar with it - you want to do it.

Thanks
-Robin
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 0/7] convert semaphore to mutex in struct class

2008-01-02 Thread Jarek Poplawski
On Thu, Jan 03, 2008 at 03:21:36PM +0800, Dave Young wrote:
...
> I don't know if there's other possible warning places with this mutex
> or not,  if you have any ideas about this, please tell me.

I think lockdep is just to tell such things. So, the question is, how
much it was tested already, because if there are many warnings
reported e.g. after merging to -mm, then this could be better to re-do
it this other way... But, I hope this will not be necessary.

Jarek P.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [linux-usb-devel] [PATCH] : Allow embedded developers USB options normally reserved for OTG

2008-01-02 Thread David Brownell
On Wednesday 02 January 2008, Bryan Wu wrote:
> B.T.W, 2 questions about the MUSB driver:
> 1. What's the plan for mainline merge of the whole MUSB driver? maybe
> I can cleanup current Blackfin ports to you guys.

It might as well merge in 2.6.25-early.  It'll be easier to integrate
patches that way.  Including your Blackfin port.  When I asked before,
nobody hugely objected to "one big patch".


> 2. Do you remember the PING issue I reported in OMAP list? How do you
> think of that?

Yes, something needs to be done.  EHCI might benefit from the same
kind of patch, to cope with ill-behaved usb thumb drives.


> > I have an idea: the USB_NAKLIMIT0 should be set to some value, when we
> > got NAK timeout interrupt, we can disable PING. When enumerating normal
> > USB stick, there is no NAK timeout, because PING is efficient for
> > handshaking. While problem USB stick will generate NAK timeout
> > interrupt, we disable PING for this kind of USB device enumeration.
 
That sounds much better than your original patch, which disabled it
always.  It might even be worth allocating a flag somewhere in the
usb_device structure to record when ep0 pings seem to misbehave.

- Dave


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[RFC PATCH 11/11] Add a symbol only trace output

2008-01-02 Thread Steven Rostedt
The trace output is very verbose with outputing both the
IP address (Instruction Pointer not Internet Protocol!)
and the kallsyms symbol. So if kallsyms is configured into
the kernel, another file is created in the debugfs system.
This is the trace_symonly file that leaves out the IP address.

Here's an example:

CPU 1: swapper:0 smp_apic_timer_interrupt+0xc/0x58 <-- 
apic_timer_interrupt+0x66/0x70
CPU 1: swapper:0 exit_idle+0x9/0x22 <-- smp_apic_timer_interrupt+0x35/0x58
CPU 0: sshd:2611 _spin_unlock+0x9/0x38 <-- __qdisc_run+0xb2/0x1a1
CPU 1: swapper:0 __exit_idle+0x9/0x2e <-- exit_idle+0x20/0x22
CPU 0: sshd:2611 _spin_lock+0xe/0x7a <-- __qdisc_run+0xba/0x1a1
CPU 1: swapper:0 atomic_notifier_call_chain+0x9/0x16 <-- __exit_idle+0x2c/0x2e
CPU 1: swapper:0 __atomic_notifier_call_chain+0xe/0x56 <-- 
atomic_notifier_call_chain+0x14/0x16


Signed-off-by: Steven Rostedt <[EMAIL PROTECTED]>
---
 lib/mcount/tracer.c |  161 ++--
 1 file changed, 106 insertions(+), 55 deletions(-)

Index: linux-compile.git/lib/mcount/tracer.c
===
--- linux-compile.git.orig/lib/mcount/tracer.c  2008-01-03 00:29:31.0 
-0500
+++ linux-compile.git/lib/mcount/tracer.c   2008-01-03 00:37:40.0 
-0500
@@ -85,6 +85,7 @@ struct mctracer_iterator {
unsigned long next_idx[NR_CPUS];
int cpu;
int idx;
+   int sym_only;
 };
 
 static struct mctracer_entry *mctracer_entry_idx(struct mctracer_trace *tr,
@@ -156,7 +157,7 @@ static void *s_next(struct seq_file *m, 
return ent;
 }
 
-static void *s_start(struct seq_file *m, loff_t *pos)
+static void *__s_start(struct seq_file *m, loff_t *pos, int sym_only)
 {
struct mctracer_iterator *iter = m->private;
void *p = NULL;
@@ -166,6 +167,8 @@ static void *s_start(struct seq_file *m,
iter->ent = NULL;
iter->cpu = 0;
iter->idx = -1;
+   iter->sym_only = sym_only;
+
for (i = 0; i < NR_CPUS; i++)
iter->next_idx[i] = 0;
 
@@ -179,6 +182,11 @@ static void *s_start(struct seq_file *m,
return p;
 }
 
+static void *s_start(struct seq_file *m, loff_t *pos)
+{
+   return __s_start(m, pos, 0);
+}
+
 static void s_stop(struct seq_file *m, void *p)
 {
struct mctracer_iterator *iter = m->private;
@@ -186,58 +194,7 @@ static void s_stop(struct seq_file *m, v
register_mcount_function(trace_function);
 }
 
-#ifdef CONFIG_KALLSYMS
-static void seq_print_symbol(struct seq_file *m,
-const char *fmt, unsigned long address)
-{
-   char buffer[KSYM_SYMBOL_LEN];
-
-   sprint_symbol(buffer, address);
-   seq_printf(m, fmt, buffer);
-}
-#else
-# define seq_print_symbol(m, fmt, address) do { } while (0)
-#endif
-
-#ifndef CONFIG_64BIT
-#define seq_print_ip_sym(m, ip)\
-do {   \
-   seq_printf(m, "[<%08lx>]", ip); \
-   seq_print_symbol(m, " %s", ip); \
-} while (0)
-#else
-#define seq_print_ip_sym(m, ip)\
-do {   \
-   seq_printf(m, "[<%016lx>]", ip);\
-   seq_print_symbol(m, " %s", ip); \
-} while (0)
-#endif
-
-static int s_show(struct seq_file *m, void *v)
-{
-   int i = (long)(v);
-   struct mctracer_iterator *iter = v;
-
-   if (i == 1) {
-   seq_printf(m, "mctracer:\n");
-   } else {
-   if (!iter->ent) {
-   seq_printf(m, " ERROR ent is NULL!\n");
-   return -1;
-   }
-
-   seq_printf(m, "CPU %d: ", iter->cpu);
-   seq_printf(m, "%s:%d ", iter->ent->comm, iter->ent->pid);
-   seq_print_ip_sym(m, iter->ent->ip);
-   if (iter->ent->parent_ip) {
-   seq_printf(m, " <-- ");
-   seq_print_ip_sym(m, iter->ent->parent_ip);
-   }
-   seq_printf(m, "\n");
-   }
-
-   return 0;
-}
+static int s_show(struct seq_file *m, void *v);
 
 static struct seq_operations mctrace_seq_ops = {
.start = s_start,
@@ -246,7 +203,8 @@ static struct seq_operations mctrace_seq
.show = s_show,
 };
 
-static int mctrace_open (struct inode *inode, struct file *file)
+static int __mctrace_open(struct inode *inode, struct file *file,
+ struct seq_operations *seq_ops)
 {
struct mctracer_iterator *iter;
int ret;
@@ -258,7 +216,7 @@ static int mctrace_open (struct inode *i
iter->tr = &mctracer_trace;
 
/* TODO stop tracer */
-   ret = seq_open(file, &mctrace_seq_ops);
+   ret = seq_open(file, seq_ops);
if (!ret) {
struct seq_file *m = file->private_data;
m->private = iter;
@@ -268,6 +226,11 @@ static int mctrace_open (struct inode *i
return ret;
 }
 
+static int m

[RFC PATCH 05/11] x86_64: notrace annotations

2008-01-02 Thread Steven Rostedt
Add "notrace" annotation to x86_64 specific files.

Signed-off-by: Arnaldo Carvalho de Melo <[EMAIL PROTECTED]>
Signed-off-by: Steven Rostedt <[EMAIL PROTECTED]>
---
 arch/x86/kernel/head64.c  |2 +-
 arch/x86/kernel/nmi_64.c  |2 +-
 arch/x86/kernel/setup64.c |4 ++--
 arch/x86/kernel/smpboot_64.c  |2 +-
 arch/x86/kernel/tsc_64.c  |4 ++--
 arch/x86/kernel/vsyscall_64.c |3 ++-
 6 files changed, 9 insertions(+), 8 deletions(-)

Index: linux-compile.git/arch/x86/kernel/head64.c
===
--- linux-compile.git.orig/arch/x86/kernel/head64.c 2007-12-19 
21:44:52.0 -0500
+++ linux-compile.git/arch/x86/kernel/head64.c  2007-12-20 00:52:47.0 
-0500
@@ -46,7 +46,7 @@ static void __init copy_bootdata(char *r
}
 }
 
-void __init x86_64_start_kernel(char * real_mode_data)
+notrace void __init x86_64_start_kernel(char *real_mode_data)
 {
int i;
 
Index: linux-compile.git/arch/x86/kernel/nmi_64.c
===
--- linux-compile.git.orig/arch/x86/kernel/nmi_64.c 2007-12-19 
21:44:52.0 -0500
+++ linux-compile.git/arch/x86/kernel/nmi_64.c  2007-12-20 00:51:50.0 
-0500
@@ -314,7 +314,7 @@ void touch_nmi_watchdog(void)
touch_softlockup_watchdog();
 }
 
-int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
+notrace __kprobes int nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
 {
int sum;
int touched = 0;
Index: linux-compile.git/arch/x86/kernel/setup64.c
===
--- linux-compile.git.orig/arch/x86/kernel/setup64.c2007-12-19 
21:44:52.0 -0500
+++ linux-compile.git/arch/x86/kernel/setup64.c 2007-12-20 00:52:32.0 
-0500
@@ -114,7 +114,7 @@ void __init setup_per_cpu_areas(void)
}
 } 
 
-void pda_init(int cpu)
+notrace void pda_init(int cpu)
 { 
struct x8664_pda *pda = cpu_pda(cpu);
 
@@ -197,7 +197,7 @@ DEFINE_PER_CPU(struct orig_ist, orig_ist
  * 'CPU state barrier', nothing should get across.
  * A lot of state is already set up in PDA init.
  */
-void __cpuinit cpu_init (void)
+notrace void __cpuinit cpu_init(void)
 {
int cpu = stack_smp_processor_id();
struct tss_struct *t = &per_cpu(init_tss, cpu);
Index: linux-compile.git/arch/x86/kernel/smpboot_64.c
===
--- linux-compile.git.orig/arch/x86/kernel/smpboot_64.c 2007-12-19 
21:44:52.0 -0500
+++ linux-compile.git/arch/x86/kernel/smpboot_64.c  2007-12-20 
00:49:57.0 -0500
@@ -317,7 +317,7 @@ static inline void set_cpu_sibling_map(i
 /*
  * Setup code on secondary processor (after comming out of the trampoline)
  */
-void __cpuinit start_secondary(void)
+notrace __cpuinit void start_secondary(void)
 {
/*
 * Dont put anything before smp_callin(), SMP
Index: linux-compile.git/arch/x86/kernel/tsc_64.c
===
--- linux-compile.git.orig/arch/x86/kernel/tsc_64.c 2007-12-19 
21:44:52.0 -0500
+++ linux-compile.git/arch/x86/kernel/tsc_64.c  2007-12-20 00:49:57.0 
-0500
@@ -248,13 +248,13 @@ __setup("notsc", notsc_setup);
 
 
 /* clock source code: */
-static cycle_t read_tsc(void)
+static notrace cycle_t read_tsc(void)
 {
cycle_t ret = (cycle_t)get_cycles_sync();
return ret;
 }
 
-static cycle_t __vsyscall_fn vread_tsc(void)
+static notrace cycle_t __vsyscall_fn vread_tsc(void)
 {
cycle_t ret = (cycle_t)get_cycles_sync();
return ret;
Index: linux-compile.git/arch/x86/kernel/vsyscall_64.c
===
--- linux-compile.git.orig/arch/x86/kernel/vsyscall_64.c2007-12-19 
21:44:52.0 -0500
+++ linux-compile.git/arch/x86/kernel/vsyscall_64.c 2007-12-20 
00:54:53.0 -0500
@@ -42,7 +42,8 @@
 #include 
 #include 
 
-#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
+#define __vsyscall(nr) \
+   __attribute__ ((unused, __section__(".vsyscall_" #nr))) notrace
 #define __syscall_clobber "r11","rcx","memory"
 #define __pa_vsymbol(x)\
({unsigned long v;  \

-- 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[RFC PATCH 02/11] Add fastcall to do_IRQ for i386

2008-01-02 Thread Steven Rostedt
MCOUNT will disable the regparm parameters of the i386 compile
options. When doing so, this breaks the prototype of do_IRQ
where the fastcall must be explicitly called.

Also fixed some whitespace damage in the call to do_IRQ.

Signed-off-by: Steven Rostedt <[EMAIL PROTECTED]>
---
 arch/x86/kernel/irq_32.c |2 +-
 include/asm-x86/irq_32.h |2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

Index: linux-compile.git/arch/x86/kernel/irq_32.c
===
--- linux-compile.git.orig/arch/x86/kernel/irq_32.c 2007-12-20 
00:20:29.0 -0500
+++ linux-compile.git/arch/x86/kernel/irq_32.c  2007-12-20 00:21:55.0 
-0500
@@ -67,7 +67,7 @@ static union irq_ctx *softirq_ctx[NR_CPU
  * handlers).
  */
 fastcall unsigned int do_IRQ(struct pt_regs *regs)
-{  
+{
struct pt_regs *old_regs;
/* high bit used in ret_from_ code */
int irq = ~regs->orig_eax;
Index: linux-compile.git/include/asm-x86/irq_32.h
===
--- linux-compile.git.orig/include/asm-x86/irq_32.h 2007-12-20 
00:20:29.0 -0500
+++ linux-compile.git/include/asm-x86/irq_32.h  2007-12-20 00:21:55.0 
-0500
@@ -41,7 +41,7 @@ extern int irqbalance_disable(char *str)
 extern void fixup_irqs(cpumask_t map);
 #endif
 
-unsigned int do_IRQ(struct pt_regs *regs);
+fastcall unsigned int do_IRQ(struct pt_regs *regs);
 void init_IRQ(void);
 void __init native_init_IRQ(void);
 

-- 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[RFC PATCH 08/11] tracer add debugfs interface

2008-01-02 Thread Steven Rostedt
This patch adds an interface into debugfs.

  /debugfs/mctracer/ctrl

echoing 1 into the ctrl file turns on the tracer,
and echoing 0 turns it off.

Signed-off-by: Steven Rostedt <[EMAIL PROTECTED]>
---
 lib/mcount/tracer.c |   87 +++-
 lib/mcount/tracer.h |1 
 2 files changed, 87 insertions(+), 1 deletion(-)

Index: linux-compile.git/lib/mcount/tracer.c
===
--- linux-compile.git.orig/lib/mcount/tracer.c  2008-01-02 23:07:23.0 
-0500
+++ linux-compile.git/lib/mcount/tracer.c   2008-01-02 23:12:50.0 
-0500
@@ -15,6 +15,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 #include 
 
 #include "tracer.h"
@@ -71,6 +73,89 @@ static inline notrace void trace_functio
raw_local_irq_restore(flags);
 }
 
+#ifdef CONFIG_DEBUG_FS
+static int mctracer_open_generic(struct inode *inode, struct file *filp)
+{
+   filp->private_data = inode->i_private;
+   return 0;
+}
+
+
+static ssize_t mctracer_ctrl_read(struct file *filp, char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+   struct mctracer_trace *tr = filp->private_data;
+   char buf[16];
+   int r;
+
+   r = sprintf(buf, "%ld\n", tr->ctrl);
+   return simple_read_from_buffer(ubuf, cnt, ppos,
+  buf, r);
+}
+
+static ssize_t mctracer_ctrl_write(struct file *filp,
+  const char __user *ubuf,
+  size_t cnt, loff_t *ppos)
+{
+   struct mctracer_trace *tr = filp->private_data;
+   int val;
+   char buf[16];
+
+   if (cnt > 15)
+   cnt = 15;
+
+   if (copy_from_user(&buf, ubuf, cnt))
+   return -EFAULT;
+
+   buf[cnt] = 0;
+
+   val = !!simple_strtoul(buf, NULL, 10);
+
+   if (tr->ctrl ^ val) {
+   if (val)
+   register_mcount_function(trace_function);
+   else
+   clear_mcount_function();
+   tr->ctrl = val;
+   }
+
+   filp->f_pos += cnt;
+
+   return cnt;
+}
+
+static struct file_operations mctracer_ctrl_fops = {
+   .open = mctracer_open_generic,
+   .read = mctracer_ctrl_read,
+   .write = mctracer_ctrl_write,
+};
+
+static void mctrace_init_debugfs(void)
+{
+   struct dentry *d_mctracer;
+   struct dentry *entry;
+
+   d_mctracer = debugfs_create_dir("mctracer", NULL);
+   if (!d_mctracer) {
+   pr_warning("Could not create debugfs directory mctracer\n");
+   return;
+   }
+
+   entry = debugfs_create_file("ctrl", 0644, d_mctracer,
+   &mctracer_trace, &mctracer_ctrl_fops);
+   if (!entry)
+   pr_warning("Could not create debugfs 'ctrl' entry\n");
+}
+#else /* CONFIG_DEBUG_FS */
+static void mctrace_init_debugfs(void)
+{
+   /*
+* No way to turn on or off the trace function
+* without debugfs, so we just turn it on.
+*/
+   register_mcount_function(trace_function);
+}
+#endif /* CONFIG_DEBUG_FS */
 
 static inline notrace int page_order(const unsigned long size)
 {
@@ -107,7 +192,7 @@ static inline notrace int mctracer_alloc
size, MCTRACER_NR_ENTRIES, MCTRACER_ENTRY_SIZE);
pr_info("   actual entries %ld\n", mctracer_trace.entries);
 
-   register_mcount_function(trace_function);
+   mctrace_init_debugfs();
 
return 0;
 
Index: linux-compile.git/lib/mcount/tracer.h
===
--- linux-compile.git.orig/lib/mcount/tracer.h  2008-01-02 23:04:34.0 
-0500
+++ linux-compile.git/lib/mcount/tracer.h   2008-01-02 23:11:39.0 
-0500
@@ -13,6 +13,7 @@ struct mctracer_trace {
void  *trace[NR_CPUS];
unsigned long trace_idx[NR_CPUS];
unsigned long entries;
+   long  ctrl;
atomic_t  cnt;
atomic_t  disabled[NR_CPUS];
atomic_t  underrun[NR_CPUS];

-- 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[RFC PATCH 10/11] mcount tracer show task comm and pid

2008-01-02 Thread Steven Rostedt
This adds the task comm and pid to the trace output. This gives the
output like:

CPU 0: sshd:2605 [] remove_wait_queue+0xc/0x4a <-- 
[] free_poll_entry+0x1e/0x2a
CPU 2: bash:2610 [] tty_check_change+0x9/0xb6 <-- 
[] tty_ioctl+0x59f/0xcdd
CPU 0: sshd:2605 [] _spin_lock_irqsave+0xe/0x81 <-- 
[] remove_wait_queue+0x17/0x4a
CPU 2: bash:2610 [] find_vpid+0x9/0x24 <-- 
[] tty_ioctl+0x62f/0xcdd
CPU 0: sshd:2605 [] _spin_unlock_irqrestore+0x9/0x3a <-- 
[] remove_wait_queue+0x45/0x4a
CPU 0: sshd:2605 [] fput+0x9/0x1b <-- [] 
free_poll_entry+0x26/0x2a


Signed-off-by: Steven Rostedt <[EMAIL PROTECTED]>
---
 lib/mcount/tracer.c |6 +-
 lib/mcount/tracer.h |3 +++
 2 files changed, 8 insertions(+), 1 deletion(-)

Index: linux-compile.git/lib/mcount/tracer.c
===
--- linux-compile.git.orig/lib/mcount/tracer.c  2008-01-02 23:17:21.0 
-0500
+++ linux-compile.git/lib/mcount/tracer.c   2008-01-02 23:17:44.0 
-0500
@@ -34,6 +34,7 @@ mctracer_add_trace_entry(struct mctracer
 {
unsigned long idx, idx_next;
struct mctracer_entry *entry;
+   struct task_struct *tsk = current;
 
idx = tr->trace_idx[cpu];
idx_next = idx + 1;
@@ -52,6 +53,8 @@ mctracer_add_trace_entry(struct mctracer
entry->idx   = atomic_inc_return(&tr->cnt);
entry->ip= ip;
entry->parent_ip = parent_ip;
+   entry->pid   = tsk->pid;
+   memcpy(entry->comm, tsk->comm, TASK_COMM_LEN);
 }
 
 static inline notrace void trace_function(const unsigned long ip,
@@ -223,7 +226,8 @@ static int s_show(struct seq_file *m, vo
return -1;
}
 
-   seq_printf(m, "  CPU %d:  ", iter->cpu);
+   seq_printf(m, "CPU %d: ", iter->cpu);
+   seq_printf(m, "%s:%d ", iter->ent->comm, iter->ent->pid);
seq_print_ip_sym(m, iter->ent->ip);
if (iter->ent->parent_ip) {
seq_printf(m, " <-- ");
Index: linux-compile.git/lib/mcount/tracer.h
===
--- linux-compile.git.orig/lib/mcount/tracer.h  2008-01-02 23:16:15.0 
-0500
+++ linux-compile.git/lib/mcount/tracer.h   2008-01-02 23:17:44.0 
-0500
@@ -2,11 +2,14 @@
 #define _LINUX_MCOUNT_TRACER_H
 
 #include 
+#include 
 
 struct mctracer_entry {
unsigned long idx;
unsigned long ip;
unsigned long parent_ip;
+   char comm[TASK_COMM_LEN];
+   pid_t pid;
 };
 
 struct mctracer_trace {

-- 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[RFC PATCH 06/11] add notrace annotations to vsyscall.

2008-01-02 Thread Steven Rostedt
Add the notrace annotations to some of the vsyscall functions.

Note: checkpatch errors on the define of vsyscall_fn because it thinks
   that it is a complex macro that needs paranthesis. Unfortunately
   we can't put paranthesis on this macro.

Signed-off-by: Steven Rostedt <[EMAIL PROTECTED]>
---
 arch/x86/vdso/vclock_gettime.c |   15 ---
 arch/x86/vdso/vgetcpu.c|3 ++-
 include/asm-x86/vsyscall.h |3 ++-
 3 files changed, 12 insertions(+), 9 deletions(-)

Index: linux-compile.git/arch/x86/vdso/vclock_gettime.c
===
--- linux-compile.git.orig/arch/x86/vdso/vclock_gettime.c   2008-01-02 
22:53:52.0 -0500
+++ linux-compile.git/arch/x86/vdso/vclock_gettime.c2008-01-02 
22:59:09.0 -0500
@@ -24,7 +24,7 @@
 
 #define gtod vdso_vsyscall_gtod_data
 
-static long vdso_fallback_gettime(long clock, struct timespec *ts)
+static long notrace vdso_fallback_gettime(long clock, struct timespec *ts)
 {
long ret;
asm("syscall" : "=a" (ret) :
@@ -32,7 +32,7 @@ static long vdso_fallback_gettime(long c
return ret;
 }
 
-static inline long vgetns(void)
+static inline long notrace vgetns(void)
 {
long v;
cycles_t (*vread)(void);
@@ -41,7 +41,7 @@ static inline long vgetns(void)
return (v * gtod->clock.mult) >> gtod->clock.shift;
 }
 
-static noinline int do_realtime(struct timespec *ts)
+static noinline int notrace do_realtime(struct timespec *ts)
 {
unsigned long seq, ns;
do {
@@ -55,7 +55,8 @@ static noinline int do_realtime(struct t
 }
 
 /* Copy of the version in kernel/time.c which we cannot directly access */
-static void vset_normalized_timespec(struct timespec *ts, long sec, long nsec)
+static void notrace
+vset_normalized_timespec(struct timespec *ts, long sec, long nsec)
 {
while (nsec >= NSEC_PER_SEC) {
nsec -= NSEC_PER_SEC;
@@ -69,7 +70,7 @@ static void vset_normalized_timespec(str
ts->tv_nsec = nsec;
 }
 
-static noinline int do_monotonic(struct timespec *ts)
+static noinline int notrace do_monotonic(struct timespec *ts)
 {
unsigned long seq, ns, secs;
do {
@@ -83,7 +84,7 @@ static noinline int do_monotonic(struct 
return 0;
 }
 
-int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
+int notrace __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
 {
if (likely(gtod->sysctl_enabled && gtod->clock.vread))
switch (clock) {
@@ -97,7 +98,7 @@ int __vdso_clock_gettime(clockid_t clock
 int clock_gettime(clockid_t, struct timespec *)
__attribute__((weak, alias("__vdso_clock_gettime")));
 
-int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
+int notrace __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
 {
long ret;
if (likely(gtod->sysctl_enabled && gtod->clock.vread)) {
Index: linux-compile.git/arch/x86/vdso/vgetcpu.c
===
--- linux-compile.git.orig/arch/x86/vdso/vgetcpu.c  2008-01-02 
22:53:52.0 -0500
+++ linux-compile.git/arch/x86/vdso/vgetcpu.c   2008-01-02 22:59:35.0 
-0500
@@ -13,7 +13,8 @@
 #include 
 #include "vextern.h"
 
-long __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused)
+long notrace
+__vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused)
 {
unsigned int dummy, p;
 
Index: linux-compile.git/include/asm-x86/vsyscall.h
===
--- linux-compile.git.orig/include/asm-x86/vsyscall.h   2008-01-02 
22:53:52.0 -0500
+++ linux-compile.git/include/asm-x86/vsyscall.h2008-01-02 
23:00:34.0 -0500
@@ -24,7 +24,8 @@ enum vsyscall_num {
((unused, __section__ (".vsyscall_gtod_data"),aligned(16)))
 #define __section_vsyscall_clock __attribute__ \
((unused, __section__ (".vsyscall_clock"),aligned(16)))
-#define __vsyscall_fn __attribute__ ((unused,__section__(".vsyscall_fn")))
+#define __vsyscall_fn __attribute__ \
+   ((unused, __section__(".vsyscall_fn"))) notrace
 
 #define VGETCPU_RDTSCP 1
 #define VGETCPU_LSL2

-- 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[RFC PATCH 01/11] Add basic support for gcc profiler instrumentation

2008-01-02 Thread Steven Rostedt
If CONFIG_MCOUNT is selected and /proc/sys/kernel/mcount_enabled is set to a
non-zero value the mcount routine will be called everytime we enter a kernel
function that is not marked with the "notrace" attribute.

The mcount routine will then call a registered function if a function
happens to be registered.

[This code has been highly hacked by Steven Rostedt, so don't
 blame Arnaldo for all of this ;-) ]

Signed-off-by: Arnaldo Carvalho de Melo <[EMAIL PROTECTED]>
Signed-off-by: Steven Rostedt <[EMAIL PROTECTED]>
---
 Documentation/stable_api_nonsense.txt |3 +
 Makefile  |4 +
 arch/x86/Kconfig  |6 ++
 arch/x86/Makefile_32  |4 +
 arch/x86/kernel/Makefile_32   |1 
 arch/x86/kernel/entry_64.S|   46 
 arch/x86/kernel/mcount-wrapper.S  |   25 ++
 include/linux/linkage.h   |2 
 include/linux/mcount.h|   21 +
 kernel/sysctl.c   |   11 
 lib/Kconfig.debug |2 
 lib/Makefile  |2 
 lib/mcount/Kconfig|6 ++
 lib/mcount/Makefile   |3 +
 lib/mcount/mcount.c   |   78 ++
 15 files changed, 213 insertions(+), 1 deletion(-)
 create mode 100644 arch/i386/kernel/mcount-wrapper.S
 create mode 100644 lib/mcount/Kconfig
 create mode 100644 lib/mcount/Makefile
 create mode 100644 lib/mcount/mcount.c
 create mode 100644 lib/mcount/mcount.h

Index: linux-compile.git/Documentation/stable_api_nonsense.txt
===
--- linux-compile.git.orig/Documentation/stable_api_nonsense.txt
2008-01-03 01:02:28.0 -0500
+++ linux-compile.git/Documentation/stable_api_nonsense.txt 2008-01-03 
01:02:33.0 -0500
@@ -62,6 +62,9 @@ consider the following facts about the L
   - different structures can contain different fields
   - Some functions may not be implemented at all, (i.e. some locks
compile away to nothing for non-SMP builds.)
+  - Parameter passing of variables from function to function can be
+   done in different ways (the CONFIG_REGPARM option controls
+   this.)
   - Memory within the kernel can be aligned in different ways,
depending on the build options.
   - Linux runs on a wide range of different processor architectures.
Index: linux-compile.git/Makefile
===
--- linux-compile.git.orig/Makefile 2008-01-03 01:02:28.0 -0500
+++ linux-compile.git/Makefile  2008-01-03 01:02:39.0 -0500
@@ -509,11 +509,15 @@ endif
 
 include $(srctree)/arch/$(SRCARCH)/Makefile
 
+ifdef CONFIG_MCOUNT
+KBUILD_CFLAGS  += -pg -fno-omit-frame-pointer -fno-optimize-sibling-calls
+else
 ifdef CONFIG_FRAME_POINTER
 KBUILD_CFLAGS  += -fno-omit-frame-pointer -fno-optimize-sibling-calls
 else
 KBUILD_CFLAGS  += -fomit-frame-pointer
 endif
+endif
 
 ifdef CONFIG_DEBUG_INFO
 KBUILD_CFLAGS  += -g
Index: linux-compile.git/arch/x86/Kconfig
===
--- linux-compile.git.orig/arch/x86/Kconfig 2008-01-03 01:02:28.0 
-0500
+++ linux-compile.git/arch/x86/Kconfig  2008-01-03 01:02:33.0 -0500
@@ -28,6 +28,12 @@ config GENERIC_CMOS_UPDATE
bool
default y
 
+# function tracing might turn this off:
+config REGPARM
+   bool
+   depends on !MCOUNT
+   default y
+
 config CLOCKSOURCE_WATCHDOG
bool
default y
Index: linux-compile.git/arch/x86/Makefile_32
===
--- linux-compile.git.orig/arch/x86/Makefile_32 2008-01-03 01:02:28.0 
-0500
+++ linux-compile.git/arch/x86/Makefile_32  2008-01-03 01:02:33.0 
-0500
@@ -37,7 +37,7 @@ LDFLAGS_vmlinux := --emit-relocs
 endif
 CHECKFLAGS += -D__i386__
 
-KBUILD_CFLAGS += -pipe -msoft-float -mregparm=3 -freg-struct-return
+KBUILD_CFLAGS += -pipe -msoft-float
 
 # prevent gcc from keeping the stack 16 byte aligned
 KBUILD_CFLAGS += $(call cc-option,-mpreferred-stack-boundary=2)
@@ -45,6 +45,8 @@ KBUILD_CFLAGS += $(call cc-option,-mpref
 # CPU-specific tuning. Anything which can be shared with UML should go here.
 include $(srctree)/arch/x86/Makefile_32.cpu
 
+cflags-$(CONFIG_REGPARM) += -mregparm=3 -freg-struct-return
+
 # temporary until string.h is fixed
 cflags-y += -ffreestanding
 
Index: linux-compile.git/arch/x86/kernel/Makefile_32
===
--- linux-compile.git.orig/arch/x86/kernel/Makefile_32  2008-01-03 
01:02:28.0 -0500
+++ linux-compile.git/arch/x86/kernel/Makefile_32   2008-01-03 
01:02:33.0 -0500
@@ -23,6 +23,7 @@ obj-$(CONFIG_APM) += apm_32.o
 obj-$(CONFIG_X86_SMP)  += smp_32.o smpboot_32.o tsc

[RFC PATCH 04/11] i386: notrace annotations

2008-01-02 Thread Steven Rostedt
>From patch-2.6.21.5-rt20. Annotates functions that should not be profiler
instrumented, i.e. where mcount should not be called at function entry.

Signed-off-by: Arnaldo Carvalho de Melo <[EMAIL PROTECTED]>
Signed-off-by: Steven Rostedt <[EMAIL PROTECTED]>
---
 arch/x86/kernel/apic_32.c  |2 +-
 arch/x86/kernel/hpet.c |2 +-
 arch/x86/kernel/irq_32.c   |2 +-
 arch/x86/kernel/nmi_32.c   |2 +-
 arch/x86/kernel/smp_32.c   |2 +-
 arch/x86/kernel/time_32.c  |2 +-
 arch/x86/kernel/traps_32.c |4 ++--
 arch/x86/kernel/tsc_32.c   |2 +-
 arch/x86/lib/delay_32.c|6 +++---
 arch/x86/mm/fault_32.c |4 ++--
 arch/x86/mm/init_32.c  |2 +-
 11 files changed, 15 insertions(+), 15 deletions(-)
---

Index: linux-compile.git/arch/x86/kernel/apic_32.c
===
--- linux-compile.git.orig/arch/x86/kernel/apic_32.c2008-01-02 
22:53:52.0 -0500
+++ linux-compile.git/arch/x86/kernel/apic_32.c 2008-01-02 22:56:41.0 
-0500
@@ -577,7 +577,7 @@ static void local_apic_timer_interrupt(v
  *   interrupt as well. Thus we cannot inline the local irq ... ]
  */
 
-void fastcall smp_apic_timer_interrupt(struct pt_regs *regs)
+notrace fastcall void smp_apic_timer_interrupt(struct pt_regs *regs)
 {
struct pt_regs *old_regs = set_irq_regs(regs);
 
Index: linux-compile.git/arch/x86/kernel/hpet.c
===
--- linux-compile.git.orig/arch/x86/kernel/hpet.c   2008-01-02 
22:53:52.0 -0500
+++ linux-compile.git/arch/x86/kernel/hpet.c2008-01-02 22:56:41.0 
-0500
@@ -295,7 +295,7 @@ static int hpet_legacy_next_event(unsign
 /*
  * Clock source related code
  */
-static cycle_t read_hpet(void)
+static notrace cycle_t read_hpet(void)
 {
return (cycle_t)hpet_readl(HPET_COUNTER);
 }
Index: linux-compile.git/arch/x86/kernel/irq_32.c
===
--- linux-compile.git.orig/arch/x86/kernel/irq_32.c 2008-01-02 
22:56:34.0 -0500
+++ linux-compile.git/arch/x86/kernel/irq_32.c  2008-01-02 22:56:41.0 
-0500
@@ -66,7 +66,7 @@ static union irq_ctx *softirq_ctx[NR_CPU
  * SMP cross-CPU interrupts have their own specific
  * handlers).
  */
-fastcall unsigned int do_IRQ(struct pt_regs *regs)
+notrace fastcall unsigned int do_IRQ(struct pt_regs *regs)
 {
struct pt_regs *old_regs;
/* high bit used in ret_from_ code */
Index: linux-compile.git/arch/x86/kernel/nmi_32.c
===
--- linux-compile.git.orig/arch/x86/kernel/nmi_32.c 2008-01-02 
22:53:52.0 -0500
+++ linux-compile.git/arch/x86/kernel/nmi_32.c  2008-01-02 22:57:52.0 
-0500
@@ -323,7 +323,7 @@ EXPORT_SYMBOL(touch_nmi_watchdog);
 
 extern void die_nmi(struct pt_regs *, const char *msg);
 
-__kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
+notrace __kprobes int nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
 {
 
/*
Index: linux-compile.git/arch/x86/kernel/smp_32.c
===
--- linux-compile.git.orig/arch/x86/kernel/smp_32.c 2008-01-02 
22:53:52.0 -0500
+++ linux-compile.git/arch/x86/kernel/smp_32.c  2008-01-02 22:56:41.0 
-0500
@@ -638,7 +638,7 @@ static void native_smp_send_stop(void)
  * all the work is done automatically when
  * we return from the interrupt.
  */
-fastcall void smp_reschedule_interrupt(struct pt_regs *regs)
+notrace fastcall void smp_reschedule_interrupt(struct pt_regs *regs)
 {
ack_APIC_irq();
__get_cpu_var(irq_stat).irq_resched_count++;
Index: linux-compile.git/arch/x86/kernel/time_32.c
===
--- linux-compile.git.orig/arch/x86/kernel/time_32.c2008-01-02 
22:53:52.0 -0500
+++ linux-compile.git/arch/x86/kernel/time_32.c 2008-01-02 22:56:41.0 
-0500
@@ -122,7 +122,7 @@ static int set_rtc_mmss(unsigned long no
 
 int timer_ack;
 
-unsigned long profile_pc(struct pt_regs *regs)
+notrace unsigned long profile_pc(struct pt_regs *regs)
 {
unsigned long pc = instruction_pointer(regs);
 
Index: linux-compile.git/arch/x86/kernel/traps_32.c
===
--- linux-compile.git.orig/arch/x86/kernel/traps_32.c   2008-01-02 
22:53:52.0 -0500
+++ linux-compile.git/arch/x86/kernel/traps_32.c2008-01-02 
22:58:19.0 -0500
@@ -723,7 +723,7 @@ void __kprobes die_nmi(struct pt_regs *r
do_exit(SIGSEGV);
 }
 
-static __kprobes void default_do_nmi(struct pt_regs * regs)
+static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
 {
unsigned char reason = 0;
 
@@ -763,7 +763,7 @@ static __kprobes void default_do_nmi(str
 
 static int ignore_nmis;
 
-fastcall __kprobes void do_nmi(struct pt_regs * regs, l

[RFC PATCH 03/11] Annotate core code that should not be traced

2008-01-02 Thread Steven Rostedt
Mark with "notrace" functions in core code that should not be
traced.  The "notrace" attribute will prevent gcc from adding
a call to mcount on the annotated funtions.

Signed-off-by: Arnaldo Carvalho de Melo <[EMAIL PROTECTED]>
Signed-off-by: Steven Rostedt <[EMAIL PROTECTED]>

---
 drivers/clocksource/acpi_pm.c |8 
 include/linux/preempt.h   |4 ++--
 kernel/irq/handle.c   |2 +-
 kernel/lockdep.c  |   27 ++-
 kernel/rcupdate.c |2 +-
 kernel/spinlock.c |2 +-
 lib/smp_processor_id.c|2 +-
 7 files changed, 24 insertions(+), 23 deletions(-)

Index: linux-compile.git/drivers/clocksource/acpi_pm.c
===
--- linux-compile.git.orig/drivers/clocksource/acpi_pm.c2007-12-20 
01:00:29.0 -0500
+++ linux-compile.git/drivers/clocksource/acpi_pm.c 2007-12-20 
01:00:48.0 -0500
@@ -30,13 +30,13 @@
  */
 u32 pmtmr_ioport __read_mostly;
 
-static inline u32 read_pmtmr(void)
+static inline notrace u32 read_pmtmr(void)
 {
/* mask the output to 24 bits */
return inl(pmtmr_ioport) & ACPI_PM_MASK;
 }
 
-u32 acpi_pm_read_verified(void)
+notrace u32 acpi_pm_read_verified(void)
 {
u32 v1 = 0, v2 = 0, v3 = 0;
 
@@ -56,12 +56,12 @@ u32 acpi_pm_read_verified(void)
return v2;
 }
 
-static cycle_t acpi_pm_read_slow(void)
+static notrace cycle_t acpi_pm_read_slow(void)
 {
return (cycle_t)acpi_pm_read_verified();
 }
 
-static cycle_t acpi_pm_read(void)
+static notrace cycle_t acpi_pm_read(void)
 {
return (cycle_t)read_pmtmr();
 }
Index: linux-compile.git/include/linux/preempt.h
===
--- linux-compile.git.orig/include/linux/preempt.h  2007-12-20 
01:00:29.0 -0500
+++ linux-compile.git/include/linux/preempt.h   2007-12-20 01:00:48.0 
-0500
@@ -11,8 +11,8 @@
 #include 
 
 #ifdef CONFIG_DEBUG_PREEMPT
-  extern void fastcall add_preempt_count(int val);
-  extern void fastcall sub_preempt_count(int val);
+  extern notrace void fastcall add_preempt_count(int val);
+  extern notrace void fastcall sub_preempt_count(int val);
 #else
 # define add_preempt_count(val)do { preempt_count() += (val); } while 
(0)
 # define sub_preempt_count(val)do { preempt_count() -= (val); } while 
(0)
Index: linux-compile.git/kernel/irq/handle.c
===
--- linux-compile.git.orig/kernel/irq/handle.c  2007-12-20 01:00:29.0 
-0500
+++ linux-compile.git/kernel/irq/handle.c   2007-12-20 01:00:48.0 
-0500
@@ -163,7 +163,7 @@ irqreturn_t handle_IRQ_event(unsigned in
  * This is the original x86 implementation which is used for every
  * interrupt type.
  */
-fastcall unsigned int __do_IRQ(unsigned int irq)
+notrace fastcall unsigned int __do_IRQ(unsigned int irq)
 {
struct irq_desc *desc = irq_desc + irq;
struct irqaction *action;
Index: linux-compile.git/kernel/lockdep.c
===
--- linux-compile.git.orig/kernel/lockdep.c 2007-12-20 01:00:29.0 
-0500
+++ linux-compile.git/kernel/lockdep.c  2007-12-20 01:00:48.0 -0500
@@ -270,14 +270,14 @@ static struct list_head chainhash_table[
((key1) >> (64-MAX_LOCKDEP_KEYS_BITS)) ^ \
(key2))
 
-void lockdep_off(void)
+notrace void lockdep_off(void)
 {
current->lockdep_recursion++;
 }
 
 EXPORT_SYMBOL(lockdep_off);
 
-void lockdep_on(void)
+notrace void lockdep_on(void)
 {
current->lockdep_recursion--;
 }
@@ -1036,7 +1036,7 @@ find_usage_forwards(struct lock_class *s
  * Return 1 otherwise and keep  unchanged.
  * Return 0 on error.
  */
-static noinline int
+static noinline notrace int
 find_usage_backwards(struct lock_class *source, unsigned int depth)
 {
struct lock_list *entry;
@@ -1586,7 +1586,7 @@ static inline int validate_chain(struct 
  * We are building curr_chain_key incrementally, so double-check
  * it from scratch, to make sure that it's done correctly:
  */
-static void check_chain_key(struct task_struct *curr)
+static notrace void check_chain_key(struct task_struct *curr)
 {
 #ifdef CONFIG_DEBUG_LOCKDEP
struct held_lock *hlock, *prev_hlock = NULL;
@@ -1962,7 +1962,7 @@ static int mark_lock_irq(struct task_str
 /*
  * Mark all held locks with a usage bit:
  */
-static int
+static notrace int
 mark_held_locks(struct task_struct *curr, int hardirq)
 {
enum lock_usage_bit usage_bit;
@@ -2009,7 +2009,7 @@ void early_boot_irqs_on(void)
 /*
  * Hardirqs will be enabled:
  */
-void trace_hardirqs_on(void)
+notrace void trace_hardirqs_on(void)
 {
struct task_struct *curr = current;
unsigned long ip;
@@ -2057,7 +2057,7 @@ EXPORT_SYMBOL(trace_hardirqs_on);
 /*
  * Hardirqs were disabled:
  */
-void trace_hardirqs_off(void)
+notrace void trace_hard

[RFC PATCH 09/11] mcount tracer output file

2008-01-02 Thread Steven Rostedt
Add /debugfs/mctracer/trace to output trace output.

Here's an example of the content.

  CPU 0:  [] notifier_call_chain+0x16/0x60 <-- 
[] __atomic_notifier_call_chain+0x26/0x56
  CPU 0:  [] mce_idle_callback+0x9/0x2f <-- 
[] notifier_call_chain+0x38/0x60
  CPU 0:  [] acpi_processor_idle+0x16/0x518 <-- 
[] cpu_idle+0xa1/0xe7
  CPU 0:  [] acpi_safe_halt+0x9/0x43 <-- [] 
acpi_processor_idle+0x1d6/0x518
  CPU 1:  [] smp_apic_timer_interrupt+0xc/0x58 <-- 
[] apic_timer_interrupt+0x66/0x70
  CPU 1:  [] exit_idle+0x9/0x22 <-- [] 
smp_apic_timer_interrupt+0x35/0x58
  CPU 1:  [] __exit_idle+0x9/0x2e <-- [] 
exit_idle+0x20/0x22
  CPU 1:  [] atomic_notifier_call_chain+0x9/0x16 <-- 
[] __exit_idle+0x2c/0x2e
  CPU 1:  [] __atomic_notifier_call_chain+0xe/0x56 <-- 
[] atomic_notifier_call_chain+0x14/0x16
  CPU 1:  [] notifier_call_chain+0x16/0x60 <-- 
[] __atomic_notifier_call_chain+0x26/0x56
  CPU 1:  [] mce_idle_callback+0x9/0x2f <-- 
[] notifier_call_chain+0x38/0x60

This is in the format of the output when KALLSYMS is defined.

  CPU : []  <-- [] 

Signed-off-by: Steven Rostedt <[EMAIL PROTECTED]>
---
 lib/mcount/tracer.c |  215 +++-
 1 file changed, 213 insertions(+), 2 deletions(-)

Index: linux-compile.git/lib/mcount/tracer.c
===
--- linux-compile.git.orig/lib/mcount/tracer.c  2008-01-02 23:12:50.0 
-0500
+++ linux-compile.git/lib/mcount/tracer.c   2008-01-02 23:17:21.0 
-0500
@@ -13,9 +13,11 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -74,6 +76,211 @@ static inline notrace void trace_functio
 }
 
 #ifdef CONFIG_DEBUG_FS
+struct mctracer_iterator {
+   struct mctracer_trace *tr;
+   struct mctracer_entry *ent;
+   unsigned long next_idx[NR_CPUS];
+   int cpu;
+   int idx;
+};
+
+static struct mctracer_entry *mctracer_entry_idx(struct mctracer_trace *tr,
+unsigned long idx,
+int cpu)
+{
+   struct mctracer_entry *array = tr->trace[cpu];
+   unsigned long underrun;
+
+   if (idx >= tr->entries)
+   return NULL;
+
+   underrun = atomic_read(&tr->underrun[cpu]);
+   if (underrun)
+   idx = (underrun + idx) % tr->entries;
+   else if (idx >= tr->trace_idx[cpu])
+   return NULL;
+
+   return &array[idx];
+}
+
+static void *find_next_entry(struct mctracer_iterator *iter)
+{
+   struct mctracer_trace *tr = iter->tr;
+   struct mctracer_entry *ent;
+   struct mctracer_entry *next = NULL;
+   int next_i = -1;
+   int i;
+
+   for_each_possible_cpu(i) {
+   if (!tr->trace[i])
+   continue;
+   ent = mctracer_entry_idx(tr, iter->next_idx[i], i);
+   if (ent && (!next || next->idx > ent->idx)) {
+   next = ent;
+   next_i = i;
+   }
+   }
+   if (next) {
+   iter->next_idx[next_i]++;
+   iter->idx++;
+   }
+   iter->ent = next;
+   iter->cpu = next_i;
+
+   return next ? iter : NULL;
+}
+
+static void *s_next(struct seq_file *m, void *v, loff_t *pos)
+{
+   struct mctracer_iterator *iter = m->private;
+   void *ent;
+   int i = (int)*pos;
+
+   (*pos)++;
+
+   /* can't go backwards */
+   if (iter->idx > i)
+   return NULL;
+
+   if (iter->idx < 0)
+   ent = find_next_entry(iter);
+   else
+   ent = iter->ent;
+
+   while (ent && iter->idx < i)
+   ent = find_next_entry(iter);
+
+   return ent;
+}
+
+static void *s_start(struct seq_file *m, loff_t *pos)
+{
+   struct mctracer_iterator *iter = m->private;
+   void *p = NULL;
+   loff_t l = 0;
+   int i;
+
+   iter->ent = NULL;
+   iter->cpu = 0;
+   iter->idx = -1;
+   for (i = 0; i < NR_CPUS; i++)
+   iter->next_idx[i] = 0;
+
+   /* stop the trace while dumping */
+   if (iter->tr->ctrl)
+   clear_mcount_function();
+
+   for (p = (void *)1; p && l < *pos; p = s_next(m, p, &l))
+   ;
+
+   return p;
+}
+
+static void s_stop(struct seq_file *m, void *p)
+{
+   struct mctracer_iterator *iter = m->private;
+   if (iter->tr->ctrl)
+   register_mcount_function(trace_function);
+}
+
+#ifdef CONFIG_KALLSYMS
+static void seq_print_symbol(struct seq_file *m,
+const char *fmt, unsigned long address)
+{
+   char buffer[KSYM_SYMBOL_LEN];
+
+   sprint_symbol(buffer, address);
+   seq_printf(m, fmt, buffer);
+}
+#else
+# define seq_print_symbol(m, fmt, address) do { } while (0)
+#endif
+
+#ifndef CONFIG_64BIT
+#define seq_print_ip_sym(m, ip)\
+do {

[RFC PATCH 07/11] mcount based trace in the form of a header file library

2008-01-02 Thread Steven Rostedt
The design is for mcount based tracers to be added thru the
lib/mcount/tracer_interface.h file, just like mcount users should add
themselves to lib/mcount/mcount.h. A Kconfig rule chooses the right MCOUNT and
MCOUNT_TRACER user.

This is to avoid function call costs for something that is supposed to be used
only in a debug kernel and that has to reduce to the bare minimum the per
function call overhead of mcount based tracing.

Signed-off-by: Arnaldo Carvalho de Melo <[EMAIL PROTECTED]>
Signed-off-by: Steven Rostedt <[EMAIL PROTECTED]>
---
 lib/mcount/Kconfig|   11 +++
 lib/mcount/Makefile   |2 
 lib/mcount/tracer.c   |  125 ++
 lib/mcount/tracer.h   |   21 +++
 lib/mcount/tracer_interface.h |   14 
 5 files changed, 173 insertions(+)
 create mode 100644 lib/mcount/tracer.c
 create mode 100644 lib/mcount/tracer.h
 create mode 100644 lib/mcount/tracer_interface.h

Index: linux-compile.git/lib/mcount/Kconfig
===
--- linux-compile.git.orig/lib/mcount/Kconfig   2008-01-02 23:24:53.0 
-0500
+++ linux-compile.git/lib/mcount/Kconfig2008-01-02 23:28:06.0 
-0500
@@ -4,3 +4,14 @@
 config MCOUNT
bool
depends on DEBUG_KERNEL
+
+config MCOUNT_TRACER
+   bool "Profiler instrumentation based tracer"
+   depends on DEBUG_KERNEL
+   default n
+   select MCOUNT
+   help
+ Use profiler instrumentation, adding -pg to CFLAGS. This will
+ insert a call to an architecture specific __mcount routine,
+ that the debugging mechanism using this facility will hook by
+ providing a set of inline routines.
Index: linux-compile.git/lib/mcount/tracer.c
===
--- /dev/null   1970-01-01 00:00:00.0 +
+++ linux-compile.git/lib/mcount/tracer.c   2008-01-02 23:28:06.0 
-0500
@@ -0,0 +1,125 @@
+/*
+ * ring buffer based mcount tracer
+ *
+ * Copyright (C) 2007 Arnaldo Carvalho de Melo <[EMAIL PROTECTED]>
+ *   Steven Rostedt <[EMAIL PROTECTED]>
+ *
+ * From code in the latency_tracer, that is:
+ *
+ *  Copyright (C) 2004-2006 Ingo Molnar
+ *  Copyright (C) 2004 William Lee Irwin III
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "tracer.h"
+#include "tracer_interface.h"
+
+static struct mctracer_trace mctracer_trace;
+
+static inline notrace void
+mctracer_add_trace_entry(struct mctracer_trace *tr,
+int cpu,
+const unsigned long ip,
+const unsigned long parent_ip)
+{
+   unsigned long idx, idx_next;
+   struct mctracer_entry *entry;
+
+   idx = tr->trace_idx[cpu];
+   idx_next = idx + 1;
+
+   if (unlikely(idx_next >= tr->entries)) {
+   atomic_inc(&tr->underrun[cpu]);
+   idx_next = 0;
+   }
+
+   tr->trace_idx[cpu] = idx_next;
+
+   if (unlikely(idx_next != 0 && atomic_read(&tr->underrun[cpu])))
+   atomic_inc(&tr->underrun[cpu]);
+
+   entry = tr->trace[cpu] + idx * MCTRACER_ENTRY_SIZE;
+   entry->idx   = atomic_inc_return(&tr->cnt);
+   entry->ip= ip;
+   entry->parent_ip = parent_ip;
+}
+
+static inline notrace void trace_function(const unsigned long ip,
+ const unsigned long parent_ip)
+{
+   unsigned long flags;
+   struct mctracer_trace *tr;
+   int cpu;
+
+   raw_local_irq_save(flags);
+   cpu = raw_smp_processor_id();
+
+   tr = &mctracer_trace;
+
+   atomic_inc(&tr->disabled[cpu]);
+   if (likely(atomic_read(&tr->disabled[cpu]) == 1))
+   mctracer_add_trace_entry(tr, cpu, ip, parent_ip);
+
+   atomic_dec(&tr->disabled[cpu]);
+
+   raw_local_irq_restore(flags);
+}
+
+
+static inline notrace int page_order(const unsigned long size)
+{
+   const unsigned long nr_pages = DIV_ROUND_UP(size, PAGE_SIZE);
+   return ilog2(roundup_pow_of_two(nr_pages));
+}
+
+static inline notrace int mctracer_alloc_buffers(void)
+{
+   const int order = page_order(MCTRACER_NR_ENTRIES * MCTRACER_ENTRY_SIZE);
+   const unsigned long size = (1UL << order) << PAGE_SHIFT;
+   struct mctracer_entry *array;
+   int i;
+
+   for_each_possible_cpu(i) {
+   array = (struct mctracer_entry *)
+ __get_free_pages(GFP_KERNEL, order);
+   if (array == NULL) {
+   printk(KERN_ERR "mctracer: failed to allocate"
+  " %ld bytes for trace buffer!\n", size);
+   goto free_buffers;
+   }
+   mctracer_trace.trace[i] = array;
+   }
+
+   /*
+* Since we allocate by orders of pages, we may be able to
+* round up a bit.
+*/
+   mctracer_trace.entries = siz

[RFC PATCH 00/11] mcount tracing utility

2008-01-02 Thread Steven Rostedt

The following patch series brings to vanilla Linux a bit of the RT kernel
trace facility. This incorporates the "-pg" profiling option of gcc
that will call the "mcount" function for all functions called in
the kernel.

This patch series implements the code for x86 (32 and 64 bit), but
other archs can easily be implemented as well.

Some Background:


A while back, Ingo Molnar and William Lee Irwin III created a latency tracer
to find problem latency areas in the kernel for the RT patch.  This tracer
became a very integral part of the RT kernel in solving where latency hot
spots were.  One of the features that the latency tracer added was a
function trace.  This function tracer would record all functions that
were called (implemented by the gcc "-pg" option) and would show what was
called when interrupts or preemption was turned off.

This feature is also very helpful in normal debugging. So it's been talked
about taking bits and pieces from the RT latency tracer and bring them
to LKML. But no one had the time to do it.

Arnaldo Carvalho de Melo took a crack at it. He pulled out the mcount
as well as part of the tracing code and made it generic from the point
of the tracing code.  I'm not sure why this stopped. Probably because
Arnaldo is a very busy man, and his efforts had to be utilized elsewhere.

While I still maintain my own Logdev utility:

  http://rostedt.homelinux.com/logdev

I came across a need to do the mcount with logdev too. I was successful
but found that it became very dependent on a lot of code. One thing that
I liked about my logdev utility was that it was very non-intrusive, and has
been easy to port from the Linux 2.0 days. I did not want to burden the
logdev patch with the intrusiveness of mcount (not really that intrusive,
it just needs to add a "notrace" annotation to functions in the kernel
that will cause more conflicts in applying patches for me).

Being close to the holidays, I grabbed Arnaldos old patches and started
massaging them into something that could be useful for logdev, and what
I found out (and talking this over with Arnaldo too) that this can
be much more useful for others as well.

The main thing I changed, was that I made the mcount function itself
generic, and not the dependency on the tracing code.  That is I added

register_mcount_function()
 and
clear_mcount_function()

So when ever mcount is enabled and a function is registered that function
is called for all functions in the kernel that is not labeled with the
"notrace" annotation.

The key thing here is that *any* utility can now hook its own function into
mcount!

The Simple Tracer:
--

To show the power of this I also massaged the tracer code that Arnaldo pulled
from the RT patch and made it be a nice example of what can be done
with this.

The function that is registered to mcount has the prototype:

 void func(unsigned long ip, unsigned long parent_ip);

The ip is the address of the function and parent_ip is the address of
the parent function that called it.

The x86_64 version has the assembly call the registered function directly
to save having to do a double function call.

To enable mcount, a sysctl is added:

   /proc/sys/kernel/mcount_enabled

Once mcount is enabled, when a function is registed, it will be called by
all functions. The tracer in this patch series shows how this is done.
It adds a directory in the debugfs, called mctracer. With a ctrl file that
will allow the user have the tracer register its function.  Note, the order
of enabling mcount and registering a function is not important, but both
must be done to initiate the tracing. That is, you can disable tracing
by either disabling mcount or by clearing the registered function.

Only one function may be registered at a time. If another function is
registered, it will simply override what ever was there previously.

Here's a simple example of the tracer output:

CPU 2: hackbench:11867 preempt_schedule+0xc/0x84 <-- 
avc_has_perm_noaudit+0x45d/0x52c
CPU 1: hackbench:12052 selinux_file_permission+0x10/0x11c <-- 
security_file_permission+0x16/0x18
CPU 3: hackbench:12017 update_curr+0xe/0x8b <-- put_prev_task_fair+0x24/0x4c
CPU 2: hackbench:11867 avc_audit+0x16/0x9e3 <-- avc_has_perm+0x51/0x63
CPU 0: hackbench:12019 socket_has_perm+0x16/0x7c <-- 
selinux_socket_sendmsg+0x27/0x3e
CPU 1: hackbench:12052 file_has_perm+0x16/0xbb <-- 
selinux_file_permission+0x104/0x11c

This is formated like:

 CPU : :  <-- 


Overhead:
-

Note that having mcount compiled in seems to show a little overhead.

Here's 3 runs of hackbench 50 without the patches:
Time: 2.137
Time: 2.283
Time: 2.245

 Avg: 2.221

and here's 3 runs with the patches (without tracing on):
Time: 2.738
Time: 2.469
Time: 2.388

  Avg: 2.531

So it is a 13% overhead when enabled (according to hackbench).

But full tracing can cause a bit more problems:

# hackbench 50
Time: 113.350

  113.350!

But this is tracing *every* function call!


Future:

Re: [PATCH -mm] compat_binfmt_elf Kconfig

2008-01-02 Thread Sam Ravnborg
On Wed, Jan 02, 2008 at 02:12:28PM -0800, Roland McGrath wrote:
> I have no opinions about the config symbol names.  Among the existing
> precedents for internal/descriptionless symbols I find more not using the
> HAVE_ prefix than those using it.
The convention is newly established - so no suprise.

> The patch versions I've sent now work
> fine, fix the parallel build problem people were seeing, and AFAICT follow
> the style of what's already in common use.  At this point, I think it would
> be easiest just to keep them and have you send symbol-renaming patches
> for any and all symbols of this sort that concern you as separate cleanups.
Would love to - but the pile of other kbuild stuff only gets bigger..

Sam
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 0/7] convert semaphore to mutex in struct class

2008-01-02 Thread Dave Young
On Jan 3, 2008 3:24 PM, Jarek Poplawski <[EMAIL PROTECTED]> wrote:
> On Thu, Jan 03, 2008 at 08:06:09AM +0100, Jarek Poplawski wrote:
> > On Thu, Jan 03, 2008 at 01:50:20PM +0800, Dave Young wrote:
> > > Convert semaphore to mutex in struct class.
> > ...
> > > One lockdep warning detected as following, thus use mutex_lock_nested 
> > > with SINGLE_DEPTH_NESTING in class_device_add
> > >
> > > Jan  3 10:45:15 darkstar kernel: 
> > > =
> > > Jan  3 10:45:15 darkstar kernel: [ INFO: possible recursive locking 
> > > detected ]
> > > Jan  3 10:45:15 darkstar kernel: 2.6.24-rc6-mm1-mutex #1
> > > Jan  3 10:45:15 darkstar kernel: 
> > > -
> > ...
> > > If there's anything missed please help to point out, thanks.
> >
> > Dave, IMHO it's not 'the right' way to do it: [...]
>
> OOPS! (I was sleeping...) Unless it has turned out it's not so hard
> here, and you are quite sure there should be no more warnings after
> this one nesting annotation - then of course, this is the right way!

Thanks ;)
I don't know if there's other possible warning places with this mutex
or not,  if you have any ideas about this, please tell me.

>
> Sorry (?)
> Jarek P.
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 0/7] convert semaphore to mutex in struct class

2008-01-02 Thread Jarek Poplawski
On Thu, Jan 03, 2008 at 08:06:09AM +0100, Jarek Poplawski wrote:
> On Thu, Jan 03, 2008 at 01:50:20PM +0800, Dave Young wrote:
> > Convert semaphore to mutex in struct class.
> ...
> > One lockdep warning detected as following, thus use mutex_lock_nested with 
> > SINGLE_DEPTH_NESTING in class_device_add
> > 
> > Jan  3 10:45:15 darkstar kernel: 
> > =
> > Jan  3 10:45:15 darkstar kernel: [ INFO: possible recursive locking 
> > detected ]
> > Jan  3 10:45:15 darkstar kernel: 2.6.24-rc6-mm1-mutex #1
> > Jan  3 10:45:15 darkstar kernel: 
> > -
> ...
> > If there's anything missed please help to point out, thanks.
> 
> Dave, IMHO it's not 'the right' way to do it: [...]

OOPS! (I was sleeping...) Unless it has turned out it's not so hard
here, and you are quite sure there should be no more warnings after
this one nesting annotation - then of course, this is the right way!

Sorry (?)
Jarek P.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] kprobes: Introduce is_kprobe_fault()

2008-01-02 Thread Ananth N Mavinakayanahalli
On Wed, Jan 02, 2008 at 08:05:14PM -0800, Harvey Harrison wrote:

Thanks for the cleanup...

...

> diff --git a/arch/x86/mm/fault_32.c b/arch/x86/mm/fault_32.c
> index a2273d4..f2e909b 100644
> --- a/arch/x86/mm/fault_32.c
> +++ b/arch/x86/mm/fault_32.c
> @@ -33,28 +33,6 @@
> 
>  extern void die(const char *,struct pt_regs *,long);
> 
> -#ifdef CONFIG_KPROBES
> -static inline int notify_page_fault(struct pt_regs *regs)
> -{
> - int ret = 0;
> -
> - /* kprobe_running() needs smp_processor_id() */
> - if (!user_mode_vm(regs)) {
^^^
For x86_32, this check is important. See commit
6444541671bd821b950dbaafee70d65188198aa6 (Never allow int3 traps
from V8086 mode to enter the kprobes handler) for precise reason why its
user_mode_vm() and not user_mode() for x86_32.

You'll need to make room for this check in the generic macro below...

> - preempt_disable();
> - if (kprobe_running() && kprobe_fault_handler(regs, 14))
> - ret = 1;
> - preempt_enable();
> - }
> -
> - return ret;
> -}
> -#else
> -static inline int notify_page_fault(struct pt_regs *regs)
> -{
> - return 0;
> -}
> -#endif
> -
>  /*
>   * Return EIP plus the CS segment base.  The segment limit is also
>   * adjusted, clamped to the kernel/user address space (whichever is
> @@ -331,7 +309,7 @@ fastcall void __kprobes do_page_fault(struct pt_regs 
> *regs,
>   if (unlikely(address >= TASK_SIZE)) {
>   if (!(error_code & 0x000d) && vmalloc_fault(address) >= 0)
>   return;
> - if (notify_page_fault(regs))
> + if (is_kprobe_fault(regs, 14))
>   return;
>   /*
>* Don't take the mm semaphore here. If we fixup a prefetch
> @@ -340,7 +318,7 @@ fastcall void __kprobes do_page_fault(struct pt_regs 
> *regs,
>   goto bad_area_nosemaphore;
>   }
> 
> - if (notify_page_fault(regs))
> + if (is_kprobe_fault(regs, 14))
>   return;
> 
>   /* It's safe to allow irq's after cr2 has been saved and the vmalloc

...

> diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
> index 8189158..65c1ffb 100644
> --- a/include/linux/kprobes.h
> +++ b/include/linux/kprobes.h
> @@ -36,6 +36,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
> 
>  #ifdef CONFIG_KPROBES
>  #include 
> @@ -203,6 +204,20 @@ static inline struct kprobe *kprobe_running(void)
>   return (__get_cpu_var(current_kprobe));
>  }
> 
> +/*
> + * If it is a kprobe pagefault we can not be premptible so return before
> + * calling kprobe_running() as it will assert on smp_processor_id if
> + * preemption is enabled.
> + */
> +static inline int is_kprobe_fault(struct pt_regs *regs, int trapnr)
> +{
> + if (!user_mode(regs) && !preemptible() && kprobe_running() &&
> + kprobe_fault_handler(regs, trapnr))
> + return 1;
> + else
> + return 0;
> +}
> +
>  static inline void reset_current_kprobe(void)
>  {
>   __get_cpu_var(current_kprobe) = NULL;
> @@ -237,6 +252,10 @@ static inline struct kprobe *kprobe_running(void)
>  {
>   return NULL;
>  }
> +static inline int is_kprobe_fault(struct pt_regs *regs, int trapnr)
> +{
> + return 0;
> +}
>  static inline int register_kprobe(struct kprobe *p)
>  {
>   return -ENOSYS;

Ananth
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 0/7] convert semaphore to mutex in struct class

2008-01-02 Thread Jarek Poplawski
On Thu, Jan 03, 2008 at 01:50:20PM +0800, Dave Young wrote:
> Convert semaphore to mutex in struct class.
...
> One lockdep warning detected as following, thus use mutex_lock_nested with 
> SINGLE_DEPTH_NESTING in class_device_add
> 
> Jan  3 10:45:15 darkstar kernel: =
> Jan  3 10:45:15 darkstar kernel: [ INFO: possible recursive locking detected ]
> Jan  3 10:45:15 darkstar kernel: 2.6.24-rc6-mm1-mutex #1
> Jan  3 10:45:15 darkstar kernel: -
...
> If there's anything missed please help to point out, thanks.

Dave, IMHO it's not 'the right' way to do it: from this and earlier
discussions it seems there could be many more warnings like this one;
lockdep simply always turns itself off after first one. So, merging
your patches like this would effectively turn off lockdep for all
other places as well, maybe for a long time.

I'd suggest to try first to do it with some wrappers around mutexes,
which simply omit lockdep verification, and later try to replace them
one by one, after checking and testing there are no such warnings
anymore (which would often need some additional annotations about
nesting and probably some changes in lockdep too).

Thanks,
Jarek P.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2/2] Kprobes: Move kprobes examples to samples/

2008-01-02 Thread Ananth N Mavinakayanahalli
From: Ananth N Mavinakayanahalli <[EMAIL PROTECTED]>

Move kprobes examples from Documentation/kprobes.txt to under samples/.
Patch originally by Randy Dunlap.

o Updated the patch to apply on 2.6.24-rc6-mm1
o Modified examples code to build on multiple architectures. Currently,
  the examples code works for x86 and powerpc
o Cleaned up unneeded #includes
o Cleaned up Kconfig per Sam Ravnborg's suggestions to fix build break
  on archs that don't have kretprobes
o Implemented suggestions by Mathieu Desnoyers on CONFIG_KRETPROBES
o Included Andrew Morton's cleanup based on x86-git

Signed-off-by: Randy Dunlap <[EMAIL PROTECTED]>
Signed-off-by: Ananth N Mavinakayanahalli <[EMAIL PROTECTED]>
Signed-off-by: Andrew Morton <[EMAIL PROTECTED]>
Acked-by: Mathieu Desnoyers <[EMAIL PROTECTED]>
---
 Documentation/kprobes.txt   |  206 
 samples/Kconfig |   11 +
 samples/Makefile|2 
 samples/kprobes/Makefile|5 
 samples/kprobes/jprobe_example.c|   65 +++
 samples/kprobes/kprobe_example.c|   88 +++
 samples/kprobes/kretprobe_example.c |   61 ++
 7 files changed, 236 insertions(+), 202 deletions(-)

Index: linux-2.6.24-rc6/Documentation/kprobes.txt
===
--- linux-2.6.24-rc6.orig/Documentation/kprobes.txt
+++ linux-2.6.24-rc6/Documentation/kprobes.txt
@@ -166,7 +166,8 @@ code mapping.
 The Kprobes API includes a "register" function and an "unregister"
 function for each type of probe.  Here are terse, mini-man-page
 specifications for these functions and the associated probe handlers
-that you'll write.  See the latter half of this document for examples.
+that you'll write.  See the files in the samples/kprobes/ sub-directory
+for examples.
 
 4.1 register_kprobe
 
@@ -392,220 +393,15 @@ e. Watchpoint probes (which fire on data
 
 8. Kprobes Example
 
-Here's a sample kernel module showing the use of kprobes to dump a
-stack trace and selected i386 registers when do_fork() is called.
-- cut here -
-/*kprobe_example.c*/
-#include 
-#include 
-#include 
-#include 
-
-/*For each probe you need to allocate a kprobe structure*/
-static struct kprobe kp;
-
-/*kprobe pre_handler: called just before the probed instruction is executed*/
-int handler_pre(struct kprobe *p, struct pt_regs *regs)
-{
-   printk("pre_handler: p->addr=0x%p, eip=%lx, eflags=0x%lx\n",
-   p->addr, regs->eip, regs->eflags);
-   dump_stack();
-   return 0;
-}
-
-/*kprobe post_handler: called after the probed instruction is executed*/
-void handler_post(struct kprobe *p, struct pt_regs *regs, unsigned long flags)
-{
-   printk("post_handler: p->addr=0x%p, eflags=0x%lx\n",
-   p->addr, regs->eflags);
-}
-
-/* fault_handler: this is called if an exception is generated for any
- * instruction within the pre- or post-handler, or when Kprobes
- * single-steps the probed instruction.
- */
-int handler_fault(struct kprobe *p, struct pt_regs *regs, int trapnr)
-{
-   printk("fault_handler: p->addr=0x%p, trap #%dn",
-   p->addr, trapnr);
-   /* Return 0 because we don't handle the fault. */
-   return 0;
-}
-
-static int __init kprobe_init(void)
-{
-   int ret;
-   kp.pre_handler = handler_pre;
-   kp.post_handler = handler_post;
-   kp.fault_handler = handler_fault;
-   kp.symbol_name = "do_fork";
-
-   ret = register_kprobe(&kp);
-   if (ret < 0) {
-   printk("register_kprobe failed, returned %d\n", ret);
-   return ret;
-   }
-   printk("kprobe registered\n");
-   return 0;
-}
-
-static void __exit kprobe_exit(void)
-{
-   unregister_kprobe(&kp);
-   printk("kprobe unregistered\n");
-}
-
-module_init(kprobe_init)
-module_exit(kprobe_exit)
-MODULE_LICENSE("GPL");
-- cut here -
-
-You can build the kernel module, kprobe-example.ko, using the following
-Makefile:
-- cut here -
-obj-m := kprobe-example.o
-KDIR := /lib/modules/$(shell uname -r)/build
-PWD := $(shell pwd)
-default:
-   $(MAKE) -C $(KDIR) SUBDIRS=$(PWD) modules
-clean:
-   rm -f *.mod.c *.ko *.o
-- cut here -
-
-$ make
-$ su -
-...
-# insmod kprobe-example.ko
-
-You will see the trace data in /var/log/messages and on the console
-whenever do_fork() is invoked to create a new process.
+See samples/kprobes/kprobe_example.c.
 
 9. Jprobes Example
 
-Here's a sample kernel module showing the use of jprobes to dump
-the arguments of do_fork().
-- cut here -
-/*jprobe-example.c */
-#include 
-#include 
-#include 
-#include 
-#include 
-
-/*
- * Jumper probe for do_fork.
- * Mirror principle enables access to arguments of the probed routine
- * from the probe handler.
- */
-
-/* Proxy routine having the same arguments as actual do_fork() routine */
-long jdo_fork(unsigned long clone_flags, unsigned long stack_start,
- st

Re: [PATCH] [20/20] x86: Print which shared library/executable faulted in segfault etc. messages

2008-01-02 Thread Eric Dumazet

Andi Kleen a écrit :

They now look like

hal-resmgr[13791]: segfault at 3c rip 2b9c8caec182 rsp 7fff1e825d30 error 4 in 
libacl.so.1.1.0[2b9c8caea000+6000]

This makes it easier to pinpoint bugs to specific libraries. 

And printing the offset into a mapping also always allows to find the 
correct fault point in a library even with randomized mappings. Previously

there was no way to actually find the correct code address inside
the randomized mapping.

Relies on earlier patch to shorten the printk formats.

They are often now longer than 80 characters, but I think that's worth 
it.


Patch for i386 and x86-64.

Signed-off-by: Andi Kleen <[EMAIL PROTECTED]>

---
 arch/x86/kernel/signal_32.c |7 +--
 arch/x86/kernel/signal_64.c |7 +--
 arch/x86/kernel/traps_32.c  |7 +--
 arch/x86/mm/fault_32.c  |4 +++-
 include/linux/mm.h  |1 +
 mm/memory.c |   27 +++
 6 files changed, 46 insertions(+), 7 deletions(-)

Index: linux/include/linux/mm.h
===
--- linux.orig/include/linux/mm.h
+++ linux/include/linux/mm.h
@@ -1145,6 +1145,7 @@ extern int randomize_va_space;
 #endif
 
 const char * arch_vma_name(struct vm_area_struct *vma);

+void print_vma_addr(char *prefix, unsigned long rip);
 
 struct page *sparse_mem_map_populate(unsigned long pnum, int nid);

 pgd_t *vmemmap_pgd_populate(unsigned long addr, int node);
Index: linux/mm/memory.c
===
--- linux.orig/mm/memory.c
+++ linux/mm/memory.c
@@ -2746,3 +2746,30 @@ int access_process_vm(struct task_struct
 
 	return buf - old_buf;

 }
+
+/*
+ * Print the name of a VMA.
+ */
+void print_vma_addr(char *prefix, unsigned long ip)
+{
+   struct mm_struct *mm = current->mm;
+   struct vm_area_struct *vma;
+   down_read(&mm->mmap_sem);
+   vma = find_vma(mm, ip);
+   if (vma && vma->vm_file) {
+   struct file *f = vma->vm_file;
+   char *buf = (char *)__get_free_page(GFP_KERNEL);
+   if (buf) {
+   char *p, *s;
+   p = d_path(f->f_dentry, f->f_vfsmnt, buf, PAGE_SIZE);


d_path() can returns an error. You should add :

if (IS_ERR(p))
p = "?";


+   s = strrchr(p, '/');
+   if (s)
+   p = s+1;
+   printk("%s%s[%lx+%lx]", prefix, p,
+   vma->vm_start,
+   vma->vm_end - vma->vm_start);
+   free_page((unsigned long)buf);
+   }
+   }
+   up_read(¤t->mm->mmap_sem);
+}


Thank you

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/3] Unionfs: use printk KERN_CONT for debugging messages

2008-01-02 Thread Joe Perches
On Thu, 2008-01-03 at 00:57 -0500, Erez Zadok wrote:
> diff --git a/fs/unionfs/debug.c b/fs/unionfs/debug.c
> index c2b8b58..5f1d887 100644
> --- a/fs/unionfs/debug.c
> +++ b/fs/unionfs/debug.c
>  void __show_inode_times(const struct inode *inode,
> @@ -472,15 +473,15 @@ void __show_inode_times(const struct inode *inode,
>   if (unlikely(!lower_inode))
>   continue;
>   pr_debug("IT(%lu:%d): ", inode->i_ino, bindex);
> - pr_debug("%s:%s:%d ", file, fxn, line);
> - pr_debug("um=%lu/%lu lm=%lu/%lu ",
> -  inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
> -  lower_inode->i_mtime.tv_sec,
> -  lower_inode->i_mtime.tv_nsec);
> - pr_debug("uc=%lu/%lu lc=%lu/%lu\n",
> -  inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
> -  lower_inode->i_ctime.tv_sec,
> -  lower_inode->i_ctime.tv_nsec);
> + printk(KERN_CONT "%s:%s:%d ", file, fxn, line);
> + printk(KERN_CONT "um=%lu/%lu lm=%lu/%lu ",
> +inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
> +lower_inode->i_mtime.tv_sec,
> +lower_inode->i_mtime.tv_nsec);
> + printk(KERN_CONT "uc=%lu/%lu lc=%lu/%lu\n",
> +inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
> +lower_inode->i_ctime.tv_sec,
> +lower_inode->i_ctime.tv_nsec);
>   }
>  }
>  

I think printks should be single statements and
KERN_CONT should be used as sparingly as possible.

Perhaps:
pr_debug("IT(%lu:%d): %s:%s:%d "
 "um=%lu/%lu lm=%lu/%lu "
 "uc=%lu/%lu lc=%lu/%lu\n",
 inode->i_ino, bindex, file, fnx, line,
 inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
 lower_inode->i_mtime.tv_sec,
 lower_inode->i_mtime.tv_nsec
 inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
 lower_inode->i_ctime.tv_sec,
 lower_inode->i_ctime.tv_nsec);

> @@ -497,15 +498,15 @@ void __show_dinode_times(const struct dentry *dentry,
>   continue;
>   pr_debug("DT(%s:%lu:%d): ", dentry->d_name.name, inode->i_ino,
>bindex);
> - pr_debug("%s:%s:%d ", file, fxn, line);
> - pr_debug("um=%lu/%lu lm=%lu/%lu ",
> -  inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
> -  lower_inode->i_mtime.tv_sec,
> -  lower_inode->i_mtime.tv_nsec);
> - pr_debug("uc=%lu/%lu lc=%lu/%lu\n",
> -  inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
> -  lower_inode->i_ctime.tv_sec,
> -  lower_inode->i_ctime.tv_nsec);
> + printk(KERN_CONT "%s:%s:%d ", file, fxn, line);
> + printk(KERN_CONT "um=%lu/%lu lm=%lu/%lu ",
> +inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
> +lower_inode->i_mtime.tv_sec,
> +lower_inode->i_mtime.tv_nsec);
> + printk(KERN_CONT "uc=%lu/%lu lc=%lu/%lu\n",
> +inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
> +lower_inode->i_ctime.tv_sec,
> +lower_inode->i_ctime.tv_nsec);
>   }
>  }
>  

and
pr_debug("DT(%s:%lu:%d): %s:%s:%d "
 "um=%lu/%lu lm=%lu/%lu "
 "uc=%lu/%lu lc=%lu/%lu\n",
 dentry->d_name.name, inode->i_ino, bindex,
 file, fnx, line,
 inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
 lower_inode->i_mtime.tv_sec,
 lower_inode->i_mtime.tv_nsec
 inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
 lower_inode->i_ctime.tv_sec,
 lower_inode->i_ctime.tv_nsec);

> 
> @@ -524,9 +525,10 @@ void __show_inode_counts(const struct inode *inode,
>   lower_inode = unionfs_lower_inode_idx(inode, bindex);
>   if (unlikely(!lower_inode))
>   continue;
> - pr_debug("SIC(%lu:%d:%d): ", inode->i_ino, bindex,
> -  atomic_read(&(inode)->i_count));
> - pr_debug("lc=%d ", atomic_read(&(lower_inode)->i_count));
> - pr_debug("%s:%s:%d\n", file, fxn, line);
> + printk(KERN_CONT "SIC(%lu:%d:%d): ", inode->i_ino, bindex,
> +atomic_read(&(inode)->i_count));
> + printk(KERN_CONT "lc=%d ",
> +atomic_read(&(lower_inode)->i_count));
> + printk(KERN_CONT "%s:%s:%d\n", file, fxn, line);
>   }
>  }

and
pr_debug("SIC(%l

[PATCH 1/2] Kprobes: Indicate kretprobe support in Kconfig

2008-01-02 Thread Ananth N Mavinakayanahalli
From: Ananth N Mavinakayanahalli <[EMAIL PROTECTED]>

This patch adds CONFIG_HAVE_KRETPROBES to the arch//Kconfig file
for relevant architectures with kprobes support. This facilitates easy
handling of in-kernel modules (like samples/kprobes/kretprobe_example.c)
that depend on kretprobes being present in the kernel.

Updated to apply on 2.6.24-rc6-mm1. Thanks to Sam Ravnborg for helping
make the patch more lean.

Per Mathieu's suggestion, added CONFIG_KRETPROBES and fixed up
dependencies.

Signed-off-by: Ananth N Mavinakayanahalli <[EMAIL PROTECTED]>
Acked-by: Mathieu Desnoyers <[EMAIL PROTECTED]>
---
 arch/Kconfig  |7 +++
 arch/ia64/Kconfig |1 +
 arch/powerpc/Kconfig  |1 +
 arch/s390/Kconfig |1 +
 arch/x86/Kconfig  |1 +
 include/asm-ia64/kprobes.h|1 -
 include/asm-powerpc/kprobes.h |1 -
 include/asm-x86/kprobes.h |1 -
 include/linux/kprobes.h   |6 +++---
 kernel/kprobes.c  |9 +++--
 10 files changed, 17 insertions(+), 12 deletions(-)

Index: linux-2.6.24-rc6/arch/Kconfig
===
--- linux-2.6.24-rc6.orig/arch/Kconfig
+++ linux-2.6.24-rc6/arch/Kconfig
@@ -27,5 +27,12 @@ config KPROBES
  for kernel debugging, non-intrusive instrumentation and testing.
  If in doubt, say "N".
 
+config KRETPROBES
+   def_bool y
+   depends on KPROBES && HAVE_KRETPROBES
+
 config HAVE_KPROBES
def_bool n
+
+config HAVE_KRETPROBES
+   def_bool n
Index: linux-2.6.24-rc6/arch/ia64/Kconfig
===
--- linux-2.6.24-rc6.orig/arch/ia64/Kconfig
+++ linux-2.6.24-rc6/arch/ia64/Kconfig
@@ -17,6 +17,7 @@ config IA64
select ARCH_SUPPORTS_MSI
select HAVE_OPROFILE
select HAVE_KPROBES
+   select HAVE_KRETPROBES
default y
help
  The Itanium Processor Family is Intel's 64-bit successor to
Index: linux-2.6.24-rc6/arch/powerpc/Kconfig
===
--- linux-2.6.24-rc6.orig/arch/powerpc/Kconfig
+++ linux-2.6.24-rc6/arch/powerpc/Kconfig
@@ -81,6 +81,7 @@ config PPC
default y
select HAVE_OPROFILE
select HAVE_KPROBES
+   select HAVE_KRETPROBES
 
 config EARLY_PRINTK
bool
Index: linux-2.6.24-rc6/arch/x86/Kconfig
===
--- linux-2.6.24-rc6.orig/arch/x86/Kconfig
+++ linux-2.6.24-rc6/arch/x86/Kconfig
@@ -20,6 +20,7 @@ config X86
def_bool y
select HAVE_OPROFILE
select HAVE_KPROBES
+   select HAVE_KRETPROBES
 
 config GENERIC_LOCKBREAK
def_bool n
Index: linux-2.6.24-rc6/include/asm-ia64/kprobes.h
===
--- linux-2.6.24-rc6.orig/include/asm-ia64/kprobes.h
+++ linux-2.6.24-rc6/include/asm-ia64/kprobes.h
@@ -82,7 +82,6 @@ struct kprobe_ctlblk {
struct prev_kprobe prev_kprobe[ARCH_PREV_KPROBE_SZ];
 };
 
-#define ARCH_SUPPORTS_KRETPROBES
 #define kretprobe_blacklist_size 0
 
 #define SLOT0_OPCODE_SHIFT (37)
Index: linux-2.6.24-rc6/include/asm-powerpc/kprobes.h
===
--- linux-2.6.24-rc6.orig/include/asm-powerpc/kprobes.h
+++ linux-2.6.24-rc6/include/asm-powerpc/kprobes.h
@@ -80,7 +80,6 @@ typedef unsigned int kprobe_opcode_t;
 #define is_trap(instr) (IS_TW(instr) || IS_TWI(instr))
 #endif
 
-#define ARCH_SUPPORTS_KRETPROBES
 #define flush_insn_slot(p) do { } while (0)
 #define kretprobe_blacklist_size 0
 
Index: linux-2.6.24-rc6/include/asm-x86/kprobes.h
===
--- linux-2.6.24-rc6.orig/include/asm-x86/kprobes.h
+++ linux-2.6.24-rc6/include/asm-x86/kprobes.h
@@ -42,7 +42,6 @@ typedef u8 kprobe_opcode_t;
: (((unsigned long)current_thread_info()) + THREAD_SIZE \
   - (unsigned long)(ADDR)))
 
-#define ARCH_SUPPORTS_KRETPROBES
 #define flush_insn_slot(p) do { } while (0)
 
 extern const int kretprobe_blacklist_size;
Index: linux-2.6.24-rc6/include/linux/kprobes.h
===
--- linux-2.6.24-rc6.orig/include/linux/kprobes.h
+++ linux-2.6.24-rc6/include/linux/kprobes.h
@@ -125,11 +125,11 @@ struct jprobe {
 DECLARE_PER_CPU(struct kprobe *, current_kprobe);
 DECLARE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
 
-#ifdef ARCH_SUPPORTS_KRETPROBES
+#ifdef CONFIG_KRETPROBES
 extern void arch_prepare_kretprobe(struct kretprobe_instance *ri,
   struct pt_regs *regs);
 extern int arch_trampoline_kprobe(struct kprobe *p);
-#else /* ARCH_SUPPORTS_KRETPROBES */
+#else /* CONFIG_KRETPROBES */
 static inline void arch_prepare_kretprobe(struct kretprobe *rp,
struct pt_regs *regs)
 {
@@ -138,7 +138,7 @@ static inline 

[RFC] PCIE ASPM support

2008-01-02 Thread Shaohua Li
PCI Express ASPM defines a protocol for PCI Express components in the D0
state to reduce Link power by placing their Links into a low power state
and instructing the other end of the Link to do likewise. This
capability allows hardware-autonomous, dynamic Link power reduction
beyond what is achievable by software-only controlled power management.
However, The device should be configured by software appropriately.
Enabling ASPM will save power, but will introduce device latency.

This patch adds ASPM support in Linux. It introduces a global policy for
ASPM, a sysfs file /sys/module/pcie_aspm/parameters/policy can control
it. The interface can be used as a boot option too. Currently we have
below setting:
-default, BIOS default setting
-powersave, highest power saving mode, enable all available ASPM state
and clock power management
-performance, highest performance, disable ASPM and clock power
management
By default, the 'default' policy is used currently.

In my test, power difference between powersave mode and performance mode
is about 1.3w in a system with 3 PCIE links.

please review, any comments will be appreciated.

Signed-off-by: Shaohua Li <[EMAIL PROTECTED]>
---
 drivers/pci/pci-acpi.c|9 
 drivers/pci/pci.c |4 
 drivers/pci/pcie/Kconfig  |   13 
 drivers/pci/pcie/Makefile |3 
 drivers/pci/pcie/aspm.c   |  673 ++
 drivers/pci/probe.c   |5 
 drivers/pci/remove.c  |4 
 include/linux/aspm.h  |   33 ++
 include/linux/pci.h   |4 
 include/linux/pci_regs.h  |8 
 10 files changed, 756 insertions(+)

Index: linux/drivers/pci/pcie/Makefile
===
--- linux.orig/drivers/pci/pcie/Makefile2008-01-03 13:16:52.0 
+0800
+++ linux/drivers/pci/pcie/Makefile 2008-01-03 13:47:34.0 +0800
@@ -2,6 +2,9 @@
 # Makefile for PCI-Express PORT Driver
 #
 
+# Build PCI Express ASPM if needed
+obj-$(CONFIG_PCIEASPM) += aspm.o
+
 pcieportdrv-y  := portdrv_core.o portdrv_pci.o portdrv_bus.o
 
 obj-$(CONFIG_PCIEPORTBUS)  += pcieportdrv.o
Index: linux/drivers/pci/pcie/aspm.c
===
--- /dev/null   1970-01-01 00:00:00.0 +
+++ linux/drivers/pci/pcie/aspm.c   2008-01-03 13:47:34.0 +0800
@@ -0,0 +1,673 @@
+/*
+ * File:   drivers/pci/pcie/aspm.c
+ * Enabling PCIE link L0s/L1 state and Clock Power Management
+ *
+ * Copyright (C) 2007 Intel
+ * Copyright (C) Zhang Yanmin ([EMAIL PROTECTED])
+ * Copyright (C) Shaohua Li ([EMAIL PROTECTED])
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include "../pci.h"
+
+#ifdef MODULE_PARAM_PREFIX
+#undef MODULE_PARAM_PREFIX
+#endif
+#define MODULE_PARAM_PREFIX "pcie_aspm."
+
+#definePCIE_ASPM_L0S   (1U)
+#definePCIE_ASPM_L1(2U)
+
+/* only for downstream port */
+struct link_state {
+   struct list_head sibiling;
+   struct pci_dev *pdev;
+
+   /* ASPM state */
+   unsigned int support_state;
+   unsigned int enabled_state;
+   unsigned int bios_aspm_state;
+   /* upstream component */
+   unsigned int l0s_upper_latency;
+   unsigned int l1_upper_latency;
+   /* downstream component */
+   unsigned int l0s_down_latency;
+   unsigned int l1_down_latency;
+   /* Clock PM state*/
+   unsigned int clk_pm_capable:1;
+   unsigned int clk_pm_enabled:1;
+   unsigned int bios_clk_state:1;
+
+};
+
+/* Only for endpoint */
+struct endpoint_state {
+   unsigned int l0s_acceptable_latency;
+   unsigned int l1_acceptable_latency;
+};
+
+static int aspm_disabled;
+static DEFINE_MUTEX(aspm_lock);
+static LIST_HEAD(link_list);
+
+#define POLICY_DEFAULT 0   /* BIOS default setting */
+#define POLICY_PERFORMANCE 1   /* high performance */
+#define POLICY_POWERSAVE 2 /* high power saving */
+static int aspm_policy;
+static const char* policy_str[] = {
+   [POLICY_DEFAULT] = "default",
+   [POLICY_PERFORMANCE] = "performance",
+   [POLICY_POWERSAVE] = "powersave"
+};
+
+static int policy_to_aspm_state(struct pci_dev *pdev)
+{
+   switch (aspm_policy) {
+   case POLICY_PERFORMANCE:
+   /* Disable ASPM and Clock PM */
+   return 0;
+   case POLICY_POWERSAVE:
+   /* Enable ASPM L0s/L1 */
+   return 3;
+   case POLICY_DEFAULT:
+   return ((struct link_state *)pdev->link_state)->bios_aspm_state;
+   }
+   return 0;
+}
+
+static int policy_to_clkpm_state(struct pci_dev *pdev)
+{
+   switch (aspm_policy) {
+   case POLICY_PERFORMANCE:
+   /* Disable ASPM and Clock PM */
+   return 0;
+   case POLICY_POWERSAVE:
+   /* Disable Clock PM */
+   return 1;
+   case 

Re: The perfect patch - Posting a patch series (was Re: [PATCH 06/12] pci : Use mutex instead of semaphore in driver core)

2008-01-02 Thread Dave Young
On Jan 2, 2008 7:14 PM, Stefan Richter <[EMAIL PROTECTED]> wrote:
> Dave Young wrote:
> > On Dec 29, 2007 7:42 PM, Stefan Richter <[EMAIL PROTECTED]> wrote:
> >> However, Dave's postings lack a References: header which refer to his
> >> 00/12 posting.
> [To let mail readers show it as a thread.]
> >> (Also, a bonus in the 00/12 posting would be a listing of all patch
> >> titles in the series and the total diffstat of the series,
> [similar to the "git pull" requests from maintainers]
> >> but nearly nobody does this.)
> ...
> > andrew recommends not to use 00/xx introduction email in series
> > in his "The perfect patch":
> > http://www.zip.com.au/~akpm/linux/patches/stuff/tpp.txt
>
> "Please don't post [PATCH 0/n] messages" is a simplified short-hand for
> "Please don't move information which we want to include into the SCM
> changelog into a separate [PATCH 0/n] message".
>
> There is nothing wrong with a 0/n posting per se.  But whenever you
> write a 0/n posting, ask yourself:
>   - Isn't the information I provide here necessary to keep around by
> somebody who takes my patch series into his quilt series or into his
> source repository?
>   - Couldn't the information here be useful at a later point in time
> when people look into the mainline Linux history?
> If "yes" or "maybe yes", then add this information to the changelogs in
> the patches.  You can then leave the 0/n posting as is, or make it
> briefer, or omit it entirely.
>
> It is never necessary to post a 0/n message, because _everything_ which
> could be said in this message can also be said in the i/n messages.
> (Things which are not meant for the SCM changelog can be written after a
> "---" delimiter line or other patch delimiters.)  However, it is
> sometimes convenient to repeat or summarize some of the information from
> the i/n messages in a 0/n message.  Think about convenience of the
> _recipients_ though, not about the sender's convenience.
>
> Generally, the 0/n message fulfills purposes very similar to "git pull"
> messages:  They give a brief overview of what is coming up in the series
> and how to handle it, and it adds redundant information about the
> contents of the series (titles, authors, overall diffstat, whether it
> supersedes an earlier series) as a verification for the recipient
> whether he really got what the sender intended to get to him.  This is
> to help detect mix-ups at the sender's or receiver's side.
>
> PS:
> Writing a changelog is almost never trivial.  Even if it seems trivial
> to the patch author, the change may not be trivial from other
> developers' and maintainers' perspective, or from the author's
> perspective when he looks at his patch a few months later.  This also
> means that there may very well be information in the 0/n message which
> should also appear in the i/n messages, even if this information seems
> obvious to the author.

Thanks for the explanation, I strongly agree with you.
I think that 0/n message should be a summary of the series. At the
same time the i/n changelog should not be stripped, any info of
changes should be added to the relavant patches.

Regards
dave
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [patch 02/20] make the inode i_mmap_lock a reader/writer lock

2008-01-02 Thread Nick Piggin
On Thursday 03 January 2008 10:35, Mike Travis wrote:
> Hi Nick,
>
> Have you done anything more with allowing > 256 CPUS in this spinlock
> patch?  We've been testing with 1k cpus and to verify with -mm kernel,
> we need to "unpatch" these spinlock changes.
>
> Thanks,
> Mike

Hi Mike,

Actually I had it in my mind that 64 bit used single-byte locking like
i386, so I didn't think I'd caused a regression there.

I'll take a look at fixing that up now.

Thanks,
Nick
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 7/7] spi : convert semaphore to mutex in struct class

2008-01-02 Thread Dave Young
Use mutex instead of semaphore in struct class.

Signed-off-by: Dave Young <[EMAIL PROTECTED]> 
---
drivers/spi/spi.c |4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)

diff -upr linux/drivers/spi/spi.c linux.new/drivers/spi/spi.c
--- linux/drivers/spi/spi.c 2007-12-28 10:47:38.0 +0800
+++ linux.new/drivers/spi/spi.c 2007-12-28 10:48:22.0 +0800
@@ -494,7 +494,7 @@ struct spi_master *spi_busnum_to_master(
struct spi_master   *master = NULL;
struct spi_master   *m;
 
-   down(&spi_master_class.sem);
+   mutex_lock(&spi_master_class.mutex);
list_for_each_entry(dev, &spi_master_class.children, node) {
m = container_of(dev, struct spi_master, dev);
if (m->bus_num == bus_num) {
@@ -502,7 +502,7 @@ struct spi_master *spi_busnum_to_master(
break;
}
}
-   up(&spi_master_class.sem);
+   mutex_unlock(&spi_master_class.mutex);
return master;
 }
 EXPORT_SYMBOL_GPL(spi_busnum_to_master);
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2/3] Unionfs: locking fixes

2008-01-02 Thread Erez Zadok
Lock parent dentries during revalidation.
Reduce total number of lockdep classes used.

Signed-off-by: Erez Zadok <[EMAIL PROTECTED]>
---
 fs/unionfs/dentry.c |   13 -
 fs/unionfs/fanout.h |3 ++-
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/fs/unionfs/dentry.c b/fs/unionfs/dentry.c
index 0369d93..7646828 100644
--- a/fs/unionfs/dentry.c
+++ b/fs/unionfs/dentry.c
@@ -42,6 +42,7 @@ static bool __unionfs_d_revalidate_one(struct dentry *dentry,
memset(&lowernd, 0, sizeof(struct nameidata));
 
verify_locked(dentry);
+   verify_locked(dentry->d_parent);
 
/* if the dentry is unhashed, do NOT revalidate */
if (d_deleted(dentry))
@@ -351,7 +352,10 @@ bool __unionfs_d_revalidate_chain(struct dentry *dentry, 
struct nameidata *nd,
 * to child order.
 */
for (i = 0; i < chain_len; i++) {
-   unionfs_lock_dentry(chain[i], UNIONFS_DMUTEX_REVAL+i);
+   unionfs_lock_dentry(chain[i], UNIONFS_DMUTEX_REVAL_CHILD);
+   if (chain[i] != chain[i]->d_parent)
+   unionfs_lock_dentry(chain[i]->d_parent,
+   UNIONFS_DMUTEX_REVAL_PARENT);
saved_bstart = dbstart(chain[i]);
saved_bend = dbend(chain[i]);
sbgen = atomic_read(&UNIONFS_SB(dentry->d_sb)->generation);
@@ -366,6 +370,8 @@ bool __unionfs_d_revalidate_chain(struct dentry *dentry, 
struct nameidata *nd,
 bindex++)
unionfs_mntput(chain[i], bindex);
}
+   if (chain[i] != chain[i]->d_parent)
+   unionfs_unlock_dentry(chain[i]->d_parent);
unionfs_unlock_dentry(chain[i]);
 
if (unlikely(!valid))
@@ -376,6 +382,9 @@ bool __unionfs_d_revalidate_chain(struct dentry *dentry, 
struct nameidata *nd,
 out_this:
/* finally, lock this dentry and revalidate it */
verify_locked(dentry);
+   if (dentry != dentry->d_parent)
+   unionfs_lock_dentry(dentry->d_parent,
+   UNIONFS_DMUTEX_REVAL_PARENT);
dgen = atomic_read(&UNIONFS_D(dentry)->generation);
 
if (unlikely(is_newer_lower(dentry))) {
@@ -394,6 +403,8 @@ out_this:
purge_inode_data(dentry->d_inode);
}
valid = __unionfs_d_revalidate_one(dentry, nd);
+   if (dentry != dentry->d_parent)
+   unionfs_unlock_dentry(dentry->d_parent);
 
/*
 * If __unionfs_d_revalidate_one() succeeded above, then it will
diff --git a/fs/unionfs/fanout.h b/fs/unionfs/fanout.h
index 5f31015..4d9a45f 100644
--- a/fs/unionfs/fanout.h
+++ b/fs/unionfs/fanout.h
@@ -290,7 +290,8 @@ enum unionfs_dentry_lock_class {
UNIONFS_DMUTEX_PARENT,
UNIONFS_DMUTEX_CHILD,
UNIONFS_DMUTEX_WHITEOUT,
-   UNIONFS_DMUTEX_REVAL,   /* for file/dentry revalidate */
+   UNIONFS_DMUTEX_REVAL_PARENT, /* for file/dentry revalidate */
+   UNIONFS_DMUTEX_REVAL_CHILD,   /* for file/dentry revalidate */
 };
 
 static inline void unionfs_lock_dentry(struct dentry *d,
-- 
1.5.2.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 3/3] Unionfs: use VFS helpers to manipulate i_nlink

2008-01-02 Thread Erez Zadok
Signed-off-by: Erez Zadok <[EMAIL PROTECTED]>
---
 fs/unionfs/unlink.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/fs/unionfs/unlink.c b/fs/unionfs/unlink.c
index a1c82b6..1e370a1 100644
--- a/fs/unionfs/unlink.c
+++ b/fs/unionfs/unlink.c
@@ -79,7 +79,7 @@ static int unionfs_unlink_whiteout(struct inode *dir, struct 
dentry *dentry)
 
 out:
if (!err)
-   dentry->d_inode->i_nlink--;
+   inode_dec_link_count(dentry->d_inode);
 
/* We don't want to leave negative leftover dentries for revalidate. */
if (!err && (dbopaque(dentry) != -1))
-- 
1.5.2.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 1/3] Unionfs: use printk KERN_CONT for debugging messages

2008-01-02 Thread Erez Zadok
Signed-off-by: Erez Zadok <[EMAIL PROTECTED]>
---
 fs/unionfs/debug.c |   50 ++
 1 files changed, 26 insertions(+), 24 deletions(-)

diff --git a/fs/unionfs/debug.c b/fs/unionfs/debug.c
index c2b8b58..5f1d887 100644
--- a/fs/unionfs/debug.c
+++ b/fs/unionfs/debug.c
@@ -456,9 +456,10 @@ void __show_branch_counts(const struct super_block *sb,
mnt = UNIONFS_D(sb->s_root)->lower_paths[i].mnt;
else
mnt = NULL;
-   pr_debug("%d:", (mnt ? atomic_read(&mnt->mnt_count) : -99));
+   printk(KERN_CONT "%d:",
+  (mnt ? atomic_read(&mnt->mnt_count) : -99));
}
-   pr_debug("%s:%s:%d\n", file, fxn, line);
+   printk(KERN_CONT "%s:%s:%d\n", file, fxn, line);
 }
 
 void __show_inode_times(const struct inode *inode,
@@ -472,15 +473,15 @@ void __show_inode_times(const struct inode *inode,
if (unlikely(!lower_inode))
continue;
pr_debug("IT(%lu:%d): ", inode->i_ino, bindex);
-   pr_debug("%s:%s:%d ", file, fxn, line);
-   pr_debug("um=%lu/%lu lm=%lu/%lu ",
-inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
-lower_inode->i_mtime.tv_sec,
-lower_inode->i_mtime.tv_nsec);
-   pr_debug("uc=%lu/%lu lc=%lu/%lu\n",
-inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
-lower_inode->i_ctime.tv_sec,
-lower_inode->i_ctime.tv_nsec);
+   printk(KERN_CONT "%s:%s:%d ", file, fxn, line);
+   printk(KERN_CONT "um=%lu/%lu lm=%lu/%lu ",
+  inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
+  lower_inode->i_mtime.tv_sec,
+  lower_inode->i_mtime.tv_nsec);
+   printk(KERN_CONT "uc=%lu/%lu lc=%lu/%lu\n",
+  inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
+  lower_inode->i_ctime.tv_sec,
+  lower_inode->i_ctime.tv_nsec);
}
 }
 
@@ -497,15 +498,15 @@ void __show_dinode_times(const struct dentry *dentry,
continue;
pr_debug("DT(%s:%lu:%d): ", dentry->d_name.name, inode->i_ino,
 bindex);
-   pr_debug("%s:%s:%d ", file, fxn, line);
-   pr_debug("um=%lu/%lu lm=%lu/%lu ",
-inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
-lower_inode->i_mtime.tv_sec,
-lower_inode->i_mtime.tv_nsec);
-   pr_debug("uc=%lu/%lu lc=%lu/%lu\n",
-inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
-lower_inode->i_ctime.tv_sec,
-lower_inode->i_ctime.tv_nsec);
+   printk(KERN_CONT "%s:%s:%d ", file, fxn, line);
+   printk(KERN_CONT "um=%lu/%lu lm=%lu/%lu ",
+  inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
+  lower_inode->i_mtime.tv_sec,
+  lower_inode->i_mtime.tv_nsec);
+   printk(KERN_CONT "uc=%lu/%lu lc=%lu/%lu\n",
+  inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
+  lower_inode->i_ctime.tv_sec,
+  lower_inode->i_ctime.tv_nsec);
}
 }
 
@@ -524,9 +525,10 @@ void __show_inode_counts(const struct inode *inode,
lower_inode = unionfs_lower_inode_idx(inode, bindex);
if (unlikely(!lower_inode))
continue;
-   pr_debug("SIC(%lu:%d:%d): ", inode->i_ino, bindex,
-atomic_read(&(inode)->i_count));
-   pr_debug("lc=%d ", atomic_read(&(lower_inode)->i_count));
-   pr_debug("%s:%s:%d\n", file, fxn, line);
+   printk(KERN_CONT "SIC(%lu:%d:%d): ", inode->i_ino, bindex,
+  atomic_read(&(inode)->i_count));
+   printk(KERN_CONT "lc=%d ",
+  atomic_read(&(lower_inode)->i_count));
+   printk(KERN_CONT "%s:%s:%d\n", file, fxn, line);
}
 }
-- 
1.5.2.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[GIT PULL -mm] 0/3 Unionfs updates/fixes/cleanups

2008-01-02 Thread Erez Zadok

The following is a series of patchsets related to Unionfs.  This is the
third set of patchsets resulting from an lkml review of the entire unionfs
code base.  The most significant change here is a locking/race bugfix during
dentry revalidation.

These patches were tested (where appropriate) on Linus's 2.6.24 latest code
(as of v2.6.24-rc6-179-gb8c9a18), MM, as well as the backports to
2.6.{23,22,21,20,19,18,9} on ext2/3/4, xfs, reiserfs, nfs2/3/4, jffs2,
ramfs, tmpfs, cramfs, and squashfs (where available).  Also tested with
LTP-full.  See http://unionfs.filesystems.org/ to download back-ported
unionfs code.

Please pull from the 'master' branch of
git://git.kernel.org/pub/scm/linux/kernel/git/ezk/unionfs.git

to receive the following:

Erez Zadok (3):
  Unionfs: use printk KERN_CONT for debugging messages
  Unionfs: locking fixes
  Unionfs: use VFS helpers to manipulate i_nlink

 debug.c  |   50 ++
 dentry.c |   13 -
 fanout.h |3 ++-
 unlink.c |2 +-
 4 files changed, 41 insertions(+), 27 deletions(-)

---
Erez Zadok
[EMAIL PROTECTED]
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 6/7] scsi : convert semaphore to mutex in struct class

2008-01-02 Thread Dave Young
Use mutex instead of semaphore in struct class.

Signed-off-by: Dave Young <[EMAIL PROTECTED]> 
---
drivers/scsi/hosts.c |4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)

diff -upr linux/drivers/scsi/hosts.c linux.new/drivers/scsi/hosts.c
--- linux/drivers/scsi/hosts.c  2007-12-28 10:45:46.0 +0800
+++ linux.new/drivers/scsi/hosts.c  2007-12-28 10:46:19.0 +0800
@@ -441,7 +441,7 @@ struct Scsi_Host *scsi_host_lookup(unsig
struct class_device *cdev;
struct Scsi_Host *shost = ERR_PTR(-ENXIO), *p;
 
-   down(&class->sem);
+   mutex_lock(&class->mutex);
list_for_each_entry(cdev, &class->children, node) {
p = class_to_shost(cdev);
if (p->host_no == hostnum) {
@@ -449,7 +449,7 @@ struct Scsi_Host *scsi_host_lookup(unsig
break;
}
}
-   up(&class->sem);
+   mutex_unlock(&class->mutex);
 
return shost;
 }
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 5/7] rtc : convert semaphore to mutex in struct class

2008-01-02 Thread Dave Young
Use mutex instead of semaphore in struct class.

Signed-off-by: Dave Young <[EMAIL PROTECTED]> 
---
drivers/rtc/interface.c |4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)

diff -upr linux/drivers/rtc/interface.c linux.new/drivers/rtc/interface.c
--- linux/drivers/rtc/interface.c   2007-12-28 10:41:42.0 +0800
+++ linux.new/drivers/rtc/interface.c   2007-12-28 10:43:51.0 +0800
@@ -256,7 +256,7 @@ struct rtc_device *rtc_class_open(char *
struct device *dev;
struct rtc_device *rtc = NULL;
 
-   down(&rtc_class->sem);
+   mutex_lock(&rtc_class->mutex);
list_for_each_entry(dev, &rtc_class->devices, node) {
if (strncmp(dev->bus_id, name, BUS_ID_SIZE) == 0) {
dev = get_device(dev);
@@ -272,7 +272,7 @@ struct rtc_device *rtc_class_open(char *
rtc = NULL;
}
}
-   up(&rtc_class->sem);
+   mutex_unlock(&rtc_class->mutex);
 
return rtc;
 }
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 4/7] power supply : convert semaphore to mutex in struct class

2008-01-02 Thread Dave Young
Use mutex instead of semaphore in struct class.

Signed-off-by: Dave Young <[EMAIL PROTECTED]> 
---
drivers/power/apm_power.c |6 +++---
drivers/power/power_supply_core.c |8 
2 files changed, 7 insertions(+), 7 deletions(-)

diff -upr linux/drivers/power/apm_power.c linux.new/drivers/power/apm_power.c
--- linux/drivers/power/apm_power.c 2007-12-28 10:36:26.0 +0800
+++ linux.new/drivers/power/apm_power.c 2007-12-28 10:38:03.0 +0800
@@ -207,10 +207,10 @@ static void apm_battery_apm_get_power_st
union power_supply_propval status;
union power_supply_propval capacity, time_to_full, time_to_empty;
 
-   down(&power_supply_class->sem);
+   mutex_lock(&power_supply_class->mutex);
find_main_battery();
if (!main_battery) {
-   up(&power_supply_class->sem);
+   mutex_unlock(&power_supply_class->mutex);
return;
}
 
@@ -278,7 +278,7 @@ static void apm_battery_apm_get_power_st
}
}
 
-   up(&power_supply_class->sem);
+   mutex_unlock(&power_supply_class->mutex);
 }
 
 static int __init apm_battery_init(void)
diff -upr linux/drivers/power/power_supply_core.c 
linux.new/drivers/power/power_supply_core.c
--- linux/drivers/power/power_supply_core.c 2007-12-28 10:36:49.0 
+0800
+++ linux.new/drivers/power/power_supply_core.c 2007-12-28 10:38:55.0 
+0800
@@ -31,7 +31,7 @@ static void power_supply_changed_work(st
for (i = 0; i < psy->num_supplicants; i++) {
struct device *dev;
 
-   down(&power_supply_class->sem);
+   mutex_lock(&power_supply_class->mutex);
list_for_each_entry(dev, &power_supply_class->devices, node) {
struct power_supply *pst = dev_get_drvdata(dev);
 
@@ -40,7 +40,7 @@ static void power_supply_changed_work(st
pst->external_power_changed(pst);
}
}
-   up(&power_supply_class->sem);
+   mutex_unlock(&power_supply_class->mutex);
}
 
power_supply_update_leds(psy);
@@ -60,7 +60,7 @@ int power_supply_am_i_supplied(struct po
union power_supply_propval ret = {0,};
struct device *dev;
 
-   down(&power_supply_class->sem);
+   mutex_lock(&power_supply_class->mutex);
list_for_each_entry(dev, &power_supply_class->devices, node) {
struct power_supply *epsy = dev_get_drvdata(dev);
int i;
@@ -76,7 +76,7 @@ int power_supply_am_i_supplied(struct po
}
}
 out:
-   up(&power_supply_class->sem);
+   mutex_unlock(&power_supply_class->mutex);
 
dev_dbg(psy->dev, "%s %d\n", __FUNCTION__, ret.intval);
 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 3/7] ieee1394 :convert semaphore to mutex in struct class

2008-01-02 Thread Dave Young
Use mutex instead of semaphore in struct class.

Signed-off-by: Dave Young <[EMAIL PROTECTED]> 
---
drivers/ieee1394/nodemgr.c |   40 
1 file changed, 20 insertions(+), 20 deletions(-)

diff -upr linux/drivers/ieee1394/nodemgr.c linux.new/drivers/ieee1394/nodemgr.c
--- linux/drivers/ieee1394/nodemgr.c2007-12-28 10:11:14.0 +0800
+++ linux.new/drivers/ieee1394/nodemgr.c2008-01-03 09:37:23.0 
+0800
@@ -733,16 +733,16 @@ static void nodemgr_remove_uds(struct no
struct unit_directory *tmp, *ud;
 
/* Iteration over nodemgr_ud_class.devices has to be protected by
-* nodemgr_ud_class.sem, but device_unregister() will eventually
-* take nodemgr_ud_class.sem too. Therefore pick out one ud at a time,
-* release the semaphore, and then unregister the ud. Since this code
+* nodemgr_ud_class.mutex, but device_unregister() will eventually
+* take nodemgr_ud_class.mutex too. Therefore pick out one ud at a time,
+* unlock the mutex, and then unregister the ud. Since this code
 * may be called from other contexts besides the knodemgrds, protect the
-* gap after release of the semaphore by nodemgr_serialize_remove_uds.
+* gap after unlock of the mutex by nodemgr_serialize_remove_uds.
 */
mutex_lock(&nodemgr_serialize_remove_uds);
for (;;) {
ud = NULL;
-   down(&nodemgr_ud_class.sem);
+   mutex_lock(&nodemgr_ud_class.mutex);
list_for_each_entry(dev, &nodemgr_ud_class.devices, node) {
tmp = container_of(dev, struct unit_directory,
   unit_dev);
@@ -751,7 +751,7 @@ static void nodemgr_remove_uds(struct no
break;
}
}
-   up(&nodemgr_ud_class.sem);
+   mutex_unlock(&nodemgr_ud_class.mutex);
if (ud == NULL)
break;
device_unregister(&ud->unit_dev);
@@ -888,7 +888,7 @@ static struct node_entry *find_entry_by_
struct device *dev;
struct node_entry *ne, *ret_ne = NULL;
 
-   down(&nodemgr_ne_class.sem);
+   mutex_lock(&nodemgr_ne_class.mutex);
list_for_each_entry(dev, &nodemgr_ne_class.devices, node) {
ne = container_of(dev, struct node_entry, node_dev);
 
@@ -897,7 +897,7 @@ static struct node_entry *find_entry_by_
break;
}
}
-   up(&nodemgr_ne_class.sem);
+   mutex_unlock(&nodemgr_ne_class.mutex);
 
return ret_ne;
 }
@@ -909,7 +909,7 @@ static struct node_entry *find_entry_by_
struct device *dev;
struct node_entry *ne, *ret_ne = NULL;
 
-   down(&nodemgr_ne_class.sem);
+   mutex_lock(&nodemgr_ne_class.mutex);
list_for_each_entry(dev, &nodemgr_ne_class.devices, node) {
ne = container_of(dev, struct node_entry, node_dev);
 
@@ -918,7 +918,7 @@ static struct node_entry *find_entry_by_
break;
}
}
-   up(&nodemgr_ne_class.sem);
+   mutex_unlock(&nodemgr_ne_class.mutex);
 
return ret_ne;
 }
@@ -1384,7 +1384,7 @@ static void nodemgr_suspend_ne(struct no
ne->in_limbo = 1;
WARN_ON(device_create_file(&ne->device, &dev_attr_ne_in_limbo));
 
-   down(&nodemgr_ud_class.sem);
+   mutex_lock(&nodemgr_ud_class.mutex);
list_for_each_entry(dev, &nodemgr_ud_class.devices, node) {
ud = container_of(dev, struct unit_directory, unit_dev);
if (ud->ne != ne)
@@ -1404,7 +1404,7 @@ static void nodemgr_suspend_ne(struct no
device_release_driver(&ud->device);
put_driver(drv);
}
-   up(&nodemgr_ud_class.sem);
+   mutex_unlock(&nodemgr_ud_class.mutex);
 }
 
 
@@ -1417,7 +1417,7 @@ static void nodemgr_resume_ne(struct nod
ne->in_limbo = 0;
device_remove_file(&ne->device, &dev_attr_ne_in_limbo);
 
-   down(&nodemgr_ud_class.sem);
+   mutex_lock(&nodemgr_ud_class.mutex);
list_for_each_entry(dev, &nodemgr_ud_class.devices, node) {
ud = container_of(dev, struct unit_directory, unit_dev);
if (ud->ne != ne)
@@ -1434,7 +1434,7 @@ static void nodemgr_resume_ne(struct nod
}
put_driver(drv);
}
-   up(&nodemgr_ud_class.sem);
+   mutex_unlock(&nodemgr_ud_class.mutex);
 
HPSB_DEBUG("Node resumed: ID:BUS[" NODE_BUS_FMT "]  GUID[%016Lx]",
   NODE_BUS_ARGS(ne->host, ne->nodeid), (unsigned long 
long)ne->guid);
@@ -1449,7 +1449,7 @@ static void nodemgr_update_pdrv(struct n
struct hpsb_protocol_driver *pdrv;
int error;
 
-   down(&nodemgr_ud_class.sem);
+   mutex_lock(&nodemgr_ud_class.mutex);
list_for_each_entry(dev, &nodemgr_ud_class

[PATCH 2/7] i2c : convert semaphore to mutex in struct class

2008-01-02 Thread Dave Young
Use mutex instead of semaphore in struct class.

Signed-off-by: Dave Young <[EMAIL PROTECTED]> 
---
drivers/i2c/i2c-core.c |9 -
1 file changed, 4 insertions(+), 5 deletions(-)

diff -upr linux/drivers/i2c/i2c-core.c linux.new/drivers/i2c/i2c-core.c
--- linux/drivers/i2c/i2c-core.c2007-12-28 10:06:58.0 +0800
+++ linux.new/drivers/i2c/i2c-core.c2008-01-03 09:31:38.0 +0800
@@ -34,7 +34,6 @@
 #include 
 #include 
 #include 
-#include 
 
 #include "i2c-core.h"
 
@@ -597,12 +596,12 @@ int i2c_register_driver(struct module *o
if (driver->attach_adapter) {
struct i2c_adapter *adapter;
 
-   down(&i2c_adapter_class.sem);
+   mutex_lock(&i2c_adapter_class.mutex);
list_for_each_entry(adapter, &i2c_adapter_class.devices,
dev.node) {
driver->attach_adapter(adapter);
}
-   up(&i2c_adapter_class.sem);
+   mutex_unlock(&i2c_adapter_class.mutex);
}
 
mutex_unlock(&core_lock);
@@ -631,7 +630,7 @@ void i2c_del_driver(struct i2c_driver *d
 * attached. If so, detach them to be able to kill the driver
 * afterwards.
 */
-   down(&i2c_adapter_class.sem);
+   mutex_lock(&i2c_adapter_class.mutex);
list_for_each_entry(adap, &i2c_adapter_class.devices, dev.node) {
if (driver->detach_adapter) {
if (driver->detach_adapter(adap)) {
@@ -656,7 +655,7 @@ void i2c_del_driver(struct i2c_driver *d
}
}
}
-   up(&i2c_adapter_class.sem);
+   mutex_unlock(&i2c_adapter_class.mutex);
 
  unregister:
driver_unregister(&driver->driver);
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 1/7] driver core : convert semaphore to class in struct class

2008-01-02 Thread Dave Young
Use mutex instead of semaphore in struct class.

Signed-off-by: Dave Young <[EMAIL PROTECTED]> 
---
drivers/base/class.c   |   22 +++---
drivers/base/core.c|   13 ++---
include/linux/device.h |3 ++-
3 files changed, 19 insertions(+), 19 deletions(-)

diff -upr linux/drivers/base/class.c linux.new/drivers/base/class.c
--- linux/drivers/base/class.c  2007-12-27 13:03:58.0 +0800
+++ linux.new/drivers/base/class.c  2008-01-03 12:51:51.0 +0800
@@ -145,7 +145,7 @@ int class_register(struct class * cls)
INIT_LIST_HEAD(&cls->devices);
INIT_LIST_HEAD(&cls->interfaces);
kset_init(&cls->class_dirs);
-   init_MUTEX(&cls->sem);
+   mutex_init(&cls->mutex);
error = kobject_set_name(&cls->subsys.kobj, "%s", cls->name);
if (error)
return error;
@@ -628,13 +628,13 @@ int class_device_add(struct class_device
kobject_uevent(&class_dev->kobj, KOBJ_ADD);
 
/* notify any interfaces this device is now here */
-   down(&parent_class->sem);
+   mutex_lock_nested(&parent_class->mutex, SINGLE_DEPTH_NESTING);
list_add_tail(&class_dev->node, &parent_class->children);
list_for_each_entry(class_intf, &parent_class->interfaces, node) {
if (class_intf->add)
class_intf->add(class_dev, class_intf);
}
-   up(&parent_class->sem);
+   mutex_unlock(&parent_class->mutex);
 
goto out1;
 
@@ -736,12 +736,12 @@ void class_device_del(struct class_devic
struct class_interface *class_intf;
 
if (parent_class) {
-   down(&parent_class->sem);
+   mutex_lock(&parent_class->mutex);
list_del_init(&class_dev->node);
list_for_each_entry(class_intf, &parent_class->interfaces, node)
if (class_intf->remove)
class_intf->remove(class_dev, class_intf);
-   up(&parent_class->sem);
+   mutex_unlock(&parent_class->mutex);
}
 
if (class_dev->dev) {
@@ -783,14 +783,14 @@ void class_device_destroy(struct class *
struct class_device *class_dev = NULL;
struct class_device *class_dev_tmp;
 
-   down(&cls->sem);
+   mutex_lock(&cls->mutex);
list_for_each_entry(class_dev_tmp, &cls->children, node) {
if (class_dev_tmp->devt == devt) {
class_dev = class_dev_tmp;
break;
}
}
-   up(&cls->sem);
+   mutex_unlock(&cls->mutex);
 
if (class_dev)
class_device_unregister(class_dev);
@@ -823,7 +823,7 @@ int class_interface_register(struct clas
if (!parent)
return -EINVAL;
 
-   down(&parent->sem);
+   mutex_lock(&parent->mutex);
list_add_tail(&class_intf->node, &parent->interfaces);
if (class_intf->add) {
list_for_each_entry(class_dev, &parent->children, node)
@@ -833,7 +833,7 @@ int class_interface_register(struct clas
list_for_each_entry(dev, &parent->devices, node)
class_intf->add_dev(dev, class_intf);
}
-   up(&parent->sem);
+   mutex_unlock(&parent->mutex);
 
return 0;
 }
@@ -847,7 +847,7 @@ void class_interface_unregister(struct c
if (!parent)
return;
 
-   down(&parent->sem);
+   mutex_lock(&parent->mutex);
list_del_init(&class_intf->node);
if (class_intf->remove) {
list_for_each_entry(class_dev, &parent->children, node)
@@ -857,7 +857,7 @@ void class_interface_unregister(struct c
list_for_each_entry(dev, &parent->devices, node)
class_intf->remove_dev(dev, class_intf);
}
-   up(&parent->sem);
+   mutex_unlock(&parent->mutex);
 
class_put(parent);
 }
diff -upr linux/drivers/base/core.c linux.new/drivers/base/core.c
--- linux/drivers/base/core.c   2007-12-27 13:03:58.0 +0800
+++ linux.new/drivers/base/core.c   2008-01-03 09:27:16.0 +0800
@@ -19,7 +19,6 @@
 #include 
 #include 
 #include 
-#include 
 
 #include "base.h"
 #include "power/power.h"
@@ -823,7 +822,7 @@ int device_add(struct device *dev)
klist_add_tail(&dev->knode_parent, &parent->klist_children);
 
if (dev->class) {
-   down(&dev->class->sem);
+   mutex_lock(&dev->class->mutex);
/* tie the class to the device */
list_add_tail(&dev->node, &dev->class->devices);
 
@@ -831,7 +830,7 @@ int device_add(struct device *dev)
list_for_each_entry(class_intf, &dev->class->interfaces, node)
if (class_intf->add_dev)
class_intf->add_dev(dev, class_intf);
-   up(&dev->class->sem);
+   mutex_unlock(&dev->class->mutex);
}
  Done:
put_device(dev);
@@

[PATCH 0/7] convert semaphore to mutex in struct class

2008-01-02 Thread Dave Young
Convert semaphore to mutex in struct class.
All the patches in this series should be applyed simultaneously

toc:
---
1-driver-core-struct-class-convert-semaphore-to-mutex.patch
2-i2c-struct-class-convert-semaphore-to-mutex.patch
3-ieee1394-struct-class-convert-semaphore-to-mutex.patch
4-power-struct-class-convert-semaphore-to-mutex.patch
5-rtc-struct-class-convert-semaphore-to-mutex.patch
6-scsi-struct-class-convert-semaphore-to-mutex.patch
7-spi-struct-class-convert-semaphore-to-mutex.patch

Summary diffstat:
---
drivers/base/class.c  |   22 ++--
drivers/base/core.c   |   13 +---
drivers/i2c/i2c-core.c|9 +++-
drivers/ieee1394/nodemgr.c|   40 +++---
drivers/power/apm_power.c |6 ++---
drivers/power/power_supply_core.c |8 +++
drivers/rtc/interface.c   |4 +--
drivers/scsi/hosts.c  |4 +--
drivers/spi/spi.c |4 +--
include/linux/device.h|3 +-
10 files changed, 56 insertions(+), 57 deletions(-)

One lockdep warning detected as following, thus use mutex_lock_nested with 
SINGLE_DEPTH_NESTING in class_device_add

Jan  3 10:45:15 darkstar kernel: =
Jan  3 10:45:15 darkstar kernel: [ INFO: possible recursive locking detected ]
Jan  3 10:45:15 darkstar kernel: 2.6.24-rc6-mm1-mutex #1
Jan  3 10:45:15 darkstar kernel: -
Jan  3 10:45:15 darkstar kernel: modprobe/2130 is trying to acquire lock:
Jan  3 10:45:15 darkstar kernel:  (&cls->mutex){--..}, at: [] 
class_device_add+0x140/0x240
Jan  3 10:45:15 darkstar kernel:
Jan  3 10:45:15 darkstar kernel: but task is already holding lock:
Jan  3 10:45:15 darkstar kernel:  (&cls->mutex){--..}, at: [] 
class_interface_register+0x43/0xf0
Jan  3 10:45:15 darkstar kernel:
Jan  3 10:45:15 darkstar kernel: other info that might help us debug this:
Jan  3 10:45:15 darkstar kernel: 1 lock held by modprobe/2130:
Jan  3 10:45:15 darkstar kernel:  #0:  (&cls->mutex){--..}, at: [] 
class_interface_register+0x43/0xf0
Jan  3 10:45:15 darkstar kernel:
Jan  3 10:45:15 darkstar kernel: stack backtrace:
Jan  3 10:45:15 darkstar kernel: Pid: 2130, comm: modprobe Not tainted 
2.6.24-rc6-mm1-mutex #1
Jan  3 10:45:15 darkstar kernel:  [] show_trace_log_lvl+0x1a/0x30
Jan  3 10:45:15 darkstar kernel:  [] show_trace+0x12/0x20
Jan  3 10:45:15 darkstar kernel:  [] dump_stack+0x6d/0x80
Jan  3 10:45:15 darkstar kernel:  [] print_deadlock_bug+0xc7/0xe0
Jan  3 10:45:15 darkstar kernel:  [] check_deadlock+0x6c/0x80
Jan  3 10:45:15 darkstar kernel:  [] validate_chain+0x14c/0x370
Jan  3 10:45:15 darkstar kernel:  [] __lock_acquire+0x1c0/0x7e0
Jan  3 10:45:15 darkstar kernel:  [] lock_acquire+0x79/0xb0
Jan  3 10:45:15 darkstar kernel:  [] mutex_lock_nested+0x8c/0x300
Jan  3 10:45:15 darkstar kernel:  [] class_device_add+0x140/0x240
Jan  3 10:45:15 darkstar kernel:  [] class_device_register+0x12/0x20
Jan  3 10:45:15 darkstar kernel:  [] class_device_create+0x9a/0xb0
Jan  3 10:45:15 darkstar kernel:  [] sg_add+0x12c/0x200 [sg]
Jan  3 10:45:15 darkstar kernel:  [] 
class_interface_register+0xd9/0xf0
Jan  3 10:45:15 darkstar kernel:  [] scsi_register_interface+0xf/0x20
Jan  3 10:45:15 darkstar kernel:  [] init_sg+0x82/0xbc [sg]
Jan  3 10:45:15 darkstar kernel:  [] sys_init_module+0xea/0x130
Jan  3 10:45:15 darkstar kernel:  [] syscall_call+0x7/0xb
Jan  3 10:45:15 darkstar kernel:  ===

If there's anything missed please help to point out, thanks.

Regards
dave
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.24-rc6-mm1

2008-01-02 Thread Torsten Kaiser
On Jan 2, 2008 10:57 PM, J. Bruce Fields <[EMAIL PROTECTED]> wrote:
> On Thu, Jan 03, 2008 at 08:51:54AM +1100, Herbert Xu wrote:
> > On Wed, Jan 02, 2008 at 07:29:59PM +0100, Torsten Kaiser wrote:
> > >
> > > Vanilla 2.6.24-rc6 seems stable. I did not see any crash or warnings.
> >
> > OK that's great.  The next step would be to try excluding specific git
> > trees from mm to see if they make a difference.
> >
> > The two specific trees of interest would be git-nfsd and git-net.
>
> Also, if it's git-nfsd, it'd be useful to test with the current git-nfsd
> from the for-mm branch at:
>
> git://linux-nfs.org/~bfields/linus.git for-mm
>
> and then any bisection results (even partial) from that tree would help
> immensely

The problem with that is, that triggering the bug is not easy so
marking anything 'good' is questionable.
This time I needed to compile over 50 packages until it triggered.

I was using 2.6.24-rc6-mm1 again, but with a crude hack (see end of
mail) that I hope should catch any double-frees of skbs.
None of my warnings triggered, only a list corruption again in
svc_xprt_enqueue(), but this time with an additional output about
whats wrong with the list:
[17023.029519] list_add corruption. prev->next should be next
(8100d20ec1c8), but was 81009c5a6
c28. (prev=81009c5a6c28).
[17023.029537] [ cut here ]
[17023.031445] kernel BUG at lib/list_debug.c:33!
[17023.033280] invalid opcode:  [1] SMP
[17023.034967] last sysfs file:
/sys/devices/system/cpu/cpu3/cache/index2/shared_cpu_map
[17023.038209] CPU 3
[17023.039047] Modules linked in: radeon drm w83792d ipv6 tuner
tea5767 tda8290 tuner_xc2028 tda9887 tu
ner_simple mt20xx tea5761 tvaudio msp3400 bttv ir_common
compat_ioctl32 videobuf_dma_sg videobuf_core b
tcx_risc usbhid tveeprom videodev hid v4l2_common v4l1_compat sg
pata_amd i2c_nforce2
[17023.039519] Pid: 20564, comm: nfsv4-svc Not tainted 2.6.24-rc6-mm1 #14
[17023.039519] RIP: 0010:[]  []
__list_add+0x54/0x60
[17023.039519] RSP: 0018:8101002c9dc0  EFLAGS: 00010282
[17023.039519] RAX: 0088 RBX: 810110125c00 RCX: 
[17023.039519] RDX: 81010067c000 RSI: 0001 RDI: 80764140
[17023.039519] RBP: 8101002c9dc0 R08: 0001 R09: 
[17023.039519] R10: 81000100a088 R11: 0001 R12: 8100d20ec180
[17023.039519] R13: 8100d20ec1b8 R14: 8100d20ec1b8 R15: 8101188e4600
[17023.039519] FS:  7ff7a870c6f0() GS:81011ff0cd00()
knlGS:
[17023.039519] CS:  0010 DS:  ES:  CR0: 8005003b
[17023.039519] CR2: 024df510 CR3: 32539000 CR4: 06e0
[17023.039519] DR0:  DR1:  DR2: 
[17023.039519] DR3:  DR6: 0ff0 DR7: 0400
[17023.039519] Process nfsv4-svc (pid: 20564, threadinfo
8101002c8000, task 81010067c000)
[17023.039519] Stack:  8101002c9e00 805c18ab
8100d20ec188 8101188e4600
[17023.039519]  81009c5a6c00 81010d118000 810110125c00
8101188e4610
[17023.039519]  8101002c9e10 805c1997 8101002c9ee0
805c2ac4
[17023.039519] Call Trace:
[17023.039519]  [] svc_xprt_enqueue+0x1ab/0x240
[17023.039519]  [] svc_xprt_received+0x17/0x20
[17023.039519]  [] svc_recv+0x394/0x7c0
[17023.039519]  [] svc_send+0xae/0xd0
[17023.039519]  [] default_wake_function+0x0/0x10
[17023.039519]  [] nfs_callback_svc+0x79/0x130
[17023.039519]  [] finish_task_switch+0x67/0xe0
[17023.039519]  [] child_rip+0xa/0x12
[17023.039519]  [] restore_args+0x0/0x30
[17023.039519]  [] __svc_create_thread+0xdd/0x200
[17023.039519]  [] nfs_callback_svc+0x0/0x130
[17023.039519]  [] child_rip+0x0/0x12
[17023.039519]
[17023.039519]
[17023.039519] Code: 0f 0b eb fe 0f 1f 84 00 00 00 00 00 55 48 8b 16 48 89 e5 e8
[17023.039519] RIP  [] __list_add+0x54/0x60
[17023.039519]  RSP 
[17023.039524] ---[ end trace a9257b24a4b10968 ]---
[17023.041451] Kernel panic - not syncing: Aiee, killing interrupt handler!

I also wonder if this really is only one bug, or two. (One in skb
handling and one in svc_xprt_enqueue's list code)

Summary of what I think is related to this:

* 2.6.24-rc2-mm1 might not have it, but had a bug in the nfsv4 sillyrenames.
* 2.6.24-rc3-mm1 did not work for me at all (IO-APIC problem)
* 2.6.24-rc3-mm2 has the bug
  - I have seen a crash in ether1394_complete_cb
  - I have seen 3 crashes in svc_xprt_enqueue, two with slub_debug=FZP
  - I have seen a crash in tcp_send_ack -> __alloc_skb
  - patched with svc_xprt_received(&svsk->sk_xprt); removed from
svc_create_socket()
-> still crashed in svc_xprt_enqueue
  - patched "skb_release_all(dst);" back to "skb_release_data(dst);"
in skb_morph() (net/core/skbuff.c).
-> seemed to work (200+ packages compiled+installed)
* 2.6.24-rc4-mm1 and -rc5-mm1: not tried, was still testing -rc3-mm2
* 2.6.24-rc6 did not crash, but I did not te

Re: sata_nv + ADMA + Samsung disk problem

2008-01-02 Thread Robert Hancock

Mark Lord wrote:

Robert Hancock wrote:
..
 From some of the traces I took previously (posted on LKML as "sata_nv 
ADMA controller lockup investigation" way back in Feb 07), what seems 
to occur is that when the second command is issued very rapidly 
(within less than 20 microseconds, or potentially longer) after the 
previous command's completion, the ADMA status changes from 0x500 
(STOPPED and IDLE) to 0x400 (just IDLE) as it typically does, but then 
it sticks there, no interrupt is ever raised, and CPB response flags 
remain at 0.

..

Assuming that NVidia got their ADMA core logic from Pacific Digital
(the inventors), then it may have some of the same bugs as the original.

One of those bugs is that the aGO trigger is sampled in a "racey" way,
such that it sometimes may miss a recent addition to the ring.

The *only* way to guarantee things with the original Pacific Digital core
was to (1) always retrigger aGO for a full ring scan with each new 
addition,

and (2) poll periodically (every half second or so) rather than relying
exclusively on the IRQ actually working..

Dunno about the NVidia version.


Theirs works rather differently - the GO bit is there, but there's 
another append register which is used to tell the controller that a new 
tag has been added to the CPB list.


The only thing we currently use the GO bit for is to switch between ADMA 
and port register mode. Could be there's something we need to do there, 
though, who knows..


--
Robert Hancock  Saskatoon, SK, Canada
To email, remove "nospam" from [EMAIL PROTECTED]
Home Page: http://www.roberthancock.com/

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] kprobes: Introduce is_kprobe_fault()

2008-01-02 Thread Masami Hiramatsu
Hi Harvey,

Thank you for greate work!
This seems including all of what I've expected.
I'll try to test.

Please send this to all of kprobe maintainers, Because this
involves all of the architectures which kprobes supports.

Maintainers;
could you help reviewing this?

Thank you,

Best Regards,

Harvey Harrison wrote:
> Use a central is_kprobe_fault() inline in kprobes.h to remove all
> of the arch-dependant, practically identical implementations in
> avr32, ia64, powerpc, s390, sparc64, and x86.
> 
> avr32 was the only arch without the preempt_disable/enable pair
> in its notify_page_fault implementation.  This should be checked
> by avr32 people.
> 
> This uncovered a possible bug in the s390 version as that purely
> copied the x86 version unconditionally passing 14 as the trapnr
> rather than the error_code parameter.  This has been preserved
> in this patch, s390 people should check if error_code really was
> intended.
> 
> Signed-off-by: Harvey Harrison <[EMAIL PROTECTED]>
> ---
> Andrew, this came up when discussing some x86 fault unification
> work, figured you were the right person to feed this through.
> 
> At least the diffstat says I did something right.  Patch against
> current Linus tree.
> 
>  arch/avr32/mm/fault.c   |   21 +
>  arch/ia64/mm/fault.c|   24 +---
>  arch/powerpc/mm/fault.c |   25 +
>  arch/s390/mm/fault.c|   25 +
>  arch/sparc64/mm/fault.c |   23 +--
>  arch/x86/mm/fault_32.c  |   26 ++
>  arch/x86/mm/fault_64.c  |   26 ++
>  include/linux/kprobes.h |   19 +++
>  8 files changed, 28 insertions(+), 161 deletions(-)
> 
> diff --git a/arch/avr32/mm/fault.c b/arch/avr32/mm/fault.c
> index 6560cb1..a95cce2 100644
> --- a/arch/avr32/mm/fault.c
> +++ b/arch/avr32/mm/fault.c
> @@ -20,25 +20,6 @@
>  #include 
>  #include 
>  
> -#ifdef CONFIG_KPROBES
> -static inline int notify_page_fault(struct pt_regs *regs, int trap)
> -{
> - int ret = 0;
> -
> - if (!user_mode(regs)) {
> - if (kprobe_running() && kprobe_fault_handler(regs, trap))
> - ret = 1;
> - }
> -
> - return ret;
> -}
> -#else
> -static inline int notify_page_fault(struct pt_regs *regs, int trap)
> -{
> - return 0;
> -}
> -#endif
> -
>  int exception_trace = 1;
>  
>  /*
> @@ -66,7 +47,7 @@ asmlinkage void do_page_fault(unsigned long ecr, struct 
> pt_regs *regs)
>   int code;
>   int fault;
>  
> - if (notify_page_fault(regs, ecr))
> + if (is_kprobe_fault(regs, ecr))
>   return;
>  
>   address = sysreg_read(TLBEAR);
> diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c
> index 7571076..1696805 100644
> --- a/arch/ia64/mm/fault.c
> +++ b/arch/ia64/mm/fault.c
> @@ -18,28 +18,6 @@
>  
>  extern void die (char *, struct pt_regs *, long);
>  
> -#ifdef CONFIG_KPROBES
> -static inline int notify_page_fault(struct pt_regs *regs, int trap)
> -{
> - int ret = 0;
> -
> - if (!user_mode(regs)) {
> - /* kprobe_running() needs smp_processor_id() */
> - preempt_disable();
> - if (kprobe_running() && kprobes_fault_handler(regs, trap))
> - ret = 1;
> - preempt_enable();
> - }
> -
> - return ret;
> -}
> -#else
> -static inline int notify_page_fault(struct pt_regs *regs, int trap)
> -{
> - return 0;
> -}
> -#endif
> -
>  /*
>   * Return TRUE if ADDRESS points at a page in the kernel's mapped segment
>   * (inside region 5, on ia64) and that page is present.
> @@ -106,7 +84,7 @@ ia64_do_page_fault (unsigned long address, unsigned long 
> isr, struct pt_regs *re
>   /*
>* This is to handle the kprobes on user space access instructions
>*/
> - if (notify_page_fault(regs, TRAP_BRKPT))
> + if (is_kprobe_fault(regs, TRAP_BRKPT))
>   return;
>  
>   down_read(&mm->mmap_sem);
> diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
> index 8135da0..00df6f9 100644
> --- a/arch/powerpc/mm/fault.c
> +++ b/arch/powerpc/mm/fault.c
> @@ -39,29 +39,6 @@
>  #include 
>  #include 
>  
> -
> -#ifdef CONFIG_KPROBES
> -static inline int notify_page_fault(struct pt_regs *regs)
> -{
> - int ret = 0;
> -
> - /* kprobe_running() needs smp_processor_id() */
> - if (!user_mode(regs)) {
> - preempt_disable();
> - if (kprobe_running() && kprobe_fault_handler(regs, 11))
> - ret = 1;
> - preempt_enable();
> - }
> -
> - return ret;
> -}
> -#else
> -static inline int notify_page_fault(struct pt_regs *regs)
> -{
> - return 0;
> -}
> -#endif
> -
>  /*
>   * Check whether the instruction at regs->nip is a store using
>   * an update addressing form which will update r1.
> @@ -164,7 +141,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, 
> unsigned long address,
>   is_write = error_code & E

Re: [patch 1/3] move WARN_ON() out of line

2008-01-02 Thread Olof Johansson
On Thu, Jan 03, 2008 at 01:56:58AM +0100, Arjan van de Ven wrote:
> Subject: move WARN_ON() out of line
> From: Arjan van de Ven <[EMAIL PROTECTED]>
> CC: Ingo Molnar <[EMAIL PROTECTED]>
> CC: Andrew Morton <[EMAIL PROTECTED]>
>
> A quick grep shows that there are currently 1145 instances of WARN_ON
> in the kernel. Currently, WARN_ON is pretty much entirely inlined,
> which makes it hard to enhance it without growing the size of the kernel
> (and getting Andrew unhappy).
>
> This patch moves WARN_ON() out of line entirely. I've considered keeping
> the test inline and moving only the slowpath out of line, but I decided
> against that: an out of line test reduces the pressure on the CPUs
> branch predictor logic and gives smaller code, while a function call
> to a fixed location is quite fast. Likewise I've considered doing something
> similar to BUG() (eg use a trapping instruction) but that's not really
> better (it needs the test inline again and recovering from an invalid
> instruction isn't quite fun).

Hi Arjan,

I've got a couple of patches in -mm at the moment that introduces __WARN()
and uses that (and lets architectures override __WARN, since for example
powerpc does use trapping instructions similarly to BUG()).

The two patches in question are:

bugh-remove-have_arch_bug--have_arch_warn.patch
powerpc-switch-to-generic-warn_on-bug_on.patch

Care to do this incrementally on top of that instead? I.e. call
do_warn_on() from the asm-generic/bug.h __WARN() instead.


-Olof

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [linux-usb-devel] [PATCH] : Allow embedded developers USB options normally reserved for OTG

2008-01-02 Thread Bryan Wu
On Jan 3, 2008 11:43 AM, David Brownell <[EMAIL PROTECTED]> wrote:
> On Wednesday 02 January 2008, Robin Getz wrote:
> > On Wed 2 Jan 2008 13:47, David Brownell pondered:
> > > On Wednesday 02 January 2008, Robin Getz wrote:
> > > > From: Robin Getz <[EMAIL PROTECTED]>
> > > >
> > > > Allow embedded developers to turn support for USB Hubs off even if
> > > > they have a full root hub. This saves the overhead (RAM and Flash size).
> > >
> > > ISTR that it won't save very much code though ... the Linux USB
> > > stack structures all its enumeration logic around hubs.
> >
> > Today, there is an USB (Host), USB_GADGET and USB_OTG (which depends on USB 
> > &&
> > USB_GADGET).
> >
> > This just enables cutting more code out, with out having to have USB_GADGET 
> > &
> > USB_OTG enabled. When I checked - that is where most of the savings came
> > from.
>
> Right.  I'm not objecting to this at all.  Just pointing
> out that usbcore will still end up including quite a lot
> of hub functionality, because there's still going to be
> a root hub in the system and a khubd managing it.
>
> This patch might be improved slightly -- in ways that, as I
> understand things, could save some RAM on Blackfin! -- by
> having the BLACKLIST_HUB option get rid of the transaction
> translator support (changing C code not just Kconfig).
> It's pretty minimal, but won't be used...
>
> Also, as you point out, it's no longer OTG specific, so
> renaming the option would improve clarity.   Maybe to
> something along the lines of USB_HOST_NO_EXTERNAL_HUBS.
> No big deal, at least now.
>

Thanks, that's the point.
Maybe Robin can choose a good name of this option.
I am just very happy that when plug-in an external hubs or something
like that will not make our system hangs, -:)))

B.T.W, 2 questions about the MUSB driver:
1. What's the plan for mainline merge of the whole MUSB driver? maybe
I can cleanup current Blackfin ports to you guys.
2. Do you remember the PING issue I reported in OMAP list? How do you
think of that?

Best Regards,
-Bryan Wu
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Patch 0/8] Remove 'TOPDIR' from Makefiles

2008-01-02 Thread WANG Cong

>Hi Wang.
>
>You a heads up. I will most likely apply
>the remaining of the patches tonight, except the UM stuff
>which I hope Jeff to take and the final removal of TOPDIR
>may wait a bit.
>I want the s390, xfs and um changes to hit -mm at least
>and we have several external modules that uses TOPDIR.
>
>And some of these external modules I care about - not
>all of them but some of them.
>
>   Sam

Thanks, Sam!

 Cong

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.22-stable causes oomkiller to be invoked

2008-01-02 Thread Dhaval Giani
On Wed, Jan 02, 2008 at 01:54:12PM -0800, Christoph Lameter wrote:
> Just traced it again on my system: It is okay for the number of pages on 
> the quicklist to reach the high count that we see (although the 16 bit 
> limits are weird. You have around 4GB of memory in the system?). Up to 
> 1/16th of free memory of a node can be allocated for quicklists (this 
> allows the effective shutting down and restarting of large amounts of 
> processes)
> 
> The problem may be that this is run on a HIGHMEM system and the 
> calculation of allowable pages on the quicklists does not take into 
> account that highmem pages are not usable for quicklists (not sure about 
> ZONE_MOVABLE on i386. Maybe we need to take that into account as well?)
> 
> Here is a patch that removes the HIGHMEM portion from the calculation. 
> Does this change anything:
> 

Yep. This one hits it. I don't see the obvious signs of the oom
happening in the 5 mins I have run the script. I will let it run for
some more time.

Thanks!
-- 
regards,
Dhaval
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: SATA kernel-buffered read VERY slow (not raid, Promise TX300 card); 2.6.23.1(vanilla)

2008-01-02 Thread Linda Walsh

Robert Hancock wrote:

Linda Walsh wrote:

Alan Cox wrote:
rate began falling; at 128k block-reads-at-a-time or larger, it 
drops below

20MB/s (only on buffered SATA).

Try disabling NCQ - see if you've got a drive with the 'NCQ = no
readahead' flaw.

http://linux-ata.org/faq.html#ncq

---
   When drive initializes, dmesg says it has NCQ (depth 0/32)
   Reading the queue_depth under /sys, shows a queuedepth of "1".

But more importantly -- I notice a chronic error message associate
with this drive that may be causing some or all of the problem:
---
Jan  2 20:06:10 Ishtar kernel: ata1.00: exception Emask 0x0 SAct 0x0 
SErr 0x0 action 0x2

Jan  2 20:06:10 Ishtar kernel: ata1.00: port_status 0x2008
Jan  2 20:06:10 Ishtar kernel: ata1.00: cmd 
c8/00:10:30:06:03/00:00:00:00:00/e0 tag 0 cdb 0x0 data 8192 in
Jan  2 20:06:10 Ishtar kernel:  res 
50/00:00:3f:06:03/00:00:00:00:00/e0 Emask 0x2 (HSM violation)

Jan  2 20:06:13 Ishtar kernel: ata1: limiting SATA link speed to 1.5 Gbps
Jan  2 20:06:13 Ishtar kernel: ata1.00: exception Emask 0x0 SAct 0x0 
SErr 0x0 action 0x6

Jan  2 20:06:13 Ishtar kernel: ata1.00: port_status 0x2008
Jan  2 20:06:13 Ishtar kernel: ata1.00: cmd 
c8/00:10:00:8b:04/00:00:00:00:00/e0 tag 0 cdb 0x0 data 8192 in
Jan  2 20:06:13 Ishtar kernel:  res 
50/00:00:0f:8b:04/00:00:00:00:00/e0 Emask 0x2 (HSM violation)
Jan  2 20:06:14 Ishtar kernel: ata1: exception Emask 0x10 SAct 0x0 SErr 
0x0 action 0x3

Jan  2 20:06:14 Ishtar kernel: ata1: hotplug_status 0x80
Jan  2 20:06:15 Ishtar kernel: ata1: exception Emask 0x10 SAct 0x0 SErr 
0x0 action 0x3

Jan  2 20:06:15 Ishtar kernel: ata1: hotplug_status 0x80
---
What da heck?  Note, this is with NCQ-queuing set to "1".  Only 
reference I could find for this error referred to "older drives", but 
this is a

2007-model year drive with ATA-7 and udma-6.

I don't think you can get or get the multi count currently, it just 
uses the best supported value.

ok



2) Drive Advanced Power Management setting("-B") (write-only):
"HDIO_DRIVE_CMD failed: Input/output error"
3) Drive Acoustic ("-M"), read = " acoustic  = not supported",
write = " HDIO_DRIVE_CMD:ACOUSTIC failed: Input/output error"


Not sure about these ones.. Does anything show up in dmesg when you do 
this?

---
   Yes:
   (for "-B", power-management)
ata1.00: exception Emask 0x0 SAct 0x0 SErr 0x0 action 0x0
ata1.00: port_status 0x2020
ata1.00: cmd ef/05:fe:00:00:00/00:00:00:00:00/40 tag 0 cdb 0x0 data 0
res 51/04:fe:00:00:00/00:00:00:00:00/40 Emask 0x1 (device error)
ata1.00: configured for UDMA/133
ata1: EH complete
sd 1:0:0:0: [sdb] 1465149168 512-byte hardware sectors (750156 MB)
sd 1:0:0:0: [sdb] Write Protect is off
sd 1:0:0:0: [sdb] Mode Sense: 00 3a 00 00
sd 1:0:0:0: [sdb] Write cache: enabled, read cache: enabled, doesn't 
support DPO or FUA


  (for "-M" acoustic management):
ata1.00: exception Emask 0x0 SAct 0x0 SErr 0x0 action 0x0
ata1.00: port_status 0x2020
ata1.00: cmd ef/42:fe:00:00:00/00:00:00:00:00/40 tag 0 cdb 0x0 data 0
res 51/04:fe:00:00:00/00:00:00:00:00/40 Emask 0x1 (device error)
ata1.00: configured for UDMA/133
ata1: EH complete
sd 1:0:0:0: [sdb] 1465149168 512-byte hardware sectors (750156 MB)
sd 1:0:0:0: [sdb] Write Protect is off
sd 1:0:0:0: [sdb] Mode Sense: 00 3a 00 00
sd 1:0:0:0: [sdb] Write cache: enabled, read cache: enabled, doesn't 
support DPO or FUA



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: + restore-missing-sysfs-max_cstate-attr.patch added to -mm tree

2008-01-02 Thread Mark Lord

Pallipadi, Venkatesh wrote:
 


-Original Message-
From: Andrew Morton [mailto:[EMAIL PROTECTED] 
Sent: Wednesday, January 02, 2008 4:52 PM

To: Pallipadi, Venkatesh
Cc: Mark Lord; Arjan van de Ven; [EMAIL PROTECTED]; 
[EMAIL PROTECTED]; Ingo Molnar; linux-kernel@vger.kernel.org; 
[EMAIL PROTECTED]
Subject: Re: + restore-missing-sysfs-max_cstate-attr.patch 
added to -mm tree


On Wed, 2 Jan 2008 16:06:20 -0800 "Pallipadi, Venkatesh" 
<[EMAIL PROTECTED]> wrote:


 


-Original Message-
From: Mark Lord [mailto:[EMAIL PROTECTED] 
Sent: Wednesday, January 02, 2008 3:42 PM

To: Arjan van de Ven
Cc: Pallipadi, Venkatesh; Andrew Morton; [EMAIL PROTECTED]; 
[EMAIL PROTECTED]; Ingo Molnar; linux-kernel@vger.kernel.org; 
[EMAIL PROTECTED]
Subject: Re: + restore-missing-sysfs-max_cstate-attr.patch 
added to -mm tree


Arjan van de Ven wrote:

On Fri, 30 Nov 2007 22:31:17 -0500
Mark Lord <[EMAIL PROTECTED]> wrote:


Arjan van de Ven wrote:

On Fri, 30 Nov 2007 22:14:08 -0500
Mark Lord <[EMAIL PROTECTED]> wrote:


in -mm there is.. the QoS stuff allows you to set maximum
tolerable

..

That's encouraging, I think, but not for 2.6.24.


latency. If your app cant take any latency, you should set
those... and the side effect is that the kernel will not do
long-latency C-states or P-state transitions..

..

I don't mind the cpufreq changing (actually, I want it 

to drop in
cpugfreq to save power and keep the fan off), but the 

C-states just

kill this app.

The app is VMware.  I force the max_state=1 when launching,
ah but then its' even easier... and can be done in 

2.6.24 already.
VMWare after all has a kernel module, and the latency 

stuff is in

2.6.23 and 2.6.24 available inside the kernel already.

..

Oh, I'm perfectly happy to write my own kernel module if 

that's what

all you need to do in your kernel module is call

add_latency_constraint("mark_wants_his_mouse", 5);

or so

..

Dredging up an old regression again now:

The "make my own module to replace /sys/.../max_cstate" doesn't work
for the single-core machine we use a lot around here.

VMware is totally sluggish unless I go to another text window 
and do this:


   while ( true ); do echo -n ; done

At which point VMware performs well again,
the same as with "echo 1 > max_cstate" in 2.6.23.

Anyone got any suggestions on how to fix this regression
or work around it for 2.6.24 ?


Easiest and clean way to do it is to have a driver with
set_acceptable_latency() for 1uS or so in init and
remove_acceptable_latency() at exit.
err, you appear to be suggesting that Mark patch his kernel to 
make it work

as well as 2.6.23?  That would be a wrong answer.

This regression was known six weeks ago.  What do we need to 
do (or revert)

to fix it in 2.6.24?



As I responded earlier here
http://www.ussg.iu.edu/hypermail/linux/kernel/0711.3/2348.html

This interface cannot be supported cleanly with cpuidle. The cleanest
way to do this is to go through latency interfaces. We have changed all
in kernel drivers to use this new interface. The issue here is, I
removed this sysfs interface without depracting it. We can call it a
regression and we can add it back for the moment. But, this will go from
sysfs sooner or later and latency interface has to be used in future.
And Mark earlier responded in this thread saying he is OK with adding
something in the kernel to get this working, That is the reason I
suggested the above option.

..

I'm fine with switching to a new interface, and even supplying my own
kernel module to do so.  But it doesn't produce the required effect
on the single-core machine we just switched over to 2.6.24
(and then promptly switched back again!).

There should be some way to get it to work with similar minimal latency
to the old "echo 1 > max_cstate" logic, but I've yet to discover it.

I wonder if the "new" latency interfaces actually work ?


As I saw it 6 weeks back, max_cstate option works as a boot parameter.

..

That's no good.  It has to be changeable on the fly, rather than requiring
the machine be rebooted each time.

Cheers
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: + restore-missing-sysfs-max_cstate-attr.patch added to -mm tree

2008-01-02 Thread Mark Lord

Pallipadi, Venkatesh wrote:
 


-Original Message-
From: Mark Lord [mailto:[EMAIL PROTECTED] 
Sent: Wednesday, January 02, 2008 3:42 PM

To: Arjan van de Ven
Cc: Pallipadi, Venkatesh; Andrew Morton; [EMAIL PROTECTED]; 
[EMAIL PROTECTED]; Ingo Molnar; linux-kernel@vger.kernel.org; 
[EMAIL PROTECTED]
Subject: Re: + restore-missing-sysfs-max_cstate-attr.patch 
added to -mm tree


Arjan van de Ven wrote:

On Fri, 30 Nov 2007 22:31:17 -0500
Mark Lord <[EMAIL PROTECTED]> wrote:


Arjan van de Ven wrote:

On Fri, 30 Nov 2007 22:14:08 -0500
Mark Lord <[EMAIL PROTECTED]> wrote:


in -mm there is.. the QoS stuff allows you to set maximum
tolerable

..

That's encouraging, I think, but not for 2.6.24.


latency. If your app cant take any latency, you should set
those... and the side effect is that the kernel will not do
long-latency C-states or P-state transitions..

..

I don't mind the cpufreq changing (actually, I want it to drop in
cpugfreq to save power and keep the fan off), but the 

C-states just

kill this app.

The app is VMware.  I force the max_state=1 when launching,

ah but then its' even easier... and can be done in 2.6.24 already.
VMWare after all has a kernel module, and the latency stuff is in
2.6.23 and 2.6.24 available inside the kernel already.

..

Oh, I'm perfectly happy to write my own kernel module if that's what

all you need to do in your kernel module is call

add_latency_constraint("mark_wants_his_mouse", 5);

or so

..

Dredging up an old regression again now:

The "make my own module to replace /sys/.../max_cstate" doesn't work
for the single-core machine we use a lot around here.

VMware is totally sluggish unless I go to another text window 
and do this:


   while ( true ); do echo -n ; done

At which point VMware performs well again,
the same as with "echo 1 > max_cstate" in 2.6.23.

Anyone got any suggestions on how to fix this regression
or work around it for 2.6.24 ?



Easiest and clean way to do it is to have a driver with
set_acceptable_latency() for 1uS or so in init and
remove_acceptable_latency() at exit.

..

As noted, I already do that.  It helps with my Core2Duo machine,
but not with the single-core case.  Bummer.

-ml
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: sata_nv + ADMA + Samsung disk problem

2008-01-02 Thread Mark Lord

Robert Hancock wrote:
..
 From some of the traces I took previously (posted on LKML as "sata_nv 
ADMA controller lockup investigation" way back in Feb 07), what seems to 
occur is that when the second command is issued very rapidly (within 
less than 20 microseconds, or potentially longer) after the previous 
command's completion, the ADMA status changes from 0x500 (STOPPED and 
IDLE) to 0x400 (just IDLE) as it typically does, but then it sticks 
there, no interrupt is ever raised, and CPB response flags remain at 0.

..

Assuming that NVidia got their ADMA core logic from Pacific Digital
(the inventors), then it may have some of the same bugs as the original.

One of those bugs is that the aGO trigger is sampled in a "racey" way,
such that it sometimes may miss a recent addition to the ring.

The *only* way to guarantee things with the original Pacific Digital core
was to (1) always retrigger aGO for a full ring scan with each new addition,
and (2) poll periodically (every half second or so) rather than relying
exclusively on the IRQ actually working..

Dunno about the NVidia version.

Cheers


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.22-stable causes oomkiller to be invoked

2008-01-02 Thread Dhaval Giani
On Thu, Jan 03, 2008 at 09:29:42AM +0530, Dhaval Giani wrote:
> On Wed, Jan 02, 2008 at 01:54:12PM -0800, Christoph Lameter wrote:
> > Just traced it again on my system: It is okay for the number of pages on 
> > the quicklist to reach the high count that we see (although the 16 bit 
> > limits are weird. You have around 4GB of memory in the system?). Up to 
> > 1/16th of free memory of a node can be allocated for quicklists (this 
> > allows the effective shutting down and restarting of large amounts of 
> > processes)
> > 
> > The problem may be that this is run on a HIGHMEM system and the 
> > calculation of allowable pages on the quicklists does not take into 
> > account that highmem pages are not usable for quicklists (not sure about 
> > ZONE_MOVABLE on i386. Maybe we need to take that into account as well?)
> > 
> > Here is a patch that removes the HIGHMEM portion from the calculation. 
> > Does this change anything:
> > 
> 
> Yep. This one hits it. I don't see the obvious signs of the oom
> happening in the 5 mins I have run the script. I will let it run for
> some more time.
> 

Yes, no oom even after 20 mins of running (which is double the normal
time for the oom to occur), also no changes in free lowmem.

Thanks for the fix. Feel free to add a 

Tested-by: Dhaval Giani <[EMAIL PROTECTED]>

-- 
regards,
Dhaval
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: sata_nv + ADMA + Samsung disk problem

2008-01-02 Thread Mark Lord

Robert Hancock wrote:


What we're doing to enter legacy mode is essentially:

-wait until ADMA status indicates IDLE bit set (max wait of 1 microsecond)
-clear GO bit in control register
-wait until status indicates LEGACY bit set (max wait of 1 microsecond)

and to enter ADMA mode:

-set GO bit in control register
-wait until status indicates LEGACY bit cleared and IDLE bit set (max 
wait of 1 microsecond)

..

If there are outstanding TCQ/NCQ commands (any drive),
then this could take (much) longer to enter legacy mode,
as the ADMA engine will wait for them all to finish.

But for normal, "nothing outstanding" mode, it should be very quick.

Cheers
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] kprobes: Introduce is_kprobe_fault()

2008-01-02 Thread Harvey Harrison
Use a central is_kprobe_fault() inline in kprobes.h to remove all
of the arch-dependant, practically identical implementations in
avr32, ia64, powerpc, s390, sparc64, and x86.

avr32 was the only arch without the preempt_disable/enable pair
in its notify_page_fault implementation.  This should be checked
by avr32 people.

This uncovered a possible bug in the s390 version as that purely
copied the x86 version unconditionally passing 14 as the trapnr
rather than the error_code parameter.  This has been preserved
in this patch, s390 people should check if error_code really was
intended.

Signed-off-by: Harvey Harrison <[EMAIL PROTECTED]>
---
Andrew, this came up when discussing some x86 fault unification
work, figured you were the right person to feed this through.

At least the diffstat says I did something right.  Patch against
current Linus tree.

 arch/avr32/mm/fault.c   |   21 +
 arch/ia64/mm/fault.c|   24 +---
 arch/powerpc/mm/fault.c |   25 +
 arch/s390/mm/fault.c|   25 +
 arch/sparc64/mm/fault.c |   23 +--
 arch/x86/mm/fault_32.c  |   26 ++
 arch/x86/mm/fault_64.c  |   26 ++
 include/linux/kprobes.h |   19 +++
 8 files changed, 28 insertions(+), 161 deletions(-)

diff --git a/arch/avr32/mm/fault.c b/arch/avr32/mm/fault.c
index 6560cb1..a95cce2 100644
--- a/arch/avr32/mm/fault.c
+++ b/arch/avr32/mm/fault.c
@@ -20,25 +20,6 @@
 #include 
 #include 
 
-#ifdef CONFIG_KPROBES
-static inline int notify_page_fault(struct pt_regs *regs, int trap)
-{
-   int ret = 0;
-
-   if (!user_mode(regs)) {
-   if (kprobe_running() && kprobe_fault_handler(regs, trap))
-   ret = 1;
-   }
-
-   return ret;
-}
-#else
-static inline int notify_page_fault(struct pt_regs *regs, int trap)
-{
-   return 0;
-}
-#endif
-
 int exception_trace = 1;
 
 /*
@@ -66,7 +47,7 @@ asmlinkage void do_page_fault(unsigned long ecr, struct 
pt_regs *regs)
int code;
int fault;
 
-   if (notify_page_fault(regs, ecr))
+   if (is_kprobe_fault(regs, ecr))
return;
 
address = sysreg_read(TLBEAR);
diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c
index 7571076..1696805 100644
--- a/arch/ia64/mm/fault.c
+++ b/arch/ia64/mm/fault.c
@@ -18,28 +18,6 @@
 
 extern void die (char *, struct pt_regs *, long);
 
-#ifdef CONFIG_KPROBES
-static inline int notify_page_fault(struct pt_regs *regs, int trap)
-{
-   int ret = 0;
-
-   if (!user_mode(regs)) {
-   /* kprobe_running() needs smp_processor_id() */
-   preempt_disable();
-   if (kprobe_running() && kprobes_fault_handler(regs, trap))
-   ret = 1;
-   preempt_enable();
-   }
-
-   return ret;
-}
-#else
-static inline int notify_page_fault(struct pt_regs *regs, int trap)
-{
-   return 0;
-}
-#endif
-
 /*
  * Return TRUE if ADDRESS points at a page in the kernel's mapped segment
  * (inside region 5, on ia64) and that page is present.
@@ -106,7 +84,7 @@ ia64_do_page_fault (unsigned long address, unsigned long 
isr, struct pt_regs *re
/*
 * This is to handle the kprobes on user space access instructions
 */
-   if (notify_page_fault(regs, TRAP_BRKPT))
+   if (is_kprobe_fault(regs, TRAP_BRKPT))
return;
 
down_read(&mm->mmap_sem);
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 8135da0..00df6f9 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -39,29 +39,6 @@
 #include 
 #include 
 
-
-#ifdef CONFIG_KPROBES
-static inline int notify_page_fault(struct pt_regs *regs)
-{
-   int ret = 0;
-
-   /* kprobe_running() needs smp_processor_id() */
-   if (!user_mode(regs)) {
-   preempt_disable();
-   if (kprobe_running() && kprobe_fault_handler(regs, 11))
-   ret = 1;
-   preempt_enable();
-   }
-
-   return ret;
-}
-#else
-static inline int notify_page_fault(struct pt_regs *regs)
-{
-   return 0;
-}
-#endif
-
 /*
  * Check whether the instruction at regs->nip is a store using
  * an update addressing form which will update r1.
@@ -164,7 +141,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned 
long address,
is_write = error_code & ESR_DST;
 #endif /* CONFIG_4xx || CONFIG_BOOKE */
 
-   if (notify_page_fault(regs))
+   if (is_kprobe_fault(regs, 11))
return 0;
 
if (trap == 0x300) {
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 2456b52..59d3f0e 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -51,29 +51,6 @@ extern int sysctl_userprocess_debug;
 
 extern void die(const char *,struct pt_regs *,long);
 
-#ifdef CONFIG_KPROBES
-static inline int notify_page_fault(struct pt_regs *regs, long err)
-{
-   

Re: [PATCH] track number of mnts writing to superblocks

2008-01-02 Thread Serge E. Hallyn
Quoting Dave Hansen ([EMAIL PROTECTED]):
> 
> One of the benefits of the r/o bind mount patches is that they
> make it explicit when a write to a superblock might occur.
> We currently search sb->s_files when remounting rw->ro to look
> for writable files.  But, that search is not comprehensive, and
> it is racy.  This replaces that search.
> 
> The idea is to keep a bit in each mount saying whether the
> mount has any writers on it.  When the bit is set the first
> time, we also increment a counter in the superblock.  That
> sb counter is the number of mounts with that bit set and
> thus, potential writers.
> 
> The other problem is that, after we make this check for
> the number of writable mounts, we need to exclude all new
> writers on those mounts.  We do this by requring that the
> superblock mnt writer count be incremented under a
> lock_super() and also holding that lock over the remount
> operation.  Effectively, this keeps us from *adding* to
> the sb's writable mounts during a remount.
> 
> The alternative to doing this is to do a much simpler list
> of mounts for each superblock.  I could also code that up
> to see what it look like.  Shouldn't be too bad.

Ok I'm blabbing quite a bit here while trying to figure out
the patch, and maybe there are some useful hints for where more
comments would be useful.  But other than the fact that
mark_mnt_has_writer() needs to the atomic_inc() even if
cpu_writer was passed in as NULL, the patch seems good.

thanks,
-serge

> Signed-off-by: Dave Hansen <[EMAIL PROTECTED]>
> ---
> 
>  linux-2.6.git-dave/fs/file_table.c   |   24 -
>  linux-2.6.git-dave/fs/namespace.c|  134 
> +--
>  linux-2.6.git-dave/fs/super.c|   61 +++---
>  linux-2.6.git-dave/include/linux/fs.h|5 -
>  linux-2.6.git-dave/include/linux/mount.h |3 
>  5 files changed, 163 insertions(+), 64 deletions(-)
> 
> diff -puN fs/file_table.c~track_sb_mnt_writers fs/file_table.c
> --- linux-2.6.git/fs/file_table.c~track_sb_mnt_writers2008-01-02 
> 10:49:11.0 -0800
> +++ linux-2.6.git-dave/fs/file_table.c2008-01-02 10:49:11.0 
> -0800
> @@ -374,30 +374,6 @@ void file_kill(struct file *file)
>   }
>  }
> 
> -int fs_may_remount_ro(struct super_block *sb)
> -{
> - struct file *file;
> -
> - /* Check that no files are currently opened for writing. */
> - file_list_lock();
> - list_for_each_entry(file, &sb->s_files, f_u.fu_list) {
> - struct inode *inode = file->f_path.dentry->d_inode;
> -
> - /* File with pending delete? */
> - if (inode->i_nlink == 0)
> - goto too_bad;
> -
> - /* Writeable file? */
> - if (S_ISREG(inode->i_mode) && (file->f_mode & FMODE_WRITE))

(why did this originally skip directories?)

> - goto too_bad;
> - }
> - file_list_unlock();
> - return 1; /* Tis' cool bro. */
> -too_bad:
> - file_list_unlock();
> - return 0;
> -}
> -
>  void __init files_init(unsigned long mempages)
>  { 
>   int n; 
> diff -puN fs/file_table.c.orig~track_sb_mnt_writers fs/file_table.c.orig
> diff -puN fs/namespace.c~track_sb_mnt_writers fs/namespace.c
> --- linux-2.6.git/fs/namespace.c~track_sb_mnt_writers 2008-01-02 
> 10:49:11.0 -0800
> +++ linux-2.6.git-dave/fs/namespace.c 2008-01-02 13:39:52.0 -0800
> @@ -118,7 +118,7 @@ struct mnt_writer {
>* If holding multiple instances of this lock, they
>* must be ordered by cpu number.
>*/
> - spinlock_t lock;
> + struct mutex lock;
>   struct lock_class_key lock_class; /* compiles out with !lockdep */
>   unsigned long count;
>   struct vfsmount *mnt;
> @@ -130,7 +130,7 @@ static int __init init_mnt_writers(void)
>   int cpu;
>   for_each_possible_cpu(cpu) {
>   struct mnt_writer *writer = &per_cpu(mnt_writers, cpu);
> - spin_lock_init(&writer->lock);
> + mutex_init(&writer->lock);
>   lockdep_set_class(&writer->lock, &writer->lock_class);
>   writer->count = 0;
>   }
> @@ -145,11 +145,52 @@ static void mnt_unlock_cpus(void)
> 
>   for_each_possible_cpu(cpu) {
>   cpu_writer = &per_cpu(mnt_writers, cpu);
> - spin_unlock(&cpu_writer->lock);
> + mutex_unlock(&cpu_writer->lock);
>   }
>  }
> 
> -static inline void __clear_mnt_count(struct mnt_writer *cpu_writer)
> +static int mark_mnt_has_writer(struct vfsmount *mnt,
> +struct mnt_writer *cpu_writer)
> +{
> + /*
> +  * Ensure that if there are people racing to set
> +  * the bit that only one of them succeeds and can
> +  * increment the sb count.
> +  */
> + if (test_and_set_bit(ilog2(MNT_MAY_HAVE_WRITERS), &mnt->mnt_flags))
> + return 0;

Comment isn't entirely clear, but you're returning 0 here because
someone else has already set th

FW: [PATCH -rt] Preemption problem in kernel RT Patch

2008-01-02 Thread mbeauch

Here's the updated patch:

Changed the real-time patch code to detect recursive calls 
to dev_queue_xmit and drop the packet when detected. 


Signed-off-by: Mark Beauchemin <[EMAIL PROTECTED]> 

diff -ru linux-2.6.24-rc5-rt1/include/linux/netdevice.h 
linux-2.6.24-rc5-rt1-mark/include/linux/netdevice.h
--- linux-2.6.24-rc5-rt1/include/linux/netdevice.h  2007-12-28 
09:34:02.0 -0500
+++ linux-2.6.24-rc5-rt1-mark/include/linux/netdevice.h 2008-01-01 
18:49:49.0 -0500
@@ -619,7 +619,7 @@
/* cpu id of processor entered to hard_start_xmit or -1,
   if nobody entered there.
 */
-   int xmit_lock_owner;
+   void*xmit_lock_owner;
void*priv;  /* pointer to private data  */
int (*hard_start_xmit) (struct sk_buff *skb,
struct net_device *dev);
@@ -1333,46 +1333,46 @@
  *
  * Get network device transmit lock
  */
-static inline void __netif_tx_lock(struct net_device *dev, int cpu)
+static inline void __netif_tx_lock(struct net_device *dev)
 {
spin_lock(&dev->_xmit_lock);
-   dev->xmit_lock_owner = cpu;
+   dev->xmit_lock_owner = (void *)current;
 }
 
 static inline void netif_tx_lock(struct net_device *dev)
 {
-   __netif_tx_lock(dev, raw_smp_processor_id());
+   __netif_tx_lock(dev);
 }
 
 static inline void netif_tx_lock_bh(struct net_device *dev)
 {
spin_lock_bh(&dev->_xmit_lock);
-   dev->xmit_lock_owner = raw_smp_processor_id();
+   dev->xmit_lock_owner = (void *)current;
 }
 
 static inline int netif_tx_trylock(struct net_device *dev)
 {
int ok = spin_trylock(&dev->_xmit_lock);
if (likely(ok))
-   dev->xmit_lock_owner = raw_smp_processor_id();
+   dev->xmit_lock_owner = (void *)current;
return ok;
 }
 
 static inline void netif_tx_unlock(struct net_device *dev)
 {
-   dev->xmit_lock_owner = -1;
+   dev->xmit_lock_owner = (void *)-1;
spin_unlock(&dev->_xmit_lock);
 }
 
 static inline void netif_tx_unlock_bh(struct net_device *dev)
 {
-   dev->xmit_lock_owner = -1;
+   dev->xmit_lock_owner = (void *)-1;
spin_unlock_bh(&dev->_xmit_lock);
 }
 
-#define HARD_TX_LOCK(dev, cpu) {   \
+#define HARD_TX_LOCK(dev) {\
if ((dev->features & NETIF_F_LLTX) == 0) {  \
-   __netif_tx_lock(dev, cpu);  \
+   __netif_tx_lock(dev);   \
}   \
 }
 
diff -ru linux-2.6.24-rc5-rt1/net/core/dev.c 
linux-2.6.24-rc5-rt1-mark/net/core/dev.c
--- linux-2.6.24-rc5-rt1/net/core/dev.c 2007-12-28 09:34:01.0 -0500
+++ linux-2.6.24-rc5-rt1-mark/net/core/dev.c2008-01-01 18:48:33.0 
-0500
@@ -1692,18 +1692,10 @@
   Either shot noqueue qdisc, it is even simpler 8)
 */
if (dev->flags & IFF_UP) {
-   int cpu = raw_smp_processor_id(); /* ok because BHs are off */
 
-   /*
-* No need to check for recursion with threaded interrupts:
-*/
-#ifdef CONFIG_PREEMPT_RT
-   if (1) {
-#else
-   if (dev->xmit_lock_owner != cpu) {
-#endif
+   if (dev->xmit_lock_owner != (void *)current) {
 
-   HARD_TX_LOCK(dev, cpu);
+   HARD_TX_LOCK(dev);
 
if (!netif_queue_stopped(dev) &&
!netif_subqueue_stopped(dev, skb)) {
@@ -3630,7 +3622,7 @@
spin_lock_init(&dev->queue_lock);
spin_lock_init(&dev->_xmit_lock);
netdev_set_lockdep_class(&dev->_xmit_lock, dev->type);
-   dev->xmit_lock_owner = -1;
+   dev->xmit_lock_owner = (void *)-1;
spin_lock_init(&dev->ingress_lock);
 
dev->iflink = -1;
diff -ru linux-2.6.24-rc5-rt1/net/sched/sch_generic.c 
linux-2.6.24-rc5-rt1-mark/net/sched/sch_generic.c
--- linux-2.6.24-rc5-rt1/net/sched/sch_generic.c2007-12-28 
09:34:02.0 -0500
+++ linux-2.6.24-rc5-rt1-mark/net/sched/sch_generic.c   2008-01-01 
18:52:33.0 -0500
@@ -89,7 +89,7 @@
 {
int ret;
 
-   if (unlikely(dev->xmit_lock_owner == raw_smp_processor_id())) {
+   if (unlikely(dev->xmit_lock_owner == (void *)current)) {
/*
 * Same CPU holding the lock. It may be a transient
 * configuration error, when hard_start_xmit() recurses. We
@@ -146,7 +146,7 @@
/* And release queue */
spin_unlock(&dev->queue_lock);
 
-   HARD_TX_LOCK(dev, raw_smp_processor_id());
+   HARD_TX_LOCK(dev);
if (!netif_subqueue_stopped(dev, skb))
ret = dev_hard_start_xmit(skb, dev);
HARD_TX_UNLOCK(dev);


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More ma

Re: [linux-usb-devel] [PATCH] : Allow embedded developers USB options normally reserved for OTG

2008-01-02 Thread Bryan Wu
On Jan 3, 2008 4:58 AM, Alan Stern <[EMAIL PROTECTED]> wrote:
> On Wed, 2 Jan 2008, David Brownell wrote:
>
> > On Wednesday 02 January 2008, Alan Stern wrote:
> > > On Wed, 2 Jan 2008, Mike Frysinger wrote:
> > >
> > > > perhaps the code size is arguable as to whether it really matters.
> > > > the reason we want it is that we have a USB host controller that will
> > > > not work with USB hubs, so we want to make sure the system does not
> > > > attempt such things.  (yes, such a USB host controller is retarded,
> > > > but the decision was out of our hands.)
> > >
> > > Just out of curiosity, how does a host controller manage to avoid
> > > working with external hubs?
> >
> > The transaction translators in external high speed hubs require
> > hosts to issue particular USB transactions.  If the host controller
> > doesn't implement the that split transaction support, then it won't
> > be supporting external hubs.
>
> So in theory one could connect a high-speed hub to such a host
> controller and expect it to communicate with high-speed devices.  So
> long as no full- or low-speed devices are added there wouldn't be any
> split transactions.  It wouldn't be USB-2.0 compliant but it should
> still work.
>

Hmmm, basically, I think the answer is yes.

But when you tell customers your devices support USB 2.0, they will
try to plug-in lots of USB devices that you can not even imagine.
If they plug-in a combo USB device including an external USB hub, the
whole embedded Linux system maybe crash or hang there.
So this patch is to refuse enumerate such unsupported USB devices.

-Bryan Wu
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] x86: fault_{32|64}.c unify do_page_fault

2008-01-02 Thread H. Peter Anvin

Harvey Harrison wrote:


My apologies, testing/compiling on X86_32 here.


Do you seriously think code is getting better and more readable because
of this liberal #ifdef sprinkling in every possible direction?



Well, this of course is not the end of the road, but it makes it
obvious where the differences between 32/64 bit lie and allows
further cleanups to unify these areas over time.  This is meant as
a no functionality change path at first.and it does point out that
for the most part the files are _very_ similar to each other.

So my plan for now was to move forward with no functional changes and
esentially ifdef or reorder code until we get to identical fault_32/64.c
which then gets moved to a single fault.c

Then the cleanups happen in one place in one file and it should be easy
to audit the series at the end.  But for further patches I'll wait until
the series is further along and tested before submitting.  This was how
the kprobes unification went and I think it works fairly well this way.



One more thing... for code motion/unification patches it's a good thing 
to verify that the i386 and x86-64 binaries are both unchanged.


-hpa
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [linux-usb-devel] [PATCH] : Allow embedded developers USB options normally reserved for OTG

2008-01-02 Thread David Brownell
On Wednesday 02 January 2008, Robin Getz wrote:
> On Wed 2 Jan 2008 13:47, David Brownell pondered:
> > On Wednesday 02 January 2008, Robin Getz wrote:
> > > From: Robin Getz <[EMAIL PROTECTED]>
> > > 
> > > Allow embedded developers to turn support for USB Hubs off even if
> > > they have a full root hub. This saves the overhead (RAM and Flash size).
> > 
> > ISTR that it won't save very much code though ... the Linux USB
> > stack structures all its enumeration logic around hubs.
> 
> Today, there is an USB (Host), USB_GADGET and USB_OTG (which depends on USB 
> && 
> USB_GADGET).
> 
> This just enables cutting more code out, with out having to have USB_GADGET & 
> USB_OTG enabled. When I checked - that is where most of the savings came 
> from.

Right.  I'm not objecting to this at all.  Just pointing
out that usbcore will still end up including quite a lot
of hub functionality, because there's still going to be
a root hub in the system and a khubd managing it.

This patch might be improved slightly -- in ways that, as I
understand things, could save some RAM on Blackfin! -- by
having the BLACKLIST_HUB option get rid of the transaction
translator support (changing C code not just Kconfig).
It's pretty minimal, but won't be used...

Also, as you point out, it's no longer OTG specific, so
renaming the option would improve clarity.   Maybe to
something along the lines of USB_HOST_NO_EXTERNAL_HUBS.
No big deal, at least now.

- Dave

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [linux-usb-devel] [PATCH] : Allow embedded developers USB options normally reserved for OTG

2008-01-02 Thread David Brownell
On Wednesday 02 January 2008, Alan Stern wrote:
> 
> > The transaction translators in external high speed hubs require
> > hosts to issue particular USB transactions.  If the host controller
> > doesn't implement the that split transaction support, then it won't
> > be supporting external hubs.
> 
> So in theory one could connect a high-speed hub to such a host
> controller and expect it to communicate with high-speed devices.  So
> long as no full- or low-speed devices are added there wouldn't be any
> split transactions.  It wouldn't be USB-2.0 compliant but it should
> still work.

Yes ...

But that would be a nightmare in terms of support calls.  Take
two USB trees, and swap the hub ... suddenly one of the downstream
devices (the full speed one) starts (or stops!) working!  That's
not the sort of issue any support line wants to cope with on a
day-to-day basis.

The accounting for those products is often along the lines of:
if you got a support call, you didn't profit from that sale.
(And from the user perspective it's just a PITA and waste of
time.)  So *every* confusion like that one gets removed.

- Dave
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [linux-usb-devel] [PATCH] : Allow embedded developers USB options normally reserved for OTG

2008-01-02 Thread Bryan Wu
On Jan 3, 2008 2:47 AM, David Brownell <[EMAIL PROTECTED]> wrote:
> On Wednesday 02 January 2008, Robin Getz wrote:
> > From: Robin Getz <[EMAIL PROTECTED]>
> >
> > Allow embedded developers to turn support for USB Hubs off even if they 
> > have a
> > full root hub. This saves the overhead (RAM and Flash size).
>
> ISTR that it won't save very much code though ... the Linux USB
> stack structures all its enumeration logic around hubs.
>
>
> > Allow embedded developers the capabilities of the "otg_whitelist.h" - a
> > product whitelist, so USB peripherals not listed there will be rejected
> > during enumeration. This is the desired operation for many embedded 
> > products.
> >
> > Signed-off-by: Robin Getz <[EMAIL PROTECTED]>
>
> This is probably the right thing to do.  Correct me if I'm wrong,
> but USB-IF recently put out some specs about "embedded hosts" which
> basically boil down to saying you can adopt the same functionality
> restrictions that used to be OTG-only.  Which is why now there are
> embedded developers who'd like this option.  :)
>
> - Dave
>
>

Yes, "embedded hosts" which currently not support whole USB 2.0 spec
is what Robin's patch serving for.

Although in MUSB of Blackfin can support USB_OTG and we can enable
this, some real embedded application just use the MUSB_HOST mode. So
just enable MUSB_HOST  can save some embedded space because it does
not including MUSB_PERIPHERAL code.

I believe some embedded SoC including embedded USB host controller
also need this option open for them.

Thanks
-Bryan Wu
>
>
> >
> > ---
> >
> >  drivers/usb/core/Kconfig |7 ---
> >  1 file changed, 4 insertions(+), 3 deletions(-)
> >
> >
> > Index: linux-2.6.x/drivers/usb/core/Kconfig
> > ===
> > --- linux-2.6.x/drivers/usb/core/Kconfig  (revision 4074)
> > +++ linux-2.6.x/drivers/usb/core/Kconfig  (working copy)
> > @@ -95,8 +95,9 @@
> >
> >  config USB_OTG_WHITELIST
> >   bool "Rely on OTG Targeted Peripherals List"
> > - depends on USB_OTG
> > - default y
> > + depends on USB_OTG || EMBEDDED
> > + default y if USB_OTG
> > + default n if EMBEDDED
> >   help
> > If you say Y here, the "otg_whitelist.h" file will be used as a
> > product whitelist, so USB peripherals not listed there will be
> > @@ -111,7 +112,7 @@
> >
> >  config USB_OTG_BLACKLIST_HUB
> >   bool "Disable external hubs"
> > - depends on USB_OTG
> > + depends on USB_OTG || EMBEDDED
> >   help
> > If you say Y here, then Linux will refuse to enumerate
> > external hubs.  OTG hosts are allowed to reduce hardware
> >
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [EMAIL PROTECTED]
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] x86: Use is_kprobe_fault to better match usage

2008-01-02 Thread Masami Hiramatsu
Harvey Harrison wrote:
> On Wed, 2008-01-02 at 21:36 -0500, Masami Hiramatsu wrote:
>> Hi Harvey,
>>
>> Harvey Harrison wrote:
>>> Currently the notify_page_fault helper is used to test it the page
>>> fault was caused by a kprobe causing an early return from do_page_fault.
>>>
>>> Change the name of the helper to is_kprobe_fault to match the usage and
>>> remove the preempt_disable/enable pair around kprobe_running() with an
>>> explicit test for preemption.  The idea for this comes from a patch
>>> by Quentin Barnes to kprobes.c
>> Sure, that's right.
>> However, since other architectures also have notify_page_fault(),
>> I think all of those code might better be changed same time for
>> maintainability.
>>
> 
> How about a static inline in linux/kprobes.h with a big comment above
> about when/why the !preemptible() check is sufficient?

Hmm, fault handling depends on the architecture. But current
notify_page_fault()s are very similar. I think unifying it is good idea.
We will be happy to review that if you send it.

Many thanks!

> 
> Harvey
> 
> 
> 

-- 
Masami Hiramatsu

Software Engineer
Hitachi Computer Products (America) Inc.
Software Solutions Division

e-mail: [EMAIL PROTECTED], [EMAIL PROTECTED]

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[GIT] More NFS client fixes for 2.6.24-rc6

2008-01-02 Thread Trond Myklebust
Hi Linus,

Please pull from the repository at

   git pull git://git.linux-nfs.org/pub/linux/nfs-2.6.git

This will update the following files through the appended changesets.

  Cheers,
Trond


 fs/nfs/nfs4proc.c  |   34 --
 fs/nfs/nfs4renewd.c|2 --
 fs/nfs/super.c |   12 +++-
 net/sunrpc/auth_gss/auth_gss.c |2 +-
 4 files changed, 32 insertions(+), 18 deletions(-)

commit 361562e89f02db8fdca32a2dfc546a74f5d883d9
Author: Trond Myklebust <[EMAIL PROTECTED]>
Date:   Wed Jan 2 16:27:16 2008 -0500

NFSv4: Fix open_to_lock_owner sequenceid allocation...

NFSv4 file locking is currently completely broken since it doesn't respect
the OPEN sequencing when it is given an unconfirmed lock_owner and needs to
do an open_to_lock_owner. Worse: it breaks the sunrpc rules by doing a
GFP_KERNEL allocation inside an rpciod callback.

Fix is to preallocate the open seqid structure in nfs4_alloc_lockdata if we
see that the lock_owner is unconfirmed.
Then, in nfs4_lock_prepare() we wait for either the open_seqid, if
the lock_owner is still unconfirmed, or else fall back to waiting on the
standard lock_seqid.

Signed-off-by: Trond Myklebust <[EMAIL PROTECTED]>

commit 9073a07d1d987f8d2c919c900ea18ff57d156a34
Author: Trond Myklebust <[EMAIL PROTECTED]>
Date:   Wed Jan 2 15:19:18 2008 -0500

NFSv4: nfs4_open_confirm must not set the open_owner as confirmed on error

RFC3530 states that the open_owner is confirmed if and only if the client
sends an OPEN_CONFIRM request with the appropriate sequence id and stateid
within the lease period.

Signed-off-by: Trond Myklebust <[EMAIL PROTECTED]>

commit 812f85c5715310583459acc4b35e918914e96060
Author: James Morris <[EMAIL PROTECTED]>
Date:   Wed Dec 26 11:20:43 2007 +1100

NFS: add newline to kernel warning message in auth_gss code

Add newline to kernel warning message in gss_create().

Signed-off-by: James Morris <[EMAIL PROTECTED]>
Signed-off-by: Trond Myklebust <[EMAIL PROTECTED]>

commit 72d740f5d660bb422bab5f72da56678ca2c82d81
Author: Trond Myklebust <[EMAIL PROTECTED]>
Date:   Wed Jan 2 13:52:03 2008 -0500

NFSv4: Fix circular locking dependency in nfs4_kill_renewd

Erez Zadok reports:

===
[ INFO: possible circular locking dependency detected ]
2.6.24-rc6-unionfs2 #80
---
umount.nfs4/4017 is trying to acquire lock:
 (&(&clp->cl_renewd)->work){--..}, at: []
__cancel_work_timer+0x83/0x17f

but task is already holding lock:
 (&clp->cl_sem){}, at: [] nfs4_kill_renewd+0x17/0x29 [nfs]

which lock already depends on the new lock.


the existing dependency chain (in reverse order) is:

-> #1 (&clp->cl_sem){}:
   [] __lock_acquire+0x9cc/0xb95
   [] lock_acquire+0x5f/0x78
   [] down_read+0x3a/0x4c
   [] nfs4_renew_state+0x1c/0x1b8 [nfs]
   [] run_workqueue+0xd9/0x1ac
   [] worker_thread+0x7a/0x86
   [] kthread+0x3b/0x62
   [] kernel_thread_helper+0x7/0x10
   [] 0x

-> #0 (&(&clp->cl_renewd)->work){--..}:
   [] __lock_acquire+0x8bc/0xb95
   [] lock_acquire+0x5f/0x78
   [] __cancel_work_timer+0xb7/0x17f
   [] cancel_delayed_work_sync+0xb/0xd
   [] nfs4_kill_renewd+0x1e/0x29 [nfs]
   [] nfs_free_client+0x37/0x9e [nfs]
   [] nfs_put_client+0x5d/0x62 [nfs]
   [] nfs_free_server+0x75/0xae [nfs]
   [] nfs4_kill_super+0x27/0x2b [nfs]
   [] deactivate_super+0x3f/0x51
   [] mntput_no_expire+0x42/0x67
   [] path_release_on_umount+0x15/0x18
   [] sys_umount+0x1a3/0x1cb
   [] sys_oldumount+0x19/0x1b
   [] sysenter_past_esp+0x5f/0xa5
   [] 0x

Looking at the code, it would seem that taking the clp->cl_sem in
nfs4_kill_renewd is completely redundant, since we're already guaranteed to
have exclusive access to the nfs_client (we're shutting down).

Signed-off-by: Trond Myklebust <[EMAIL PROTECTED]>

commit 53478daff2c8b494d2af1ede6611f166f81bc393
Author: Trond Myklebust <[EMAIL PROTECTED]>
Date:   Wed Jan 2 13:28:57 2008 -0500

NFS: Fix a possible Oops in fs/nfs/super.c

Sigh... commit 4584f520e1f773082ef44ff4f8969a5d992b16ec (NFS: Fix NFS
mountpoint crossing...) had a slight flaw: server can be NULL if sget()
returned an existing superblock.

Fix the fix by dereferencing s->s_fs_info.

Also add in the same namespace Oops fix for NFSv4 in both the mountpoint
crossing case, and the referral case.

Signed-off-by: Trond Myklebust <[EMAIL PROTECTED]>

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index f03d9d5..9e2e1c7 10

Re: [linux-usb-devel] [PATCH] : Allow embedded developers USB options normally reserved for OTG

2008-01-02 Thread Robin Getz
On Wed 2 Jan 2008 13:47, David Brownell pondered:
> On Wednesday 02 January 2008, Robin Getz wrote:
> > From: Robin Getz <[EMAIL PROTECTED]>
> > 
> > Allow embedded developers to turn support for USB Hubs off even if
> > they have a full root hub. This saves the overhead (RAM and Flash size).
> 
> ISTR that it won't save very much code though ... the Linux USB
> stack structures all its enumeration logic around hubs.

Today, there is an USB (Host), USB_GADGET and USB_OTG (which depends on USB && 
USB_GADGET).

This just enables cutting more code out, with out having to have USB_GADGET & 
USB_OTG enabled. When I checked - that is where most of the savings came 
from.

-Robin
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] x86: Use is_kprobe_fault to better match usage

2008-01-02 Thread Harvey Harrison
On Wed, 2008-01-02 at 21:36 -0500, Masami Hiramatsu wrote:
> Hi Harvey,
> 
> Harvey Harrison wrote:
> > Currently the notify_page_fault helper is used to test it the page
> > fault was caused by a kprobe causing an early return from do_page_fault.
> > 
> > Change the name of the helper to is_kprobe_fault to match the usage and
> > remove the preempt_disable/enable pair around kprobe_running() with an
> > explicit test for preemption.  The idea for this comes from a patch
> > by Quentin Barnes to kprobes.c
> 
> Sure, that's right.
> However, since other architectures also have notify_page_fault(),
> I think all of those code might better be changed same time for
> maintainability.
> 

How about a static inline in linux/kprobes.h with a big comment above
about when/why the !preemptible() check is sufficient?

Harvey



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [patch] powerpc systbl.h broken

2008-01-02 Thread Stephen Rothwell
On Thu, 3 Jan 2008 13:16:24 +1100 Stephen Rothwell <[EMAIL PROTECTED]> wrote:
>
> This has been fixed in the current (approaching 2.6.24) kernel.  It
> should be fixed in 2.6.23.xx, so I have cc'd this to
> [EMAIL PROTECTED] (and linuxppc-dev, of course).

Should have been [EMAIL PROTECTED] (where I have sent another copy of
the email)

-- 
Cheers,
Stephen Rothwell[EMAIL PROTECTED]
http://www.canb.auug.org.au/~sfr/


pgpr6vm7ngBUh.pgp
Description: PGP signature


Re: [PATCH] x86: Use is_kprobe_fault to better match usage

2008-01-02 Thread Masami Hiramatsu
Hi Harvey,

Harvey Harrison wrote:
> Currently the notify_page_fault helper is used to test it the page
> fault was caused by a kprobe causing an early return from do_page_fault.
> 
> Change the name of the helper to is_kprobe_fault to match the usage and
> remove the preempt_disable/enable pair around kprobe_running() with an
> explicit test for preemption.  The idea for this comes from a patch
> by Quentin Barnes to kprobes.c

Sure, that's right.
However, since other architectures also have notify_page_fault(),
I think all of those code might better be changed same time for
maintainability.

Thanks,

> Signed-off-by: Harvey Harrison <[EMAIL PROTECTED]>
> ---
> Ingo, this may not be functionally equivalent, feel free to yank it out
> if there is any trouble, but from what I've seen it should be OK.
> 
> Did you ever find a good kprobes test?
> 
>  arch/x86/mm/fault_32.c |   30 ++
>  arch/x86/mm/fault_64.c |   30 ++
>  2 files changed, 28 insertions(+), 32 deletions(-)
> 
> diff --git a/arch/x86/mm/fault_32.c b/arch/x86/mm/fault_32.c
> index 051a4ec..5c48cc2 100644
> --- a/arch/x86/mm/fault_32.c
> +++ b/arch/x86/mm/fault_32.c
> @@ -12,6 +12,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -42,23 +43,20 @@
>  #define PF_RSVD  (1<<3)
>  #define PF_INSTR (1<<4)
>  
> -static inline int notify_page_fault(struct pt_regs *regs)
> +static inline int is_kprobe_fault(struct pt_regs *regs)
>  {
> -#ifdef CONFIG_KPROBES
>   int ret = 0;
> -
> - /* kprobe_running() needs smp_processor_id() */
> - if (!user_mode_vm(regs)) {
> - preempt_disable();
> - if (kprobe_running() && kprobe_fault_handler(regs, 14))
> - ret = 1;
> - preempt_enable();
> - }
> -
> - return ret;
> -#else
> - return 0;
> +#ifdef CONFIG_KPROBES
> + /*
> +  * If it is a kprobe fault we can not be premptible so return before
> +  * calling kprobe_running() as it will assert on smp_processor_id if
> +  * preemption is enabled.
> +  */
> + if (!user_mode_vm(regs) && !preemptible() && kprobe_running() &&
> + kprobe_fault_handler(regs, 14))
> + ret = 1;
>  #endif
> + return ret;
>  }
>  
>  #ifdef CONFIG_X86_32
> @@ -428,7 +426,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, 
> unsigned long error_code)
>   return;
>   }
>  #endif
> - if (notify_page_fault(regs))
> + if (is_kprobe_fault(regs))
>   return;
>   /*
>* Don't take the mm semaphore here. If we fixup a prefetch
> @@ -437,7 +435,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, 
> unsigned long error_code)
>   goto bad_area_nosemaphore;
>   }
>  
> - if (notify_page_fault(regs))
> + if (is_kprobe_fault(regs))
>   return;
>  
>   /* It's safe to allow irq's after cr2 has been saved and the vmalloc
> diff --git a/arch/x86/mm/fault_64.c b/arch/x86/mm/fault_64.c
> index 97b92b6..09008e5 100644
> --- a/arch/x86/mm/fault_64.c
> +++ b/arch/x86/mm/fault_64.c
> @@ -13,6 +13,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -45,23 +46,20 @@
>  #define PF_RSVD  (1<<3)
>  #define PF_INSTR (1<<4)
>  
> -static inline int notify_page_fault(struct pt_regs *regs)
> +static inline int is_kprobe_fault(struct pt_regs *regs)
>  {
> -#ifdef CONFIG_KPROBES
>   int ret = 0;
> -
> - /* kprobe_running() needs smp_processor_id() */
> - if (!user_mode(regs)) {
> - preempt_disable();
> - if (kprobe_running() && kprobe_fault_handler(regs, 14))
> - ret = 1;
> - preempt_enable();
> - }
> -
> - return ret;
> -#else
> - return 0;
> +#ifdef CONFIG_KPROBES
> + /*
> +  * If it is a kprobe fault we can not be premptible so return before
> +  * calling kprobe_running() as it will assert on smp_processor_id if
> +  * preemption is enabled.
> +  */
> + if (!user_mode_vm(regs) && !preemptible() && kprobe_running() &&
> + kprobe_fault_handler(regs, 14))
> + ret = 1;
>  #endif
> + return ret;
>  }
>  
>  #ifdef CONFIG_X86_32
> @@ -478,7 +476,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs 
> *regs,
>   return;
>   }
>  #endif
> - if (notify_page_fault(regs))
> + if (is_kprobe_fault(regs))
>   return;
>   /*
>* Don't take the mm semaphore here. If we fixup a prefetch
> @@ -487,7 +485,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs 
> *regs,
>   goto bad_area_nosemaphore;
>   }
>  
> - if (notify_page_fault(regs))
> + if (is_kprobe_fault(regs))
>   return;
>  
>   if (likely(regs->fla

Re: [PATCH] x86: fault_{32|64}.c unify do_page_fault

2008-01-02 Thread H. Peter Anvin

Harvey Harrison wrote:

There is no such thing as CONFIG_x86_64 .


My apologies, testing/compiling on X86_32 here.


Please also compile for x86-64, even if you can't easily test it 
(although you can always boot under qemu, even if it's slow.)


Unification patches especially.

-hpa
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCHv2] x86: fault_{32|64}.c unify do_page_fault

2008-01-02 Thread Harvey Harrison
Begin to unify do_page_fault(), easy code movement first.

Signed-off-by: Harvey Harrison <[EMAIL PROTECTED]>
---
Ingo, Alexey Dobriyan noticed an obvious typo CONFIG_x86_64 in
the previous version, this is a fixed patch.

 arch/x86/mm/fault_32.c |   38 +-
 arch/x86/mm/fault_64.c |   23 ++-
 2 files changed, 47 insertions(+), 14 deletions(-)

diff --git a/arch/x86/mm/fault_32.c b/arch/x86/mm/fault_32.c
index b1893eb..051a4ec 100644
--- a/arch/x86/mm/fault_32.c
+++ b/arch/x86/mm/fault_32.c
@@ -375,19 +375,26 @@ void __kprobes do_page_fault(struct pt_regs *regs, 
unsigned long error_code)
struct mm_struct *mm;
struct vm_area_struct *vma;
unsigned long address;
-   int write, si_code;
-   int fault;
+   int write, fault;
+#ifdef CONFIG_X86_64
+   unsigned long flags;
+#endif
+   int si_code;
 
/*
 * We can fault from pretty much anywhere, with unknown IRQ state.
 */
trace_hardirqs_fixup();
 
-   /* get the address */
-   address = read_cr2();
-
tsk = current;
+   mm = tsk->mm;
 
+#ifdef CONFIG_X86_64
+   prefetchw(&mm->mmap_sem);
+#endif
+
+   /* get the address */
+   address = read_cr2();
si_code = SEGV_MAPERR;
 
/*
@@ -403,9 +410,24 @@ void __kprobes do_page_fault(struct pt_regs *regs, 
unsigned long error_code)
 * (error_code & 4) == 0, and that the fault was not a
 * protection error (error_code & 9) == 0.
 */
+#ifdef CONFIG_X86_32
if (unlikely(address >= TASK_SIZE)) {
-   if (!(error_code & 0x000d) && vmalloc_fault(address) >= 0)
+   if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
+   vmalloc_fault(address) >= 0)
return;
+#else
+   if (unlikely(address >= TASK_SIZE64)) {
+   /*
+* Don't check for the module range here: its PML4
+* is always initialized because it's shared with the main
+* kernel text. Only vmalloc may need PML4 syncups.
+*/
+   if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
+ ((address >= VMALLOC_START && address < VMALLOC_END))) {
+   if (vmalloc_fault(address) >= 0)
+   return;
+   }
+#endif
if (notify_page_fault(regs))
return;
/*
@@ -423,8 +445,6 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned 
long error_code)
if (regs->flags & (X86_EFLAGS_IF|VM_MASK))
local_irq_enable();
 
-   mm = tsk->mm;
-
/*
 * If we're in an interrupt, have no user context or are running in an
 * atomic region then we must not take the fault.
@@ -495,7 +515,7 @@ good_area:
goto bad_area;
}
 
- survive:
+survive:
/*
 * If for any reason at all we couldn't handle the fault,
 * make sure we exit gracefully rather than endlessly redo
diff --git a/arch/x86/mm/fault_64.c b/arch/x86/mm/fault_64.c
index 357a3e0..97b92b6 100644
--- a/arch/x86/mm/fault_64.c
+++ b/arch/x86/mm/fault_64.c
@@ -426,7 +426,9 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs 
*regs,
struct vm_area_struct *vma;
unsigned long address;
int write, fault;
+#ifdef CONFIG_X86_64
unsigned long flags;
+#endif
int si_code;
 
/*
@@ -436,14 +438,15 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs 
*regs,
 
tsk = current;
mm = tsk->mm;
+
+#ifdef CONFIG_X86_64
prefetchw(&mm->mmap_sem);
+#endif
 
/* get the address */
address = read_cr2();
-
si_code = SEGV_MAPERR;
 
-
/*
 * We fault-in kernel-space virtual memory on-demand. The
 * 'reference' page table is init_mm.pgd.
@@ -457,6 +460,12 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs 
*regs,
 * (error_code & 4) == 0, and that the fault was not a
 * protection error (error_code & 9) == 0.
 */
+#ifdef CONFIG_X86_32
+   if (unlikely(address >= TASK_SIZE)) {
+   if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
+   vmalloc_fault(address) >= 0)
+   return;
+#else
if (unlikely(address >= TASK_SIZE64)) {
/*
 * Don't check for the module range here: its PML4
@@ -468,6 +477,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs 
*regs,
if (vmalloc_fault(address) >= 0)
return;
}
+#endif
if (notify_page_fault(regs))
return;
/*
@@ -500,7 +510,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs 
*regs,
if (user_mode_vm(regs))
error_code |= PF_USER;
 
- again:
+again:
/* Wh

Re: [patch] powerpc systbl.h broken

2008-01-02 Thread Stephen Rothwell
Hi Clifford,

Thanks for the report.

PowerPC problems should be reported to [EMAIL PROTECTED]

This has been fixed in the current (approaching 2.6.24) kernel.  It
should be fixed in 2.6.23.xx, so I have cc'd this to
[EMAIL PROTECTED] (and linuxppc-dev, of course).

(Also, please post patches inline, don't attach them.  And you need to
reply to this with a Signed-off-by: line.)

-- 
Cheers,
Stephen Rothwell[EMAIL PROTECTED]

From:   Clifford Wolf <[EMAIL PROTECTED]>

Hi,

In current 2.6.23 (I have checked 2.6.23.12 and 2.6.23.9) the end of
include/asm-powerpc/systbl.h reads:

--snip--
SYSCALL_SPU(getcpu)
COMPAT_SYS(epoll_pwait)
COMPAT_SYS_SPU(utimensat)
COMPAT_SYS(fallocate)
COMPAT_SYS_SPU(signalfd)
COMPAT_SYS_SPU(timerfd)
SYSCALL_SPU(eventfd)
COMPAT_SYS_SPU(sync_file_range2)
--snap--

This obviously does not match the definitions in asm-powerpc/unistd.h:

--snip--
#define __NR_getcpu 302
#define __NR_epoll_pwait303
#define __NR_utimensat  304
#define __NR_signalfd   305
#define __NR_timerfd306
#define __NR_eventfd307
#define __NR_sync_file_range2   308
#define __NR_fallocate  309
--snap--

which breaks the system calls 305 to 309 inclusive.

--- linux-2.6.23.12/include/asm-powerpc/systbl.h.orig   2008-01-02 
15:09:04.0 +0100
+++ linux-2.6.23.12/include/asm-powerpc/systbl.h2008-01-02 
15:09:29.0 +0100
@@ -308,8 +308,8 @@
 SYSCALL_SPU(getcpu)
 COMPAT_SYS(epoll_pwait)
 COMPAT_SYS_SPU(utimensat)
-COMPAT_SYS(fallocate)
 COMPAT_SYS_SPU(signalfd)
 COMPAT_SYS_SPU(timerfd)
 SYSCALL_SPU(eventfd)
 COMPAT_SYS_SPU(sync_file_range2)
+COMPAT_SYS(fallocate)


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH linux-acpi] Correct wakeup set error and append a new column PCI ID

2008-01-02 Thread Yi Yang
On Wed, 2008-01-02 at 17:09 +0100, Pavel Machek wrote:
> On Wed 2008-01-02 10:03:59, Yi Yang wrote:
> > On Wed, 2008-01-02 at 00:20 +0100, Pavel Machek wrote:
> > > Hi!
> > > 
> > > > /proc/acpi/wakeup is also case-sensitive, case-insensitive is better.
> > > 
> > > Why?
> > A user uses device bus id like 'C093' to enable or disable wakeup of the
> > device, for example
> > 
> > echo "C093" > /proc/acpi/wakeup
> > 
> > but i think "c093" should also be ok. i.e.
> 
> Why do you think so? Unix is generally case-sensitive. I see ascii
> text in .../wakeup. Maybe some bios vendor is crazy enough to have
> wakeup devices called 'wake', 'Wake', 'wAke', 'waKe', 'wakE'?
Of course, when you cat/proc/acpi/wakeup, you get "wake", but when you
want to enable or disable wakeup of the device "wake", you can

echo "wAke" > /proc/acpi/wakeup

or

echo "wake" > /proc/acpi/wakeup

Don't you think it is reasonable? This is just for user's convenience.

> 
> > > Maybe this file should be left for compatibility and we should present
> > > something reasonable in /sys? Can't you already get PCI ID from sysfs
> > > node?
> > PCI ID can be gotten from sysfs, but it is a unique identifier for a
> > device, a user can get device name from /usr/share/hwdata/pci.ids in any
> > dstribution by PCI ID, he/she is unnecessary to use bus number to get
> > device name, bus number is platform-specific, but PCI ID is
> > platform-independent.
> 
> If the same info can be gotten from 'sysfs node' field, new field
> should not be added.
> 
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] x86: fault_{32|64}.c unify do_page_fault

2008-01-02 Thread Harvey Harrison
On Thu, 2008-01-03 at 04:45 +0300, Alexey Dobriyan wrote:
> On Wed, Jan 02, 2008 at 05:01:02PM -0800, Harvey Harrison wrote:
> > Begin to unify do_page_fault(), easy code movement first.
> > 
> > Signed-off-by: Harvey Harrison <[EMAIL PROTECTED]>
> > ---
> > Ingo, similar to the kprobes unification patches I did, it gets a bit
> > uglier before it gets better ;-)
> > 
> >  arch/x86/mm/fault_32.c |   38 +-
> >  arch/x86/mm/fault_64.c |   23 ++-
> >  2 files changed, 47 insertions(+), 14 deletions(-)
> > 
> > diff --git a/arch/x86/mm/fault_32.c b/arch/x86/mm/fault_32.c
> > index b1893eb..051a4ec 100644
> > --- a/arch/x86/mm/fault_32.c
> > +++ b/arch/x86/mm/fault_32.c
> > @@ -375,19 +375,26 @@ void __kprobes do_page_fault(struct pt_regs *regs, 
> > unsigned long error_code)
> > struct mm_struct *mm;
> > struct vm_area_struct *vma;
> > unsigned long address;
> > -   int write, si_code;
> > -   int fault;
> > +   int write, fault;
> > +#ifdef CONFIG_x86_64
> 
> There is no such thing as CONFIG_x86_64 .

My apologies, testing/compiling on X86_32 here.

> 
> Do you seriously think code is getting better and more readable because
> of this liberal #ifdef sprinkling in every possible direction?
> 

Well, this of course is not the end of the road, but it makes it
obvious where the differences between 32/64 bit lie and allows
further cleanups to unify these areas over time.  This is meant as
a no functionality change path at first.and it does point out that
for the most part the files are _very_ similar to each other.

So my plan for now was to move forward with no functional changes and
esentially ifdef or reorder code until we get to identical fault_32/64.c
which then gets moved to a single fault.c

Then the cleanups happen in one place in one file and it should be easy
to audit the series at the end.  But for further patches I'll wait until
the series is further along and tested before submitting.  This was how
the kprobes unification went and I think it works fairly well this way.

But, if the end result is too ugly, it won't bother me at all if it
doesn't go in.

Harvey

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH linux-acpi] Correct wakeup set error and append a new column PCI ID

2008-01-02 Thread Yi Yang
On Wed, 2008-01-02 at 17:09 +0100, Pavel Machek wrote:
> On Wed 2008-01-02 10:03:59, Yi Yang wrote:
> > On Wed, 2008-01-02 at 00:20 +0100, Pavel Machek wrote:
> > > Hi!
> > > 
> > > > /proc/acpi/wakeup is also case-sensitive, case-insensitive is better.
> > > 
> > > Why?
> > A user uses device bus id like 'C093' to enable or disable wakeup of the
> > device, for example
> > 
> > echo "C093" > /proc/acpi/wakeup
> > 
> > but i think "c093" should also be ok. i.e.
> 
> Why do you think so? Unix is generally case-sensitive. I see ascii
> text in .../wakeup. Maybe some bios vendor is crazy enough to have
> wakeup devices called 'wake', 'Wake', 'wAke', 'waKe', 'wakE'?
This is just for users' convenience, i believe you must think 0xff and
0xFF are the same.

> 
> > > Maybe this file should be left for compatibility and we should present
> > > something reasonable in /sys? Can't you already get PCI ID from sysfs
> > > node?
> > PCI ID can be gotten from sysfs, but it is a unique identifier for a
> > device, a user can get device name from /usr/share/hwdata/pci.ids in any
> > dstribution by PCI ID, he/she is unnecessary to use bus number to get
> > device name, bus number is platform-specific, but PCI ID is
> > platform-independent.
> 
> If the same info can be gotten from 'sysfs node' field, new field
> should not be added.
Assume you are a user of /proc/acpi/wakeup, when you
cat /proc/acpi/wakeup, you only get PCI bus id, then you need use PCI
bus id to get the device info, that is platform-specific, if you want to
use this PCI bus id to get the device info from another machines, that
is absolutely impossible, but it is ok if it is PCI ID.

Moreover, you can very easily get the device info
from /usr/share/hwdata/pci.ids.

grep  /usr/share/hwdata/pci.ids

That is more convenient than PCI bus id.

If we can provide PCI ID in /proc/acpi/wakeup, why we let users get that
from /sys/devices/pci...?

> 
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [patch 1/3] move WARN_ON() out of line

2008-01-02 Thread Matt Mackall

On Thu, 2008-01-03 at 01:56 +0100, Arjan van de Ven wrote:
> Subject: move WARN_ON() out of line
> From: Arjan van de Ven <[EMAIL PROTECTED]>
> CC: Ingo Molnar <[EMAIL PROTECTED]>
> CC: Andrew Morton <[EMAIL PROTECTED]>
> 
> A quick grep shows that there are currently 1145 instances of WARN_ON
> in the kernel. Currently, WARN_ON is pretty much entirely inlined,
> which makes it hard to enhance it without growing the size of the kernel
> (and getting Andrew unhappy).
> 
> This patch moves WARN_ON() out of line entirely. I've considered keeping
> the test inline and moving only the slowpath out of line, but I decided
> against that: an out of line test reduces the pressure on the CPUs
> branch predictor logic and gives smaller code, while a function call
> to a fixed location is quite fast. Likewise I've considered doing something
> similar to BUG() (eg use a trapping instruction) but that's not really
> better (it needs the test inline again and recovering from an invalid
> instruction isn't quite fun).
> 
> The code size reduction of this patch was about 6.5Kb (on a distro style
> .config):
> 
> text data bss dec hex filename
> 3096493293455 2760704 6150652  5dd9fc vmlinux.before
> 3090006293455 2760704 6144165  5dc0a5 vmlinux.after
> 
> Signed-off-by: Arjan van de Ven <[EMAIL PROTECTED]>

I hate the do_foo naming scheme (how about __warn_on?), but otherwise:

Acked-by: Matt Mackall <[EMAIL PROTECTED]>

> + printk(KERN_WARNING "WARNING: at %s:%d %s()\n",
> + __FILE__, __LINE__, __FUNCTION__);
> + dump_stack();

While we're here, I'll mention that dump_stack probably ought to take a
severity level argument.

-- 
Mathematics is the supreme nostalgia of our time.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [patch] scsi: revert "[SCSI] Get rid of scsi_cmnd->done"

2008-01-02 Thread Linus Torvalds


On Wed, 2 Jan 2008, James Bottomley wrote:
> > 
> > To say that another way:
> > 
> >  "the code is functionally equivalent, EXCEPT IT ISN'T, and it's 
> >   known to be broken".
> > 
> > wouldn't you say my version is more honest and correct?
> 
> No.  Just because a bug appears when a particular piece of code is in
> and disappears when it is reverted doesn't automatically equate to the
> code in question being buggy.

But it *DOES* mean that it's not equivalent.

> Look at the taxonomy of the bug.  This is the form of the error:
> 
> buffer I/O error on device sr0, logical block 20304
> attempt to access beyond end of device
> sr0: rw=0, want=81224, limit=40944
> 
> The last limit is the most suggestive, that comes straight from
> bdev->bd_inode->i_size>>9 and is supposed to be the size of the block
> device in 512 byte blocks. For a 4.7GB DVD, it's a little small.
> Nothing in the sr code sets this directly (although it does come from
> get_blkdev() for the first opener).  pktcdvd does set it, though ... and
> probably wrongly if the drive in question isn't UDF formatted.

.. but you're ignoring the fact that if pktcdvd sets it wrong, then it 
should be visible with the pre-commit kernel *also*.

In other words, you continue to ignore the fact that BEHAVIOUR CHANGED.

Why?

Linus
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[GIT PATCH] SCSI bug fix for 2.6.24-rc6

2008-01-02 Thread James Bottomley
This is the bug fix for

http://bugzilla.kernel.org/show_bug.cgi?id=9674

It's available here:

master.kernel.org:/pub/scm/linux/kernel/git/jejb/scsi-rc-fixes-2.6.git

I've just attached it below as well.

James

---

>From 751bf4d7865e4ced406be93b04c7436d866d3684 Mon Sep 17 00:00:00 2001
From: James Bottomley <[EMAIL PROTECTED]>
Date: Wed, 2 Jan 2008 11:14:30 -0600
Subject: [SCSI] scsi_sysfs: restore prep_fn when ULD is removed

A recent bug report:

http://bugzilla.kernel.org/show_bug.cgi?id=9674

Was caused because the ULDs now set their own prep functions, but
don't necessarily reset the prep function back to the SCSI default
when they are removed.  This leads to panics if commands are sent to
the device after the module is removed because the prep_fn is still
pointing to the old module code.  The fix for this is to implement a
bus remove method that resets the prep_fn pointer correctly before
calling the ULD specific driver remove method.

Signed-off-by: James Bottomley <[EMAIL PROTECTED]>
---
 drivers/scsi/scsi_lib.c   |2 +-
 drivers/scsi/scsi_priv.h  |3 +++
 drivers/scsi/scsi_sysfs.c |   17 +
 3 files changed, 21 insertions(+), 1 deletions(-)

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 0e81e4c..a9ac5b1 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1332,7 +1332,7 @@ int scsi_prep_return(struct request_queue *q, struct 
request *req, int ret)
 }
 EXPORT_SYMBOL(scsi_prep_return);
 
-static int scsi_prep_fn(struct request_queue *q, struct request *req)
+int scsi_prep_fn(struct request_queue *q, struct request *req)
 {
struct scsi_device *sdev = q->queuedata;
int ret = BLKPREP_KILL;
diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h
index eff0059..3f34e93 100644
--- a/drivers/scsi/scsi_priv.h
+++ b/drivers/scsi/scsi_priv.h
@@ -74,6 +74,9 @@ extern struct request_queue *scsi_alloc_queue(struct 
scsi_device *sdev);
 extern void scsi_free_queue(struct request_queue *q);
 extern int scsi_init_queue(void);
 extern void scsi_exit_queue(void);
+struct request_queue;
+struct request;
+extern int scsi_prep_fn(struct request_queue *, struct request *);
 
 /* scsi_proc.c */
 #ifdef CONFIG_SCSI_PROC_FS
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index f374fdc..00b3866 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -373,12 +373,29 @@ static int scsi_bus_resume(struct device * dev)
return err;
 }
 
+static int scsi_bus_remove(struct device *dev)
+{
+   struct device_driver *drv = dev->driver;
+   struct scsi_device *sdev = to_scsi_device(dev);
+   int err = 0;
+
+   /* reset the prep_fn back to the default since the
+* driver may have altered it and it's being removed */
+   blk_queue_prep_rq(sdev->request_queue, scsi_prep_fn);
+
+   if (drv && drv->remove)
+   err = drv->remove(dev);
+
+   return 0;
+}
+
 struct bus_type scsi_bus_type = {
 .name  = "scsi",
 .match = scsi_bus_match,
.uevent = scsi_bus_uevent,
.suspend= scsi_bus_suspend,
.resume = scsi_bus_resume,
+   .remove = scsi_bus_remove,
 };
 
 int scsi_sysfs_register(void)
-- 
1.5.3.6



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] Exporting capability code/name pairs

2008-01-02 Thread Andrew Morgan
-BEGIN PGP SIGNED MESSAGE-
Hash: SHA1

There is also the issue of compiled code which explicitly raises and
lowers capabilities around critical code sections (ie., as they were
intended to be used) is also not well served by this change.

That is, unless the code was compiled with things like CAP_MAC_ADMIN
being #define'd then it won't be able to do things like cap_set_flag()
appropriately.

Cheers

Andrew

KaiGai Kohei wrote:
> Andrew Morgan wrote:
>> -BEGIN PGP SIGNED MESSAGE-
>> Hash: SHA1
>>
>> KaiGai Kohei wrote:
>>> Remaining issues:
>>> - We have to mount securityfs explicitly, or use /etc/fstab.
>>>   It can cause a matter when we want to use this feature on
>>>   very early phase on boot. (like /sbin/init)
>> I'm not altogether clear how you intend this to work.
>>
>> Are you saying that some future version of libcap will require that
>> securityfs be mounted before it (libcap) will work?
> 
> Yes, but implementing this feature on securityfs might be not good
> idea as as James said. If this feature is on procfs or sysfs, it is
> not necessary to mount securityfs explicitly.
> 
> Thanks,
-BEGIN PGP SIGNATURE-
Version: GnuPG v1.4.7 (Darwin)
Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org

iD8DBQFHfD7n+bHCR3gb8jsRAsgcAKDY6qXBR3JV2addHUg5sxyZHJEkDQCfdLOL
zJlf3T4SQsUNENr3kwR5pr8=
=v8C5
-END PGP SIGNATURE-
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] x86: fault_{32|64}.c unify do_page_fault

2008-01-02 Thread Alexey Dobriyan
On Wed, Jan 02, 2008 at 05:01:02PM -0800, Harvey Harrison wrote:
> Begin to unify do_page_fault(), easy code movement first.
> 
> Signed-off-by: Harvey Harrison <[EMAIL PROTECTED]>
> ---
> Ingo, similar to the kprobes unification patches I did, it gets a bit
> uglier before it gets better ;-)
> 
>  arch/x86/mm/fault_32.c |   38 +-
>  arch/x86/mm/fault_64.c |   23 ++-
>  2 files changed, 47 insertions(+), 14 deletions(-)
> 
> diff --git a/arch/x86/mm/fault_32.c b/arch/x86/mm/fault_32.c
> index b1893eb..051a4ec 100644
> --- a/arch/x86/mm/fault_32.c
> +++ b/arch/x86/mm/fault_32.c
> @@ -375,19 +375,26 @@ void __kprobes do_page_fault(struct pt_regs *regs, 
> unsigned long error_code)
>   struct mm_struct *mm;
>   struct vm_area_struct *vma;
>   unsigned long address;
> - int write, si_code;
> - int fault;
> + int write, fault;
> +#ifdef CONFIG_x86_64

There is no such thing as CONFIG_x86_64 .

Do you seriously think code is getting better and more readable because
of this liberal #ifdef sprinkling in every possible direction?

> + unsigned long flags;
> +#endif

One.

> + int si_code;
>  
>   /*
>* We can fault from pretty much anywhere, with unknown IRQ state.
>*/
>   trace_hardirqs_fixup();
>  
> - /* get the address */
> - address = read_cr2();
> -
>   tsk = current;
> + mm = tsk->mm;
>  
> +#ifdef CONFIG_x86_64
> + prefetchw(&mm->mmap_sem);
> +#endif

Two.

> +
> + /* get the address */
> + address = read_cr2();
>   si_code = SEGV_MAPERR;
>  
>   /*
> @@ -403,9 +410,24 @@ void __kprobes do_page_fault(struct pt_regs *regs, 
> unsigned long error_code)
>* (error_code & 4) == 0, and that the fault was not a
>* protection error (error_code & 9) == 0.
>*/
> +#ifdef CONFIG_X86_32
>   if (unlikely(address >= TASK_SIZE)) {
> - if (!(error_code & 0x000d) && vmalloc_fault(address) >= 0)
> + if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
> + vmalloc_fault(address) >= 0)
>   return;
> +#else
> + if (unlikely(address >= TASK_SIZE64)) {
> + /*
> +  * Don't check for the module range here: its PML4
> +  * is always initialized because it's shared with the main
> +  * kernel text. Only vmalloc may need PML4 syncups.
> +  */
> + if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
> +   ((address >= VMALLOC_START && address < VMALLOC_END))) {
> + if (vmalloc_fault(address) >= 0)
> + return;
> + }
> +#endif

Three.

>   if (notify_page_fault(regs))
>   return;
>   /*
> @@ -423,8 +445,6 @@ void __kprobes do_page_fault(struct pt_regs *regs, 
> unsigned long error_code)
>   if (regs->flags & (X86_EFLAGS_IF|VM_MASK))
>   local_irq_enable();
>  
> - mm = tsk->mm;
> -
>   /*
>* If we're in an interrupt, have no user context or are running in an
>* atomic region then we must not take the fault.
> @@ -495,7 +515,7 @@ good_area:
>   goto bad_area;
>   }
>  
> - survive:
> +survive:
>   /*
>* If for any reason at all we couldn't handle the fault,
>* make sure we exit gracefully rather than endlessly redo
> diff --git a/arch/x86/mm/fault_64.c b/arch/x86/mm/fault_64.c
> index 357a3e0..97b92b6 100644
> --- a/arch/x86/mm/fault_64.c
> +++ b/arch/x86/mm/fault_64.c
> @@ -426,7 +426,9 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs 
> *regs,
>   struct vm_area_struct *vma;
>   unsigned long address;
>   int write, fault;
> +#ifdef CONFIG_x86_64
>   unsigned long flags;
> +#endif

Four.

>   int si_code;
>  
>   /*
> @@ -436,14 +438,15 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs 
> *regs,
>  
>   tsk = current;
>   mm = tsk->mm;
> +
> +#ifdef CONFIG_x86_64
>   prefetchw(&mm->mmap_sem);
> +#endif

Five.

>  
>   /* get the address */
>   address = read_cr2();
> -
>   si_code = SEGV_MAPERR;
>  
> -
>   /*
>* We fault-in kernel-space virtual memory on-demand. The
>* 'reference' page table is init_mm.pgd.
> @@ -457,6 +460,12 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs 
> *regs,
>* (error_code & 4) == 0, and that the fault was not a
>* protection error (error_code & 9) == 0.
>*/
> +#ifdef CONFIG_X86_32
> + if (unlikely(address >= TASK_SIZE)) {
> + if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
> + vmalloc_fault(address) >= 0)
> + return;
> +#else
>   if (unlikely(address >= TASK_SIZE64)) {
>   /*
>* Don't check for the module range here: its PML4
> @@ -468,6 +477,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs 
> *re

[PATCH] x86_64: not clear empty_zero_page again

2008-01-02 Thread Yinghai Lu
[PATCH] x86_64: not clear empty_zero_page again

empty_zero_page is in .bss section, and it is cleared in clear_bss by 
x86_64_start_kernel. So don't clear that again in mem_init

Signed-off-by: Yinghai Lu <[EMAIL PROTECTED]>

Index: linux-2.6/arch/x86/mm/init_64.c
===
--- linux-2.6.orig/arch/x86/mm/init_64.c
+++ linux-2.6/arch/x86/mm/init_64.c
@@ -521,8 +521,7 @@ void __init mem_init(void)
 
pci_iommu_alloc();
 
-   /* clear the zero-page */
-   memset(empty_zero_page, 0, PAGE_SIZE);
+   /* clear_bss() already clear the empty_zero_page */
 
reservedpages = 0;
 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Linux 2.6.23.12

2008-01-02 Thread Shaohua Li

On Wed, 2008-01-02 at 13:42 -0700, Bjorn Helgaas wrote:
> The patch below was put in 2.6.23.12 as a fix for
> http://bugzilla.kernel.org/show_bug.cgi?id=9514.  It apparently
> does make 9514 go away, but only by coincidence.  There are a
> couple other ideas about fixing 9514.  My proposed patch is
> attached in the bugzilla.
> 
> The .12 patch reduces the number of resources we reserve from
> 24 to 8 ioport regions and from 12 to 4 mmio regions per PNP device.
> It also removes a warning about regions we ignore.
> 
> Obviously, we want to reserve ALL the regions for PNP devices,
> and Thomas is working on a patch for that (which I hope will be
> ready for 2.6.25).  But in the meantime, the .12 patch makes it
> more likely that we will have resource conflicts with PNP devices.
> 
> http://bugzilla.kernel.org/show_bug.cgi?id=9630 appears to be
> such a conflict.
Just let you know why we increase the resource number.
http://bugzilla.kernel.org/show_bug.cgi?id=8973
we found real I/O resource conflict here and cause system hang.

Thanks,
Shaohua

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


RE: + restore-missing-sysfs-max_cstate-attr.patch added to -mm tree

2008-01-02 Thread Pallipadi, Venkatesh
 

>-Original Message-
>From: Andrew Morton [mailto:[EMAIL PROTECTED] 
>Sent: Wednesday, January 02, 2008 4:52 PM
>To: Pallipadi, Venkatesh
>Cc: Mark Lord; Arjan van de Ven; [EMAIL PROTECTED]; 
>[EMAIL PROTECTED]; Ingo Molnar; linux-kernel@vger.kernel.org; 
>[EMAIL PROTECTED]
>Subject: Re: + restore-missing-sysfs-max_cstate-attr.patch 
>added to -mm tree
>
>On Wed, 2 Jan 2008 16:06:20 -0800 "Pallipadi, Venkatesh" 
><[EMAIL PROTECTED]> wrote:
>
>>  
>> 
>> >-Original Message-
>> >From: Mark Lord [mailto:[EMAIL PROTECTED] 
>> >Sent: Wednesday, January 02, 2008 3:42 PM
>> >To: Arjan van de Ven
>> >Cc: Pallipadi, Venkatesh; Andrew Morton; [EMAIL PROTECTED]; 
>> >[EMAIL PROTECTED]; Ingo Molnar; linux-kernel@vger.kernel.org; 
>> >[EMAIL PROTECTED]
>> >Subject: Re: + restore-missing-sysfs-max_cstate-attr.patch 
>> >added to -mm tree
>> >
>> >Arjan van de Ven wrote:
>> >> On Fri, 30 Nov 2007 22:31:17 -0500
>> >> Mark Lord <[EMAIL PROTECTED]> wrote:
>> >> 
>> >>> Arjan van de Ven wrote:
>>  On Fri, 30 Nov 2007 22:14:08 -0500
>>  Mark Lord <[EMAIL PROTECTED]> wrote:
>> 
>> >> in -mm there is.. the QoS stuff allows you to set maximum
>> >> tolerable
>> > ..
>> >
>> > That's encouraging, I think, but not for 2.6.24.
>> >
>> >> latency. If your app cant take any latency, you should set
>> >> those... and the side effect is that the kernel will not do
>> >> long-latency C-states or P-state transitions..
>> > ..
>> >
>> > I don't mind the cpufreq changing (actually, I want it 
>to drop in
>> > cpugfreq to save power and keep the fan off), but the 
>> >C-states just
>> > kill this app.
>> >
>> > The app is VMware.  I force the max_state=1 when launching,
>>  ah but then its' even easier... and can be done in 
>2.6.24 already.
>>  VMWare after all has a kernel module, and the latency 
>stuff is in
>>  2.6.23 and 2.6.24 available inside the kernel already.
>> >>> ..
>> >>>
>> >>> Oh, I'm perfectly happy to write my own kernel module if 
>that's what
>> >> 
>> >> all you need to do in your kernel module is call
>> >> 
>> >> add_latency_constraint("mark_wants_his_mouse", 5);
>> >> 
>> >> or so
>> >..
>> >
>> >Dredging up an old regression again now:
>> >
>> >The "make my own module to replace /sys/.../max_cstate" doesn't work
>> >for the single-core machine we use a lot around here.
>> >
>> >VMware is totally sluggish unless I go to another text window 
>> >and do this:
>> >
>> >while ( true ); do echo -n ; done
>> >
>> >At which point VMware performs well again,
>> >the same as with "echo 1 > max_cstate" in 2.6.23.
>> >
>> >Anyone got any suggestions on how to fix this regression
>> >or work around it for 2.6.24 ?
>> >
>> 
>> Easiest and clean way to do it is to have a driver with
>> set_acceptable_latency() for 1uS or so in init and
>> remove_acceptable_latency() at exit.
>
>err, you appear to be suggesting that Mark patch his kernel to 
>make it work
>as well as 2.6.23?  That would be a wrong answer.
>
>This regression was known six weeks ago.  What do we need to 
>do (or revert)
>to fix it in 2.6.24?
>

As I responded earlier here
http://www.ussg.iu.edu/hypermail/linux/kernel/0711.3/2348.html

This interface cannot be supported cleanly with cpuidle. The cleanest
way to do this is to go through latency interfaces. We have changed all
in kernel drivers to use this new interface. The issue here is, I
removed this sysfs interface without depracting it. We can call it a
regression and we can add it back for the moment. But, this will go from
sysfs sooner or later and latency interface has to be used in future.
And Mark earlier responded in this thread saying he is OK with adding
something in the kernel to get this working, That is the reason I
suggested the above option.

As I saw it 6 weeks back, max_cstate option works as a boot parameter. I
did not see anyone else (apart from Mark) saying they are depending on
this sysfs interface to change max_cstate at run time and Mark said he
can do with the kernel change if possible. Please let me know if you
think this interface is a must fix for .24. I will send a minimal patch
to add it back for .24 for !CPU_IDLE case.

Thanks,
Venki 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] Provide u64 version of jiffies_to_usecs() in kernel/tsacct.c

2008-01-02 Thread H. Peter Anvin

Jonathan Lim wrote:


Peter,

Would you be willing to include the u64 function as part of your patch to make
it available kernel-wide?  It just needs:

  u64 inline jiffies_to_usecs_u64(const u64 j)

and for the symbol to be exported.  Thanks.



It should be a separate patch (new functionality versus change of 
implementation); I'd just do a small patch on top of mine.


-hpa
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[ANNOUNCE] ebizzy 0.3 released

2008-01-02 Thread Rodrigo Rubira Branco (BSDaemon)
ebizzy is designed to generate a workload resembling common web application
server workloads.  It is especially useful for testing
changes to memory management, and whenever a highly threaded application
with a large working set and many vmas is needed.

This is release 0.3 of ebizzy.  It reports a rate of transactions per
second, compiles on Linux/Solaris/FreeBSD/HPUX, and scales better.

Available for download at:

http://ebizzy.sf.net



Rodrigo (BSDaemon).

--
http://www.kernelhacking.com/rodrigo

Kernel Hacking: If i really know, i can hack

GPG KeyID: 1FCEDEA1




Message sent using UebiMiau 2.7.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] x86: fault_{32|64}.c unify do_page_fault

2008-01-02 Thread Harvey Harrison
Begin to unify do_page_fault(), easy code movement first.

Signed-off-by: Harvey Harrison <[EMAIL PROTECTED]>
---
Ingo, similar to the kprobes unification patches I did, it gets a bit
uglier before it gets better ;-)

 arch/x86/mm/fault_32.c |   38 +-
 arch/x86/mm/fault_64.c |   23 ++-
 2 files changed, 47 insertions(+), 14 deletions(-)

diff --git a/arch/x86/mm/fault_32.c b/arch/x86/mm/fault_32.c
index b1893eb..051a4ec 100644
--- a/arch/x86/mm/fault_32.c
+++ b/arch/x86/mm/fault_32.c
@@ -375,19 +375,26 @@ void __kprobes do_page_fault(struct pt_regs *regs, 
unsigned long error_code)
struct mm_struct *mm;
struct vm_area_struct *vma;
unsigned long address;
-   int write, si_code;
-   int fault;
+   int write, fault;
+#ifdef CONFIG_x86_64
+   unsigned long flags;
+#endif
+   int si_code;
 
/*
 * We can fault from pretty much anywhere, with unknown IRQ state.
 */
trace_hardirqs_fixup();
 
-   /* get the address */
-   address = read_cr2();
-
tsk = current;
+   mm = tsk->mm;
 
+#ifdef CONFIG_x86_64
+   prefetchw(&mm->mmap_sem);
+#endif
+
+   /* get the address */
+   address = read_cr2();
si_code = SEGV_MAPERR;
 
/*
@@ -403,9 +410,24 @@ void __kprobes do_page_fault(struct pt_regs *regs, 
unsigned long error_code)
 * (error_code & 4) == 0, and that the fault was not a
 * protection error (error_code & 9) == 0.
 */
+#ifdef CONFIG_X86_32
if (unlikely(address >= TASK_SIZE)) {
-   if (!(error_code & 0x000d) && vmalloc_fault(address) >= 0)
+   if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
+   vmalloc_fault(address) >= 0)
return;
+#else
+   if (unlikely(address >= TASK_SIZE64)) {
+   /*
+* Don't check for the module range here: its PML4
+* is always initialized because it's shared with the main
+* kernel text. Only vmalloc may need PML4 syncups.
+*/
+   if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
+ ((address >= VMALLOC_START && address < VMALLOC_END))) {
+   if (vmalloc_fault(address) >= 0)
+   return;
+   }
+#endif
if (notify_page_fault(regs))
return;
/*
@@ -423,8 +445,6 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned 
long error_code)
if (regs->flags & (X86_EFLAGS_IF|VM_MASK))
local_irq_enable();
 
-   mm = tsk->mm;
-
/*
 * If we're in an interrupt, have no user context or are running in an
 * atomic region then we must not take the fault.
@@ -495,7 +515,7 @@ good_area:
goto bad_area;
}
 
- survive:
+survive:
/*
 * If for any reason at all we couldn't handle the fault,
 * make sure we exit gracefully rather than endlessly redo
diff --git a/arch/x86/mm/fault_64.c b/arch/x86/mm/fault_64.c
index 357a3e0..97b92b6 100644
--- a/arch/x86/mm/fault_64.c
+++ b/arch/x86/mm/fault_64.c
@@ -426,7 +426,9 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs 
*regs,
struct vm_area_struct *vma;
unsigned long address;
int write, fault;
+#ifdef CONFIG_x86_64
unsigned long flags;
+#endif
int si_code;
 
/*
@@ -436,14 +438,15 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs 
*regs,
 
tsk = current;
mm = tsk->mm;
+
+#ifdef CONFIG_x86_64
prefetchw(&mm->mmap_sem);
+#endif
 
/* get the address */
address = read_cr2();
-
si_code = SEGV_MAPERR;
 
-
/*
 * We fault-in kernel-space virtual memory on-demand. The
 * 'reference' page table is init_mm.pgd.
@@ -457,6 +460,12 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs 
*regs,
 * (error_code & 4) == 0, and that the fault was not a
 * protection error (error_code & 9) == 0.
 */
+#ifdef CONFIG_X86_32
+   if (unlikely(address >= TASK_SIZE)) {
+   if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
+   vmalloc_fault(address) >= 0)
+   return;
+#else
if (unlikely(address >= TASK_SIZE64)) {
/*
 * Don't check for the module range here: its PML4
@@ -468,6 +477,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs 
*regs,
if (vmalloc_fault(address) >= 0)
return;
}
+#endif
if (notify_page_fault(regs))
return;
/*
@@ -500,7 +510,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs 
*regs,
if (user_mode_vm(regs))
error_code |= PF_USER;
 
- again:
+again:
/* When ru

[PATCH] x86: Use is_kprobe_fault to better match usage

2008-01-02 Thread Harvey Harrison
Currently the notify_page_fault helper is used to test it the page
fault was caused by a kprobe causing an early return from do_page_fault.

Change the name of the helper to is_kprobe_fault to match the usage and
remove the preempt_disable/enable pair around kprobe_running() with an
explicit test for preemption.  The idea for this comes from a patch
by Quentin Barnes to kprobes.c

Signed-off-by: Harvey Harrison <[EMAIL PROTECTED]>
---
Ingo, this may not be functionally equivalent, feel free to yank it out
if there is any trouble, but from what I've seen it should be OK.

Did you ever find a good kprobes test?

 arch/x86/mm/fault_32.c |   30 ++
 arch/x86/mm/fault_64.c |   30 ++
 2 files changed, 28 insertions(+), 32 deletions(-)

diff --git a/arch/x86/mm/fault_32.c b/arch/x86/mm/fault_32.c
index 051a4ec..5c48cc2 100644
--- a/arch/x86/mm/fault_32.c
+++ b/arch/x86/mm/fault_32.c
@@ -12,6 +12,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -42,23 +43,20 @@
 #define PF_RSVD(1<<3)
 #define PF_INSTR   (1<<4)
 
-static inline int notify_page_fault(struct pt_regs *regs)
+static inline int is_kprobe_fault(struct pt_regs *regs)
 {
-#ifdef CONFIG_KPROBES
int ret = 0;
-
-   /* kprobe_running() needs smp_processor_id() */
-   if (!user_mode_vm(regs)) {
-   preempt_disable();
-   if (kprobe_running() && kprobe_fault_handler(regs, 14))
-   ret = 1;
-   preempt_enable();
-   }
-
-   return ret;
-#else
-   return 0;
+#ifdef CONFIG_KPROBES
+   /*
+* If it is a kprobe fault we can not be premptible so return before
+* calling kprobe_running() as it will assert on smp_processor_id if
+* preemption is enabled.
+*/
+   if (!user_mode_vm(regs) && !preemptible() && kprobe_running() &&
+   kprobe_fault_handler(regs, 14))
+   ret = 1;
 #endif
+   return ret;
 }
 
 #ifdef CONFIG_X86_32
@@ -428,7 +426,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned 
long error_code)
return;
}
 #endif
-   if (notify_page_fault(regs))
+   if (is_kprobe_fault(regs))
return;
/*
 * Don't take the mm semaphore here. If we fixup a prefetch
@@ -437,7 +435,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned 
long error_code)
goto bad_area_nosemaphore;
}
 
-   if (notify_page_fault(regs))
+   if (is_kprobe_fault(regs))
return;
 
/* It's safe to allow irq's after cr2 has been saved and the vmalloc
diff --git a/arch/x86/mm/fault_64.c b/arch/x86/mm/fault_64.c
index 97b92b6..09008e5 100644
--- a/arch/x86/mm/fault_64.c
+++ b/arch/x86/mm/fault_64.c
@@ -13,6 +13,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -45,23 +46,20 @@
 #define PF_RSVD(1<<3)
 #define PF_INSTR   (1<<4)
 
-static inline int notify_page_fault(struct pt_regs *regs)
+static inline int is_kprobe_fault(struct pt_regs *regs)
 {
-#ifdef CONFIG_KPROBES
int ret = 0;
-
-   /* kprobe_running() needs smp_processor_id() */
-   if (!user_mode(regs)) {
-   preempt_disable();
-   if (kprobe_running() && kprobe_fault_handler(regs, 14))
-   ret = 1;
-   preempt_enable();
-   }
-
-   return ret;
-#else
-   return 0;
+#ifdef CONFIG_KPROBES
+   /*
+* If it is a kprobe fault we can not be premptible so return before
+* calling kprobe_running() as it will assert on smp_processor_id if
+* preemption is enabled.
+*/
+   if (!user_mode_vm(regs) && !preemptible() && kprobe_running() &&
+   kprobe_fault_handler(regs, 14))
+   ret = 1;
 #endif
+   return ret;
 }
 
 #ifdef CONFIG_X86_32
@@ -478,7 +476,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs 
*regs,
return;
}
 #endif
-   if (notify_page_fault(regs))
+   if (is_kprobe_fault(regs))
return;
/*
 * Don't take the mm semaphore here. If we fixup a prefetch
@@ -487,7 +485,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs 
*regs,
goto bad_area_nosemaphore;
}
 
-   if (notify_page_fault(regs))
+   if (is_kprobe_fault(regs))
return;
 
if (likely(regs->flags & X86_EFLAGS_IF))
-- 
1.5.4.rc2.1097.gb6e0d



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[patch 3/3] consolidate oops end/ID printing code in panic.c

2008-01-02 Thread Arjan van de Ven

Subject: consolidate oops end/ID printing code in panic.c
From: Arjan van de Ven <[EMAIL PROTECTED]>
CC: Ingo Molnar <[EMAIL PROTECTED]>
CC: Andrew Morton <[EMAIL PROTECTED]>

This patch consolidates the 2 places that print an oops end marker and ID
into a single function; this patch follows the WARN_ON uninline and the
WARN_ON enhancement patches.

Signed-off-by: Arjan van de Ven <[EMAIL PROTECTED]>

---
 kernel/panic.c |   15 +--
 1 file changed, 9 insertions(+), 6 deletions(-)

Index: linux-2.6.24-rc6/kernel/panic.c
===
--- linux-2.6.24-rc6.orig/kernel/panic.c
+++ linux-2.6.24-rc6/kernel/panic.c
@@ -281,6 +281,13 @@ static int init_oops_id(void)
 }
 late_initcall(init_oops_id);

+static void print_oops_end_marker(void)
+{
+   init_oops_id();
+   printk(KERN_WARNING "---[ end trace %016llx ]---\n",
+   (unsigned long long)oops_id);
+}
+
 /*
  * Called when the architecture exits its oops handler, after printing
  * everything.
@@ -288,9 +295,7 @@ late_initcall(init_oops_id);
 void oops_exit(void)
 {
do_oops_enter_exit();
-   init_oops_id();
-   printk(KERN_WARNING "---[ end trace %016llx ]---\n",
-   (unsigned long long)oops_id);
+   print_oops_end_marker();
 }

 int do_warn_on(const unsigned long condition, const char *file,
@@ -302,9 +307,7 @@ int do_warn_on(const unsigned long condi
__FILE__, __LINE__, __FUNCTION__);
print_modules();
dump_stack();
-   init_oops_id();
-   printk(KERN_WARNING "---[ end trace %016llx ]---\n",
-   (unsigned long long)oops_id);
+   print_oops_end_marker();
}
return !!condition;
 }
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] Provide u64 version of jiffies_to_usecs() in kernel/tsacct.c

2008-01-02 Thread Jonathan Lim
On Wed Jan  2 16:36:47 2008, [EMAIL PROTECTED] wrote:
> 
> Andrew Morton wrote:
> > On Fri, 28 Dec 2007 13:26:07 -0800 (PST) Jonathan Lim <[EMAIL PROTECTED]> 
> > wrote:
> > 
> >> It's possible that the values used in and returned from jiffies_to_usecs()
> >> are incorrect because of truncation when variables of type u64 are
> >> involved.  So a function specific to that type is used instead.
> >>
> >> Diff'd against: linux/kernel/git/stable/linux-2.6.23.y.git
> >>
> >> Signed-off-by: Jonathan Lim <[EMAIL PROTECTED]>
> >>
> >> --- a/kernel/tsacct.c  2007-12-28 11:58:05.182065029 -0800
> >> +++ b/kernel/tsacct.c  2007-12-28 11:57:37.949013675 -0800
> >> @@ -71,6 +71,17 @@ void bacct_add_tsk(struct taskstats *sta
> >>  
> >>  #ifdef CONFIG_TASK_XACCT
> >>  
> >> +static inline u64 jiffies_to_usecs_u64(const u64 j)
> >> +{
> >> +#if HZ <= USEC_PER_SEC && !(USEC_PER_SEC % HZ)
> >> +  return (USEC_PER_SEC / HZ) * j;
> >> +#elif HZ > USEC_PER_SEC && !(HZ % USEC_PER_SEC)
> >> +  return (j + (HZ / USEC_PER_SEC) - 1)/(HZ / USEC_PER_SEC);
> >> +#else
> >> +  return (j * USEC_PER_SEC) / HZ;
> >> +#endif
> >> +}
> >> +
> >>  #define KB 1024
> >>  #define MB (1024*KB)
> >>  /*
> >> @@ -81,8 +92,8 @@ void xacct_add_tsk(struct taskstats *sta
> >>struct mm_struct *mm;
> >>  
> >>/* convert pages-jiffies to Mbyte-usec */
> >> -  stats->coremem = jiffies_to_usecs(p->acct_rss_mem1) * PAGE_SIZE / MB;
> >> -  stats->virtmem = jiffies_to_usecs(p->acct_vm_mem1) * PAGE_SIZE / MB;
> >> +  stats->coremem = jiffies_to_usecs_u64(p->acct_rss_mem1) * PAGE_SIZE / 
> >> MB;
> >> +  stats->virtmem = jiffies_to_usecs_u64(p->acct_vm_mem1) * PAGE_SIZE / MB;
> >>mm = get_task_mm(p);
> >>if (mm) {
> >>/* adjust to KB unit */
> > 
> > Fair enough.  But I guess that new function should be a kernel-wide thing
> > because surely other users will turn up.
> > 
> > Peter has been working on the accuracy of some of these conversion
> > functions and might need to know about this change?
> 
> Yes, the function should be coded using the new #defines produced by 
> timeconst.h; that way you end up avoiding a possible overflow in the 
> multiplication.
> 
> I believe all three cases can be folded, then, to:
> 
>   return (j*HZ_TO_USEC_NUM + HZ_TO_USEC_DEN-1) / HZ_TO_USEC_DEN;
> 
> I would also like to observe that the roundoff behaviour of the function 
>   above is inconsistent; in case 2 it will round up, but in case 3 it 
> will round down.  The line proposed above has round up behaviour.
> 
>   -hpa

Peter,

Would you be willing to include the u64 function as part of your patch to make
it available kernel-wide?  It just needs:

  u64 inline jiffies_to_usecs_u64(const u64 j)

and for the symbol to be exported.  Thanks.

Jonathan
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[patch 2/3] Add the end-of-trace marker and the module list to WARN_ON()

2008-01-02 Thread Arjan van de Ven

Subject: Add the end-of-trace marker and the module list to WARN_ON()
From: Arjan van de Ven <[EMAIL PROTECTED]>
CC: Ingo Molnar <[EMAIL PROTECTED]>
CC: Andrew Morton <[EMAIL PROTECTED]>

Unlike oopses, WARN_ON() currently does't print the loaded modules list.
This makes it harder to take action on certain bug reports. For example,
recently there were a set of WARN_ON()s reported in the mac80211 stack,
which were just signalling a driver bug. It takes then anther round trip
to the bug reporter (if he responds at all) to find out which driver
is at fault.

Another issue is that, unlike oopses, WARN_ON() doesn't currently printk
the helpful "cut here" line, nor the "end of trace" marker.
Now that WARN_ON() is out of line, the size increase due to this is
minimal and it's worth adding.

Signed-off-by: Arjan van de Ven <[EMAIL PROTECTED]>

---
 kernel/panic.c |5 +
 1 file changed, 5 insertions(+)

Index: linux-2.6.24-rc6/kernel/panic.c
===
--- linux-2.6.24-rc6.orig/kernel/panic.c
+++ linux-2.6.24-rc6/kernel/panic.c
@@ -297,9 +297,14 @@ int do_warn_on(const unsigned long condi
const int line, const char *function)
 {
if (unlikely(condition)) {
+   printk(KERN_WARNING "[ cut here ]\n");
printk(KERN_WARNING "WARNING: at %s:%d %s()\n",
__FILE__, __LINE__, __FUNCTION__);
+   print_modules();
dump_stack();
+   init_oops_id();
+   printk(KERN_WARNING "---[ end trace %016llx ]---\n",
+   (unsigned long long)oops_id);
}
return !!condition;
 }
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 01/12] Use mutex instead of semaphore in driver core

2008-01-02 Thread Dave Young
On Jan 3, 2008 12:08 AM, Alan Stern <[EMAIL PROTECTED]> wrote:
> On Tue, 1 Jan 2008, Greg KH wrote:
>
> > For most cases, yes, I agree with this, but due to the lockdep issues
> > that occur here, and the whole mess with the suspend path and locking
> > the device tree, that has been hashed out many times in the past, I am
> > interested in trying to see if there is any real reason why this is
> > absolutely necessary to convert.
> >
> > If no one has noticed any issues in this area, I think the complexity
> > that will be involved in any such conversion will strongly outweigh any
> > simplicity that might be expected.
> >
> > I'm very open to potential patches to do this, just don't ignore the
> > issues that others have run into in the past when attempting this.
>
> There are two separate things to consider here.  One is struct device
> and the other is struct class.
>
> We know that replacing semaphores with mutexes in struct device doesn't
> sit well with lockdep.  However the replacement may work perfectly
> smoothly for struct class.  It would be worthwhile for Dave Young to
> separate out just that part and try it.
>
Ok, let me try a new patch only for struct class.

Regards
dave
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[patch 1/3] move WARN_ON() out of line

2008-01-02 Thread Arjan van de Ven

Subject: move WARN_ON() out of line
From: Arjan van de Ven <[EMAIL PROTECTED]>
CC: Ingo Molnar <[EMAIL PROTECTED]>
CC: Andrew Morton <[EMAIL PROTECTED]>

A quick grep shows that there are currently 1145 instances of WARN_ON
in the kernel. Currently, WARN_ON is pretty much entirely inlined,
which makes it hard to enhance it without growing the size of the kernel
(and getting Andrew unhappy).

This patch moves WARN_ON() out of line entirely. I've considered keeping
the test inline and moving only the slowpath out of line, but I decided
against that: an out of line test reduces the pressure on the CPUs
branch predictor logic and gives smaller code, while a function call
to a fixed location is quite fast. Likewise I've considered doing something
similar to BUG() (eg use a trapping instruction) but that's not really
better (it needs the test inline again and recovering from an invalid
instruction isn't quite fun).

The code size reduction of this patch was about 6.5Kb (on a distro style
.config):

   textdata bss dec hex filename
3096493  293455 2760704 6150652  5dd9fc vmlinux.before
3090006  293455 2760704 6144165  5dc0a5 vmlinux.after

Signed-off-by: Arjan van de Ven <[EMAIL PROTECTED]>

---
 include/asm-generic/bug.h |   13 -
 kernel/panic.c|   13 +
 2 files changed, 17 insertions(+), 9 deletions(-)

Index: linux-2.6.24-rc6/include/asm-generic/bug.h
===
--- linux-2.6.24-rc6.orig/include/asm-generic/bug.h
+++ linux-2.6.24-rc6/include/asm-generic/bug.h
@@ -32,15 +32,10 @@ struct bug_entry {
 #endif

 #ifndef HAVE_ARCH_WARN_ON
-#define WARN_ON(condition) ({  \
-   int __ret_warn_on = !!(condition);  \
-   if (unlikely(__ret_warn_on)) {  \
-   printk("WARNING: at %s:%d %s()\n", __FILE__,  \
-   __LINE__, __FUNCTION__);\
-   dump_stack();   \
-   }   \
-   unlikely(__ret_warn_on);\
-})
+extern int do_warn_on(const unsigned long condition, const char *file,
+   const int line, const char *function);
+#define WARN_ON(condition) do_warn_on((unsigned long)(condition), __FILE__, \
+__LINE__, __FUNCTION__)
 #endif

 #else /* !CONFIG_BUG */
Index: linux-2.6.24-rc6/kernel/panic.c
===
--- linux-2.6.24-rc6.orig/kernel/panic.c
+++ linux-2.6.24-rc6/kernel/panic.c
@@ -20,6 +20,7 @@
 #include 
 #include 
 #include 
+#include 

 int panic_on_oops;
 int tainted;
@@ -292,6 +293,18 @@ void oops_exit(void)
(unsigned long long)oops_id);
 }

+int do_warn_on(const unsigned long condition, const char *file,
+   const int line, const char *function)
+{
+   if (unlikely(condition)) {
+   printk(KERN_WARNING "WARNING: at %s:%d %s()\n",
+   __FILE__, __LINE__, __FUNCTION__);
+   dump_stack();
+   }
+   return !!condition;
+}
+EXPORT_SYMBOL(do_warn_on);
+
 #ifdef CONFIG_CC_STACKPROTECTOR
 /*
  * Called when gcc's -fstack-protector feature is used, and
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: + restore-missing-sysfs-max_cstate-attr.patch added to -mm tree

2008-01-02 Thread Andrew Morton
On Wed, 2 Jan 2008 16:06:20 -0800 "Pallipadi, Venkatesh" <[EMAIL PROTECTED]> 
wrote:

>  
> 
> >-Original Message-
> >From: Mark Lord [mailto:[EMAIL PROTECTED] 
> >Sent: Wednesday, January 02, 2008 3:42 PM
> >To: Arjan van de Ven
> >Cc: Pallipadi, Venkatesh; Andrew Morton; [EMAIL PROTECTED]; 
> >[EMAIL PROTECTED]; Ingo Molnar; linux-kernel@vger.kernel.org; 
> >[EMAIL PROTECTED]
> >Subject: Re: + restore-missing-sysfs-max_cstate-attr.patch 
> >added to -mm tree
> >
> >Arjan van de Ven wrote:
> >> On Fri, 30 Nov 2007 22:31:17 -0500
> >> Mark Lord <[EMAIL PROTECTED]> wrote:
> >> 
> >>> Arjan van de Ven wrote:
>  On Fri, 30 Nov 2007 22:14:08 -0500
>  Mark Lord <[EMAIL PROTECTED]> wrote:
> 
> >> in -mm there is.. the QoS stuff allows you to set maximum
> >> tolerable
> > ..
> >
> > That's encouraging, I think, but not for 2.6.24.
> >
> >> latency. If your app cant take any latency, you should set
> >> those... and the side effect is that the kernel will not do
> >> long-latency C-states or P-state transitions..
> > ..
> >
> > I don't mind the cpufreq changing (actually, I want it to drop in
> > cpugfreq to save power and keep the fan off), but the 
> >C-states just
> > kill this app.
> >
> > The app is VMware.  I force the max_state=1 when launching,
>  ah but then its' even easier... and can be done in 2.6.24 already.
>  VMWare after all has a kernel module, and the latency stuff is in
>  2.6.23 and 2.6.24 available inside the kernel already.
> >>> ..
> >>>
> >>> Oh, I'm perfectly happy to write my own kernel module if that's what
> >> 
> >> all you need to do in your kernel module is call
> >> 
> >> add_latency_constraint("mark_wants_his_mouse", 5);
> >> 
> >> or so
> >..
> >
> >Dredging up an old regression again now:
> >
> >The "make my own module to replace /sys/.../max_cstate" doesn't work
> >for the single-core machine we use a lot around here.
> >
> >VMware is totally sluggish unless I go to another text window 
> >and do this:
> >
> >while ( true ); do echo -n ; done
> >
> >At which point VMware performs well again,
> >the same as with "echo 1 > max_cstate" in 2.6.23.
> >
> >Anyone got any suggestions on how to fix this regression
> >or work around it for 2.6.24 ?
> >
> 
> Easiest and clean way to do it is to have a driver with
> set_acceptable_latency() for 1uS or so in init and
> remove_acceptable_latency() at exit.

err, you appear to be suggesting that Mark patch his kernel to make it work
as well as 2.6.23?  That would be a wrong answer.

This regression was known six weeks ago.  What do we need to do (or revert)
to fix it in 2.6.24?

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Make checkpatch.pl's quiet option not print the summary on no errors

2008-01-02 Thread Arjan van de Ven

Subject: Make checkpatch.pl's quiet option not print the summary on no errors
From: Arjan van de Ven <[EMAIL PROTECTED]>
CC: [EMAIL PROTECTED]

Right now, in quiet mode, checkpatch.pl still prints a summary line even
if the patch is 100% clean. IMO, "quiet mode" should mean "no output if clean",
the patch below makes that so. (This also makes the quilt integration
on my system work nicer :)

Signed-off-by: Arjan van de Ven <[EMAIL PROTECTED]>

---
 scripts/checkpatch.pl |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

Index: linux-2.6.24-rc6/scripts/checkpatch.pl
===
--- linux-2.6.24-rc6.orig/scripts/checkpatch.pl
+++ linux-2.6.24-rc6/scripts/checkpatch.pl
@@ -1579,7 +1579,7 @@ sub process {
}

print report_dump();
-   if ($summary) {
+   if ($summary && ($clean == 0 || $quiet == 0)) {
print "total: $cnt_error errors, $cnt_warn warnings, " .
(($check)? "$cnt_chk checks, " : "") .
"$cnt_lines lines checked\n";
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH -mm] EFI : Split EFI tables parsing code from EFI runtime service support code

2008-01-02 Thread Huang, Ying
On Sun, 2007-12-30 at 15:28 +0100, Ingo Molnar wrote:
> * Huang, Ying <[EMAIL PROTECTED]> wrote:
> 
> > +struct efi_tables efi_tables;
> > +EXPORT_SYMBOL(efi_tables);
> 
> > +enum bios_type bios_type = BIOS_LEGACY;
> > +EXPORT_SYMBOL(bios_type);
> 
> please make all the new exports EXPORT_SYMBOL_GPL().

OK, I will change it.

Best Regards,
Huang Ying
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] [20/20] x86: Print which shared library/executable faulted in segfault etc. messages

2008-01-02 Thread Andi Kleen

They now look like

hal-resmgr[13791]: segfault at 3c rip 2b9c8caec182 rsp 7fff1e825d30 error 4 in 
libacl.so.1.1.0[2b9c8caea000+6000]

This makes it easier to pinpoint bugs to specific libraries. 

And printing the offset into a mapping also always allows to find the 
correct fault point in a library even with randomized mappings. Previously
there was no way to actually find the correct code address inside
the randomized mapping.

Relies on earlier patch to shorten the printk formats.

They are often now longer than 80 characters, but I think that's worth 
it.

Patch for i386 and x86-64.

Signed-off-by: Andi Kleen <[EMAIL PROTECTED]>

---
 arch/x86/kernel/signal_32.c |7 +--
 arch/x86/kernel/signal_64.c |7 +--
 arch/x86/kernel/traps_32.c  |7 +--
 arch/x86/mm/fault_32.c  |4 +++-
 include/linux/mm.h  |1 +
 mm/memory.c |   27 +++
 6 files changed, 46 insertions(+), 7 deletions(-)

Index: linux/include/linux/mm.h
===
--- linux.orig/include/linux/mm.h
+++ linux/include/linux/mm.h
@@ -1145,6 +1145,7 @@ extern int randomize_va_space;
 #endif
 
 const char * arch_vma_name(struct vm_area_struct *vma);
+void print_vma_addr(char *prefix, unsigned long rip);
 
 struct page *sparse_mem_map_populate(unsigned long pnum, int nid);
 pgd_t *vmemmap_pgd_populate(unsigned long addr, int node);
Index: linux/mm/memory.c
===
--- linux.orig/mm/memory.c
+++ linux/mm/memory.c
@@ -2746,3 +2746,30 @@ int access_process_vm(struct task_struct
 
return buf - old_buf;
 }
+
+/*
+ * Print the name of a VMA.
+ */
+void print_vma_addr(char *prefix, unsigned long ip)
+{
+   struct mm_struct *mm = current->mm;
+   struct vm_area_struct *vma;
+   down_read(&mm->mmap_sem);
+   vma = find_vma(mm, ip);
+   if (vma && vma->vm_file) {
+   struct file *f = vma->vm_file;
+   char *buf = (char *)__get_free_page(GFP_KERNEL);
+   if (buf) {
+   char *p, *s;
+   p = d_path(f->f_dentry, f->f_vfsmnt, buf, PAGE_SIZE);
+   s = strrchr(p, '/');
+   if (s)
+   p = s+1;
+   printk("%s%s[%lx+%lx]", prefix, p,
+   vma->vm_start,
+   vma->vm_end - vma->vm_start);
+   free_page((unsigned long)buf);
+   }
+   }
+   up_read(¤t->mm->mmap_sem);
+}
Index: linux/arch/x86/kernel/signal_32.c
===
--- linux.orig/arch/x86/kernel/signal_32.c
+++ linux/arch/x86/kernel/signal_32.c
@@ -198,12 +198,15 @@ asmlinkage int sys_sigreturn(unsigned lo
return ax;
 
 badframe:
-   if (show_unhandled_signals && printk_ratelimit())
+   if (show_unhandled_signals && printk_ratelimit()) {
printk("%s%s[%d] bad frame in sigreturn frame:%p ip:%lx"
-  " sp:%lx oeax:%lx\n",
+  " sp:%lx oeax:%lx",
task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG,
current->comm, task_pid_nr(current), frame, regs->ip,
regs->sp, regs->orig_ax);
+   print_vma_addr(" in ", regs->ip);
+   printk("\n");
+   }
 
force_sig(SIGSEGV, current);
return 0;
Index: linux/arch/x86/kernel/signal_64.c
===
--- linux.orig/arch/x86/kernel/signal_64.c
+++ linux/arch/x86/kernel/signal_64.c
@@ -481,9 +481,12 @@ do_notify_resume(struct pt_regs *regs, v
 void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
 { 
struct task_struct *me = current; 
-   if (show_unhandled_signals && printk_ratelimit())
-   printk("%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx 
orax:%lx\n",
+   if (show_unhandled_signals && printk_ratelimit()) {
+   printk("%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx",
   me->comm,me->pid,where,frame,regs->ip,regs->sp,regs->orig_ax);
+   print_vma_addr(" in ", regs->ip);
+   printk("\n");
+   }
 
force_sig(SIGSEGV, me); 
 } 
Index: linux/arch/x86/kernel/traps_32.c
===
--- linux.orig/arch/x86/kernel/traps_32.c
+++ linux/arch/x86/kernel/traps_32.c
@@ -673,11 +673,14 @@ void __kprobes do_general_protection(str
current->thread.error_code = error_code;
current->thread.trap_no = 13;
if (show_unhandled_signals && unhandled_signal(current, SIGSEGV) &&
-   printk_ratelimit())
+   printk_ratelimit()) {
printk(KERN_INFO
-   "%s[%d] general protection ip:%lx sp:%lx error:%lx\n",
+

[PATCH] pda-power: only register available psu

2008-01-02 Thread Dmitry Baryshkov
Currently pda-power adds both ac and usb power supply units.
This patch fixes it so that psu are added only if they are enabled.

Signed-off-by: Dmitry Baryshkov <[EMAIL PROTECTED]>

diff --git a/drivers/power/pda_power.c b/drivers/power/pda_power.c
index c058f28..42eac09 100644
--- a/drivers/power/pda_power.c
+++ b/drivers/power/pda_power.c
@@ -168,21 +168,14 @@ static int pda_power_probe(struct platform_device *pdev)
pda_power_supplies[1].num_supplicants = pdata->num_supplicants;
}
 
-   ret = power_supply_register(&pdev->dev, &pda_power_supplies[0]);
-   if (ret) {
-   dev_err(dev, "failed to register %s power supply\n",
-   pda_power_supplies[0].name);
-   goto supply0_failed;
-   }
-
-   ret = power_supply_register(&pdev->dev, &pda_power_supplies[1]);
-   if (ret) {
-   dev_err(dev, "failed to register %s power supply\n",
-   pda_power_supplies[1].name);
-   goto supply1_failed;
-   }
-
if (ac_irq) {
+   ret = power_supply_register(&pdev->dev, &pda_power_supplies[0]);
+   if (ret) {
+   dev_err(dev, "failed to register %s power supply\n",
+   pda_power_supplies[0].name);
+   goto ac_failed;
+   }
+
ret = request_irq(ac_irq->start, power_changed_isr,
  get_irq_flags(ac_irq), ac_irq->name,
  &pda_power_supplies[0]);
@@ -193,6 +186,13 @@ static int pda_power_probe(struct platform_device *pdev)
}
 
if (usb_irq) {
+   ret = power_supply_register(&pdev->dev, &pda_power_supplies[1]);
+   if (ret) {
+   dev_err(dev, "failed to register %s power supply\n",
+   pda_power_supplies[1].name);
+   goto usb_failed;
+   }
+
ret = request_irq(usb_irq->start, power_changed_isr,
  get_irq_flags(usb_irq), usb_irq->name,
  &pda_power_supplies[1]);
@@ -205,13 +205,14 @@ static int pda_power_probe(struct platform_device *pdev)
goto success;
 
 usb_irq_failed:
+   power_supply_unregister(&pda_power_supplies[1]);
+usb_failed:
if (ac_irq)
free_irq(ac_irq->start, &pda_power_supplies[0]);
 ac_irq_failed:
-   power_supply_unregister(&pda_power_supplies[1]);
-supply1_failed:
-   power_supply_unregister(&pda_power_supplies[0]);
-supply0_failed:
+   if (ac_irq)
+   power_supply_unregister(&pda_power_supplies[0]);
+ac_failed:
 noirqs:
 wrongid:
 success:

-- 
With best wishes
Dmitry

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] [19/20] x86: Use shorter addresses in i386 segfault printks

2008-01-02 Thread Andi Kleen

Signed-off-by: Andi Kleen <[EMAIL PROTECTED]>

---
 arch/x86/mm/fault_32.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

Index: linux/arch/x86/mm/fault_32.c
===
--- linux.orig/arch/x86/mm/fault_32.c
+++ linux/arch/x86/mm/fault_32.c
@@ -549,7 +549,7 @@ bad_area_nosemaphore:
 
if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
printk_ratelimit()) {
-   printk("%s%s[%d]: segfault at %08lx ip %08lx "
+   printk("%s%s[%d]: segfault at %lx ip %08lx "
"sp %08lx error %lx\n",
task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
tsk->comm, task_pid_nr(tsk), address, regs->ip,
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] [18/20] x86: Don't disable TSC in any C states on AMD Fam10h

2008-01-02 Thread Andi Kleen

The ACPI code currently disables TSC use in any C2 and C3 
states. But the AMD Fam10h BKDG documents that the TSC
will never stop in any C states when the CONSTANT_TSC bit is 
set. Make this disabling conditional on CONSTANT_TSC
not set on AMD.

I actually think this is true on Intel too for C2 states
on CPUs with p-state invariant TSC, but this needs
further discussions with Len to really confirm :-) 

So far it is only enabled on AMD.

Cc: [EMAIL PROTECTED]

Signed-off-by: Andi Kleen <[EMAIL PROTECTED]>

---
 drivers/acpi/processor_idle.c |   32 
 1 file changed, 28 insertions(+), 4 deletions(-)

Index: linux/drivers/acpi/processor_idle.c
===
--- linux.orig/drivers/acpi/processor_idle.c
+++ linux/drivers/acpi/processor_idle.c
@@ -353,6 +353,26 @@ int acpi_processor_resume(struct acpi_de
return 0;
 }
 
+#if defined (CONFIG_GENERIC_TIME) && defined (CONFIG_X86_TSC)
+static int tsc_halts_in_c(int state)
+{
+   switch (boot_cpu_data.x86_vendor) {
+   case X86_VENDOR_AMD:
+   /*
+* AMD Fam10h TSC will tick in all
+* C/P/S0/S1 states when this bit is set.
+*/
+   if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
+   return 0;
+   /*FALL THROUGH*/
+   case X86_VENDOR_INTEL:
+   /* Several cases known where TSC halts in C2 too */
+   default:
+   return state > ACPI_STATE_C1;
+   }
+}
+#endif
+
 #ifndef CONFIG_CPU_IDLE
 static void acpi_processor_idle(void)
 {
@@ -512,7 +532,8 @@ static void acpi_processor_idle(void)
 
 #if defined (CONFIG_GENERIC_TIME) && defined (CONFIG_X86_TSC)
/* TSC halts in C2, so notify users */
-   mark_tsc_unstable("possible TSC halt in C2");
+   if (tsc_halts_in_c(ACPI_STATE_C2))
+   mark_tsc_unstable("possible TSC halt in C2");
 #endif
/* Compute time (ticks) that we were actually asleep */
sleep_ticks = ticks_elapsed(t1, t2);
@@ -576,7 +597,8 @@ static void acpi_processor_idle(void)
 
 #if defined (CONFIG_GENERIC_TIME) && defined (CONFIG_X86_TSC)
/* TSC halts in C3, so notify users */
-   mark_tsc_unstable("TSC halts in C3");
+   if (tsc_halts_in_c(ACPI_STATE_C3))
+   mark_tsc_unstable("TSC halts in C3");
 #endif
/* Compute time (ticks) that we were actually asleep */
sleep_ticks = ticks_elapsed(t1, t2);
@@ -1441,7 +1463,8 @@ static int acpi_idle_enter_simple(struct
 
 #if defined (CONFIG_GENERIC_TIME) && defined (CONFIG_X86_TSC)
/* TSC could halt in idle, so notify users */
-   mark_tsc_unstable("TSC halts in idle");;
+   if (tsc_halts_in_c(cx->type))
+   mark_tsc_unstable("TSC halts in idle");;
 #endif
sleep_ticks = ticks_elapsed(t1, t2);
 
@@ -1552,7 +1575,8 @@ static int acpi_idle_enter_bm(struct cpu
 
 #if defined (CONFIG_GENERIC_TIME) && defined (CONFIG_X86_TSC)
/* TSC could halt in idle, so notify users */
-   mark_tsc_unstable("TSC halts in idle");
+   if (tsc_halts_in_c(ACPI_STATE_C3))
+   mark_tsc_unstable("TSC halts in idle");
 #endif
sleep_ticks = ticks_elapsed(t1, t2);
/* Tell the scheduler how much we idled: */
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] [16/20] x86: Allow TSC clock source on AMD Fam10h and some cleanup

2008-01-02 Thread Andi Kleen

After a lot of discussions with AMD it turns out that TSC
on Fam10h CPUs is synchronized when the CONSTANT_TSC cpuid bit is set. 
Or rather that if there are ever systems where that is not
true it would be their BIOS' task to disable the bit.

So finally use TSC gettimeofday on Fam10h by default.

Or rather it is always used now on CPUs where the AMD 
specific CONSTANT_TSC bit is set.

This gives a nice speed bost for gettimeofday() on these systems
which tends to be by far the most common v/syscall.

On a Fam10h system here TSC gtod uses about 20% of the CPU time of 
acpi_pm based gtod(). This was measured on 32bit, on 64bit
it is even better because TSC gtod() can use a vsyscall
and stay in ring 3, which acpi_pm doesn't.

The Intel check simply checks for CONSTANT_TSC too without hardcoding
Intel vendor. This is equivalent on 64bit because all 64bit capable Intel 
CPUs will have CONSTANT_TSC set. 

On Intel there is no CPU supplied CONSTANT_TSC bit currently,
but we synthesize one based on hardcoded knowledge which steppings
have p-state invariant TSC.

So the new logic is now: On CPUs which have the AMD specific
CONSTANT_TSC bit set or on Intel CPUs which are new enough
to be known to have p-state invariant TSC always use 
TSC based gettimeofday()

Cc: [EMAIL PROTECTED]

Signed-off-by: Andi Kleen <[EMAIL PROTECTED]>

---
 arch/x86/kernel/tsc_32.c |5 +
 arch/x86/kernel/tsc_64.c |5 ++---
 2 files changed, 7 insertions(+), 3 deletions(-)

Index: linux/arch/x86/kernel/tsc_32.c
===
--- linux.orig/arch/x86/kernel/tsc_32.c
+++ linux/arch/x86/kernel/tsc_32.c
@@ -354,6 +354,11 @@ __cpuinit int unsynchronized_tsc(void)
 {
if (!cpu_has_tsc || tsc_unstable)
return 1;
+
+   /* Anything with constant TSC should be synchronized */
+   if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
+   return 0;
+
/*
 * Intel systems are normally all synchronized.
 * Exceptions must mark TSC as unstable:
Index: linux/arch/x86/kernel/tsc_64.c
===
--- linux.orig/arch/x86/kernel/tsc_64.c
+++ linux/arch/x86/kernel/tsc_64.c
@@ -276,9 +276,8 @@ __cpuinit int unsynchronized_tsc(void)
if (apic_is_clustered_box())
return 1;
 #endif
-   /* Most intel systems have synchronized TSCs except for
-  multi node systems */
-   if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
+
+   if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
 #ifdef CONFIG_ACPI
/* But TSC doesn't tick in C3 so don't use it there */
if (acpi_gbl_FADT.header.length > 0 &&
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] [17/20] x86: Remove explicit C3 TSC check on 64bit

2008-01-02 Thread Andi Kleen

Trust the ACPI code to disable TSC instead when C3 is used.

AMD Fam10h does not disable TSC in any C states so the
check was incorrect there anyways after the change
to handle this like Intel on AMD too.

This allows to use the TSC when C3 is disabled in software
(acpi.max_c_state=2), but the BIOS supports it anyways.

Match i386 behaviour.

Cc: [EMAIL PROTECTED]

Signed-off-by: Andi Kleen <[EMAIL PROTECTED]>

---
 arch/x86/kernel/tsc_64.c |9 +
 1 file changed, 1 insertion(+), 8 deletions(-)

Index: linux/arch/x86/kernel/tsc_64.c
===
--- linux.orig/arch/x86/kernel/tsc_64.c
+++ linux/arch/x86/kernel/tsc_64.c
@@ -277,15 +277,8 @@ __cpuinit int unsynchronized_tsc(void)
return 1;
 #endif
 
-   if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
-#ifdef CONFIG_ACPI
-   /* But TSC doesn't tick in C3 so don't use it there */
-   if (acpi_gbl_FADT.header.length > 0 &&
-   acpi_gbl_FADT.C3latency < 1000)
-   return 1;
-#endif
+   if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
return 0;
-   }
 
/* Assume multi socket systems are not synchronized */
return num_present_cpus() > 1;
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] [14/20] x86: Add per cpu counters for machine check polls / machine check events

2008-01-02 Thread Andi Kleen

.. and report them in /proc/interrupts

Signed-off-by: Andi Kleen <[EMAIL PROTECTED]>

---
 arch/x86/kernel/cpu/mcheck/mce_64.c |6 ++
 arch/x86/kernel/irq_32.c|   10 ++
 arch/x86/kernel/irq_64.c|9 +
 include/asm-x86/mce.h   |3 +++
 4 files changed, 28 insertions(+)

Index: linux/arch/x86/kernel/cpu/mcheck/mce_64.c
===
--- linux.orig/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ linux/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -23,6 +23,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -57,6 +58,9 @@ static char *trigger_argv[2] = { trigger
 
 static DECLARE_WAIT_QUEUE_HEAD(mce_wait);
 
+DEFINE_PER_CPU(unsigned, mce_checks);
+DEFINE_PER_CPU(unsigned, mce_events);
+
 /*
  * Lockless MCE logging infrastructure.
  * This avoids deadlocks on printk locks without having to break locks. Also
@@ -208,6 +212,7 @@ void do_machine_check(struct pt_regs * r
 
memset(&m, 0, sizeof(struct mce));
m.cpu = smp_processor_id();
+   __get_cpu_var(mce_checks)++;
rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);
/* if the restart IP is not valid, we're done for */
if (!(m.mcgstatus & MCG_STATUS_RIPV))
@@ -263,6 +268,7 @@ void do_machine_check(struct pt_regs * r
panicm_found = 1;
}
 
+   __get_cpu_var(mce_checks)++;
add_taint(TAINT_MACHINE_CHECK);
}
 
Index: linux/arch/x86/kernel/irq_32.c
===
--- linux.orig/arch/x86/kernel/irq_32.c
+++ linux/arch/x86/kernel/irq_32.c
@@ -18,6 +18,7 @@
 
 #include 
 #include 
+#include 
 
 DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
 EXPORT_PER_CPU_SYMBOL(irq_stat);
@@ -329,6 +330,15 @@ skip:
 #if defined(CONFIG_X86_IO_APIC)
seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
 #endif
+   seq_printf(p, "MCE: ");
+   for_each_online_cpu(j)
+   seq_printf(p, "%10u ", per_cpu(mce_events, j));
+   seq_printf(p, "  Machine check events\n");
+   seq_printf(p, "MCP: ");
+   for_each_online_cpu(j)
+   seq_printf(p, "%10u ", per_cpu(mce_checks, j));
+   seq_printf(p, "  Machine check state polls\n");
+
}
return 0;
 }
Index: linux/include/asm-x86/mce.h
===
--- linux.orig/include/asm-x86/mce.h
+++ linux/include/asm-x86/mce.h
@@ -115,6 +115,9 @@ extern void mcheck_init(struct cpuinfo_x
 extern void stop_mce(void);
 extern void restart_mce(void);
 
+DECLARE_PER_CPU(unsigned, mce_events);
+DECLARE_PER_CPU(unsigned, mce_checks);
+
 #endif /* __KERNEL__ */
 
 #endif
Index: linux/arch/x86/kernel/irq_64.c
===
--- linux.orig/arch/x86/kernel/irq_64.c
+++ linux/arch/x86/kernel/irq_64.c
@@ -17,6 +17,7 @@
 #include 
 #include 
 #include 
+#include 
 
 DEFINE_PER_CPU(irq_cpustat_t, irq_stat);
 
@@ -151,6 +152,14 @@ skip:
seq_printf(p, "%10u ", cpu_pda(j)->irq_spurious_count);
seq_printf(p, "  Spurious interrupts\n");
seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
+   seq_printf(p, "MCE: ");
+   for_each_online_cpu(j)
+   seq_printf(p, "%10u ", per_cpu(mce_events, j));
+   seq_printf(p, "  Machine check events\n");
+   seq_printf(p, "MCP: ");
+   for_each_online_cpu(j)
+   seq_printf(p, "%10u ", per_cpu(mce_checks, j));
+   seq_printf(p, "  Machine check state polls\n");
}
return 0;
 }
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] [15/20] x86: Move X86_FEATURE_CONSTANT_TSC into early cpu feature detection

2008-01-02 Thread Andi Kleen

Need this in the next patch in time_init and that happens early.

This includes a minor fix on i386 where early_intel_workarounds()
[which is now called early_init_intel] really executes early as 
the comments say.

Signed-off-by: Andi Kleen <[EMAIL PROTECTED]>

---
 arch/x86/kernel/cpu/amd.c|   17 +++--
 arch/x86/kernel/cpu/common.c |   11 +--
 arch/x86/kernel/cpu/cpu.h|3 ++-
 arch/x86/kernel/cpu/intel.c  |   13 ++---
 arch/x86/kernel/setup_64.c   |   39 +++
 5 files changed, 59 insertions(+), 24 deletions(-)

Index: linux/arch/x86/kernel/setup_64.c
===
--- linux.orig/arch/x86/kernel/setup_64.c
+++ linux/arch/x86/kernel/setup_64.c
@@ -553,9 +553,6 @@ static void __cpuinit display_cacheinfo(
printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
c->x86_cache_size, ecx & 0xFF);
}
-
-   if (n >= 0x8007)
-   cpuid(0x8007, &dummy, &dummy, &dummy, &c->x86_power);
if (n >= 0x8008) {
cpuid(0x8008, &eax, &dummy, &dummy, &dummy);
c->x86_virt_bits = (eax >> 8) & 0xff;
@@ -633,7 +630,7 @@ static void __init amd_detect_cmp(struct
 #endif
 }
 
-static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
+static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c)
 {
 #ifdef CONFIG_SMP
unsigned bits, ecx;
@@ -691,6 +688,15 @@ static __cpuinit int amd_apic_timer_brok
return 0;
 }
 
+static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
+{
+   early_init_amd_mc(c);
+
+   /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */
+   if (c->x86_power & (1<<8))
+   set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
+}
+
 static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 {
unsigned level;
@@ -740,10 +746,6 @@ static void __cpuinit init_amd(struct cp
}
display_cacheinfo(c);
 
-   /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */
-   if (c->x86_power & (1<<8))
-   set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
-
/* Multi core CPU? */
if (c->extended_cpuid_level >= 0x8008)
amd_detect_cmp(c);
@@ -850,6 +852,13 @@ static void srat_detect_node(void)
 #endif
 }
 
+static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
+{
+   if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
+   (c->x86 == 0x6 && c->x86_model >= 0x0e))
+   set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
+}
+
 static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 {
/* Cache sizes */
@@ -1061,6 +1070,20 @@ void __cpuinit identify_cpu(struct cpuin
 #ifdef CONFIG_NUMA
numa_add_cpu(smp_processor_id());
 #endif
+
+   c->extended_cpuid_level = cpuid_eax(0x8000);
+
+   if (c->extended_cpuid_level >= 0x8007)
+   c->x86_power = cpuid_edx(0x8007);
+
+   switch (c->x86_vendor) {
+   case X86_VENDOR_AMD:
+   early_init_amd(c);
+   break;
+   case X86_VENDOR_INTEL:
+   early_init_intel(c);
+   break;
+   }
 }
 
 void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
Index: linux/arch/x86/kernel/cpu/amd.c
===
--- linux.orig/arch/x86/kernel/cpu/amd.c
+++ linux/arch/x86/kernel/cpu/amd.c
@@ -63,6 +63,15 @@ static __cpuinit int amd_apic_timer_brok
 
 int force_mwait __cpuinitdata;
 
+void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
+{
+   if (cpuid_eax(0x8000) >= 0x8007) {
+   c->x86_power = cpuid_edx(0x8007);
+   if (c->x86_power & (1<<8))
+   set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
+   }
+}
+
 static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 {
u32 l, h;
@@ -85,6 +94,8 @@ static void __cpuinit init_amd(struct cp
}
 #endif
 
+   early_init_amd(c);
+
/*
 *  FIXME: We should handle the K5 here. Set up the write
 *  range and also turn on MSR 83 bits 4 and 31 (write alloc,
@@ -257,12 +268,6 @@ static void __cpuinit init_amd(struct cp
c->x86_max_cores = (cpuid_ecx(0x8008) & 0xff) + 1;
}
 
-   if (cpuid_eax(0x8000) >= 0x8007) {
-   c->x86_power = cpuid_edx(0x8007);
-   if (c->x86_power & (1<<8))
-   set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
-   }
-
 #ifdef CONFIG_X86_HT
/*
 * On a AMD multi core setup the lower bits of the APIC id
Index: linux/arch/x86/kernel/cpu/common.c
===
--- linux.orig/arch/x86/kernel/cpu/common.c
+++ linux/arch/x86/kernel/cpu/common.c
@@ -307,6 +307,15 @@ static void __init early_cpu_detect(void
cpu_detect(c);
 
get_cpu_vendor(c, 1);
+
+   switc

  1   2   3   4   5   >