date:20121205

[RFC PATCH v2 04/10] smp, cpu hotplug: Fix smp_call_function_*() to prevent CPU offline properly

2012-12-05 Thread Srivatsa S. Bhat

Once stop_machine() is gone from the CPU offline path, we won't be able to
depend on preempt_disable() to prevent CPUs from going offline from under us.

Use the get/put_online_cpus_atomic_light() APIs to prevent changes to the
cpu_online_mask, while invoking from atomic context.

Signed-off-by: Srivatsa S. Bhat 
---

 kernel/smp.c |   38 +-
 1 file changed, 25 insertions(+), 13 deletions(-)

diff --git a/kernel/smp.c b/kernel/smp.c
index 29dd40a..abcc4d2 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -310,7 +310,8 @@ int smp_call_function_single(int cpu, smp_call_func_t func, 
void *info,
 * prevent preemption and reschedule on another processor,
 * as well as CPU removal
 */
-   this_cpu = get_cpu();
+   get_online_cpus_atomic_light();
+   this_cpu = smp_processor_id();
 
/*
 * Can deadlock when called with interrupts disabled.
@@ -342,7 +343,7 @@ int smp_call_function_single(int cpu, smp_call_func_t func, 
void *info,
}
}
 
-   put_cpu();
+   put_online_cpus_atomic_light();
 
return err;
 }
@@ -371,8 +372,10 @@ int smp_call_function_any(const struct cpumask *mask,
const struct cpumask *nodemask;
int ret;
 
+   get_online_cpus_atomic_light();
/* Try for same CPU (cheapest) */
-   cpu = get_cpu();
+   cpu = smp_processor_id();
+
if (cpumask_test_cpu(cpu, mask))
goto call;
 
@@ -388,7 +391,7 @@ int smp_call_function_any(const struct cpumask *mask,
cpu = cpumask_any_and(mask, cpu_online_mask);
 call:
ret = smp_call_function_single(cpu, func, info, wait);
-   put_cpu();
+   put_online_cpus_atomic_light();
return ret;
 }
 EXPORT_SYMBOL_GPL(smp_call_function_any);
@@ -409,14 +412,17 @@ void __smp_call_function_single(int cpu, struct 
call_single_data *data,
unsigned int this_cpu;
unsigned long flags;
 
-   this_cpu = get_cpu();
+   get_online_cpus_atomic_light();
+
+   this_cpu = smp_processor_id();
+
/*
 * Can deadlock when called with interrupts disabled.
 * We allow cpu's that are not yet online though, as no one else can
 * send smp call function interrupt to this cpu and as such deadlocks
 * can't happen.
 */
-   WARN_ON_ONCE(cpu_online(smp_processor_id()) && wait && irqs_disabled()
+   WARN_ON_ONCE(cpu_online(this_cpu) && wait && irqs_disabled()
 && !oops_in_progress);
 
if (cpu == this_cpu) {
@@ -427,7 +433,7 @@ void __smp_call_function_single(int cpu, struct 
call_single_data *data,
csd_lock(data);
generic_exec_single(cpu, data, wait);
}
-   put_cpu();
+   put_online_cpus_atomic_light();
 }
 
 /**
@@ -451,6 +457,8 @@ void smp_call_function_many(const struct cpumask *mask,
unsigned long flags;
int refs, cpu, next_cpu, this_cpu = smp_processor_id();
 
+   get_online_cpus_atomic_light();
+
/*
 * Can deadlock when called with interrupts disabled.
 * We allow cpu's that are not yet online though, as no one else can
@@ -467,17 +475,18 @@ void smp_call_function_many(const struct cpumask *mask,
 
/* No online cpus?  We're done. */
if (cpu >= nr_cpu_ids)
-   return;
+   goto out_unlock;
 
/* Do we have another CPU which isn't us? */
next_cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
if (next_cpu == this_cpu)
-   next_cpu = cpumask_next_and(next_cpu, mask, cpu_online_mask);
+   next_cpu = cpumask_next_and(next_cpu, mask,
+   cpu_online_mask);
 
/* Fastpath: do that cpu by itself. */
if (next_cpu >= nr_cpu_ids) {
smp_call_function_single(cpu, func, info, wait);
-   return;
+   goto out_unlock;
}
 
data = &__get_cpu_var(cfd_data);
@@ -523,7 +532,7 @@ void smp_call_function_many(const struct cpumask *mask,
/* Some callers race with other cpus changing the passed mask */
if (unlikely(!refs)) {
csd_unlock(>csd);
-   return;
+   goto out_unlock;
}
 
raw_spin_lock_irqsave(_function.lock, flags);
@@ -554,6 +563,9 @@ void smp_call_function_many(const struct cpumask *mask,
/* Optionally wait for the CPUs to complete */
if (wait)
csd_lock_wait(>csd);
+
+out_unlock:
+   put_online_cpus_atomic_light();
 }
 EXPORT_SYMBOL(smp_call_function_many);
 
@@ -574,9 +586,9 @@ EXPORT_SYMBOL(smp_call_function_many);
  */
 int smp_call_function(smp_call_func_t func, void *info, int wait)
 {
-   preempt_disable();
+   get_online_cpus_atomic_light();
smp_call_function_many(cpu_online_mask, func, info, wait);
-   preempt_enable();
+   put_online_cpus_atomic_light();
 
return 0;

[RFC PATCH v2 05/10] smp, cpu hotplug: Fix on_each_cpu_*() to prevent CPU offline properly

2012-12-05 Thread Srivatsa S. Bhat

Once stop_machine() is gone from the CPU offline path, we won't be able to
depend on preempt_disable() to prevent CPUs from going offline from under us.

Use the get/put_online_cpus_atomic_light() APIs to prevent changes to the
cpu_online_mask, while invoking from atomic context.

Signed-off-by: Srivatsa S. Bhat 
---

 kernel/smp.c |   26 --
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/kernel/smp.c b/kernel/smp.c
index abcc4d2..b258a92 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -688,12 +688,12 @@ int on_each_cpu(void (*func) (void *info), void *info, 
int wait)
unsigned long flags;
int ret = 0;
 
-   preempt_disable();
+   get_online_cpus_atomic_light();
ret = smp_call_function(func, info, wait);
local_irq_save(flags);
func(info);
local_irq_restore(flags);
-   preempt_enable();
+   put_online_cpus_atomic_light();
return ret;
 }
 EXPORT_SYMBOL(on_each_cpu);
@@ -715,7 +715,11 @@ EXPORT_SYMBOL(on_each_cpu);
 void on_each_cpu_mask(const struct cpumask *mask, smp_call_func_t func,
void *info, bool wait)
 {
-   int cpu = get_cpu();
+   int cpu;
+
+   get_online_cpus_atomic_light();
+
+   cpu = smp_processor_id();
 
smp_call_function_many(mask, func, info, wait);
if (cpumask_test_cpu(cpu, mask)) {
@@ -723,7 +727,7 @@ void on_each_cpu_mask(const struct cpumask *mask, 
smp_call_func_t func,
func(info);
local_irq_enable();
}
-   put_cpu();
+   put_online_cpus_atomic_light();
 }
 EXPORT_SYMBOL(on_each_cpu_mask);
 
@@ -748,8 +752,10 @@ EXPORT_SYMBOL(on_each_cpu_mask);
  * The function might sleep if the GFP flags indicates a non
  * atomic allocation is allowed.
  *
- * Preemption is disabled to protect against CPUs going offline but not online.
- * CPUs going online during the call will not be seen or sent an IPI.
+ * We use get/put_online_cpus_atomic_light() to have a stable online mask
+ * to work with, whose CPUs won't go offline in-between our operation.
+ * And we will skip those CPUs which have already begun their offline journey.
+ * CPUs coming online during the call will not be seen or sent an IPI.
  *
  * You must not call this function with disabled interrupts or
  * from a hardware interrupt handler or from a bottom half handler.
@@ -764,26 +770,26 @@ void on_each_cpu_cond(bool (*cond_func)(int cpu, void 
*info),
might_sleep_if(gfp_flags & __GFP_WAIT);
 
if (likely(zalloc_cpumask_var(, (gfp_flags|__GFP_NOWARN {
-   preempt_disable();
+   get_online_cpus_atomic_light();
for_each_online_cpu(cpu)
if (cond_func(cpu, info))
cpumask_set_cpu(cpu, cpus);
on_each_cpu_mask(cpus, func, info, wait);
-   preempt_enable();
+   put_online_cpus_atomic_light();
free_cpumask_var(cpus);
} else {
/*
 * No free cpumask, bother. No matter, we'll
 * just have to IPI them one by one.
 */
-   preempt_disable();
+   get_online_cpus_atomic_light();
for_each_online_cpu(cpu)
if (cond_func(cpu, info)) {
ret = smp_call_function_single(cpu, func,
info, wait);
WARN_ON_ONCE(!ret);
}
-   preempt_enable();
+   put_online_cpus_atomic_light();
}
 }
 EXPORT_SYMBOL(on_each_cpu_cond);

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[RFC PATCH v2 08/10] yield_to(), cpu-hotplug: Prevent offlining of other CPUs properly

2012-12-05 Thread Srivatsa S. Bhat

Once stop_machine() is gone from the CPU offline path, we won't be able to
depend on local_irq_save() to prevent CPUs from going offline from under us.

Use the get/put_online_cpus_atomic_light() APIs to prevent changes to the
cpu_online_mask, while invoking from atomic context.

Signed-off-by: Srivatsa S. Bhat 
---

 kernel/sched/core.c |4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 84a8579..1ef595a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4312,6 +4312,7 @@ bool __sched yield_to(struct task_struct *p, bool preempt)
unsigned long flags;
bool yielded = 0;
 
+   get_online_cpus_atomic_light();
local_irq_save(flags);
rq = this_rq();
 
@@ -4339,13 +4340,14 @@ again:
 * Make p's CPU reschedule; pick_next_entity takes care of
 * fairness.
 */
-   if (preempt && rq != p_rq)
+   if (preempt && rq != p_rq && cpu_online(task_cpu(p)))
resched_task(p_rq->curr);
}
 
 out:
double_rq_unlock(rq, p_rq);
local_irq_restore(flags);
+   put_online_cpus_atomic_light();
 
if (yielded)
schedule();

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[RFC PATCH v2 09/10] kvm, vmx: Add full atomic synchronization with CPU Hotplug

2012-12-05 Thread Srivatsa S. Bhat

preempt_disable() will no longer help prevent CPUs from going offline, once
stop_machine() gets removed from the CPU offline path. So use
get/put_online_cpus_atomic_full() in vmx_vcpu_load() to prevent CPUs from
going offline while clearing vmcs. Here we truly need full-synchronization
with CPU hotplug (and not just an unchanging cpu_online_mask), because we
want to prevent race with the CPU_DYING callback from kvm.

Reported-by: Michael Wang 
Debugged-by: Xiao Guangrong 
Signed-off-by: Srivatsa S. Bhat 
---

 arch/x86/kvm/vmx.c |8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index f858159..23c1063 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1519,10 +1519,14 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int 
cpu)
struct vcpu_vmx *vmx = to_vmx(vcpu);
u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
 
-   if (!vmm_exclusive)
+   if (!vmm_exclusive) {
kvm_cpu_vmxon(phys_addr);
-   else if (vmx->loaded_vmcs->cpu != cpu)
+   } else if (vmx->loaded_vmcs->cpu != cpu) {
+   /* Prevent any CPU from going offline */
+   get_online_cpus_atomic_full();
loaded_vmcs_clear(vmx->loaded_vmcs);
+   put_online_cpus_atomic_full();
+   }
 
if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) {
per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs;

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[RFC PATCH v2 10/10] cpu: No more __stop_machine() in _cpu_down()

2012-12-05 Thread Srivatsa S. Bhat

From: Paul E. McKenney 

The _cpu_down() function invoked as part of the CPU-hotplug offlining
process currently invokes __stop_machine(), which is slow and inflicts
substantial real-time latencies on the entire system.  This patch
substitutes stop_cpus() for __stop_machine() in order to improve
both performance and real-time latency.

This is currently unsafe, because there are a number of uses of
preempt_disable() that are intended to block CPU-hotplug offlining.
These will be fixed by using get/put_online_cpus_atomic_light(), or
get/put_online_cpus_atomic_full(), but in the meantime, this commit is one
way to help locate them.

Signed-off-by: Paul E. McKenney 
Signed-off-by: Paul E. McKenney 
[ Srivatsa: Refer to the new sync primitives for readers, in the changelog ]
Signed-off-by: Srivatsa S. Bhat 
---

 kernel/cpu.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/cpu.c b/kernel/cpu.c
index c71c723..00a1edc 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -418,7 +418,7 @@ static int __ref _cpu_down(unsigned int cpu, int 
tasks_frozen)
}
smpboot_park_threads(cpu);
 
-   err = __stop_machine(take_cpu_down, _param, cpumask_of(cpu));
+   err = stop_cpus(cpumask_of(cpu), take_cpu_down, _param);
if (err) {
/* CPU didn't die: tell everyone.  Can't complain. */
smpboot_unpark_threads(cpu);

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [ANNOUNCE] Git v1.8.1-rc0

2012-12-05 Thread Junio C Hamano

Ramsay Jones  writes:

> I fetch git from 'git://git.kernel.org/pub/scm/git/git.git' which has
> commit ee26a6e2 ("Git 1.8.1-rc0", 03-12-2012), but is missing the v1.8.1-rc0
> tag. Is this just an oversight ...

Thanks for letting me know; forgot to push out the tag.


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] Fix perf mmap limitations on 32-bit.

2012-12-05 Thread Arnaldo Carvalho de Melo

Em Wed, Dec 05, 2012 at 01:32:53PM -0500, David Miller escreveu:
> From: David Miller 
> Date: Sat, 10 Nov 2012 14:12:19 -0500 (EST)
> 
> 
> Ping?

Went to my perf/core branch:

http://git.kernel.org/?p=linux/kernel/git/acme/linux.git;a=shortlog;h=refs/heads/perf/core

http://git.kernel.org/?p=linux/kernel/git/acme/linux.git;a=commitdiff;h=69a8ae881d6448aab652756affc2bdaf2b223121

- Arnaldo
 
> > This is a suggested patch to fix the bug I reported at:
> > 
> > http://marc.info/?l=linux-kernel=135033028924652=2
> > 
> > Essentially, there is a hard requirement that when perf analyzes a
> > trace, it must have the entire thing mmap()'d.
> > 
> > Therefore the scheme used on 32-bit where we have a fixed (8) number
> > of 32MB mmaps, and cycle through them, simply does not work.
> > 
> > One of the reasons this requirement exists is because the iterators
> > maintain references to perf entry objects and those references don't
> > just simply go away when this mmap code decides to cycle an old mmap
> > area out and reuse it.  At this point, those entry pointers now point
> > to garbage resulting in unpredictable behavior and crashes.
> > 
> > It is better to try to mmap() as much as we can and if we do actually
> > run into address space limitations, the failure of the mmap() call
> > will indicate that and stop processing.
> > 
> > I noticed that perf_session->mmap_window is set to a constant in one
> > location, and only used in one other location.  So I got rid of it
> > altogether.
> > 
> > So we adjust the size of the mmaps[] array to the maximum we could
> > need.  On 64-bit we only need one slot.  On 32-bit we could need
> > up to 128 (128 * 32MB == 4GB).
> > 
> > I've verified that this allows a large (~600MB) perf.data file to
> > be analyzed properly with a 32-bit perf binary, which previously
> > was not possible.
> > 
> > Signed-off-by: David S. Miller 
> > 
> > diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
> > index 8cdd232..2cd3cc3 100644
> > --- a/tools/perf/util/session.c
> > +++ b/tools/perf/util/session.c
> > @@ -128,15 +128,6 @@ struct perf_session *perf_session__new(const char 
> > *filename, int mode,
> > goto out;
> >  
> > memcpy(self->filename, filename, len);
> > -   /*
> > -* On 64bit we can mmap the data file in one go. No need for tiny mmap
> > -* slices. On 32bit we use 32MB.
> > -*/
> > -#if BITS_PER_LONG == 64
> > -   self->mmap_window = ULLONG_MAX;
> > -#else
> > -   self->mmap_window = 32 * 1024 * 1024ULL;
> > -#endif
> > self->machines = RB_ROOT;
> > self->repipe = repipe;
> > INIT_LIST_HEAD(>ordered_samples.samples);
> > @@ -1369,6 +1360,18 @@ fetch_mmaped_event(struct perf_session *session,
> > return event;
> >  }
> >  
> > +/*
> > + * On 64bit we can mmap the data file in one go. No need for tiny mmap
> > + * slices. On 32bit we use 32MB.
> > + */
> > +#if BITS_PER_LONG == 64
> > +#define MMAP_SIZE ULLONG_MAX
> > +#define NUM_MMAPS 1
> > +#else
> > +#define MMAP_SIZE (32 * 1024 * 1024ULL)
> > +#define NUM_MMAPS 128
> > +#endif
> > +
> >  int __perf_session__process_events(struct perf_session *session,
> >u64 data_offset, u64 data_size,
> >u64 file_size, struct perf_tool *tool)
> > @@ -1376,7 +1379,7 @@ int __perf_session__process_events(struct 
> > perf_session *session,
> > u64 head, page_offset, file_offset, file_pos, progress_next;
> > int err, mmap_prot, mmap_flags, map_idx = 0;
> > size_t  page_size, mmap_size;
> > -   char *buf, *mmaps[8];
> > +   char *buf, *mmaps[NUM_MMAPS];
> > union perf_event *event;
> > uint32_t size;
> >  
> > @@ -1393,7 +1396,7 @@ int __perf_session__process_events(struct 
> > perf_session *session,
> >  
> > progress_next = file_size / 16;
> >  
> > -   mmap_size = session->mmap_window;
> > +   mmap_size = MMAP_SIZE;
> > if (mmap_size > file_size)
> > mmap_size = file_size;
> >  
> > diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
> > index dd64261..903966b 100644
> > --- a/tools/perf/util/session.h
> > +++ b/tools/perf/util/session.h
> > @@ -29,7 +29,6 @@ struct ordered_samples {
> >  struct perf_session {
> > struct perf_header  header;
> > unsigned long   size;
> > -   unsigned long   mmap_window;
> > struct machine  host_machine;
> > struct rb_root  machines;
> > struct perf_evlist  *evlist;
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[RFC PATCH v2 03/10] CPU hotplug: Convert preprocessor macros to static inline functions

2012-12-05 Thread Srivatsa S. Bhat

On 12/05/2012 06:10 AM, Andrew Morton wrote:
"static inline C functions would be preferred if possible.  Feel free to
fix up the wrong crufty surrounding code as well ;-)"

Convert the macros in the CPU hotplug code to static inline C functions.

Signed-off-by: Srivatsa S. Bhat 
---

 include/linux/cpu.h |   12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index e2a9c49..599b376 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -200,12 +200,12 @@ static inline void cpu_hotplug_driver_unlock(void)
 
 #else  /* CONFIG_HOTPLUG_CPU */
 
-#define get_online_cpus()  do { } while (0)
-#define put_online_cpus()  do { } while (0)
-#define get_online_cpus_atomic_light() do { } while (0)
-#define put_online_cpus_atomic_light() do { } while (0)
-#define get_online_cpus_atomic_full()  do { } while (0)
-#define put_online_cpus_atomic_full()  do { } while (0)
+static inline void get_online_cpus(void) {}
+static inline void put_online_cpus(void) {}
+static inline void get_online_cpus_atomic_light(void) {}
+static inline void put_online_cpus_atomic_light(void) {}
+static inline void get_online_cpus_atomic_full(void) {}
+static inline void put_online_cpus_atomic_full(void) {}
 #define hotcpu_notifier(fn, pri)   do { (void)(fn); } while (0)
 /* These aren't inline functions due to a GCC bug. */
 #define register_hotcpu_notifier(nb)   ({ (void)(nb); 0; })

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC PATCH v2 01/10] CPU hotplug: Provide APIs for "light" atomic readers to prevent CPU offline

2012-12-05 Thread Srivatsa S. Bhat

Replaying what Tejun wrote:

(cc'ing Oleg)

Hello, Srivatsa.

On 12/06/2012 12:13 AM, Srivatsa S. Bhat wrote:
> Also, since we don't use per-cpu locks (because rwlocks themselves are quite
> scalable for readers), we don't end up in any lock ordering problems that can
> occur if we try to use per-cpu locks.
> 

Read-lock really isn't that scalable when you compare it to
preempt_disable/enable().  When used on hot paths, it's gonna generate
a lot of cacheline pingpongs.  This patch is essentially creating a
new big lock which has potential for being very hot.

preempt_disable/enable() + stop_machine() essentially works as percpu
rwlock with very heavy penalty on the writer side.  Because the reader
side doesn't even implement spinning while writer is in progress, the
writer side has to preempt the readers before entering critical
section and that's what the "stopping machine" is about.

Note that the resolution on the reader side is very low.  Any section
w/ preemption disabled is protected against stop_machine().  Also, the
stop_machine() itself is extremely heavy involving essentially locking
up the machine until all CPUs can reach the same condition via
scheduling the stop_machine tasks.  So, I *think* all you need to do
here is making cpu online locking finer grained (separated from
preemption) and lighten the writer side a bit.  I'm quite doubtful
that you would need to go hunting donw all get_online_cpus().  They
aren't used that often anyway.

Anyways, so, separating out cpu hotplug locking from preemption is the
right thing to do but I think rwlock is likely to be too heavy on the
reader side.  I think percpu reader accounting + reader spinning while
writer in progress should be a good combination.  It's a bit heavier
than preempt_disable() - it'll have an extra conditional jump on the
hot path, but there won't be any cacheline bouncing.  The writer side
would need to synchronize against all CPUs but only against the ones
actually read locking cpu hotplug.  As long as reader side critical
sections don't go crazy, it should be okay.

So, we basically need percpu_rwlock.  We already have percpu_rwsem.
We used to have some different variants of writer-heavy locks.  Dunno
what happened to them.  Maybe we still have it somewhere.  Oleg has
been working on the area lately and should know more.  Oleg, it seems
CPU hotplug needs big-reader rwlock, ideas on how to proceed?

Thanks.

-- tejun 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 6/6 v8] cpufreq, highbank: add support for highbank cpufreq

2012-12-05 Thread Mike Turquette

On Wed, Dec 5, 2012 at 8:48 AM, Mark Langsdorf
 wrote:
> diff --git a/drivers/cpufreq/highbank-cpufreq.c 
> b/drivers/cpufreq/highbank-cpufreq.c
> new file mode 100644
> index 000..1f28fa6
> --- /dev/null
> +++ b/drivers/cpufreq/highbank-cpufreq.c
> @@ -0,0 +1,102 @@

Looks pretty good to me.  Some tedious nitpicks and discussion below.

> +static int hb_voltage_change(unsigned int freq)
> +{
> +   int i;
> +   u32 msg[7];
> +
> +   msg[0] = HB_CPUFREQ_CHANGE_NOTE;
> +   msg[1] = freq / 100;
> +   for (i = 2; i < 7; i++)
> +   msg[i] = 0;
> +
> +   return pl320_ipc_transmit(msg);
> +}
> +
> +static int hb_cpufreq_clk_notify(struct notifier_block *nb,
> +   unsigned long action, void *hclk)
> +{
> +   struct clk_notifier_data *clk_data = hclk;
> +   int i = 0;
> +
> +   if (action == PRE_RATE_CHANGE) {
> +   if (clk_data->new_rate > clk_data->old_rate)
> +   while (hb_voltage_change(clk_data->new_rate))
> +   if (i++ > 15)

There are a few magic numbers here.  How about something like:

#define HB_VOLT_CHANGE_MAX_TRIES 15

Maybe do the same for the i2c message length?

> +   return NOTIFY_STOP;

How about NOTIFY_BAD?  It more clearly signals that an error has occurred.

You could also return notifier_from_errno(-ETIMEDOUT) here if you
prefer but that would only be for the sake of readability.
clk_set_rate doesn't actually return the notifier error code in the
event of a notifier abort.

> +   } else if (action == POST_RATE_CHANGE) {
> +   if (clk_data->new_rate < clk_data->old_rate)
> +   while (hb_voltage_change(clk_data->new_rate))
> +   if (i++ > 15)
> +   break;

Same as above.  It is true that the clock framework does nothing with
post-rate change notifier aborts but that might change in the future.

> +   }
> +
> +   return NOTIFY_DONE;
> +}
> +
> +static struct notifier_block hb_cpufreq_clk_nb = {
> +   .notifier_call = hb_cpufreq_clk_notify,
> +};
> +

Do you have any plans to convert your voltage change routine over to
the regulator framework?  Likewise do you plan to use the OPP library
in the future?  I can understand if you do not do that since your
regulator/dvfs programming model makes things very simple for you.

The reason I bring this up is that I did float a patch a while back
for a generalized dvfs notifier handler.  The prereqs for using it are
1) ccf, 2) regulator fwk, 3) opp definitions.  Here is the patch:
https://github.com/mturquette/linux/commit/05a280bbc0819a6858d73088a632666f0c7f68a4

And an example usage in the OMAP CPUfreq driver:
https://github.com/mturquette/linux/commit/958f10bb98a293aa912e7eb9cd6edbdc51c1c04a

I understand if this approach incurs too much software overhead for
you but I wanted to throw it out there.  It might working nicely in
the cpufreq-cpu0 driver or some other "generic" CPUfreq driver for
implementing DVFS.

Regards,
Mike
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [ANNOUNCE] Git v1.8.1-rc0

2012-12-05 Thread Ramsay Jones

Junio C Hamano wrote:
> A release candidate preview, Git v1.8.1-rc0, is now available for
> testing at the usual places.
> 
> This cycle has been a bit slow (perhaps because it had a major US
> holiday to slow people down) but we seem to have managed to apply
> reasonably large number of usability improvement changes, with a
> handful of new features.  There are several new and large-ish topics
> that are cooking in 'next', but I think we would better keep them
> cooking there without merging them to 'master' before the upcoming
> release to happen before the year end.  So as far as features goes,
> this preview release is pretty much *it*.
> 
> The release tarballs are found at:
> 
> http://code.google.com/p/git-core/downloads/list
> 
> and their SHA-1 checksums are:
> 
> 39faaa15bc71f8eb52048e77ea564cecf78c7adf  git-1.8.1.rc0.tar.gz
> 2eeba24488337de02b58dc442258d58b79e2b8f4  git-htmldocs-1.8.1.rc0.tar.gz
> b28d1f8e8b9268b712b33fbdfb67dd6f14afb499  git-manpages-1.8.1.rc0.tar.gz
> 
> Also the following public repositories all have a copy of the v1.8.1-rc0
> tag and the master branch that the tag points at:
> 
>   url = git://repo.or.cz/alt-git.git
>   url = https://code.google.com/p/git-core/
>   url = git://git.sourceforge.jp/gitroot/git-core/git.git
>   url = git://git-core.git.sourceforge.net/gitroot/git-core/git-core
>   url = https://github.com/gitster/git
> 

I fetch git from 'git://git.kernel.org/pub/scm/git/git.git' which has
commit ee26a6e2 ("Git 1.8.1-rc0", 03-12-2012), but is missing the v1.8.1-rc0
tag. Is this just an oversight (the above list suggests not), or should I not
be using kernel.org?

ATB,
Ramsay Jones



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC PATCH v2 01/10] CPU hotplug: Provide APIs for "light" atomic readers to prevent CPU offline

2012-12-05 Thread Srivatsa S. Bhat

Replaying what Oleg wrote:

Hi,

Sorry I don't understand the context and I can't find this thread
anywhere, so I am not sure I understand...

> Replaying what Tejun wrote:
> So, we basically need percpu_rwlock.  We already have percpu_rwsem.

Yes, and with -mm patches it becomes reader-friendly. In particular
see http://marc.info/?l=linux-mm-commits=135240650828875

> Oleg, it seems
> CPU hotplug needs big-reader rwlock, ideas on how to proceed?
> 

I am going to convert get_online_cpus() to use percpu_down_read(),
this looks simple.

We already discussed this with Paul, see

http://marc.info/?l=linux-kernel=135248463226031

and the whole thread.

In short, all we need is percpu_down_write_recursive_readers() and
afaics the only complication is lockdep, we need down_read_no_lockdep()
which (like __up_read) doesn't do rwsem_acquire_read().

Oleg.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [BUG -next] cpufreq: cpufreq_governor.

2012-12-05 Thread Ilya Zykov

What do I do wrong?

After: modprobe cpufreq_ondemand
I have:

WARNING: Error inserting freq_table 
(/lib/modules/3.7.0-rc8-next-20121205-ttybuf.1+/kernel/drivers/cpufreq/freq_table.ko):
 Unknown symbol in module, or unknown parameter (see dmesg)
FATAL: Error inserting cpufreq_ondemand 
(/lib/modules/3.7.0-rc8-next-20121205-ttybuf.1+/kernel/drivers/cpufreq/cpufreq_ondemand.ko):
 Unknown symbol in module, or unknown parameter (see dmesg)

dmesg:
Dec  5 22:26:11 bm kernel: cpufreq_governor: Unknown symbol 
__cpufreq_driver_getavg (err 0)
Dec  5 22:26:11 bm kernel: cpufreq_governor: Unknown symbol sysfs_create_group 
(err 0)
Dec  5 22:26:11 bm kernel: cpufreq_governor: Unknown symbol sysfs_remove_group 
(err 0)
Dec  5 22:26:11 bm kernel: cpufreq_governor: Unknown symbol 
__cpufreq_driver_target (err 0)
Dec  5 22:26:11 bm kernel: cpufreq_governor: Unknown symbol 
get_cpu_idle_time_us (err 0)
Dec  5 22:26:11 bm kernel: cpufreq_governor: Unknown symbol 
delayed_work_timer_fn (err 0)
Dec  5 22:26:11 bm kernel: cpufreq_governor: Unknown symbol 
get_cpu_iowait_time_us (err 0)

cat /proc/kallsyms |grep freq_dr
814976e0 T __cpufreq_driver_target
814987e0 T __cpufreq_driver_getavg
81498850 T cpufreq_driver_target
81881650 r __ksymtab___cpufreq_driver_getavg
81881660 r __ksymtab___cpufreq_driver_target
81883b40 r __ksymtab_cpufreq_driver_target
81894290 r __kcrctab___cpufreq_driver_getavg
81894298 r __kcrctab___cpufreq_driver_target
81895508 r __kcrctab_cpufreq_driver_target
818b3080 r __kstrtab___cpufreq_driver_getavg
818b3098 r __kstrtab_cpufreq_driver_target
818b30ae r __kstrtab___cpufreq_driver_target
81e240c8 b cpufreq_driver_lock
81e240d0 b cpufreq_driver
a0c42000 d acpi_cpufreq_driver  [acpi_cpufreq]

lsmod |grep cpufr
acpi_cpufreq   18066  1 
freq_table 14199  1 acpi_cpufreq
mperf  12668  1 acpi_cpufreq

.config

CONFIG_64BIT=y
CONFIG_X86_64=y
CONFIG_X86=y
CONFIG_INSTRUCTION_DECODER=y
CONFIG_OUTPUT_FORMAT="elf64-x86-64"
CONFIG_ARCH_DEFCONFIG="arch/x86/configs/x86_64_defconfig"
CONFIG_LOCKDEP_SUPPORT=y
CONFIG_STACKTRACE_SUPPORT=y
CONFIG_HAVE_LATENCYTOP_SUPPORT=y
CONFIG_MMU=y
CONFIG_NEED_DMA_MAP_STATE=y
CONFIG_NEED_SG_DMA_LENGTH=y
CONFIG_GENERIC_ISA_DMA=y
CONFIG_GENERIC_BUG=y
CONFIG_GENERIC_BUG_RELATIVE_POINTERS=y
CONFIG_GENERIC_HWEIGHT=y
CONFIG_GENERIC_GPIO=y
CONFIG_ARCH_MAY_HAVE_PC_FDC=y
CONFIG_RWSEM_XCHGADD_ALGORITHM=y
CONFIG_GENERIC_CALIBRATE_DELAY=y
CONFIG_ARCH_HAS_CPU_RELAX=y
CONFIG_ARCH_HAS_DEFAULT_IDLE=y
CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y
CONFIG_ARCH_HAS_CPU_AUTOPROBE=y
CONFIG_HAVE_SETUP_PER_CPU_AREA=y
CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK=y
CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK=y
CONFIG_ARCH_HIBERNATION_POSSIBLE=y
CONFIG_ARCH_SUSPEND_POSSIBLE=y
CONFIG_ZONE_DMA32=y
CONFIG_AUDIT_ARCH=y
CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING=y
CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y
CONFIG_HAVE_INTEL_TXT=y
CONFIG_X86_64_SMP=y
CONFIG_X86_HT=y
CONFIG_ARCH_HWEIGHT_CFLAGS="-fcall-saved-rdi -fcall-saved-rsi -fcall-saved-rdx 
-fcall-saved-rcx -fcall-saved-r8 -fcall-saved-r9 -fcall-saved-r10 
-fcall-saved-r11"
CONFIG_ARCH_CPU_PROBE_RELEASE=y
CONFIG_ARCH_SUPPORTS_UPROBES=y
CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
CONFIG_IRQ_WORK=y
CONFIG_BUILDTIME_EXTABLE_SORT=y

CONFIG_EXPERIMENTAL=y
CONFIG_INIT_ENV_ARG_LIMIT=32
CONFIG_CROSS_COMPILE=""
CONFIG_LOCALVERSION="-ttybuf.1"
CONFIG_HAVE_KERNEL_GZIP=y
CONFIG_HAVE_KERNEL_BZIP2=y
CONFIG_HAVE_KERNEL_LZMA=y
CONFIG_HAVE_KERNEL_XZ=y
CONFIG_HAVE_KERNEL_LZO=y
CONFIG_KERNEL_GZIP=y
CONFIG_DEFAULT_HOSTNAME="(none)"
CONFIG_SWAP=y
CONFIG_SYSVIPC=y
CONFIG_SYSVIPC_SYSCTL=y
CONFIG_POSIX_MQUEUE=y
CONFIG_POSIX_MQUEUE_SYSCTL=y
CONFIG_FHANDLE=y
CONFIG_AUDIT=y
CONFIG_AUDITSYSCALL=y
CONFIG_AUDIT_WATCH=y
CONFIG_AUDIT_TREE=y
CONFIG_HAVE_GENERIC_HARDIRQS=y

CONFIG_GENERIC_HARDIRQS=y
CONFIG_GENERIC_IRQ_PROBE=y
CONFIG_GENERIC_IRQ_SHOW=y
CONFIG_GENERIC_PENDING_IRQ=y
CONFIG_GENERIC_IRQ_CHIP=y
CONFIG_IRQ_DOMAIN=y
CONFIG_IRQ_FORCED_THREADING=y
CONFIG_SPARSE_IRQ=y
CONFIG_CLOCKSOURCE_WATCHDOG=y
CONFIG_ARCH_CLOCKSOURCE_DATA=y
CONFIG_GENERIC_TIME_VSYSCALL=y
CONFIG_GENERIC_CLOCKEVENTS=y
CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y
CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST=y
CONFIG_GENERIC_CMOS_UPDATE=y

CONFIG_TICK_ONESHOT=y
CONFIG_NO_HZ=y
CONFIG_HIGH_RES_TIMERS=y

CONFIG_TICK_CPU_ACCOUNTING=y
CONFIG_BSD_PROCESS_ACCT=y
CONFIG_BSD_PROCESS_ACCT_V3=y
CONFIG_TASKSTATS=y
CONFIG_TASK_DELAY_ACCT=y
CONFIG_TASK_XACCT=y
CONFIG_TASK_IO_ACCOUNTING=y

CONFIG_TREE_RCU=y
CONFIG_RCU_FANOUT=64
CONFIG_RCU_FANOUT_LEAF=16
CONFIG_LOG_BUF_SHIFT=19
CONFIG_HAVE_UNSTABLE_SCHED_CLOCK=y
CONFIG_ARCH_SUPPORTS_NUMA_BALANCING=y
CONFIG_ARCH_WANTS_NUMA_GENERIC_PGPROT=y
CONFIG_CGROUPS=y
CONFIG_CGROUP_FREEZER=y
CONFIG_CGROUP_DEVICE=y
CONFIG_CPUSETS=y
CONFIG_PROC_PID_CPUSET=y
C

Re: [PATCH v2 12/44] metag: TBX source

2012-12-05 Thread Joe Perches

On Wed, 2012-12-05 at 16:08 +, James Hogan wrote:
> Add source files from the Thread Binary Interface (TBI) library which
> provides useful low level operations and traps/context management.
[]
>  arch/metag/tbx/tbicache.c  |  462 
> 

Could you please run checkpatch on this file?


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC PATCH v2 01/10] CPU hotplug: Provide APIs for "light" atomic readers to prevent CPU offline

2012-12-05 Thread Srivatsa S. Bhat

Replaying what Tejun wrote:

Hello, Oleg.

> Replaying what Oleg wrote:
> 
> Hi,
> 
> Sorry I don't understand the context and I can't find this thread
> anywhere, so I am not sure I understand...
> 

Weird, lkml cc is missing.  Srivatsa?

[Now fixed. This thread has lkml CC]

>> Replaying what Tejun wrote:
>> So, we basically need percpu_rwlock.  We already have percpu_rwsem.
> 
> Yes, and with -mm patches it becomes reader-friendly. In particular
> see http://marc.info/?l=linux-mm-commits=135240650828875
> 
>> Oleg, it seems
>> CPU hotplug needs big-reader rwlock, ideas on how to proceed?
>>
> 
> I am going to convert get_online_cpus() to use percpu_down_read(),
> this looks simple.
> 
> We already discussed this with Paul, see
> 
>   http://marc.info/?l=linux-kernel=135248463226031
> 
> and the whole thread.
> 
> In short, all we need is percpu_down_write_recursive_readers() and
> afaics the only complication is lockdep, we need down_read_no_lockdep()
> which (like __up_read) doesn't do rwsem_acquire_read().
> 

So, it's a different thing.  There are two mechanism protecting
against cpu hotplug - get_online_cpus() and preempt_disable().  The
former can be used by ones which can sleep and need to protect against
the whole up/down process (DOWN_PREPARE and so on).  The latter
protects the last step and can be used when the caller can't sleep.
Replacing get_online_cpus() w/ percpu_rwsem is great but this thread
is about replacing preempt_disable with something finer grained and
less heavy on the writer side - IOW, percpu_rwlock as opposed to
percpu_rwsem, so, I think the end result would be that CPU hotplug
will be protected by percpu_rwsem for the whole part and by
percpu_rwlock for the last commit stage.

The problem seems that we don't have percpu_rwlock yet.  It shouldn't
be too difficult to implement, right?

Thanks.

-- tejun 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC PATCH v2 01/10] CPU hotplug: Provide APIs for "light" atomic readers to prevent CPU offline

2012-12-05 Thread Srivatsa S. Bhat

Replaying what Oleg wrote:

(add lkml)

> Replaying what Tejun wrote:

> Replacing get_online_cpus() w/ percpu_rwsem is great but this thread
> is about replacing preempt_disable with something finer grained and
> less heavy on the writer side

If only I understood why preempt_disable() is bad ;-)

OK, I guess "less heavy on the writer side" is the hint, and in the
previous email you mentioned that "stop_machine() itself is extremely
heavy".

Looks like, you are going to remove stop_machine() from cpu_down ???

> The problem seems that we don't have percpu_rwlock yet.  It shouldn't
> be too difficult to implement, right?
> 

Oh, I am not sure... unless you simply copy-and-paste the lglock code
and replace spinlock_t with rwlock_t.

We probably want something more efficient, but I bet we can't avoid
the barriers on the read side.

And somehow we should avoid the livelocks. Say, we can't simply add
the per_cpu_reader_counter, _read_lock should spin if the writer is
active. But at the same time _read_lock should be recursive.

Tejun, could you please send me mbox with this thread offlist?

[That should now be unnecessary, since the discussion can continue
on-list on this thread].

Oleg.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 3.7-rc8] drm: Fix possible EDID memory allocation oops

2012-12-05 Thread Tim Gardner

The result of drm_property_create_blob() is not checked for success
which could lead to a NULL pointer dereference.

I was led to this by a smatch warning:

drivers/gpu/drm/drm_crtc.c:3186 drm_mode_connector_update_edid_property() 
error: potential null dereference 'connector->edid_blob_ptr'.  
(drm_property_create_blob returns null)

drm_property_create_blob() calls kzalloc() which can return NULL.

Cc: David Airlie 
Cc: dri-de...@lists.freedesktop.org
Cc: sta...@vger.kernel.org # 3.0+
Signed-off-by: Tim Gardner 
---

This bug actually exists in the original commit 
f453ba0460742ad027ae0c4c7d61e62817b3e7ef
(2.6.29), but I only checked as far back as 3.0 for stable.

 drivers/gpu/drm/drm_crtc.c |4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index ef1b221..31872ba 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -3180,6 +3180,10 @@ int drm_mode_connector_update_edid_property(struct 
drm_connector *connector,
size = EDID_LENGTH * (1 + edid->extensions);
connector->edid_blob_ptr = drm_property_create_blob(connector->dev,
size, edid);
+   if (!connector->edid_blob_ptr) {
+   pr_err("drm: Could not allocate %d edid blob bytes.\n", size);
+   return -ENOMEM;
+   }
 
ret = drm_connector_property_set_value(connector,
   dev->mode_config.edid_property,
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC PATCH v2 01/10] CPU hotplug: Provide APIs for "light" atomic readers to prevent CPU offline

2012-12-05 Thread Srivatsa S. Bhat

Replaying what Tejun wrote:

Hello, Oleg.

> Replaying what Oleg wrote:
>> Replacing get_online_cpus() w/ percpu_rwsem is great but this thread
>> is about replacing preempt_disable with something finer grained and
>> less heavy on the writer side
> 
> If only I understood why preempt_disable() is bad ;-)
> 
> OK, I guess "less heavy on the writer side" is the hint, and in the
> previous email you mentioned that "stop_machine() itself is extremely
> heavy".
> 
> Looks like, you are going to remove stop_machine() from cpu_down ???
>

Yeah, that's what Srivatsa is trying to do.  The problem seems to be
that cpu up/down is very frequent on certain mobile platforms for
power management and as currently implemented cpu hotplug is too heavy
and latency-inducing.

>> The problem seems that we don't have percpu_rwlock yet.  It shouldn't
>> be too difficult to implement, right?
>>
> 
> Oh, I am not sure... unless you simply copy-and-paste the lglock code
> and replace spinlock_t with rwlock_t.
> 

Ah... right, so that's where brlock ended up.  So, lglock is the new
thing and brlock is a wrapper around it.

> We probably want something more efficient, but I bet we can't avoid
> the barriers on the read side.
> 
> And somehow we should avoid the livelocks. Say, we can't simply add
> the per_cpu_reader_counter, _read_lock should spin if the writer is
> active. But at the same time _read_lock should be recursive.
> 

I think we should just go with lglock.  It does involve local atomic
ops but atomic ops themselves aren't that expensive and it's not like
we can avoid memory barriers.  Also, that's the non-sleeping
counterpart of percpu_rwsem.  If it's not good enough for some reason,
we should improve it rather than introducing something else.

Thanks.

-- tejun

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 3/6 v9] gpio: Add userland device interface to block GPIO

2012-12-05 Thread Wolfgang Grandegger

On 12/04/2012 09:39 PM, Roland Stigge wrote:
> This patch adds a character device interface to the block GPIO system.
> 
> Signed-off-by: Roland Stigge 
> ---
>  Documentation/ABI/testing/dev-gpioblock |   34 +
>  drivers/gpio/gpiolib.c  |  208 
> +++-
>  include/linux/gpio.h|   10 +
>  3 files changed, 251 insertions(+), 1 deletion(-)
> 
> --- /dev/null
> +++ linux-2.6/Documentation/ABI/testing/dev-gpioblock
> @@ -0,0 +1,34 @@
> +What:/dev/
> +Date:Nov 2012
> +KernelVersion:   3.7
> +Contact: Roland Stigge 
> +Description: The /dev/ character device node provides userspace
> + access to GPIO blocks, named exactly as the block, e.g.
> + /dev/block0.
> +
> + Reading:
> + When reading sizeof(unsigned long) bytes from the device, the
> + current state of the block, masked by the current mask (see
> + below) can be obtained as a word. When the device is opened
> + with O_NONBLOCK, read() always returns with data immediately,
> + otherwise it blocks until data is available, see IRQ handling
> + below.
> +
> + Writing:
> + By writing sizeof(unsigned long) bytes to the device, the
> + current state of the block can be set. This operation is
> + masked by the current mask (see below).
> +
> + IRQ handling:
> + When one or more IRQs in the block are IRQ capable, you can
> + poll() on the device to check/wait for this IRQ. If no IRQ
> + is available, poll() returns -ENOSYS and userspace needs to
> + (busy) poll itself if necessary.
> +
> + Setting the mask (default: all bits set):
> + By doing an ioctl(fd, 0, ) with an unsigned long mask, the
> + current mask for read and write operations on this gpio block
> + can be set.
> +
> + See also Documentation/gpio.txt for an explanation of block
> + GPIO.
> --- linux-2.6.orig/drivers/gpio/gpiolib.c
> +++ linux-2.6/drivers/gpio/gpiolib.c
> @@ -11,6 +11,8 @@
>  #include 
>  #include 
>  #include 
> +#include 
> +#include 
>  
>  #define CREATE_TRACE_POINTS
>  #include 
> @@ -2122,6 +2124,190 @@ struct gpio_block *gpio_block_find_by_na
>  }
>  EXPORT_SYMBOL_GPL(gpio_block_find_by_name);
>  
> +static struct gpio_block *gpio_block_find_by_minor(int minor)
> +{
> + struct gpio_block *i;
> +
> + list_for_each_entry(i, _block_list, list)
> + if (i->miscdev.minor == minor)
> + return i;
> + return NULL;
> +}
> +
> +static bool gpio_block_is_irq_duplicate(struct gpio_block *block, int index)
> +{
> + int irq = gpio_to_irq(block->gpio[index]);
> + int i;
> +
> + for (i = 0; i < index; i++)
> + if (gpio_to_irq(block->gpio[i]) == irq)
> + return true;
> + return false;
> +}
> +
> +static irqreturn_t gpio_block_irq_handler(int irq, void *dev)
> +{
> + struct gpio_block *block = dev;
> +
> + wake_up_interruptible(>wait_queue);
> + block->got_int = true;
> +
> + return IRQ_HANDLED;
> +}
> +
> +static int gpio_block_fop_open(struct inode *in, struct file *f)
> +{
> + int i;
> + struct gpio_block *block = gpio_block_find_by_minor(MINOR(in->i_rdev));
> + int status;
> + int irq;
> +
> + if (!block)
> + return -ENOENT;
> +
> + block->irq_controlled = false;
> + block->got_int = false;
> + init_waitqueue_head(>wait_queue);
> + f->private_data = block;
> +
> + for (i = 0; i < block->ngpio; i++) {
> + status = gpio_request(block->gpio[i], "gpioblock dev");

You could use the name of the GPIO block.

> + if (status)
> + goto err1;
> +
> + irq = gpio_to_irq(block->gpio[i]);
> + if (irq >= 0 &&
> + !test_bit(FLAG_IS_OUT, _desc[block->gpio[i]].flags) &&
> + !gpio_block_is_irq_duplicate(block, i)) {
> + status = request_irq(irq, gpio_block_irq_handler,
> +  IRQF_TRIGGER_FALLING,
> +  block->name, block);
> + if (status)
> + goto err2;
> +
> + block->irq_controlled = true;
> + }
> + }

There is no need to request IRQs if "O_NONBLOCK" is specified.

> +
> + return 0;
> +
> +err1:
> + while (i > 0) {
> + i--;
> +
> + irq = gpio_to_irq(block->gpio[i]);
> + if (irq >= 0 &&
> + !test_bit(FLAG_IS_OUT, _desc[block->gpio[i]].flags) &&
> + !gpio_block_is_irq_duplicate(block, i))
> + free_irq(irq, block);
> +err2:
> + gpio_free(block->gpio[i]);
> + }
> + return status;
>

[PATCH RFC 0/1] cpufreq/x86: Add P-state driver for sandy bridge.

2012-12-05 Thread dirk . brandewie

From: Dirk Brandewie 

This driver provides a P state driver for Sandybridge and Ivybridge
processors.
 
Motivation:
The goal of this driver is to improve the power efficiency of
Sandybridge/Ivybridge based systems.  As the investigation into how to
achieve this goal progressed it became apparent (to me) that some of the
design assumptions of the cpufreq subsystem are no longer valid and
that a micro-architecure specific P state driver would be less complex
and potentially more effiecent.  As Intel continues to innovate in the
area of freqency/power control this will become more true IMHO.

General info:
The driver uses a PID controller to adjust the core frequency based on
the presented load. The driver exposes the tuning parameters for the
controller in the /sys/devices/system/cpu/cpufreq/snb directory.  The
controller code is being used in PI mode with the default tuning
parmeters.

Tuning parmeters:
   setpoint - load in percent on the core will attempt to maintain. 
   sample_rate_ms - rate at which the driver will sample the load on the core. 
   deadband  - percent ± around the setpoint the controller will
   consider zero error.
   p_gain_pct - Proportional gain in percent. 
   i_gain_pct - Integral gain in percent. 
   d_gain_pct - Derivative gain in percent

To use the driver as root run the following shell script:
   #!/bin/sh
   for file in /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor
   do 
  echo snb > $file
   done

Limitations:

ATM this driver will only run on SandyBridge systems testing on
Ivybridge systems is not complete.
 
Open Questions:

What is the correct way to integrate this driver into the system?  The
current implementation registers as a cpufreq frequency governor, this
was done to streamline testing using cpufreq to load/unload governors.

What tuning parameters should be exposed via sysfs (if any)?  ATM all
the PID parameters are exposed to enable tuning of the driver.


Performance information:

 --- Kernel build ---
The following is data collected for a bzImage kernel build.  The
commands used were:
 make -j8 clean
 sysctl -w vm.drop_caches=3
 /usr/bin/time -f "%E %c" make -j8 bzImage

Time and context switches measured with /usr/bin/time -f "%E %c"

Energy measured with package energy status MSR described in section
14.7 in the Intel® 64 and IA-32 Architectures Software Developer’s
Manual Volume 3.
   http://download.intel.com/products/processor/manual/325384.pdf

Average watts calculated with energy/time in seconds

   time ctx sw  energy  avg watts
perf  02:24.49  116721  666046.09
snb   02:27.03  114940  659144.83
ondemand  02:26.83  190948  668345.51

A graph of the power usage during the kernel build for each governor
is available here:
http://git.fenrus.org/dirk/kernel.png

 --- Power benchmark ---
I used industry standard power bench suite to compare the performance and
ondemand governors against the Sandybridge governor.

  Governor| ssj_ops/watt
 -
  performance |   1855
  ondemand|   1839
  snb |   2016
 
A graph of the power usage for each governor is avavailable here:
http://git.fenrus.org/dirk/power_benchmark.png

A graph showing the results of cpufreq-bench tool shipped with the
kernel Collected with
   cpufreq-bench -l 6000 -s 6000 -x 2000 -y 2000 -c 0 \
   -g {ondemand | snb} -n 40 -r 40 
is available here:
http://git.fenrus.org/dirk/cpufreq-bench.png

Dirk Brandewie (1):
  cpufreq/x86: Add P-state driver for sandy bridge.

 drivers/cpufreq/Kconfig.x86   |8 +
 drivers/cpufreq/Makefile  |1 +
 drivers/cpufreq/cpufreq_snb.c |  727 +
 3 files changed, 736 insertions(+), 0 deletions(-)
 create mode 100644 drivers/cpufreq/cpufreq_snb.c

-- 
1.7.7.6

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH RFC 1/1] cpufreq/x86: Add P-state driver for sandy bridge.

2012-12-05 Thread dirk . brandewie

From: Dirk Brandewie 

Add a P-state driver for the Sandy bridge processor.

This driver provides better power efficiency than the current
governors of the Intel architecture.  The driver masquerades as a
frequency governor to the cpufreq subsystem but does not use cpufreq
to change frequency.

Issues:
  does not report current frequency via cpufreq subsystem so this
  confuses some tools.


Signed-off-by: Dirk Brandewie 
---
 drivers/cpufreq/Kconfig.x86   |8 +
 drivers/cpufreq/Makefile  |1 +
 drivers/cpufreq/cpufreq_snb.c |  727 +
 3 files changed, 736 insertions(+), 0 deletions(-)
 create mode 100644 drivers/cpufreq/cpufreq_snb.c

diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86
index 934854a..8c8acd3 100644
--- a/drivers/cpufreq/Kconfig.x86
+++ b/drivers/cpufreq/Kconfig.x86
@@ -2,6 +2,14 @@
 # x86 CPU Frequency scaling drivers
 #
 
+config X86_SNB_CPUFREQ
+   tristate "SandyBridge frequency Governor"
+   help
+ This driver will override the CPU_FREQ subsystem when
+the system has a SandyBridge processor
+
+If in doubt, say N.
+
 config X86_PCC_CPUFREQ
tristate "Processor Clocking Control interface driver"
depends on ACPI && ACPI_PROCESSOR
diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
index 1bc90e1..71ad49e 100644
--- a/drivers/cpufreq/Makefile
+++ b/drivers/cpufreq/Makefile
@@ -38,6 +38,7 @@ obj-$(CONFIG_X86_SPEEDSTEP_SMI)   += 
speedstep-smi.o
 obj-$(CONFIG_X86_SPEEDSTEP_CENTRINO)   += speedstep-centrino.o
 obj-$(CONFIG_X86_P4_CLOCKMOD)  += p4-clockmod.o
 obj-$(CONFIG_X86_CPUFREQ_NFORCE2)  += cpufreq-nforce2.o
+obj-$(CONFIG_X86_SNB_CPUFREQ)  += cpufreq_snb.o
 
 
##
 # ARM SoC drivers
diff --git a/drivers/cpufreq/cpufreq_snb.c b/drivers/cpufreq/cpufreq_snb.c
new file mode 100644
index 000..0d46862
--- /dev/null
+++ b/drivers/cpufreq/cpufreq_snb.c
@@ -0,0 +1,727 @@
+/*
+ * cpufreq_snb.c: Native P state management for Intel processors
+ *
+ * (C) Copyright 2012 Intel Corporation
+ * Author: Dirk Brandewie 
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+#include 
+#include 
+#include 
+
+#define SAMPLE_COUNT   3
+
+struct sampling_state {
+   int idle_mode;
+   int first_sample;
+};
+
+struct sample {
+   ktime_t start_time;
+   ktime_t end_time;
+   int core_pct_busy;
+   int freq_pct_busy;
+   u64 duration_us;
+   u64 idletime_us;
+   u64 aperf;
+   u64 mperf;
+};
+
+struct freqdata {
+   int current_freq;
+   int min_freq;
+   int max_freq;
+   int turbo_freq;
+};
+
+struct _pid {
+   int setpoint;
+   int32_t integral;
+   int32_t p_gain;
+   int32_t i_gain;
+   int32_t d_gain;
+   int deadband;
+   int last_err;
+};
+
+struct cpudata {
+   int cpu;
+
+   char name[64];
+
+   struct timer_list timer;
+
+   struct freq_adjust_policy *freq_policy;
+   struct freqdata clock;
+   struct sampling_state sampling_state;
+   struct _pid pid;
+   struct _pid idle_pid;
+
+   int min_freq_count;
+
+   ktime_t prev_sample;
+   u64 prev_idle_time_us;
+   u64 prev_aperf;
+   u64 prev_mperf;
+   int sample_ptr;
+   struct sample samples[SAMPLE_COUNT];
+};
+
+static unsigned int snb_usage;
+static DEFINE_MUTEX(snb_mutex);
+
+struct cpudata **all_cpu_data;
+struct freq_adjust_policy {
+   int sample_rate_ms;/* sample rate */
+   int deadband; /*adjust freq on last sample or average */
+   int setpoint; /* starting freq when we have no info */
+   int p_gain_pct;
+   int d_gain_pct;
+   int i_gain_pct;
+};
+
+struct freq_adjust_policy default_policy = {
+   .sample_rate_ms = 10,
+   .deadband = 0,
+   .setpoint = 109,
+   .p_gain_pct = 17,
+   .d_gain_pct = 0,
+   .i_gain_pct = 4,
+};
+
+#define FRAC_BITS 8
+#define int_tofp(X) ((int64_t)(X) << FRAC_BITS)
+#define fp_toint(X) ((X) >> FRAC_BITS)
+
+static inline int32_t mul_fp(int32_t x, int32_t y)
+{
+   return ((int64_t)x * (int64_t)y) >> FRAC_BITS;
+}
+
+static inline int32_t div_fp(int32_t x, int32_t y)
+{
+   return div_s64((int64_t)x << FRAC_BITS, (int64_t)y);
+}
+
+
+static inline void pid_reset(struct _pid *pid, int setpoint, int busy,
+   int deadband, int integral) {
+   pid->setpoint = setpoint;
+   pid->deadband  = deadband;
+   pid->integral  = int_tofp(integral);
+   pid->last_err  = setpoint - busy;
+}
+

Re: [RFC PATCH v2 02/10] CPU hotplug: Provide APIs for "full" atomic readers to prevent CPU offline

2012-12-05 Thread Srivatsa S. Bhat

Replaying what Tejun wrote:

On 12/06/2012 12:13 AM, Srivatsa S. Bhat wrote:
> Some of the atomic hotplug readers cannot tolerate CPUs going offline while
> they are in their critical section. That is, they can't get away with just
> synchronizing with the updates to the cpu_online_mask; they really need to
> synchronize with the entire CPU tear-down sequence, because they are very
> much involved in the hotplug related code paths.
> 
> Such "full" atomic hotplug readers need a way to *actually* and *truly*
> prevent CPUs from going offline while they are active.
> 

I don't think this is a good idea.  You really should just need
get/put_online_cpus() and get/put_online_cpus_atomic().  The former
the same as they are.  The latter replacing what
preempt_disable/enable() was protecting.  Let's please not go
overboard unless we know they're necessary.  I strongly suspect that
breaking up reader side from preempt_disable and making writer side a
bit lighter should be enough.  Conceptually, it really should be a
simple conversion - convert preempt_disable/enable() pairs protecting
CPU on/offlining w/ get/put_cpu_online_atomic() and wrap the
stop_machine() section with the matching write lock.

Thanks.

-- tejun 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 2/3] perf hists: Link hist entries before inserting to an output tree

2012-12-05 Thread Jiri Olsa

On Wed, Dec 05, 2012 at 03:56:42PM +0900, Namhyung Kim wrote:
> From: Namhyung Kim 
> 

SNIP

> - struct rb_node *next = rb_first(>entries);
> + struct rb_root *root;
> + struct rb_node *next;
> +
> + if (sort__need_collapse)
> + root = >entries_collapsed;
> + else
> + root = hists->entries_in;
>  
> + next = rb_first(root);
>   while (next != NULL) {
> - struct hist_entry *he = rb_entry(next, struct hist_entry, 
> rb_node);
> + struct hist_entry *he = rb_entry(next, struct hist_entry, 
> rb_node_in);
>  
> - next = rb_next(>rb_node);
> + next = rb_next(>rb_node_in);
>   if (!hist_entry__next_pair(he)) {
> - rb_erase(>rb_node, >entries);
> + rb_erase(>rb_node_in, root);
>   hist_entry__free(he);
>   }
>   }
> @@ -481,6 +459,11 @@ static void hists__process(struct hists *old, struct 
> hists *new)
>   else
>   hists__link(new, old);
>  
> + hists__output_resort(new);
> +
> + if (show_displacement)
> + hists__compute_position(new);
> +

Computing the position after hists__link screws up the position data,
because we likely have new entries in.

However, I wonder if anyone is actualy using displacement info..?

jirka
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC PATCH v2 01/10] CPU hotplug: Provide APIs for "light" atomic readers to prevent CPU offline

2012-12-05 Thread Oleg Nesterov

I'll try to read this series later,

one minor and almost offtopic nit.

On 12/06, Srivatsa S. Bhat wrote:
>
>  static int __ref take_cpu_down(void *_param)
>  {
>   struct take_cpu_down_param *param = _param;
> + unsigned long flags;
>   int err;
>
> + /*
> +  *  __cpu_disable() is the step where the CPU is removed from the
> +  *  cpu_online_mask. Protect it with the light-lock held for write.
> +  */
> + write_lock_irqsave(_hotplug_rwlock, flags);
> +
>   /* Ensure this CPU doesn't handle any more interrupts. */
>   err = __cpu_disable();
> - if (err < 0)
> + if (err < 0) {
> + write_unlock_irqrestore(_hotplug_rwlock, flags);
>   return err;
> + }
> +
> + /*
> +  * We have successfully removed the CPU from the cpu_online_mask.
> +  * So release the light-lock, so that the light-weight atomic readers
> +  * (who care only about the cpu_online_mask updates, and not really
> +  * about the actual cpu-take-down operation) can continue.
> +  *
> +  * But don't enable interrupts yet, because we still have work left to
> +  * do, to actually bring the CPU down.
> +  */
> + write_unlock(_hotplug_rwlock);
>
>   cpu_notify(CPU_DYING | param->mod, param->hcpu);
> +
> + local_irq_restore(flags);
>   return 0;

This is subjective, but imho _irqsave and the fat comment look confusing.

Currently take_cpu_down() is always called with irqs disabled, so you
do not need to play with interrupts.

10/10 does s/__stop_machine/stop_cpus/ and that patch could simply add
local_irq_disable/enable into take_cpu_down().

But again this is minor and subjective, I won't insist.

Oleg.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC PATCH v2 10/10] cpu: No more __stop_machine() in _cpu_down()

2012-12-05 Thread Oleg Nesterov

On 12/06, Srivatsa S. Bhat wrote:
>
> @@ -418,7 +418,7 @@ static int __ref _cpu_down(unsigned int cpu, int 
> tasks_frozen)
>   }
>   smpboot_park_threads(cpu);
>
> - err = __stop_machine(take_cpu_down, _param, cpumask_of(cpu));
> + err = stop_cpus(cpumask_of(cpu), take_cpu_down, _param);

stop_one_cpu(cpu) ?

Oleg.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 2/3] perf hists: Link hist entries before inserting to an output tree

2012-12-05 Thread Arnaldo Carvalho de Melo

Em Wed, Dec 05, 2012 at 08:06:46PM +0100, Jiri Olsa escreveu:
> On Wed, Dec 05, 2012 at 03:56:42PM +0900, Namhyung Kim wrote:
> > From: Namhyung Kim 
> > @@ -481,6 +459,11 @@ static void hists__process(struct hists *old, struct 
> > hists *new)
> > else
> > hists__link(new, old);
> >  
> > +   hists__output_resort(new);
> > +
> > +   if (show_displacement)
> > +   hists__compute_position(new);
> > +
> 
> Computing the position after hists__link screws up the position data,
> because we likely have new entries in.
> 
> However, I wonder if anyone is actualy using displacement info..?

IIRC that was used long ago in the first version of 'perf diff', that
is not the default, probably we can just ditch it to simplify things,
can you check?

- Arnaldo
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [patch,v3] bdi: add a user-tunable cpu_list for the bdi flusher threads

2012-12-05 Thread Jens Axboe

On 2012-12-05 19:43, Jeff Moyer wrote:
> In realtime environments, it may be desirable to keep the per-bdi
> flusher threads from running on certain cpus.  This patch adds a
> cpu_list file to /sys/class/bdi/* to enable this.  The default is to tie
> the flusher threads to the same numa node as the backing device (though
> I could be convinced to make it a mask of all cpus to avoid a change in
> behaviour).

This looks fine to me now. I'll queue it up for 3.8.

-- 
Jens Axboe

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC PATCH v2 10/10] cpu: No more __stop_machine() in _cpu_down()

2012-12-05 Thread Srivatsa S. Bhat

On 12/06/2012 12:38 AM, Oleg Nesterov wrote:
> On 12/06, Srivatsa S. Bhat wrote:
>>
>> @@ -418,7 +418,7 @@ static int __ref _cpu_down(unsigned int cpu, int 
>> tasks_frozen)
>>  }
>>  smpboot_park_threads(cpu);
>>
>> -err = __stop_machine(take_cpu_down, _param, cpumask_of(cpu));
>> +err = stop_cpus(cpumask_of(cpu), take_cpu_down, _param);
> 
> stop_one_cpu(cpu) ?
> 

Even I was thinking of using that. Paul, any particular reason you chose
stop_cpus() over stop_one_cpu() in [1]?

[1]. https://lkml.org/lkml/2012/10/30/359

Regards,
Srivatsa S. Bhat

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC PATCH v2 01/10] CPU hotplug: Provide APIs for "light" atomic readers to prevent CPU offline

2012-12-05 Thread Srivatsa S. Bhat

On 12/06/2012 12:37 AM, Oleg Nesterov wrote:
> I'll try to read this series later,
> 
> one minor and almost offtopic nit.
> 
> On 12/06, Srivatsa S. Bhat wrote:
>>
>>  static int __ref take_cpu_down(void *_param)
>>  {
>>  struct take_cpu_down_param *param = _param;
>> +unsigned long flags;
>>  int err;
>>
>> +/*
>> + *  __cpu_disable() is the step where the CPU is removed from the
>> + *  cpu_online_mask. Protect it with the light-lock held for write.
>> + */
>> +write_lock_irqsave(_hotplug_rwlock, flags);
>> +
>>  /* Ensure this CPU doesn't handle any more interrupts. */
>>  err = __cpu_disable();
>> -if (err < 0)
>> +if (err < 0) {
>> +write_unlock_irqrestore(_hotplug_rwlock, flags);
>>  return err;
>> +}
>> +
>> +/*
>> + * We have successfully removed the CPU from the cpu_online_mask.
>> + * So release the light-lock, so that the light-weight atomic readers
>> + * (who care only about the cpu_online_mask updates, and not really
>> + * about the actual cpu-take-down operation) can continue.
>> + *
>> + * But don't enable interrupts yet, because we still have work left to
>> + * do, to actually bring the CPU down.
>> + */
>> +write_unlock(_hotplug_rwlock);
>>
>>  cpu_notify(CPU_DYING | param->mod, param->hcpu);
>> +
>> +local_irq_restore(flags);
>>  return 0;
> 
> This is subjective, but imho _irqsave and the fat comment look confusing.
> 
> Currently take_cpu_down() is always called with irqs disabled, so you
> do not need to play with interrupts.
> 
> 10/10 does s/__stop_machine/stop_cpus/ and that patch could simply add
> local_irq_disable/enable into take_cpu_down().
> 

Hmm, we could certainly do that, but somehow I felt it would be easier to
read if we tinker and fix up the take_cpu_down() logic at one place, as a
whole, instead of breaking up into pieces in different patches. And that
also makes the last patch look really cute: it just replaces stop_machine()
with stop_cpus(), as the changelog intended.

I'll see if doing like what you suggested improves the readability, and
if yes, I'll change it. Thank you!

Regards,
Srivatsa S. Bhat

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v4] backlight: corgi_lcd: Use gpio_set_value_cansleep() to avoid WARN_ON

2012-12-05 Thread Russell King - ARM Linux

On Wed, Dec 05, 2012 at 07:20:00PM +0100, Marko Katić wrote:
> On Wed, Dec 5, 2012 at 10:30 AM, Russell King - ARM Linux
>  wrote:
> > On Wed, Dec 05, 2012 at 09:59:07AM +0900, Jingoo Han wrote:
> >> - if (gpio_is_valid(lcd->gpio_backlight_cont))
> >> - gpio_set_value(lcd->gpio_backlight_cont, cont);
> >> + if (gpio_is_valid(lcd->gpio_backlight_cont)) {
> >> + if (gpio_cansleep(lcd->gpio_backlight_cont))
> >> + gpio_set_value_cansleep(lcd->gpio_backlight_cont, 
> >> cont);
> >> + else
> >> + gpio_set_value(lcd->gpio_backlight_cont, cont);
> >> + }
> >
> > Why not simply:
> >
> > +   if (gpio_is_valid(lcd->gpio_backlight_cont))
> > +   gpio_set_value_cansleep(lcd->gpio_backlight_cont, cont);
> 
> My first patch did exactly this but there were complains about it's
> commit message.

So that's a reason to drop the patch?  Err, forgive me for being thick
as a medieval castle wall, but what does complaints about the commit
message have to do with the contents of the patch?  Why can't you just
fix the commit message?

> And i just found out that Marek Vasut posted the exact same patch more
> than a year ago.
> 
> http://lists.infradead.org/pipermail/linux-arm-kernel/2011-April/046955.html
> 
> It was not applied for various reasons.

Looking at that thread (which is corrupted btw, probably thanks to the
crappy python based locking in mailman) - here's a better archiver:

http://lists.arm.linux.org.uk/lurker/thread/20110402.014316.74101499.en.html

it didn't go anywhere because the discussion was distracted by the loss
of David Brownell.

Eric shares my opinion of the _cansleep() mess, but unfortunately it's
what we have and no one's come up with any better solutions to it.  (I
argued from the outset that the gpio_xxx_cansleep() should've been
gpio_xxx() and the non-cansleep() version should be called
gpio_xxx_atomic() so that by default people use the version which _can_
sleep, but have to think about it when they want to manipulate GPIOs in
non-task contexts.)

That's off-topic though.  Using just the _cansleep() version is far
better than messing around with stuff like:

if (gpio_cansleep(gpio))
gpio_xxx_cansleep(gpio);
else
gpio_xxx(gpio);

> > If you read the gpiolib code and documentation, what you will realise is
> > that the two calls are identical except for the "might_sleep_if()" in
> > gpio_set_value_cansleep().  You will also note that gpiolib itself _only_
> > calls gpio_set_value_cansleep() without checking gpio_cansleep() in
> > contexts where sleeping is possible.  So if it's good enough for gpiolib,
> > it should be good enough here.
> 
> The documentation tells which calls to use when you don't need to sleep
> and which calls to use when you might sleep. And here we have a case
> where the same call to gpio_set_value might sleep or doesn't have to,
> depending on the model.
> In this case, i'd rather use gpio_cansleep check as Andrew proposed.
> 
> I will also say that the distinction between gpio_set_value and
> gpio_set_value_cansleep.
> is rather confusing at this point. Is it really necessary to have both ?

No.  You can call gpio_set_value_cansleep() from task contexts for any
GPIO just fine, but you can't call it from atomic contexts (it will
complain).  It doesn't matter whether the GPIO can sleep or not.

You can call gpio_set_value() from any context without it complaining,
however, gpio_set_value() can't be used with a GPIO which sleeps.

Look, when it comes down to it, in _task_ context, where sleeps are
permissible:

gpio_set_value(gpio, xxx);
and
gpio_set_value_cansleep(gpio, xxx);

are exactly the same thing; they will both set the value of a GPIO
output, whether it be an atomic or a sleeping gpio to the requested
value.

The difference between the two becomes important if you're not in task
context, where only the non-_cansleep() versions can be used.  This is
enforced by the _cansleep() versions issuing a WARN_ON() if they're
used in non-task contexts.  And conversely, the non-_cansleep() versions
will warn (as you've found) if you use that call with a GPIO which will
sleep.

There is another solution to this mess:

void __gpio_set_value(unsigned gpio, int value)
{
struct gpio_chip*chip;

chip = gpio_to_chip(gpio);
/* Should be using gpio_set_value_cansleep() */
-   WARN_ON(chip->can_sleep);
+   might_sleep_if(chip->can_sleep);
trace_gpio_value(gpio, 0, value);
if (test_bit(FLAG_OPEN_DRAIN,  _desc[gpio].flags))
_gpio_set_open_drain_value(gpio, chip, value);
else if (test_bit(FLAG_OPEN_SOURCE,  _desc[gpio].flags))
_gpio_set_open_source_value(gpio, chip, value);
else
chip->set(chip, gpio - chip->base, value);
}
EXPORT_SYMBOL_GPL(__gpio_set_value);

With the above change (and an equivalent change

Re: [3.6 regression?] THP + migration/compaction livelock (I think)

2012-12-05 Thread Andy Lutomirski

On Sun, Nov 18, 2012 at 2:55 PM, David Rientjes  wrote:
> On Sat, 17 Nov 2012, Marc Duponcheel wrote:
>
>> # echo always >/sys/kernel/mm/transparent_hugepage/enabled
>> # while [ 1 ]
>>   do
>>sleep 10
>>date
>>echo = vmstat
>>egrep "(thp|compact)" /proc/vmstat
>>echo = khugepaged stack
>>cat /proc/501/stack
>>  done > /tmp/49361.
>> # emerge icedtea
>> (where 501 = pidof khugepaged)
>>
>> for  = base = 3.6.6
>> and  = test = 3.6.6 + diff you provided
>>
>> I attach
>>  /tmp/49361.base.gz
>> and
>>  /tmp/49361.test.gz
>>
>> Note:
>>
>>  with xxx=base, I could see
>>   PID USER  PR  NI  VIRT  RES  SHR S  %CPU %MEM TIME+ COMMAND
>>  8617 root  20   0 3620m  41m  10m S 988.3  0.5   6:19.06 javac
>> 1 root  20   0  4208  588  556 S   0.0  0.0   0:03.25 init
>>  already during configure and I needed to kill -9 javac
>>
>>  with xxx=test, I could see
>>   PID USER  PR  NI  VIRT  RES  SHR S  %CPU %MEM TIME+ COMMAND
>> 9275 root  20   0 2067m 474m  10m S 304.2  5.9   0:32.81 javac
>>  710 root   0 -20 000 S   0.3  0.0   0:01.07 kworker/0:1H
>>  later when processing >700 java files
>>
>> Also note that with xxx=test compact_blocks_moved stays 0
>>
>
> Sounds good!  Andy, have you had the opportunity to try to reproduce your
> issue with the backports that Mel listed?  I think he'll be considering
> asking for some of these to be backported for a future stable release so
> any input you can provide would certainly be helpful.

I've had an impressive amount of trouble even reproducing it on 3.6.
Apparently I haven't hid the magic combination yet.  I'll give it
another try soon.

-- 
Andy Lutomirski
AMA Capital Management, LLC
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 2/3] perf hists: Link hist entries before inserting to an output tree

2012-12-05 Thread Jiri Olsa

On Wed, Dec 05, 2012 at 08:06:46PM +0100, Jiri Olsa wrote:
> On Wed, Dec 05, 2012 at 03:56:42PM +0900, Namhyung Kim wrote:
> > From: Namhyung Kim 
> > 
> 
> SNIP
> 
> > -   struct rb_node *next = rb_first(>entries);
> > +   struct rb_root *root;
> > +   struct rb_node *next;
> > +
> > +   if (sort__need_collapse)
> > +   root = >entries_collapsed;
> > +   else
> > +   root = hists->entries_in;
> >  
> > +   next = rb_first(root);
> > while (next != NULL) {
> > -   struct hist_entry *he = rb_entry(next, struct hist_entry, 
> > rb_node);
> > +   struct hist_entry *he = rb_entry(next, struct hist_entry, 
> > rb_node_in);
> >  
> > -   next = rb_next(>rb_node);
> > +   next = rb_next(>rb_node_in);
> > if (!hist_entry__next_pair(he)) {
> > -   rb_erase(>rb_node, >entries);
> > +   rb_erase(>rb_node_in, root);
> > hist_entry__free(he);
> > }
> > }
> > @@ -481,6 +459,11 @@ static void hists__process(struct hists *old, struct 
> > hists *new)
> > else
> > hists__link(new, old);
> >  
> > +   hists__output_resort(new);
> > +
> > +   if (show_displacement)
> > +   hists__compute_position(new);
> > +
> 
> Computing the position after hists__link screws up the position data,
> because we likely have new entries in.
> 
> However, I wonder if anyone is actualy using displacement info..?

hum,

the point of the displacement is to show how far is the matching entry
in baseline wrt report output -> after hists__output_resort.. that goes
in opposite way of what we try do to in here.

Anyone else in favour of removing 'Displ.' column? ;-)

jirka
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC] Capabilities still can't be inherited by normal programs

2012-12-05 Thread Andy Lutomirski

On Tue, Dec 4, 2012 at 5:54 AM, Serge E. Hallyn  wrote:
> Quoting Andy Lutomirski (l...@amacapital.net):
>> >> d) If I really wanted, I could emulate execve without actually doing
>> >> execve, and capabilities would be inherited.
>> >
>> > If you could modify the executable properties of the binary that has
>> > the privilege to wield a privilege then you are either exploiting an
>> > app bug, or doing something the privileged binary has been trusted to
>> > do.
>>
>> That's not what I mean.  I would:
>>
>> fork()
>> munmap everything
>> mmap ld.so
>> set up a fake initial stack and the right fd or mapping or whatever
>> just to ld-linux.so
>>
>> That's almost execve, and privilege inheritance works.
>
> But of course that is why you only want to fill fI on programs you trust
> not to do that.  What you are arguing is that you want to give fI on
> programs you don't trust anyway, and so heck why not just give it on
> everything.
>

Huh?  I'd set fP on a program I expect to do *exactly* that (or use
actual in-kernel capability inheritance, which I would find vastly
more pleasant).  If I give a program a capability (via fP or fI & pI),
then I had better trust it not to abuse that capability.  Having it
pass that capability on to a child helper process would be just fine
with me *because it already has that capability*.

The problem with the current inheritance mechanism is that it's very
difficult to understand what it means for an fI bit or a pI bit to be
set.  Saying "set a pI bit using pam if you want to grant permission
to that user to run a particular program with fI set" is crap -- it
only works if there is exactly one binary on the system with that bit
set.  In any case, a different administrator or package might use it
for something different.

Suppose I use the (apparently) current suggested approach: I install a
fI=cap_net_raw copy of tcpdump somewhere.  Then I write a helper that
has fP=cap_new_raw and invokes that copy of tcpdump after appropriate
validation of parameters.  All is well.

Now I want to grant only a subset of users permission to run ping.  So
I modify ping so it's cap_net_raw=i (not p) and grant those users
pI=cap_net_raw.

The end result: I introduced a security hole: the users with
cap_net_raw=i can run tcpdump *without validation via the helper*.
Oops.

The fundamental problem as I see it is that fI and pI's behavior is so
odd that the significance of setting some of those bits varies and is
likely to be used, if at all, in conflicting ways.

> Anyway, implementing the features you want in a new module is encouraged,
> so long as the behavior of existing module stays the same.

I'll think about it some more and do it possibly using a sysctl.
Adding this kind of stuff in a module is asking for even worse
incomprehensibility of which capability bit means what.

--Andy
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v10 0/2] x86: vmclear vmcss on all cpus when doing kdump if necessary

2012-12-05 Thread Eric W. Biederman

Zhang Yanfei  writes:

> Currently, kdump just makes all the logical processors leave VMX operation by
> executing VMXOFF instruction, so any VMCSs active on the logical processors 
> may
> be corrupted. But, sometimes, we need the VMCSs to debug guest images 
> contained
> in the host vmcore. To prevent the corruption, we should VMCLEAR the VMCSs 
> before
> executing the VMXOFF instruction.
>
> The patch set provides a way to VMCLEAR vmcss related to guests on all cpus 
> before
> executing the VMXOFF when doing kdump. This is used to ensure the VMCSs in the
> vmcore updated and non-corrupted.

Skimming through it looks like the important things have been addressed.

Acked-by: "Eric W. Biederman" 


> Changelog from v9 to v10:
> 1. add rcu protect to the callback function
>
> Changelog from v8 to v9:
> 1. KEXEC: use a callback function instead of a notifier.
> 2. KVM-INTEL: use a new vmclear function instead of just calling 
>vmclear_local_loaded_vmcss to make sure we just do the core vmclear
>operation in kdump.
>
> Changelog from v7 to v8:
> 1. KEXEC: regression for using name crash_notifier_list
>and remove comments related to KVM
>and just call function atomic_notifier_call_chain directly.
>
> Changelog from v6 to v7:
> 1. KVM-INTEL: in hardware_disable, we needn't disable the
>vmclear, so remove it.
>
> Changelog from v5 to v6:
> 1. KEXEC: the atomic notifier list renamed:
>crash_notifier_list --> vmclear_notifier_list
> 2. KVM-INTEL: provide empty functions if CONFIG_KEXEC is
>not defined and remove unnecessary #ifdef's.
>
> Changelog from v4 to v5:
> 1. use an atomic notifier instead of function call, so
>have all the vmclear codes in vmx.c.
>
> Changelog from v3 to v4:
> 1. add a new percpu variable vmclear_skipped to skip
>vmclear in kdump in some conditions.
>
> Changelog from v2 to v3:
> 1. remove unnecessary conditions in function
>cpu_emergency_clear_loaded_vmcss as Marcelo suggested.
>
> Changelog from v1 to v2:
> 1. remove the sysctl and clear VMCSs unconditionally.
>
> Zhang Yanfei (2):
>   x86/kexec: VMCLEAR VMCSs loaded on all cpus if necessary
>   KVM-INTEL: provide the vmclear function and a bitmap to support
> VMCLEAR in kdump
>
>  arch/x86/include/asm/kexec.h |2 +
>  arch/x86/kernel/crash.c  |   32 
>  arch/x86/kvm/vmx.c   |   67 
> ++
>  3 files changed, 101 insertions(+), 0 deletions(-)
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] nommu: Enable the strict alignment (CR_A) bit only if ARCH < v6

2012-12-05 Thread Uwe Kleine-König

Hello,

On Mon, Dec 03, 2012 at 05:46:17PM +, Will Deacon wrote:
> On Mon, Dec 03, 2012 at 05:25:53PM +, Russell King - ARM Linux wrote:
> > On Mon, Dec 03, 2012 at 05:44:11PM +0100, Armando Visconti wrote:
> > > On 11/29/2012 12:04 PM, Armando VISCONTI wrote:
> > >> This patch keeps disabled the strict alignment CP15 bit for
> > >> all armv6 and armv7 processor without the mmu. This behaviour
> > >> is now same as in the mmu case.
> > >>
> > >> Signed-off-by: Armando Visconti
> > >> ---
> > >>   arch/arm/kernel/head-nommu.S |2 +-
> > >>   1 files changed, 1 insertions(+), 1 deletions(-)
> > >>
> > >> diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S
> > >> index 278cfc1..2c228a0 100644
> > >> --- a/arch/arm/kernel/head-nommu.S
> > >> +++ b/arch/arm/kernel/head-nommu.S
> > >> @@ -68,7 +68,7 @@ __after_proc_init:
> > >>   * CP15 system control register value returned in r0 from
> > >>   * the CPU init function.
> > >>   */
> > >> -#ifdef CONFIG_ALIGNMENT_TRAP
> > >> +#if defined(CONFIG_ALIGNMENT_TRAP)&&  __LINUX_ARM_ARCH__<  6
> > >>  orr r0, r0, #CR_A
> > >>   #else
> > >>  bic r0, r0, #CR_A
> > >
> > > Any feedback on this simple patch?
> > 
> > Well, it brings the nommu version into line with the mmu version, so
> > it's sensible.  Please put it in the patch system, thanks.
> 
> In which case, I'll definitely dust-off the patches to prevent unaligned
> accesses to strongly-ordered memory because that results in good ol'
> UNPREDICTABLE behaviour.
> 
> I think Uwe also requires some of the stuff there for his M3 port.
I didn't check more context than available in the patch and I also
didn't double-check the M3-docs, but AFAIK the M3 doesn't have a cp15
and I noopt this code out.

Best regards
Uwe

-- 
Pengutronix e.K.   | Uwe Kleine-König|
Industrial Linux Solutions | http://www.pengutronix.de/  |
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v2 4/4] HID: i2c-hid: fix i2c_hid_get_raw_report count mismatches

2012-12-05 Thread Jean Delvare

On Wed,  5 Dec 2012 15:02:56 +0100, Benjamin Tissoires wrote:
> The previous memcpy implementation relied on the size advertized by the
> device. There were no guarantees that buf was big enough.
> 
> Some gymnastic is also required with the +2/-2 to take into account
> the first 2 bytes of the returned buffer where the total returned
> length is supplied by the device.
> 
> Signed-off-by: Benjamin Tissoires 
> ---
>  drivers/hid/i2c-hid/i2c-hid.c | 16 
>  1 file changed, 12 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/hid/i2c-hid/i2c-hid.c b/drivers/hid/i2c-hid/i2c-hid.c
> index c6630d4..ce01d59 100644
> --- a/drivers/hid/i2c-hid/i2c-hid.c
> +++ b/drivers/hid/i2c-hid/i2c-hid.c
> @@ -502,23 +502,31 @@ static int i2c_hid_get_raw_report(struct hid_device 
> *hid,
>  {
>   struct i2c_client *client = hid->driver_data;
>   struct i2c_hid *ihid = i2c_get_clientdata(client);
> + size_t ret_count, ask_count;
>   int ret;
>  
>   if (report_type == HID_OUTPUT_REPORT)
>   return -EINVAL;
>  
> - if (count > ihid->bufsize)
> - count = ihid->bufsize;
> + /* +2 bytes to include the size of the reply in the query buffer */
> + ask_count = min(count + 2, (size_t)ihid->bufsize);
>  
>   ret = i2c_hid_get_report(client,
>   report_type == HID_FEATURE_REPORT ? 0x03 : 0x01,
> - report_number, ihid->inbuf, count);
> + report_number, ihid->inbuf, ask_count);
>  
>   if (ret < 0)
>   return ret;
>  
> - count = ihid->inbuf[0] | (ihid->inbuf[1] << 8);
> + ret_count = ihid->inbuf[0] | (ihid->inbuf[1] << 8);
>  
> + if (!ret_count)

I'd make this (ret_count <= 2), as this would let you call memcpy with a
null or even negative length.

Other than that, the new code looks OK and safe.

> + return 0;
> +
> + ret_count = min(ret_count, ask_count);
> +
> + /* The query buffer contains the size, dropping it in the reply */
> + count = min(count, ret_count - 2);
>   memcpy(buf, ihid->inbuf + 2, count);
>  
>   return count;


-- 
Jean Delvare
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v4 2/4] input: Cypress PS/2 Trackpad psmouse driver

2012-12-05 Thread Henrik Rydberg

Hi Kamal,

> From: Dudley Du 
> 
> Input/mouse driver for Cypress PS/2 Trackpad.
> 
> Original code contributed by Dudley Du (Cypress Semiconductor Corporation),
> modified by Kamal Mostafa and Kyle Fazzari.
> 
> BugLink: http://launchpad.net/bugs/978807
> 
> Signed-off-by: Dudley Du 
> Signed-off-by: Kamal Mostafa 
> Signed-off-by: Kyle Fazzari 
> Signed-off-by: Mario Limonciello 
> Signed-off-by: Tim Gardner 
> Acked-by: Herton Krzesinski 
> ---
>  drivers/input/mouse/cypress_ps2.c |  835 
> +
>  drivers/input/mouse/cypress_ps2.h |  219 ++
>  2 files changed, 1054 insertions(+)
>  create mode 100644 drivers/input/mouse/cypress_ps2.c
>  create mode 100644 drivers/input/mouse/cypress_ps2.h
> 
> diff --git a/drivers/input/mouse/cypress_ps2.c 
> b/drivers/input/mouse/cypress_ps2.c
> new file mode 100644
> index 000..fab4d18
> --- /dev/null
> +++ b/drivers/input/mouse/cypress_ps2.c
> @@ -0,0 +1,835 @@
> +/*
> + * Cypress Trackpad PS/2 mouse driver
> + *
> + * Copyright (c) 2012 Cypress Semiconductor Corporation.
> + *
> + * Author:
> + *   Dudley Du 
> + *
> + * Additional contributors include:
> + *   Kamal Mostafa 
> + *   Kyle Fazzari 
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms of the GNU General Public License version 2 as published 
> by
> + * the Free Software Foundation.
> + */
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +#include "cypress_ps2.h"
> +
> +#undef CYTP_DEBUG_VERBOSE  /* define this and DEBUG for more verbose dump */
> +
> +#undef CYTP_RELATIVE_SUPPORT  /* define to enable unused EV_REL code */

Code inside a local ifdef which is off by default is very rare in the
kernel, and will likely bitrot. If you want to preserve the
information, why not add it to the documentation, and remove the code
here?

> +
> +#define cytp_dbg(fmt, ...)  psmouse_dbg(psmouse, pr_fmt(fmt), ##__VA_ARGS__)
> +
> +
> +static int is_cypress_key(const unsigned char *param)
> +{
> + return (param[0] == CYPRESS_KEY_1) && (param[1] == CYPRESS_KEY_2);
> +}
> +
> +static void cypress_set_packet_size(struct psmouse *psmouse, unsigned int n)
> +{
> + struct cytp_data *cytp = psmouse->private;
> + psmouse->pktsize = cytp->pkt_size = n;
> +}
> +
> +static void cypress_set_abs_rel_mode(struct cytp_data *cytp,
> + unsigned int cytp_mode_bit)
> +{
> + cytp->mode = (cytp->mode & ~CYTP_BIT_ABS_REL_MASK) | cytp_mode_bit;
> +}
> +
> +static const unsigned char cytp_rate[] = {10, 20, 40, 60, 100, 200};
> +static const unsigned char cytp_resolution[] = {0x00, 0x01, 0x02, 0x03};
> +
> +static int cypress_ps2_sendbyte(struct psmouse *psmouse, int value)
> +{
> + struct ps2dev *ps2dev = >ps2dev;
> +
> + if (ps2_sendbyte(ps2dev, value & 0xff, CYTP_CMD_TIMEOUT) < 0) {
> + cytp_dbg("send command 0x%02x failed, resp 0x%02x\n",
> +  value & 0xff, ps2dev->nak);
> + if (ps2dev->nak == CYTP_PS2_RETRY)
> + return CYTP_PS2_RETRY;
> + else
> + return CYTP_PS2_ERROR;
> + }
> +
> + cytp_dbg("send command 0x%02x success, resp 0xfa\n", value & 0xff);
> +
> + return 0;
> +}
> +
> +static int cypress_ps2_ext_cmd(struct psmouse *psmouse, unsigned short cmd,
> +unsigned char data)
> +{
> + struct ps2dev *ps2dev = >ps2dev;
> + int tries = CYTP_PS2_CMD_TRIES;
> + int rc;
> +
> + ps2_begin_command(ps2dev);
> +
> + do {
> + /*
> +  * send extension command 0xE8 or 0xF3,
> +  * if send extension command failed,
> +  * try to send recovery command to make
> +  * trackpad device return to ready wait command state.
> +  * It alwasy success based on this recovery commands.
> +  */

This still reads the same, please change the wording of the last sentence.

> + rc = cypress_ps2_sendbyte(psmouse, cmd & 0xff);
> + if (rc == CYTP_PS2_RETRY) {
> + rc = cypress_ps2_sendbyte(psmouse, 0x00);
> + if (rc == CYTP_PS2_RETRY)
> + rc = cypress_ps2_sendbyte(psmouse, 0x0a);
> + }
> + if (rc == CYTP_PS2_ERROR)
> + continue;
> +
> + rc = cypress_ps2_sendbyte(psmouse, data);
> + if (rc == CYTP_PS2_RETRY)
> + rc = cypress_ps2_sendbyte(psmouse, data);
> + if (rc == CYTP_PS2_ERROR)
> + continue;
> + else
> + break;
> + } while (--tries > 0);
> +
> + ps2_end_command(ps2dev);
> +
> + return rc;
> +}
> +
> +static int cypress_ps2_read_cmd_status(struct psmouse *psmouse,
> +unsigned char cmd,
> +

Re: Use PCI ROMs from EFI boot services

2012-12-05 Thread Bjorn Helgaas

On Mon, Dec 3, 2012 at 1:02 PM, Seth Forshee  wrote:
> On Thu, Oct 25, 2012 at 11:35:57AM -0600, Bjorn Helgaas wrote:
>> On Thu, Aug 23, 2012 at 10:36 AM, Matthew Garrett  wrote:
>> > V3 just fixes all the casting issues and incorporates David's change in
>> > search ordering.
>>
>> I think there's still a section mismatch issue with these patches, so
>> I haven't merged them yet.
>>
>> I rebased my pci/mjg-pci-roms-from-efi branch to v3.7-rc2, and if we
>> get this issue fixed I'll put it in -next as v3.8 material.
>
> I still don't see this series in -next, so I take it the section
> mismatch was never fixed? How about the following?

That's right; nobody stepped up to fix the section mismatch.  I'm
happy to fold in your fix, especially if Matthew acks it.

David, Eric, what about the kexec question?  It looks to me like this
wouldn't make things worse than they are today.  If I understand
correctly, today we don't use ROM data from EFI on either an initial
boot or a kexec.  After this patch, we could use EFI ROM data on the
initial boot, but not after a kexec.  So it's worse in the sense that
the kexec case doesn't match the initial boot, but at least it's not
something that used to work and is now broken.

> From ece31852159a6b2cf9a059031638354e9817a6a6 Mon Sep 17 00:00:00 2001
> From: Seth Forshee 
> Date: Mon, 3 Dec 2012 13:55:50 -0600
> Subject: [PATCH] x86: Don't discard boot_params
>
> boot_params is now used at runtime on EFI systems to stash option ROMs
> that aren't available after exiting boot services, so it can no longer
> be marked __initdata.
>
> Signed-off-by: Seth Forshee 
> ---
>  arch/x86/kernel/setup.c |4 
>  1 file changed, 4 deletions(-)
>
> diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
> index 468e98d..6e13035 100644
> --- a/arch/x86/kernel/setup.c
> +++ b/arch/x86/kernel/setup.c
> @@ -143,11 +143,7 @@ int default_check_phys_apicid_present(int phys_apicid)
>  }
>  #endif
>
> -#ifndef CONFIG_DEBUG_BOOT_PARAMS
> -struct boot_params __initdata boot_params;
> -#else
>  struct boot_params boot_params;
> -#endif
>
>  /*
>   * Machine setup..
> --
> 1.7.9.5
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v4 4/4] input: Cypress PS/2 Trackpad simulated multitouch (disabled)

2012-12-05 Thread Henrik Rydberg

Hi Kamal,

> This feature is disabled by default; enable with CYPRESS_SIMULATED_MT.
> 
> Instead of SEMI_MT, present a full mt interface with simulated contact
> positions for >=3 fingers.  Enables e.g. multi-finger tap and drag for
> old userspace applications which only count the contact positions.
> 
> Signed-off-by: Kamal Mostafa 
> ---
>  drivers/input/mouse/cypress_ps2.c |   18 ++
>  drivers/input/mouse/cypress_ps2.h |   16 ++--
>  2 files changed, 32 insertions(+), 2 deletions(-)

No thanks. Millions of devices are supported via semi-mt, there is no
reason why this one should be different. If there are userland issues,
something broke recently. You may want to check with Chase, for
instance, for the full history of commodity trackpad MT support.

Thanks.
Henrik
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC PATCH v2 01/10] CPU hotplug: Provide APIs for "light" atomic readers to prevent CPU offline

2012-12-05 Thread Srivatsa S. Bhat

> Replaying what Tejun wrote:
> 
> Hello, Oleg.
> 
>> Replaying what Oleg wrote:
>>> Replacing get_online_cpus() w/ percpu_rwsem is great but this thread
>>> is about replacing preempt_disable with something finer grained and
>>> less heavy on the writer side
>>
>> If only I understood why preempt_disable() is bad ;-)
>>
>> OK, I guess "less heavy on the writer side" is the hint, and in the
>> previous email you mentioned that "stop_machine() itself is extremely
>> heavy".
>>
>> Looks like, you are going to remove stop_machine() from cpu_down ???
>>
> 
> Yeah, that's what Srivatsa is trying to do.  The problem seems to be
> that cpu up/down is very frequent on certain mobile platforms for
> power management and as currently implemented cpu hotplug is too heavy
> and latency-inducing.
>   
>>> The problem seems that we don't have percpu_rwlock yet.  It shouldn't
>>> be too difficult to implement, right?
>>>
>>
>> Oh, I am not sure... unless you simply copy-and-paste the lglock code
>> and replace spinlock_t with rwlock_t.
>>
> 
> Ah... right, so that's where brlock ended up.  So, lglock is the new
> thing and brlock is a wrapper around it.
> 
>> We probably want something more efficient, but I bet we can't avoid
>> the barriers on the read side.
>>
>> And somehow we should avoid the livelocks. Say, we can't simply add
>> the per_cpu_reader_counter, _read_lock should spin if the writer is
>> active. But at the same time _read_lock should be recursive.
>>
> 
> I think we should just go with lglock.  It does involve local atomic
> ops but atomic ops themselves aren't that expensive and it's not like
> we can avoid memory barriers.  Also, that's the non-sleeping
> counterpart of percpu_rwsem.  If it's not good enough for some reason,
> we should improve it rather than introducing something else.
> 

While working on the v2 yesterday, I had actually used rwlocks for
the light readers and atomic ops for the full-readers. (Later I changed
both to rwlocks while posting this v2). Anyway, the atomic ops version
looked something like shown below.

I'll take a look at lglocks and see if that helps in our case.

Regards,
Srivatsa S. Bhat


---

 include/linux/cpu.h |4 ++
 kernel/cpu.c|   98 +++
 2 files changed, 102 insertions(+)


diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index c64b6ed..5011c7d 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -177,6 +177,8 @@ extern void get_online_cpus(void);
 extern void put_online_cpus(void);
 extern void get_online_cpus_stable_atomic(void);
 extern void put_online_cpus_stable_atomic(void);
+extern void get_online_cpus_atomic(void);
+extern void put_online_cpus_atomic(void);
 #define hotcpu_notifier(fn, pri)   cpu_notifier(fn, pri)
 #define register_hotcpu_notifier(nb)   register_cpu_notifier(nb)
 #define unregister_hotcpu_notifier(nb) unregister_cpu_notifier(nb)
@@ -202,6 +204,8 @@ static inline void cpu_hotplug_driver_unlock(void)
 #define put_online_cpus()  do { } while (0)
 #define get_online_cpus_stable_atomic()do { } while (0)
 #define put_online_cpus_stable_atomic()do { } while (0)
+#define get_online_cpus_atomic()   do { } while (0)
+#define put_online_cpus_atomic()   do { } while (0)
 #define hotcpu_notifier(fn, pri)   do { (void)(fn); } while (0)
 /* These aren't inline functions due to a GCC bug. */
 #define register_hotcpu_notifier(nb)   ({ (void)(nb); 0; })
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 8c9eecc..76b07f7 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -19,6 +19,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "smpboot.h"
 
@@ -104,6 +105,58 @@ void put_online_cpus_stable_atomic(void)
 }
 EXPORT_SYMBOL_GPL(put_online_cpus_stable_atomic);
 
+static DEFINE_PER_CPU(atomic_t, atomic_reader_refcount);
+
+#define writer_active(v)   ((v) < 0)
+#define reader_active(v)   ((v) > 0)
+
+/*
+ * Invoked by hotplug reader, to prevent CPUs from going offline.
+ * Increments its per-cpu 'atomic_reader_refcount' to mark itself as being
+ * active.
+ *
+ * If 'atomic_reader_refcount' is negative, it means that a CPU offline
+ * operation is in progress (hotplug writer). Wait for it to complete
+ * and then mark your presence (increment the count) and return.
+ *
+ * You can call this recursively, because it doesn't hold any locks.
+ *
+ * Returns with preemption disabled.
+ */
+void get_online_cpus_atomic(void)
+{
+   int c, old;
+
+   preempt_disable();
+   read_lock(_rwlock);
+
+   for (;;) {
+   c = atomic_read(&__get_cpu_var(atomic_reader_refcount));
+   if (unlikely(writer_active(c))) {
+   cpu_relax();
+   continue;
+   }
+
+   old = atomic_cmpxchg(&__get_cpu_var(atomic_reader_refcount),
+c, c + 1);
+
+   if (likely(old == c))
+   break;
+
+   c

Re: [tpmdd-devel] [PATCH 1/1] TPM: STMicroelectronics ST33 I2C KERNEL 3.x.x

2012-12-05 Thread Peter Hüwe

Hi Kent,
> 
>   Heh, duh, well of course it is. I've now staged everything I'm
> planning on pushing at:
> 
> git://github.com/shpedoikal/linux.git tpmdd-12-05-12
> 
> Please test and let me know if I missed anything.
> 
> Thanks,
> Kent
> 
> > Kent
> > 

(I'm still writing this on behalf of myself ;)


While I really appreciate you helping Mathias out here, I'm not so sure 
whether an offlist discussion of a driver submission is a really good idea. 
I did not see any v2 / improvements on list and now it's commited (?!)

There's no need to argue here, I'm fine with this,
but I'd highly appreciate if at least the result is published again in the 
future on the mailing list, for proper review. 

If the mailfilter got the mails I apologize.


Can you perhaps post the message to the list, so a proper review is possible?

I thought the Mathias has changed the naming of the files? I don't see that in 
your commit.


The version you committed to that branch still has some of the items of my 
first review. (e.g.
 * This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.)

-> The (possible) GPL v3 clause has to go away for the kernel, but I'm not a 
lawyer.



Thanks,
Peter
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC] Capabilities still can't be inherited by normal programs

2012-12-05 Thread Markku Savela

On 12/05/2012 09:32 PM, Andy Lutomirski wrote:

>Anyway, implementing the features you want in a new module is encouraged,
>so long as the behavior of existing module stays the same.

I'll think about it some more and do it possibly using a sysctl.
Adding this kind of stuff in a module is asking for even worse
incomprehensibility of which capability bit means what.

For what is worth, and just for information. This module approach
has been attempted, sort of: I did implement capabilities inheritance
in Nokia N9 (Aegis). The capabilities started to inherit when task
entered "aegis mode" (a bit in secure bits).

The experience was "interesting". There are many "simplified" articles
about running root with less than full capabilities, and we did that.
However, it also caused a lot of headache, because many people got
hit by this "root is no more omnipotent" thing and complained. It was
a pain to manage and find correct required for each task and often
end result was to grant all (or at least too much).

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v2] x86,AMD: Power driver support for AMD's family 16h processors

2012-12-05 Thread Guenter Roeck

On Wed, Dec 05, 2012 at 06:12:42AM -0500, Boris Ostrovsky wrote:
> Add family 16h PCI ID to AMD's power driver to allow it report
> power consumption on these processors.
> 
> Signed-off-by: Boris Ostrovsky 

Applied.

Thanks,
Guenter
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: Use PCI ROMs from EFI boot services

2012-12-05 Thread Matthew Garrett

On Wed, Dec 05, 2012 at 01:09:25PM -0700, Bjorn Helgaas wrote:

> That's right; nobody stepped up to fix the section mismatch.  I'm
> happy to fold in your fix, especially if Matthew acks it.

Yes, sorry, I've been way behind on pretty much everything for the past 
few months. Please do add my Ack.

> David, Eric, what about the kexec question?  It looks to me like this
> wouldn't make things worse than they are today.  If I understand
> correctly, today we don't use ROM data from EFI on either an initial
> boot or a kexec.  After this patch, we could use EFI ROM data on the
> initial boot, but not after a kexec.  So it's worse in the sense that
> the kexec case doesn't match the initial boot, but at least it's not
> something that used to work and is now broken.

I think I'd agree here - it's not ideal, but it's no more broken than 
the current situation.

-- 
Matthew Garrett | mj...@srcf.ucam.org
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] tmpfs: fix shared mempolicy leak

2012-12-05 Thread Tommi Rantala

2012/12/5 Mel Gorman :
> On Tue, Dec 04, 2012 at 11:24:30PM -0800, Hugh Dickins wrote:
>> From: Mel Gorman 
>>
>> Commit 00442ad04a5e ("mempolicy: fix a memory corruption by refcount
>> imbalance in alloc_pages_vma()") changed get_vma_policy() to raise the
>> refcount on a shmem shared mempolicy; whereas shmem_alloc_page() went
>> on expecting alloc_page_vma() to drop the refcount it had acquired.
>> This deserves a rework: but for now fix the leak in shmem_alloc_page().
>
> Thanks Hugh for turning gibber into a patch!
>
> Signed-off-by: Mel Gorman 
>
> Tommi, just in case, can you confirm this fixes the problem for you please?

Confirmed! No more complaints from kmemleak.

Thanks,
Tommi
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC PATCH v2 02/10] CPU hotplug: Provide APIs for "full" atomic readers to prevent CPU offline

2012-12-05 Thread Srivatsa S. Bhat

> Replaying what Tejun wrote:
> 
> On 12/06/2012 12:13 AM, Srivatsa S. Bhat wrote:
>> Some of the atomic hotplug readers cannot tolerate CPUs going offline while
>> they are in their critical section. That is, they can't get away with just
>> synchronizing with the updates to the cpu_online_mask; they really need to
>> synchronize with the entire CPU tear-down sequence, because they are very
>> much involved in the hotplug related code paths.
>>
>> Such "full" atomic hotplug readers need a way to *actually* and *truly*
>> prevent CPUs from going offline while they are active.
>>
> 
> I don't think this is a good idea.  You really should just need
> get/put_online_cpus() and get/put_online_cpus_atomic().  The former
> the same as they are.  The latter replacing what
> preempt_disable/enable() was protecting.  Let's please not go
> overboard unless we know they're necessary.  I strongly suspect that
> breaking up reader side from preempt_disable and making writer side a
> bit lighter should be enough.  Conceptually, it really should be a
> simple conversion - convert preempt_disable/enable() pairs protecting
> CPU on/offlining w/ get/put_cpu_online_atomic() and wrap the
> stop_machine() section with the matching write lock.
> 

Yes, that _sounds_ sufficient, but IMHO it won't be, in practice. The
*number* of call-sites that you need to convert from preempt_disable/enable
to get/put_online_cpus_atomic() won't be too many, however the *frequency*
of usage of those call-sites can potentially be very high.

For example, the IPI path (smp_call_function_*) needs to use the new APIs
instead of preempt_disable(); and this is quite a hot path. So if we replace
preempt_disable/enable() with a synchronization mechanism that spins
the reader *throughout* the CPU offline operation, and provide no light-weight
alternative API, then even such very hot readers will have to bear the wrath.

And IPIs and interrupts are the work-generators in a system. Since they
can be hotplug readers, if we spin them like this, we effectively end up
recreating the stop_machine() "effect", without even using stop_machine().

This is what I meant in my yesterday's reply too:
https://lkml.org/lkml/2012/12/4/349

That's why we need a light-weight variant IMHO, so that we can use them
atleast where feasible, like IPI path (smp_call_function_*) for example.
That'll help us avoid the "stop_machine effect", hoping that most readers
are of the light-type. As I mentioned in the cover-letter, most readers
_are_ of the light-type (eg: 5 patches in this series deal with light
readers, only 1 patch deals with a heavy/full reader). I don't see why
we should unnecessarily slow down every reader just because a minority of
readers actually need full synchronization with CPU offline.

Regards,
Srivatsa S. Bhat

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC v2 6/8] gpu: drm: tegra: Remove redundant host1x

2012-12-05 Thread Thierry Reding

On Wed, Dec 05, 2012 at 05:34:30PM +0100, Daniel Vetter wrote:
> On Wed, Dec 5, 2012 at 2:28 PM, Thierry Reding
>  wrote:
> >> Imo that's worse, since now drm manages even more of the driver->hw
> >> binding process. In my dream world the drm driver just registers a
> >> normal driver at module load time directly with whatever bus it's
> >> interested in, and then, from it the bus' ->probe callback setups up
> >> the entire driver, calling down into drm to setup up interfaces to
> >> userspace (dev node, sysfs, and whatever is required to implement the
> >> ioctls) and uses the various helper libraries provided. So host1x
> >> providing a virtual device that tegradrm can match sounds much better
> >> (if that helps in decoupling from host1x).
> >
> > Okay, now I see where you're going. You want to replace the various
> > drm_*_init() functions with something more fine-grained that does the
> > initialization manually in each driver. Is that it? The obvious
> > disadvantage is that a lot of code would have to be duplicated, though
> > that can presumably be factored out into further helpers if necessary.
> >
> > On that note, I just noticed that drm_platform_init() actually binds a
> > single platform_device to the drm_driver, which isn't going to work very
> > well in case there are two devices that want to use the same driver. It
> > would be nice to get rid of that limitation as well while at it.
> 
> Yeah, it's the lack of generality that irks me, and writing driver
> init code is one giant sequence of setup function calls anyway -
> sprinkling 1-2 more doesn't really matter, but helps a lot if it
> results in the driver being in full control (e.g. if you need to
> reorder due to some special requirement, that's much easier to do then
> than with the current hoop-jumping). But like I've said, a bit a
> bigger fish to fry, just wanted to point you into that direction ...

I have quite a number of things to finish up myself and this sounds like
quite a bit of work.

> >> Or simply call the tegradrm setup from host1x directly, creating a
> >> depency on the tegradrm module. When trying to unload host1x it can
> >> then also call down into tegradrm to tear down the drm device.
> >> Afterwards you should be able to unload tegradrm without fuzz. And if
> >> the hard dependcy is an issue for other host1x clients this
> >> setup/teardown could be wrapped into an #ifdef CONFIG_TEGRADRM.
> >
> > This is what I originally proposed. However, since tegra-drm will need
> > to call functions provided by host1x we have a cyclic dependency.
> > Wouldn't that prevent either module from being unloaded?
> 
> You could pass down a host1x interface struct with a vtable to
> tegradrm (plus some static inline helpers to make those not a pain to
> call).

That sounds very interesting. It's also in line with what Terje proposed
earlier, making the host1x into a helper library, only the registration
part would remain with host1x. So in this kind of setup, the host1x
driver would initialize tegra-drm with something like:

int tegra_drm_init(struct device *parent, const struct host1x_ops *ops)
{
struct platform_device *pdev = to_platform_device(parent);
int err;

err = drm_platform_init(_drm_driver, pdev);
...
}

The DRM driver's .load() callback would of course have to be passed the
ops pointer. Either that or indeed some kind of custom setup function is
needed instead of calling drm_platform_init().

Maybe I should go and give such an implementation a shot, see where it
ends up.

> The other possibility (and I'm not proud at all of that code)
> which we've used in the intel-ips driver is to use symbol_get at
> runtime - but there the requirement was explicitly that intel-ips
> needs to work on server systems without the drm/i915 driver loaded,
> but still always have the support for interacting with it compiled in
> (to make distros happy). It's all rather awkward though ...

Hehe, indeed. Adding a dummy platform device suddenly doesn't sound that
bad. =)

Thierry


pgpJ3KFoWyoJS.pgp
Description: PGP signature

[PATCH 4/6] staging/rtl8192u: don't init globals to 0 or NULL

2012-12-05 Thread Sebastian Hahn

Fix a couple of instances where checkpatch complained about
initializing globals with 0.

Signed-off-by: Sebastian Hahn 
---
 drivers/staging/rtl8192u/ieee80211/ieee80211_module.c |4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/staging/rtl8192u/ieee80211/ieee80211_module.c 
b/drivers/staging/rtl8192u/ieee80211/ieee80211_module.c
index c673adc..76c56e5 100644
--- a/drivers/staging/rtl8192u/ieee80211/ieee80211_module.c
+++ b/drivers/staging/rtl8192u/ieee80211/ieee80211_module.c
@@ -220,7 +220,7 @@ void free_ieee80211(struct net_device *dev)
 
 #ifdef CONFIG_IEEE80211_DEBUG
 
-u32 ieee80211_debug_level = 0;
+u32 ieee80211_debug_level;
 static int debug = \
//  IEEE80211_DL_INFO   |
//  IEEE80211_DL_WX |
@@ -241,7 +241,7 @@ static int debug = \
//IEEE80211_DL_DATA |
IEEE80211_DL_ERR  //awayls open this flags to 
show error out
;
-struct proc_dir_entry *ieee80211_proc = NULL;
+struct proc_dir_entry *ieee80211_proc;
 
 static int show_debug_level(char *page, char **start, off_t offset,
int count, int *eof, void *data)
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 5/6] staging/rtl8192u: use same indent for switch and case

2012-12-05 Thread Sebastian Hahn

Fix the checkpatch error "switch and case should be at the same indent"

Signed-off-by: Sebastian Hahn 
---
 .../staging/rtl8192u/ieee80211/ieee80211_softmac.c |  272 
 drivers/staging/rtl8192u/ieee80211/ieee80211_tx.c  |   32 +-
 drivers/staging/rtl8192u/ieee80211/ieee80211_wx.c  |2 +-
 drivers/staging/rtl8192u/r8192U_core.c |  338 ++--
 drivers/staging/rtl8192u/r8192U_wx.c   |   12 +-
 drivers/staging/rtl8192u/r819xU_firmware.c |   96 +++---
 drivers/staging/rtl8192u/r819xU_phy.c  |   78 ++---
 7 files changed, 415 insertions(+), 415 deletions(-)

diff --git a/drivers/staging/rtl8192u/ieee80211/ieee80211_softmac.c 
b/drivers/staging/rtl8192u/ieee80211/ieee80211_softmac.c
index e83c310..7062f9d 100644
--- a/drivers/staging/rtl8192u/ieee80211/ieee80211_softmac.c
+++ b/drivers/staging/rtl8192u/ieee80211/ieee80211_softmac.c
@@ -1948,166 +1948,166 @@ ieee80211_rx_frame_softmac(struct ieee80211_device 
*ieee, struct sk_buff *skb,
 
switch (WLAN_FC_GET_STYPE(header->frame_ctl)) {
 
-   case IEEE80211_STYPE_ASSOC_RESP:
-   case IEEE80211_STYPE_REASSOC_RESP:
-
-   IEEE80211_DEBUG_MGMT("received [RE]ASSOCIATION RESPONSE 
(%d)\n",
-   WLAN_FC_GET_STYPE(header->frame_ctl));
-   if ((ieee->softmac_features & IEEE_SOFTMAC_ASSOCIATE) &&
-   ieee->state == 
IEEE80211_ASSOCIATING_AUTHENTICATED &&
-   ieee->iw_mode == IW_MODE_INFRA){
-   struct ieee80211_network network_resp;
-   struct ieee80211_network *network = 
_resp;
-
-   if (0 == (errcode=assoc_parse(ieee,skb, ))){
-   ieee->state=IEEE80211_LINKED;
-   ieee->assoc_id = aid;
-   ieee->softmac_stats.rx_ass_ok++;
-   /* station support qos */
-   /* Let the register setting defaultly 
with Legacy station */
-   if(ieee->qos_support) {
-   assoc_resp = (struct 
ieee80211_assoc_response_frame*)skb->data;
-   memset(network, 0, 
sizeof(*network));
-   if 
(ieee80211_parse_info_param(ieee,assoc_resp->info_element,\
-   
rx_stats->len - sizeof(*assoc_resp),\
-   
network,rx_stats)){
-   return 1;
-   }
-   else
-   {   //filling the 
PeerHTCap. //maybe not necessary as we can get its info from current_network.
-   
memcpy(ieee->pHTInfo->PeerHTCapBuf, network->bssht.bdHTCapBuf, 
network->bssht.bdHTCapLen);
-   
memcpy(ieee->pHTInfo->PeerHTInfoBuf, network->bssht.bdHTInfoBuf, 
network->bssht.bdHTInfoLen);
-   }
-   if (ieee->handle_assoc_response 
!= NULL)
-   
ieee->handle_assoc_response(ieee->dev, (struct 
ieee80211_assoc_response_frame*)header, network);
+   case IEEE80211_STYPE_ASSOC_RESP:
+   case IEEE80211_STYPE_REASSOC_RESP:
+
+   IEEE80211_DEBUG_MGMT("received [RE]ASSOCIATION RESPONSE (%d)\n",
+   WLAN_FC_GET_STYPE(header->frame_ctl));
+   if ((ieee->softmac_features & IEEE_SOFTMAC_ASSOCIATE) &&
+   ieee->state == IEEE80211_ASSOCIATING_AUTHENTICATED &&
+   ieee->iw_mode == IW_MODE_INFRA){
+   struct ieee80211_network network_resp;
+   struct ieee80211_network *network = _resp;
+
+   if (0 == (errcode=assoc_parse(ieee,skb, ))){
+   ieee->state=IEEE80211_LINKED;
+   ieee->assoc_id = aid;
+   ieee->softmac_stats.rx_ass_ok++;
+   /* station support qos */
+   /* Let the register setting defaultly with 
Legacy station */
+   if(ieee->qos_support) {
+   assoc_resp = (struct 
ieee80211_assoc_response_frame*)skb->data;
+   memset(network, 0, sizeof(*network));
+   if

[PATCH 0/6] staging:rtl8192u: begin cleanup of some checkpatch errors

2012-12-05 Thread Sebastian Hahn

Jennifer and I have started making the rtl8192u driver conform with the
CodingStyle rules a little better. We started out with a cleanfile run,
then did corrections for individual issues, grouped into patches by
specific issue fixed.

Note that this is in no way a complete cleanup, and since we contained
logical changes to single commits none of the commits pass checkpatch
themselves, as there's much more work to be done here.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 3/6] staging/rtl8192u: indent with tabs, not spaces

2012-12-05 Thread Sebastian Hahn

Converted staging/rtl8187se to use tabs instead of spaces for
indentation to fix the checkpatch error "code indent should use tabs
where possible".

Signed-off-by: Sebastian Hahn 
---
 drivers/staging/rtl8192u/ieee80211/ieee80211.h |   14 +-
 drivers/staging/rtl8192u/ieee80211/ieee80211_rx.c  |  370 ++--
 .../staging/rtl8192u/ieee80211/ieee80211_softmac.c |4 +-
 drivers/staging/rtl8192u/ieee80211/ieee80211_tx.c  |   14 +-
 drivers/staging/rtl8192u/ieee80211/rtl_crypto.h|   84 ++---
 drivers/staging/rtl8192u/r8180_93cx6.c |2 +-
 drivers/staging/rtl8192u/r8180_93cx6.c.bak |  146 
 drivers/staging/rtl8192u/r8180_pm.h|2 +-
 drivers/staging/rtl8192u/r8192U.h  |  198 +--
 drivers/staging/rtl8192u/r8192U_core.c |   42 +--
 10 files changed, 511 insertions(+), 365 deletions(-)
 create mode 100644 drivers/staging/rtl8192u/r8180_93cx6.c.bak

diff --git a/drivers/staging/rtl8192u/ieee80211/ieee80211.h 
b/drivers/staging/rtl8192u/ieee80211/ieee80211.h
index b85e3e3..210898c 100644
--- a/drivers/staging/rtl8192u/ieee80211/ieee80211.h
+++ b/drivers/staging/rtl8192u/ieee80211/ieee80211.h
@@ -2088,10 +2088,10 @@ struct ieee80211_device {
 struct delayed_work start_ibss_wq;
struct work_struct wx_sync_scan_wq;
struct workqueue_struct *wq;
-// Qos related. Added by Annie, 2005-11-01.
-//STA_QOS  StaQos;
+   // Qos related. Added by Annie, 2005-11-01.
+   //STA_QOS  StaQos;
 
-//u32 STA_EDCA_PARAM[4];
+   //u32 STA_EDCA_PARAM[4];
//CHANNEL_ACCESS_SETTING ChannelAccessSetting;
 
 
@@ -2107,11 +2107,11 @@ struct ieee80211_device {
   struct net_device *dev);
 
int (*reset_port)(struct net_device *dev);
-int (*is_queue_full) (struct net_device * dev, int pri);
+   int (*is_queue_full) (struct net_device * dev, int pri);
 
-int (*handle_management) (struct net_device * dev,
-  struct ieee80211_network * network, u16 
type);
-int (*is_qos_active) (struct net_device *dev, struct sk_buff *skb);
+   int (*handle_management) (struct net_device * dev,
+ struct ieee80211_network * network, u16 type);
+   int (*is_qos_active) (struct net_device *dev, struct sk_buff *skb);
 
/* Softmac-generated frames (management) are TXed via this
 * callback if the flag IEEE_SOFTMAC_SINGLE_QUEUE is
diff --git a/drivers/staging/rtl8192u/ieee80211/ieee80211_rx.c 
b/drivers/staging/rtl8192u/ieee80211/ieee80211_rx.c
index 8e58647..ee7ce5f 100644
--- a/drivers/staging/rtl8192u/ieee80211/ieee80211_rx.c
+++ b/drivers/staging/rtl8192u/ieee80211/ieee80211_rx.c
@@ -52,7 +52,7 @@ static inline void ieee80211_monitor_rx(struct 
ieee80211_device *ieee,
u16 fc = le16_to_cpu(hdr->frame_ctl);
 
skb->dev = ieee->dev;
-skb_reset_mac_header(skb);
+   skb_reset_mac_header(skb);
 
skb_pull(skb, ieee80211_get_hdrlen(fc));
skb->pkt_type = PACKET_OTHERHOST;
@@ -218,16 +218,16 @@ ieee80211_rx_frame_mgmt(struct ieee80211_device *ieee, 
struct sk_buff *skb,
 * this is not mandatory but seems that the probe
 * response parser uses it
 */
-struct ieee80211_hdr_3addr * hdr = (struct ieee80211_hdr_3addr 
*)skb->data;
+   struct ieee80211_hdr_3addr * hdr = (struct ieee80211_hdr_3addr 
*)skb->data;
 
rx_stats->len = skb->len;
ieee80211_rx_mgt(ieee,(struct ieee80211_hdr_4addr *)skb->data,rx_stats);
-//if ((ieee->state == IEEE80211_LINKED) && (memcmp(hdr->addr3, 
ieee->current_network.bssid, ETH_ALEN)))
-if ((memcmp(hdr->addr1, ieee->dev->dev_addr, ETH_ALEN)))//use ADDR1 to 
perform address matching for Management frames
-{
-dev_kfree_skb_any(skb);
-return 0;
-}
+   //if ((ieee->state == IEEE80211_LINKED) && (memcmp(hdr->addr3, 
ieee->current_network.bssid, ETH_ALEN)))
+   if ((memcmp(hdr->addr1, ieee->dev->dev_addr, ETH_ALEN)))//use ADDR1 to 
perform address matching for Management frames
+   {
+   dev_kfree_skb_any(skb);
+   return 0;
+   }
 
ieee80211_rx_frame_softmac(ieee, skb, rx_stats, type, stype);
 
@@ -773,7 +773,7 @@ void RxReorderIndicatePacket( struct ieee80211_device *ieee,
 }
 
 u8 parse_subframe(struct sk_buff *skb,
-  struct ieee80211_rx_stats *rx_stats,
+ struct ieee80211_rx_stats *rx_stats,
  struct ieee80211_rxb *rxb,u8* src,u8* dst)
 {
struct ieee80211_hdr_3addr  *hdr = (struct ieee80211_hdr_3addr* 
)skb->data;
@@ -1154,8 +1154,8 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct 
sk_buff *skb,
type, stype, skb->len);
goto rx_dropped;
}
-if (memcmp(bssid, ieee->current_network.bssid, ETH_ALEN))
-

[PATCH 2/6] staging/rtl8192u: put { on same line as struct

2012-12-05 Thread Sebastian Hahn

From: Jennifer Naumann 

This fixes the checkpatch error "open brace '{' following struct go on the
same line" in staging/rtl8192u

Signed-off-by: Jennifer Naumann 

---
 drivers/staging/rtl8192u/ieee80211/ieee80211.h   |   12 ++---
 drivers/staging/rtl8192u/ieee80211/rtl819x_BA.h  |3 +-
 drivers/staging/rtl8192u/ieee80211/rtl819x_Qos.h |   51 --
 drivers/staging/rtl8192u/r8192U.h|   27 
 drivers/staging/rtl8192u/r8192U_core.c   |3 +-
 drivers/staging/rtl8192u/r8192U_dm.h |9 ++--
 drivers/staging/rtl8192u/r8192U_wx.c |3 +-
 drivers/staging/rtl8192u/r819xU_cmdpkt.h |   18 +++-
 8 files changed, 42 insertions(+), 84 deletions(-)

diff --git a/drivers/staging/rtl8192u/ieee80211/ieee80211.h 
b/drivers/staging/rtl8192u/ieee80211/ieee80211.h
index bb0e4fe..b85e3e3 100644
--- a/drivers/staging/rtl8192u/ieee80211/ieee80211.h
+++ b/drivers/staging/rtl8192u/ieee80211/ieee80211.h
@@ -1655,8 +1655,7 @@ typedef struct tx_pending_t{
struct ieee80211_txb *txb;
 }tx_pending_t;
 
-typedef struct _bandwidth_autoswitch
-{
+typedef struct _bandwidth_autoswitch {
long threshold_20Mhzto40Mhz;
longthreshold_40Mhzto20Mhz;
bool bforced_tx20Mhz;
@@ -1668,8 +1667,7 @@ typedef struct _bandwidth_autoswitch
 
 #define REORDER_WIN_SIZE   128
 #define REORDER_ENTRY_NUM  128
-typedef struct _RX_REORDER_ENTRY
-{
+typedef struct _RX_REORDER_ENTRY {
struct list_headList;
u16 SeqNum;
struct ieee80211_rxb* prxb;
@@ -1709,15 +1707,13 @@ typedef struct _IbssParms{
 #define MAX_NUM_RATES  264 // Max num of support rates element: 8,  Max num of 
ext. support rate: 255. 061122, by rcnjko.
 
 // RF state.
-typedefenum _RT_RF_POWER_STATE
-{
+typedefenum _RT_RF_POWER_STATE {
eRfOn,
eRfSleep,
eRfOff
 }RT_RF_POWER_STATE;
 
-typedef struct _RT_POWER_SAVE_CONTROL
-{
+typedef struct _RT_POWER_SAVE_CONTROL {
 
//
// Inactive Power Save(IPS) : Disable RF when disconnected
diff --git a/drivers/staging/rtl8192u/ieee80211/rtl819x_BA.h 
b/drivers/staging/rtl8192u/ieee80211/rtl819x_BA.h
index ffd1c0c..2c398ca 100644
--- a/drivers/staging/rtl8192u/ieee80211/rtl819x_BA.h
+++ b/drivers/staging/rtl8192u/ieee80211/rtl819x_BA.h
@@ -28,8 +28,7 @@ struct ieee80211_ADDBA_Req{
 //Is this need?I put here just to make it easier to define structure BA_RECORD 
//WB
 typedef union _SEQUENCE_CONTROL{
u16 ShortData;
-   struct
-   {
+   struct {
u16 FragNum:4;
u16 SeqNum:12;
}field;
diff --git a/drivers/staging/rtl8192u/ieee80211/rtl819x_Qos.h 
b/drivers/staging/rtl8192u/ieee80211/rtl819x_Qos.h
index 2625cde..2348ccd 100644
--- a/drivers/staging/rtl8192u/ieee80211/rtl819x_Qos.h
+++ b/drivers/staging/rtl8192u/ieee80211/rtl819x_Qos.h
@@ -81,8 +81,7 @@ typedef   union _QOS_CTRL_FIELD{
u16 shortData;
 
// WMM spec
-   struct
-   {
+   struct {
u8  UP:3;
u8  usRsvd1:1;
u8  EOSP:1;
@@ -92,8 +91,7 @@ typedef   union _QOS_CTRL_FIELD{
}WMM;
 
// 802.11e: QoS data type frame sent by non-AP QSTAs.
-   struct
-   {
+   struct {
u8  TID:4;
u8  bIsQsize:1;// 0: BIT[8:15] is TXOP Duration 
Requested, 1: BIT[8:15] is Queue Size.
u8  AckPolicy:2;
@@ -102,8 +100,7 @@ typedef union _QOS_CTRL_FIELD{
}BySta;
 
// 802.11e: QoS data, QoS Null, and QoS Data+CF-Ack frames sent by HC.
-   struct
-   {
+   struct {
u8  TID:4;
u8  EOSP:1;
u8  AckPolicy:2;
@@ -112,8 +109,7 @@ typedef union _QOS_CTRL_FIELD{
}ByHc_Data;
 
// 802.11e: QoS (+) CF-Poll frames sent by HC.
-   struct
-   {
+   struct {
u8  TID:4;
u8  EOSP:1;
u8  AckPolicy:2;
@@ -133,14 +129,12 @@ typedef   union _QOS_CTRL_FIELD{
 typedefunion _QOS_INFO_FIELD{
u8  charData;
 
-   struct
-   {
+   struct {
u8  ucParameterSetCount:4;
u8  ucReserved:4;
}WMM;
 
-   struct
-   {
+   struct {
//Ref WMM_Specification_1-1.pdf, 2006-06-13 Isaiah
u8  ucAC_VO_UAPSD:1;
u8  ucAC_VI_UAPSD:1;
@@ -152,16 +146,14 @@ typedef   union _QOS_INFO_FIELD{
 
}ByWmmPsSta;
 
-   struct
-   {
+   struct {
//Ref WMM_Specification_1-1.pdf, 2006-06-13 Isaiah
u8  ucParameterSetCount:4;
u8  ucReserved:3;
u8

[PATCH 6/6] staging/rtl8192u: do not init statics to 0

2012-12-05 Thread Sebastian Hahn

Fix the checkpatch error "do not initialize statics to 0 or NULL"

Signed-off-by: Sebastian Hahn 
---
 .../staging/rtl8192u/ieee80211/ieee80211_softmac.c |2 +-
 .../rtl8192u/ieee80211/ieee80211_softmac_wx.c  |2 +-
 drivers/staging/rtl8192u/r8192U_core.c |   14 +++
 drivers/staging/rtl8192u/r8192U_dm.c   |   40 ++--
 4 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/drivers/staging/rtl8192u/ieee80211/ieee80211_softmac.c 
b/drivers/staging/rtl8192u/ieee80211/ieee80211_softmac.c
index 7062f9d..454f889 100644
--- a/drivers/staging/rtl8192u/ieee80211/ieee80211_softmac.c
+++ b/drivers/staging/rtl8192u/ieee80211/ieee80211_softmac.c
@@ -498,7 +498,7 @@ void ieee80211_softmac_scan_wq(struct work_struct *work)
 {
struct delayed_work *dwork = container_of(work, struct delayed_work, 
work);
struct ieee80211_device *ieee = container_of(dwork, struct 
ieee80211_device, softmac_scan_wq);
-   static short watchdog = 0;
+   static short watchdog;
u8 channel_map[MAX_CHANNEL_NUMBER+1];
memcpy(channel_map, GET_DOT11D_INFO(ieee)->channel_map, 
MAX_CHANNEL_NUMBER+1);
if(!ieee->ieee_up)
diff --git a/drivers/staging/rtl8192u/ieee80211/ieee80211_softmac_wx.c 
b/drivers/staging/rtl8192u/ieee80211/ieee80211_softmac_wx.c
index 421da8a..45422db 100644
--- a/drivers/staging/rtl8192u/ieee80211/ieee80211_softmac_wx.c
+++ b/drivers/staging/rtl8192u/ieee80211/ieee80211_softmac_wx.c
@@ -302,7 +302,7 @@ void ieee80211_wx_sync_scan_wq(struct work_struct *work)
HT_EXTCHNL_OFFSET chan_offset=0;
HT_CHANNEL_WIDTH bandwidth=0;
int b40M = 0;
-   static int count = 0;
+   static int count;
chan = ieee->current_network.channel;
netif_carrier_off(ieee->dev);
 
diff --git a/drivers/staging/rtl8192u/r8192U_core.c 
b/drivers/staging/rtl8192u/r8192U_core.c
index f59bd92..01f82d3 100644
--- a/drivers/staging/rtl8192u/r8192U_core.c
+++ b/drivers/staging/rtl8192u/r8192U_core.c
@@ -3613,7 +3613,7 @@ HalRxCheckStuck819xUsb(struct net_device *dev)
u16 RegRxCounter = read_nic_word(dev, 0x130);
struct r8192_priv *priv = ieee80211_priv(dev);
bool bStuck = FALSE;
-   static u8   rx_chk_cnt = 0;
+   static u8   rx_chk_cnt;
RT_TRACE(COMP_RESET,"%s(): RegRxCounter is %d,RxCounter is 
%d\n",__FUNCTION__,RegRxCounter,priv->RxCounter);
// If rssi is small, we should check rx for long time because of bad rx.
// or maybe it will continuous silent reset every 2 seconds.
@@ -4071,7 +4071,7 @@ externvoidrtl819x_watchdog_wqcallback(struct 
work_struct *work)
struct net_device *dev = priv->ieee80211->dev;
struct ieee80211_device* ieee = priv->ieee80211;
RESET_TYPE  ResetType = RESET_TYPE_NORESET;
-   static u8   check_reset_cnt=0;
+   static u8   check_reset_cnt;
bool bBusyTraffic = false;
 
if(!priv->up)
@@ -4554,12 +4554,12 @@ void rtl8192_process_phyinfo(struct r8192_priv * 
priv,u8* buffer, struct ieee802
u8  rfpath;
u32 nspatial_stream, tmp_val;
//u8i;
-   static u32 slide_rssi_index=0, slide_rssi_statistics=0;
-   static u32 slide_evm_index=0, slide_evm_statistics=0;
-   static u32 last_rssi=0, last_evm=0;
+   static u32 slide_rssi_index, slide_rssi_statistics;
+   static u32 slide_evm_index, slide_evm_statistics;
+   static u32 last_rssi, last_evm;
 
-   static u32 slide_beacon_adc_pwdb_index=0, 
slide_beacon_adc_pwdb_statistics=0;
-   static u32 last_beacon_adc_pwdb=0;
+   static u32 slide_beacon_adc_pwdb_index, 
slide_beacon_adc_pwdb_statistics;
+   static u32 last_beacon_adc_pwdb;
 
struct ieee80211_hdr_3addr *hdr;
u16 sc ;
diff --git a/drivers/staging/rtl8192u/r8192U_dm.c 
b/drivers/staging/rtl8192u/r8192U_dm.c
index 7e2918f..ea46717 100644
--- a/drivers/staging/rtl8192u/r8192U_dm.c
+++ b/drivers/staging/rtl8192u/r8192U_dm.c
@@ -201,8 +201,8 @@ extern void deinit_hal_dm(struct net_device *dev)
 void dm_CheckRxAggregation(struct net_device *dev) {
struct r8192_priv *priv = ieee80211_priv((struct net_device *)dev);
PRT_HIGH_THROUGHPUT pHTInfo = priv->ieee80211->pHTInfo;
-   static unsigned longlastTxOkCnt = 0;
-   static unsigned longlastRxOkCnt = 0;
+   static unsigned longlastTxOkCnt;
+   static unsigned longlastRxOkCnt;
unsigned long   curTxOkCnt = 0;
unsigned long   curRxOkCnt = 0;
 
@@ -359,7 +359,7 @@ static void dm_check_rate_adaptive(struct net_device * dev)
u32 currentRATR, targetRATR 
= 0;
u32 LowRSSIThreshForRA = 0, 
HighRSSIThreshForRA = 0;
boolbshort_gi_enabled = 
false;
-   static u8

Re: Look Ma, da kernel is b0rken

2012-12-05 Thread Stephen Rothwell

Hi Alan,

On Wed, 5 Dec 2012 15:47:49 + Alan Cox  wrote:
>
> And yes btw we should turn this option on in -next, and get these sort of
> things out of the tree for good. More importantly it'll mean anyone
> adding another one gets a whine on the spot.

While I appreciate your confidence, I don't notice quite a few new
warnings (because there are so many of them already :-().  Is there some
reason to not turn this on in our "normal" builds?  Does it produce many
false positives?  What compiler version is required?

I also currently don't carry patches that only ever appear in linux-next
(well, not intentionally anyway).  I assume it would require a patch to
the Makefile(s) to turn this on.
-- 
Cheers,
Stephen Rothwells...@canb.auug.org.au

pgpsX5aFB3Gtv.pgp
Description: PGP signature

Re: [RFC PATCH v2 02/10] CPU hotplug: Provide APIs for "full" atomic readers to prevent CPU offline

2012-12-05 Thread Tejun Heo

Hello,

On Thu, Dec 06, 2012 at 02:01:35AM +0530, Srivatsa S. Bhat wrote:
> Yes, that _sounds_ sufficient, but IMHO it won't be, in practice. The
> *number* of call-sites that you need to convert from preempt_disable/enable
> to get/put_online_cpus_atomic() won't be too many, however the *frequency*
> of usage of those call-sites can potentially be very high.

I don't think that will be the case and, even if it is, doing it this
way would make it difficult to tell.  The right thing to do is
replacing stop_machine with finer grained percpu locking first.
Refining it further should happen iff that isn't enough and there
isn't an simpler solution.  So, let's please do the simple conversion
first.

Thanks.

-- 
tejun
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [tpmdd-devel] [PATCH 1/1] TPM: STMicroelectronics ST33 I2C KERNEL 3.x.x

2012-12-05 Thread Kent Yoder

On Wed, Dec 05, 2012 at 09:20:47PM +0100, Peter Hüwe wrote:
> Hi Kent,
> > 
> >   Heh, duh, well of course it is. I've now staged everything I'm
> > planning on pushing at:
> > 
> > git://github.com/shpedoikal/linux.git tpmdd-12-05-12
> > 
> > Please test and let me know if I missed anything.
> > 
> > Thanks,
> > Kent
> > 
> > > Kent
> > > 
> 
> (I'm still writing this on behalf of myself ;)
> 
> 
> While I really appreciate you helping Mathias out here, I'm not so sure 
> whether an offlist discussion of a driver submission is a really good idea. 
> I did not see any v2 / improvements on list and now it's commited (?!)

  This is already at least v3 of the driver IIRC. And its not fully
committed at all, its just in a staging tree. I can blow it away at any
time if we find problems.

> There's no need to argue here, I'm fine with this,
> but I'd highly appreciate if at least the result is published again in the 
> future on the mailing list, for proper review. 

  Not a problem. usually I'd attach any updates I planned to commit for
public review, then if anything wasn't made public before I issue my
pull request to send them to security-next, I'd append the full diff to
the pull request at that time. Either way, all changes will hit a list
at some point in time.

> If the mailfilter got the mails I apologize.
> 
> 
> Can you perhaps post the message to the list, so a proper review is possible?
> 
> I thought the Mathias has changed the naming of the files? I don't see that 
> in 
> your commit.

  Good catch, this is the kind of review I was asking for. :-)

> 
> The version you committed to that branch still has some of the items of my 
> first review. (e.g.
>  * This program is free software; you can redistribute it and/or modify
> * it under the terms of the GNU General Public License as published by
> * the Free Software Foundation; either version 2 of the License, or
> * (at your option) any later version.)
> 
> -> The (possible) GPL v3 clause has to go away for the kernel, but I'm not a 
> lawyer.

  A GPLv3 clause would say "v3 or any later version". This should be
fine.

Kent

> 
> 
> Thanks,
> Peter
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC] Capabilities still can't be inherited by normal programs

2012-12-05 Thread Serge Hallyn

Quoting Andy Lutomirski (l...@amacapital.net):
> On Tue, Dec 4, 2012 at 5:54 AM, Serge E. Hallyn  wrote:
> > Quoting Andy Lutomirski (l...@amacapital.net):
> >> >> d) If I really wanted, I could emulate execve without actually doing
> >> >> execve, and capabilities would be inherited.
> >> >
> >> > If you could modify the executable properties of the binary that has
> >> > the privilege to wield a privilege then you are either exploiting an
> >> > app bug, or doing something the privileged binary has been trusted to
> >> > do.
> >>
> >> That's not what I mean.  I would:
> >>
> >> fork()
> >> munmap everything
> >> mmap ld.so
> >> set up a fake initial stack and the right fd or mapping or whatever
> >> just to ld-linux.so
> >>
> >> That's almost execve, and privilege inheritance works.
> >
> > But of course that is why you only want to fill fI on programs you trust
> > not to do that.  What you are arguing is that you want to give fI on
> > programs you don't trust anyway, and so heck why not just give it on
> > everything.
> >
> 
> Huh?  I'd set fP on a program I expect to do *exactly* that (or use
> actual in-kernel capability inheritance, which I would find vastly
> more pleasant).  If I give a program a capability (via fP or fI & pI),
> then I had better trust it not to abuse that capability.  Having it
> pass that capability on to a child helper process would be just fine
> with me *because it already has that capability*.
> 
> The problem with the current inheritance mechanism is that it's very
> difficult to understand what it means for an fI bit or a pI bit to be
> set.  Saying "set a pI bit using pam if you want to grant permission
> to that user to run a particular program with fI set" is crap -- it
> only works if there is exactly one binary on the system with that bit
> set.  In any case, a different administrator or package might use it
> for something different.
> 
> Suppose I use the (apparently) current suggested approach: I install a
> fI=cap_net_raw copy of tcpdump somewhere.  Then I write a helper that
> has fP=cap_new_raw and invokes that copy of tcpdump after appropriate
> validation of parameters.  All is well.

Since you're writing a special helper, you can surely have it validate
the userid and make it so the calling user doesn't have to have
cap_net_raw in pI?

> Now I want to grant only a subset of users permission to run ping.  So
> I modify ping so it's cap_net_raw=i (not p) and grant those users
> pI=cap_net_raw.
> 
> The end result: I introduced a security hole: the users with
> cap_net_raw=i can run tcpdump *without validation via the helper*.
> Oops.

-serge
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: Use PCI ROMs from EFI boot services

2012-12-05 Thread David Woodhouse

On Wed, 2012-12-05 at 13:09 -0700, Bjorn Helgaas wrote:
> 
> David, Eric, what about the kexec question?  It looks to me like this
> wouldn't make things worse than they are today.  If I understand
> correctly, today we don't use ROM data from EFI on either an initial
> boot or a kexec.  After this patch, we could use EFI ROM data on the
> initial boot, but not after a kexec.  So it's worse in the sense that
> the kexec case doesn't match the initial boot, but at least it's not
> something that used to work and is now broken.

Yeah, kexec under EFI doesn't work too well. I have a firmware running
in qemu locally which will let you call SetVirtualAddressMap more than
once, which is a step towards fixing it sanely. It got preempted, but
I'll take another look at it shortly.

-- 
David WoodhouseOpen Source Technology Centre
david.woodho...@intel.com  Intel Corporation





smime.p7s
Description: S/MIME cryptographic signature

Re: Look Ma, da kernel is b0rken

2012-12-05 Thread Borislav Petkov

On Thu, Dec 06, 2012 at 07:57:21AM +1100, Stephen Rothwell wrote:
> On Wed, 5 Dec 2012 15:47:49 + Alan Cox  wrote:
> > And yes btw we should turn this option on in -next, and get these sort of
> > things out of the tree for good. More importantly it'll mean anyone
> > adding another one gets a whine on the spot.
> 
> While I appreciate your confidence, I don't notice quite a few new
> warnings (because there are so many of them already :-().  Is there some
> reason to not turn this on in our "normal" builds?  Does it produce many
> false positives?

Yes, it produces a huge number of warnings which need weeding out (some
of them are false positives and some of them are simply unfixable due to
design decisions in the kernel, etc, etc):

$ make W=123 drivers/pnp/pnpacpi/core.o 2> w.log
make[1]: Nothing to be done for `all'.
  CHK include/generated/uapi/linux/version.h
  CHK include/generated/utsrelease.h
make[1]: Nothing to be done for `relocs'.
  CALLscripts/checksyscalls.sh
  CC  drivers/pnp/pnpacpi/core.o
$ wc w.log
  2305  11202 168011 w.log

This is 2305 lines only for one compilation unit.

So if one enables all additional warning levels (this is what "W=123"
does) your build logs will be huge.

> What compiler version is required?

Works on all compilers by checking for supported -W options - see
scripts/Makefile.build.

> I also currently don't carry patches that only ever appear in
> linux-next (well, not intentionally anyway). I assume it would require
> a patch to the Makefile(s) to turn this on.

See above.

So ideally it would be if someone would build with "W=123" and track all
new warnings appearing with each new patch in linux-next and nag the
patch author to fix it before it hits mainline. This would require a
moderate level of scripting and experimenting though. The advantage is
that with something like that we'll be able to use all -W code checking
methods implemented gcc on our code and let the compiler possibly catch
more stuff.

We simply need someone not lazy enough to write that tracking and
nagging bit :).

Thanks.

-- 
Regards/Gruss,
Boris.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [tpmdd-devel] [PATCH 1/1] TPM: STMicroelectronics ST33 I2C KERNEL 3.x.x

2012-12-05 Thread Peter Hüwe

Hi Kent, Matthias,

Am Mittwoch, 5. Dezember 2012, 19:07:20 schrieb Kent Yoder:
>   Heh, duh, well of course it is. I've now staged everything I'm
> planning on pushing at:
> 
> git://github.com/shpedoikal/linux.git tpmdd-12-05-12
> 
> Please test and let me know if I missed anything.


I just checked out your commit from github.

Here's a part of the review:

Smatch still complains a bit, sparse is fine.:

 make -C /data/data-old/linux-2.6/ M=`pwd` C=1 CHECK=smatch modules
make: Entering directory `/data/data-old/linux-2.6'
  CHECK   /data/data-old/linux-2.6/drivers/char/tpm/tpm_stm_st33_i2c.c
/data/data-old/linux-2.6/drivers/char/tpm/tpm_stm_st33_i2c.c:535 
tpm_stm_i2c_recv() warn: variable dereferenced before check 'chip' (see line 
531)
/data/data-old/linux-2.6/drivers/char/tpm/tpm_stm_st33_i2c.c:748 
tpm_st33_i2c_probe() warn: variable dereferenced before check 'platform_data' 
(see line 659)
/data/data-old/linux-2.6/drivers/char/tpm/tpm_stm_st33_i2c.c:848 
tpm_st33_i2c_pm_resume() warn: should this be a bitwise op?
/data/data-old/linux-2.6/drivers/char/tpm/tpm_stm_st33_i2c.c:848 
tpm_st33_i2c_pm_resume() warn: should this be a bitwise op?
  CC [M]  /data/data-old/linux-2.6/drivers/char/tpm/tpm_stm_st33_i2c.o
  Building modules, stage 2.
  MODPOST 6 modules
  LD [M]  /data/data-old/linux-2.6/drivers/char/tpm/tpm_stm_st33_i2c.ko
make: Leaving directory `/data/data-old/linux-2.6'

Please check.


(maybe also fix the checkpatch -strict stuff as well? Would be nice ;)





drivers/char/tpm/tpm_stm_st33_i2c.h 
- Does the driver really need a seperate headerfile in drivers/char/tpm/ ? for 
me it seems everything can be included into the c-file.

> struct st_tpm_hash {
>   int size;
>   u8 *data;
> };
is unused - please remove.

> #define MINOR_NUM_I2C 224
Please remove, it's unused and if you really need it use the one from tpm.h

include/linux/i2c/tpm_stm_st33_i2c.h
I'm not sure if this is needed publicly? Or does only your driver need this?

>struct st33zp24_platform_data {
Telling from the name I have no idea what this device is.





drivers/char/tpm/tpm_stm_st33_i2c.c

>enum stm33zp24_int_flags {
>   TPM_GLOBAL_INT_ENABLE = 0x80,
>   TPM_INTF_CMD_READY_INT = 0x080,
>   TPM_INTF_FIFO_AVALAIBLE_INT = 0x040,
>   TPM_INTF_WAKE_UP_READY_INT = 0x020,
>   TPM_INTF_LOCALITY_CHANGE_INT = 0x004,
>   TPM_INTF_STS_VALID_INT = 0x002,
>   TPM_INTF_DATA_AVAIL_INT = 0x001,
>};

Why the leading zeros? please remove.


> static int tpm_st33_i2c_pm_suspend(struct i2c_client *client, pm_message_t 
> mesg)
>...
> static int tpm_st33_i2c_pm_resume(struct i2c_client *client)
>,,,
>static struct i2c_driver tpm_st33_i2c_driver = {
>   .driver = {
>  .owner = THIS_MODULE,
>  .name = TPM_ST33_I2C,
>  },
>   .probe = tpm_st33_i2c_probe,
>   .remove = tpm_st33_i2c_remove,
>   .resume = tpm_st33_i2c_pm_resume,
>   .suspend = tpm_st33_i2c_pm_suspend,
>   .id_table = tpm_st33_i2c_id
>};

Please convert resume/suspend  to .driver.pm 

It's pretty easy.
See this post  for details
http://sourceforge.net/mailarchive/message.php?msg_id=29516784
Rafael did spent quite a lot of effort to convert almost every driver back 
then, so we should 'fix' new ones.



> /*
>  * tpm_st33_i2c_init initialize driver
>  * @return: 0 if successful, else non zero value.
>  */
> static int __init tpm_st33_i2c_init(void)
> {
>   return i2c_add_driver(_st33_i2c_driver);
> }
> 
> /*
>  * tpm_st33_i2c_exit The kernel calls this function during unloading the
>  * module or during shut down process
>  */
> static void __exit tpm_st33_i2c_exit(void)
> {
>   i2c_del_driver(_st33_i2c_driver);
> }
> 
> module_init(tpm_st33_i2c_init);
> module_exit(tpm_st33_i2c_exit);

Hooray for oneliners ;)
+ module_i2c_driver(tpm_st33_i2c_driver);



Keep on hacking ;)

Thanks,
PeterH


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: Look Ma, da kernel is b0rken

2012-12-05 Thread Alan Cox

> While I appreciate your confidence, I don't notice quite a few new
> warnings (because there are so many of them already :-().  Is there some
> reason to not turn this on in our "normal" builds?  Does it produce many
> false positives?  What compiler version is required?

I've not seen any false positives from it yet. Unlike some of the
variable related ones it seems pretty solid.

> I also currently don't carry patches that only ever appear in linux-next
> (well, not intentionally anyway).  I assume it would require a patch to
> the Makefile(s) to turn this on.

Yes. I guess it belongs to the build scripts maintainers ?

Alan
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: Look Ma, da kernel is b0rken

2012-12-05 Thread Alan Cox

On Wed, 5 Dec 2012 22:12:45 +0100
Borislav Petkov  wrote:

> On Thu, Dec 06, 2012 at 07:57:21AM +1100, Stephen Rothwell wrote:
> > On Wed, 5 Dec 2012 15:47:49 + Alan Cox  wrote:
> > > And yes btw we should turn this option on in -next, and get these sort of
> > > things out of the tree for good. More importantly it'll mean anyone
> > > adding another one gets a whine on the spot.
> > 
> > While I appreciate your confidence, I don't notice quite a few new
> > warnings (because there are so many of them already :-().  Is there some
> > reason to not turn this on in our "normal" builds?  Does it produce many
> > false positives?
> 
> Yes, it produces a huge number of warnings which need weeding out (some
> of them are false positives and some of them are simply unfixable due to
> design decisions in the kernel, etc, etc):
> 
> $ make W=123 drivers/pnp/pnpacpi/core.o 2> w.log

I was just talking about the always true/always false stuff !
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [tpmdd-devel] [PATCH 1/1] TPM: STMicroelectronics ST33 I2C KERNEL 3.x.x

2012-12-05 Thread Peter Hüwe

Hi Kent,

Am Mittwoch, 5. Dezember 2012, 22:00:48 schrieb Kent Yoder:
>   This is already at least v3 of the driver IIRC. And its not fully
> committed at all, its just in a staging tree. I can blow it away at any
> time if we find problems.
> 
>   Not a problem. usually I'd attach any updates I planned to commit for
> public review, then if anything wasn't made public before I issue my
> pull request to send them to security-next, I'd append the full diff to
> the pull request at that time. Either way, all changes will hit a list
> at some point in time.
Ok - great ;) maybe my I got the 'planning on pushing'  wrong.


> > I thought the Mathias has changed the naming of the files? I don't see
> > that in your commit.
> 
>   Good catch, this is the kind of review I was asking for. :-)

Hehe. Glad to hear.
You know I care for the tpm subsystem ;)

> 
> > The version you committed to that branch still has some of the items of
> > my first review. (e.g.
> > 
> >  * This program is free software; you can redistribute it and/or modify
> > 
> > * it under the terms of the GNU General Public License as published by
> > * the Free Software Foundation; either version 2 of the License, or
> > * (at your option) any later version.)
> > 
> > -> The (possible) GPL v3 clause has to go away for the kernel, but I'm
> > not a lawyer.
> 
>   A GPLv3 clause would say "v3 or any later version". This should be
> fine.

Hmm, okay. 
I just googled for it on lkml, there are some files and drivers.
So I guess it's okay - sorry for the noise.
But I'm not a lawyer, personally I'd go for GPLv2 only but that's my personal 
opinion. 


Thanks,
Peter


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: Look Ma, da kernel is b0rken

2012-12-05 Thread Andrew Morton

On Wed, 5 Dec 2012 16:31:21 +0100
Borislav Petkov  wrote:

> On Wed, Dec 05, 2012 at 03:27:56PM +, Alan Cox wrote:
> > On Wed, 5 Dec 2012 15:29:35 +0100
> > Borislav Petkov  wrote:
> > 
> > > On Wed, Dec 05, 2012 at 08:09:01AM +0100, Andreas Mohr wrote:
> > > > Hi,
> > > > 
> > > > drivers/pnp/pnpacpi/core.c: In function 'ispnpidacpi':
> > > > drivers/pnp/pnpacpi/core.c:65:2: warning: logical 'or' of collectively
> > > > exhaustive tests is always true [-Wlogical-op]
> > > > drivers/pnp/pnpacpi/core.c:66:2: warning: logical 'or' of collectively
> > > > exhaustive tests is always true [-Wlogical-op]
> > > > drivers/pnp/pnpacpi/core.c:67:2: warning: logical 'or' of collectively
> > > > exhaustive tests is always true [-Wlogical-op]
> > > > 
> > > > 
> > > > That's already the second less enticing -Wlogical-op issue
> > > > which was discovered by accident during less than two days
> > 
> > No it's not. It's been reported in bugzilla. I sent patches ages ago.
> > They were ignored. Coverity has had it tagged for years (and a ton more
> > of them you've not noticed yet)
> > 
> > http://article.gmane.org/gmane.linux.acpi.devel/56753/match=test_alpha
> > 
> > This isn't discovered, this is in the "If you stick your fingers in your
> > ears and hum you can't hear the screaming" category.
> 
> Hillarious!
> 
> Andrew, would you please pick up Alan's patch? It clearly fixes an
> ancient bug in the pnpacpi code.
> 

Bjorn had a review comment which appears to remain unaddressed:

: The original is definitely broken.
: 
: I think the corrected test allows PNP IDs containing '@', which
: doesn't appear legal per sec 6.1.5 of the ACPI 5.0 spec.  Should this
: be
: 
: +   if (!('A' <= (c) && (c) <= 'Z')) \
: 
: instead?

Also, the original patch is missing a signed-off-by.  Here's what I
have queued:

From: Alan Cox 
Subject: pnpacpi: fix incorrect TEST_ALPHA() test

TEST_ALPHA() is broken and always returns 0.

[a...@linux-foundation.org: return false for '@' as well, per Bjorn]
Signed-off-by: Alan Cox 
Cc: Bjorn Helgaas 
Cc: Borislav Petkov 
Cc: Andreas Mohr 
Cc: Li Shaohua 
Signed-off-by: Andrew Morton 
---

 drivers/pnp/pnpacpi/core.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff -puN drivers/pnp/pnpacpi/core.c~pnpacpi-fix-incorrect-test_alpha-test 
drivers/pnp/pnpacpi/core.c
--- a/drivers/pnp/pnpacpi/core.c~pnpacpi-fix-incorrect-test_alpha-test
+++ a/drivers/pnp/pnpacpi/core.c
@@ -58,7 +58,7 @@ static inline int __init is_exclusive_de
if (!(('0' <= (c) && (c) <= '9') || ('A' <= (c) && (c) <= 'F'))) \
return 0
 #define TEST_ALPHA(c) \
-   if (!('@' <= (c) || (c) <= 'Z')) \
+   if (!('A' <= (c) && (c) <= 'Z')) \
return 0
 static int __init ispnpidacpi(const char *id)
 {
_

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] KVM: MMU: optimize for set_spte

2012-12-05 Thread Marcelo Tosatti

On Tue, Dec 04, 2012 at 07:17:11AM +0800, Xiao Guangrong wrote:
> There are two cases we need to adjust page size in set_spte:
> 1): the one is other vcpu creates new sp in the window between mapping_level()
> and acquiring mmu-lock.
> 2): the another case is the new sp is created by itself (page-fault path) when
> guest uses the target gfn as its page table.
> 
> In current code, set_spte drop the spte and emulate the access for these case,
> it works not good:
> - for the case 1, it may destroy the mapping established by other vcpu, and
>   do expensive instruction emulation.
> - for the case 2, it may emulate the access even if the guest is accessing
>   the page which not used as page table. There is a example, 0~2M is used as
>   huge page in guest, in this huge page, only page 3 used as page table, then
>   guest read/writes on other pages can cause instruction emulation.
> 
> Both of these cases can be fixed by allowing guest to retry the access, it
> will refault, then we can establish the mapping by using small page
> 
> Signed-off-by: Xiao Guangrong 
> ---
>  arch/x86/kvm/mmu.c |   16 
>  1 files changed, 12 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index b875a9e..01d7c2a 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -2382,12 +2382,20 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
>   || (!vcpu->arch.mmu.direct_map && write_fault
>   && !is_write_protection(vcpu) && !user_fault)) {
> 
> + /*
> +  * There are two cases:
> +  * - the one is other vcpu creates new sp in the window
> +  *   between mapping_level() and acquiring mmu-lock.
> +  * - the another case is the new sp is created by itself
> +  *   (page-fault path) when guest uses the target gfn as
> +  *   its page table.
> +  * Both of these cases can be fixed by allowing guest to
> +  * retry the access, it will refault, then we can establish
> +  * the mapping by using small page.
> +  */
>   if (level > PT_PAGE_TABLE_LEVEL &&
> - has_wrprotected_page(vcpu->kvm, gfn, level)) {
> - ret = 1;
> - drop_spte(vcpu->kvm, sptep);
> + has_wrprotected_page(vcpu->kvm, gfn, level))
>   goto done;
> - }
> 
>   spte |= PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE;
> 
> -- 
> 1.7.7.6

ACK.

Does it fix your testcase?

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC PATCH 4/6] kvm: Move private memory slots to start of memslots array

2012-12-05 Thread Marcelo Tosatti

On Mon, Dec 03, 2012 at 04:39:30PM -0700, Alex Williamson wrote:
> In order to make the memslots array grow on demand, move the private
> slots to the lower indexes of the array.  The private slots are
> assumed likely to be in use, so if we didn't do this we'd end up
> allocating the full memslots array all the time.

You're changing private slot ids. Fail to see why is that relevant
to on-demand growing.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC PATCH 3/6] kvm: Merge id_to_index into memslots

2012-12-05 Thread Marcelo Tosatti

On Mon, Dec 03, 2012 at 04:39:24PM -0700, Alex Williamson wrote:
> This allows us to resize this structure and therefore the number of
> memslots as part of the RCU update.

Why is this necessary? "struct memslots" is updated, message above
conflicts with that.

If there is a reason, "id_to_index" becomes confusing.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC PATCH 5/6] kvm: Re-introduce memslots->nmemslots

2012-12-05 Thread Marcelo Tosatti

On Mon, Dec 03, 2012 at 04:39:36PM -0700, Alex Williamson wrote:
> struct kvm_memory_slot is currently 52 bytes (LP64), not counting the
> arch data.  On x86 this means the memslot array to support a tiny 32+3
> entries (user+private) is over 2k.  We'd like to support more slots
> so that we can support more assigned devices, but it doesn't make
> sense to penalize everyone by using a statically allocated array.
> This allows us to start introducing a grow-able array.
> 
> Signed-off-by: Alex Williamson 
> ---
>  arch/ia64/kvm/kvm-ia64.c |2 +-
>  arch/powerpc/kvm/book3s_hv.c |2 +-
>  arch/x86/kvm/vmx.c   |1 +
>  arch/x86/kvm/x86.c   |4 +++-
>  include/linux/kvm_host.h |9 ++---
>  virt/kvm/kvm_main.c  |   10 ++
>  6 files changed, 18 insertions(+), 10 deletions(-)
> 
> diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
> index 012e5dd..96401b5 100644
> --- a/arch/ia64/kvm/kvm-ia64.c
> +++ b/arch/ia64/kvm/kvm-ia64.c
> @@ -1836,7 +1836,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
>  
>   memslot = id_to_memslot(kvm->memslots, log->slot);
>   r = -ENOENT;
> - if (!memslot->dirty_bitmap)
> + if (!memslots || !memslot->dirty_bitmap)
>   goto out;
>  
>   kvm_ia64_sync_dirty_log(kvm, memslot);
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index 56067db..0417190 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -1267,7 +1267,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct 
> kvm_dirty_log *log)
>  
>   memslot = id_to_memslot(kvm->memslots, log->slot);
>   r = -ENOENT;
> - if (!memslot->dirty_bitmap)
> + if (!memslot || !memslot->dirty_bitmap)
>   goto out;
>  
>   n = kvm_dirty_bitmap_bytes(memslot);
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 2bb9157..07fdd90 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -2751,6 +2751,7 @@ static gva_t rmode_tss_base(struct kvm *kvm)
>  
>   slots = kvm_memslots(kvm);
>   slot = id_to_memslot(slots, KVM_PRIVATE_MEM_SLOTS);
> + BUG_ON(!slot);
>   base_gfn = slot->base_gfn + slot->npages - 3;
>  
>   return base_gfn << PAGE_SHIFT;
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 8765485..53fe9b2 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -3139,9 +3139,11 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct 
> kvm_dirty_log *log)
>   goto out;
>  
>   memslot = id_to_memslot(kvm->memslots, log->slot);
> + r = -ENOENT;
> + if (!memslot)
> + goto out;
>  
>   dirty_bitmap = memslot->dirty_bitmap;
> - r = -ENOENT;
>   if (!dirty_bitmap)
>   goto out;
>  
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 7b3d5c4..1955a4e 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -313,6 +313,7 @@ struct kvm_irq_routing_table {};
>   * to get the memslot by its id.
>   */
>  struct kvm_memslots {
> + int nmemslots;
>   u64 generation;
>   struct kvm_memory_slot memslots[KVM_MEM_SLOTS_NUM];
>  };
> @@ -397,7 +398,7 @@ static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm 
> *kvm, int i)
>  
>  #define kvm_for_each_memslot(memslot, slots) \
>   for (memslot = >memslots[0]; \
> -   memslot < slots->memslots + KVM_MEM_SLOTS_NUM && memslot->npages;\
> +   memslot < slots->memslots + slots->nmemslots && memslot->npages;\
>   memslot++)
>  
>  int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id);
> @@ -424,10 +425,12 @@ static inline struct kvm_memslots *kvm_memslots(struct 
> kvm *kvm)
>  static inline struct kvm_memory_slot *
>  id_to_memslot(struct kvm_memslots *slots, int id)
>  {
> - int index = slots->memslots[id].id_to_index;
>   struct kvm_memory_slot *slot;
>  
> - slot = >memslots[index];
> + if (id >= slots->nmemslots)
> + return NULL;
> +
> + slot = >memslots[slots->memslots[id].id_to_index];
>  
>   WARN_ON(slot->id != id);
>   return slot;
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 3ce2664..ebd3960 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -444,7 +444,9 @@ static void kvm_init_memslots_id(struct kvm *kvm)
>   int i;
>   struct kvm_memslots *slots = kvm->memslots;
>  
> - for (i = 0; i < KVM_MEM_SLOTS_NUM; i++)
> + slots->nmemslots = KVM_MEM_SLOTS_NUM;
> +
> + for (i = 0; i < kvm->memslots->nmemslots; i++)
>   slots->memslots[i].id_to_index = slots->memslots[i].id = i;
>  }
>  
> @@ -658,10 +660,10 @@ static void sort_memslots(struct kvm_memslots *slots)
>  {
>   int i;
>  
> - sort(slots->memslots, KVM_MEM_SLOTS_NUM,
> + sort(slots->memslots, slots->nmemslots,
> sizeof(struct kvm_memory_slot), cmp_memslot,

Re: [RFC PATCH 0/6] kvm: Growable memory slot array

2012-12-05 Thread Marcelo Tosatti

On Mon, Dec 03, 2012 at 04:39:05PM -0700, Alex Williamson wrote:
> Memory slots are currently a fixed resource with a relatively small
> limit.  When using PCI device assignment in a qemu guest it's fairly
> easy to exhaust the number of available slots.  I posted patches
> exploring growing the number of memory slots a while ago, but it was
> prior to caching memory slot array misses and thefore had potentially
> poor performance.  Now that we do that, Avi seemed receptive to
> increasing the memory slot array to arbitrary lengths.  I think we
> still don't want to impose unnecessary kernel memory consumptions on
> guests not making use of this, so I present again a growable memory
> slot array.
> 
> A couple notes/questions; in the previous version we had a
> kvm_arch_flush_shadow() call when we increased the number of slots.
> I'm not sure if this is still necessary.  I had also made the x86
> specific slot_bitmap dynamically grow as well and switch between a
> direct bitmap and indirect pointer to a bitmap.  That may have
> contributed to needing the flush.  

I don't remember. Do you recall what was the argument back then?
(there must have been some).

> I haven't done that yet here
> because it seems like an unnecessary complication if we have a max
> on the order of 512 or 1024 entries.  A bit per slot isn't a lot of
> overhead.  If we want to go more, maybe we should make it switch.
> That leads to the final question, we need an upper bound since this
> does allow consumption of extra kernel memory, what should it be?  A
> PCI bus filled with assigned devices can theorically use up to 2048
> slots (32 devices * 8 functions * (6 BARs + ROM + possibly split
> MSI-X BAR)).  For this RFC, I don't change the max, just make it
> grow up to 32 user slots.  Untested on anything but x86 so far.
> Thanks,

Not sure. Some reasonable number based on current usage expectations?
(can be increased later if necessary).

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] scatterlist: add a warning if sg_virt() is used on highmem pages

2012-12-05 Thread Andrew Morton

On Wed,  5 Dec 2012 12:28:18 +0100
Sebastian Andrzej Siewior  wrote:

> sg_virt() on highmem pages won't work. This WARN_ON() should catch some
> that still try.
> 
> Signed-off-by: Sebastian Andrzej Siewior 
> ---
>  include/linux/scatterlist.h |3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
> index 4bd6c06..4d4adab 100644
> --- a/include/linux/scatterlist.h
> +++ b/include/linux/scatterlist.h
> @@ -198,6 +198,9 @@ static inline dma_addr_t sg_phys(struct scatterlist *sg)
>   **/
>  static inline void *sg_virt(struct scatterlist *sg)
>  {
> +#ifdef CONFIG_DEBUG_SG
> + WARN_ON(PageHighMem(sg_page(sg)));
> +#endif
>   return page_address(sg_page(sg)) + sg->offset;
>  }

Why won't it work?  page_address() will search the kmap table and will
search for the page.  If the caller had previously kmapped that page,
all is well.  If the caller has failed to kmap the page, sg_virt() will
return a nearly-null pointer and presumably someone will later go oops.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: Look Ma, da kernel is b0rken

2012-12-05 Thread Borislav Petkov

On Wed, Dec 05, 2012 at 09:41:14PM +, Alan Cox wrote:
> I was just talking about the always true/always false stuff !

That's -Wlogical-op and not on by default. You can enable it with -W=2.

-- 
Regards/Gruss,
Boris.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC] Capabilities still can't be inherited by normal programs

2012-12-05 Thread Andy Lutomirski

On Wed, Dec 5, 2012 at 1:05 PM, Serge Hallyn  wrote:
> Quoting Andy Lutomirski (l...@amacapital.net):
>> On Tue, Dec 4, 2012 at 5:54 AM, Serge E. Hallyn  wrote:
>> > Quoting Andy Lutomirski (l...@amacapital.net):
>> >> >> d) If I really wanted, I could emulate execve without actually doing
>> >> >> execve, and capabilities would be inherited.
>> >> >
>> >> > If you could modify the executable properties of the binary that has
>> >> > the privilege to wield a privilege then you are either exploiting an
>> >> > app bug, or doing something the privileged binary has been trusted to
>> >> > do.
>> >>
>> >> That's not what I mean.  I would:
>> >>
>> >> fork()
>> >> munmap everything
>> >> mmap ld.so
>> >> set up a fake initial stack and the right fd or mapping or whatever
>> >> just to ld-linux.so
>> >>
>> >> That's almost execve, and privilege inheritance works.
>> >
>> > But of course that is why you only want to fill fI on programs you trust
>> > not to do that.  What you are arguing is that you want to give fI on
>> > programs you don't trust anyway, and so heck why not just give it on
>> > everything.
>> >
>>
>> Huh?  I'd set fP on a program I expect to do *exactly* that (or use
>> actual in-kernel capability inheritance, which I would find vastly
>> more pleasant).  If I give a program a capability (via fP or fI & pI),
>> then I had better trust it not to abuse that capability.  Having it
>> pass that capability on to a child helper process would be just fine
>> with me *because it already has that capability*.
>>
>> The problem with the current inheritance mechanism is that it's very
>> difficult to understand what it means for an fI bit or a pI bit to be
>> set.  Saying "set a pI bit using pam if you want to grant permission
>> to that user to run a particular program with fI set" is crap -- it
>> only works if there is exactly one binary on the system with that bit
>> set.  In any case, a different administrator or package might use it
>> for something different.
>>
>> Suppose I use the (apparently) current suggested approach: I install a
>> fI=cap_net_raw copy of tcpdump somewhere.  Then I write a helper that
>> has fP=cap_new_raw and invokes that copy of tcpdump after appropriate
>> validation of parameters.  All is well.
>
> Since you're writing a special helper, you can surely have it validate
> the userid and make it so the calling user doesn't have to have
> cap_net_raw in pI?

I can and did.

The mere presence of a cap_net_raw+i tcpdump binary is more or less
equivalent to saying that users with cap_net_raw in pI can capture
packets.  I've just prevented pI=cap_net_raw from meaning anything
less than "can capture packets".  So I think we should bite the bullet
and just let programs opt in (via some appropriately careful
mechanism) to real capability inheritance.

--Andy
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 0/3] HWPOISON, hugetlbfs: small bug fixes

2012-12-05 Thread Naoya Horiguchi

Hi,

I found some small bugs about memory error handling on hugepages by my
testing on the recent kernel, so I wrote patches for them.
Can I have your reviews or comments on them?

Thanks,
Naoya
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 2/3] HWPOISON, hugetlbfs: fix "bad pmd" warning in unmapping hwpoisoned hugepage

2012-12-05 Thread Naoya Horiguchi

When a process which used a hwpoisoned hugepage tries to exit() or
munmap(), the kernel can print out "bad pmd" message because page
table walker in free_pgtables() encounters 'hwpoisoned entry' on pmd.

This is because currently we fail to clear the hwpoisoned entry in
__unmap_hugepage_range(), so this patch simply does it.

Signed-off-by: Naoya Horiguchi 
---
 mm/hugetlb.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git v3.7-rc8.orig/mm/hugetlb.c v3.7-rc8/mm/hugetlb.c
index e61a749..fe7c2a7 100644
--- v3.7-rc8.orig/mm/hugetlb.c
+++ v3.7-rc8/mm/hugetlb.c
@@ -2387,8 +2387,10 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, 
struct vm_area_struct *vma,
/*
 * HWPoisoned hugepage is already unmapped and dropped reference
 */
-   if (unlikely(is_hugetlb_entry_hwpoisoned(pte)))
+   if (unlikely(is_hugetlb_entry_hwpoisoned(pte))) {
+   pte_clear(mm, address, ptep);
continue;
+   }
 
page = pte_page(pte);
/*
-- 
1.7.11.7

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 3/3] HWPOISON, hugetlbfs: fix RSS-counter warning

2012-12-05 Thread Naoya Horiguchi

Memory error handling on hugepages can break a RSS counter, which emits
a message like "Bad rss-counter state mm:88040abecac0 idx:1 val:-1".
This is because PageAnon returns true for hugepage (this behavior is
necessary for reverse mapping to work on hugetlbfs).

Signed-off-by: Naoya Horiguchi 
---
 mm/rmap.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git v3.7-rc8.orig/mm/rmap.c v3.7-rc8/mm/rmap.c
index 2ee1ef0..df54ef0 100644
--- v3.7-rc8.orig/mm/rmap.c
+++ v3.7-rc8/mm/rmap.c
@@ -1235,7 +1235,9 @@ int try_to_unmap_one(struct page *page, struct 
vm_area_struct *vma,
update_hiwater_rss(mm);
 
if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) {
-   if (PageAnon(page))
+   if (PageHuge(page))
+   ;
+   else if (PageAnon(page))
dec_mm_counter(mm, MM_ANONPAGES);
else
dec_mm_counter(mm, MM_FILEPAGES);
-- 
1.7.11.7

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 1/3] dynamic_debug: Fix vpr_ logging styles

2012-12-05 Thread Jason Baron

From: Joe Perches 

vpr_info_dq should be a function and vpr_info should have
a do {} while (0)

Add missing newlines to pr_s.

Miscellaneous neatening too.
braces, coalescing formats, alignments, etc...

Signed-off-by: Joe Perches 
Signed-off-by: Jason Baron 
---
 lib/dynamic_debug.c |  118 +++
 1 files changed, 62 insertions(+), 56 deletions(-)

diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index e7f7d99..c0869f1 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -59,7 +59,7 @@ struct ddebug_iter {
 
 static DEFINE_MUTEX(ddebug_lock);
 static LIST_HEAD(ddebug_tables);
-static int verbose = 0;
+static int verbose;
 module_param(verbose, int, 0644);
 
 /* Return the last part of a pathname */
@@ -107,24 +107,32 @@ static char *ddebug_describe_flags(struct _ddebug *dp, 
char *buf,
return buf;
 }
 
-#define vpr_info(fmt, ...) \
-   if (verbose) do { pr_info(fmt, ##__VA_ARGS__); } while (0)
-
-#define vpr_info_dq(q, msg)\
+#define vpr_info(fmt, ...) \
 do {   \
-   /* trim last char off format print */   \
-   vpr_info("%s: func=\"%s\" file=\"%s\" " \
-   "module=\"%s\" format=\"%.*s\" "\
-   "lineno=%u-%u", \
-   msg,\
-   q->function ? q->function : "", \
-   q->filename ? q->filename : "", \
-   q->module ? q->module : "", \
-   (int)(q->format ? strlen(q->format) - 1 : 0),   \
-   q->format ? q->format : "", \
-   q->first_lineno, q->last_lineno);   \
+   if (verbose)\
+   pr_info(fmt, ##__VA_ARGS__);\
 } while (0)
 
+static void vpr_info_dq(const struct ddebug_query *query, const char *msg)
+{
+   /* trim any trailing newlines */
+   int fmtlen = 0;
+
+   if (query->format) {
+   fmtlen = strlen(query->format);
+   while (fmtlen && query->format[fmtlen - 1] == '\n')
+   fmtlen--;
+   }
+
+   vpr_info("%s: func=\"%s\" file=\"%s\" module=\"%s\" format=\"%.*s\" 
lineno=%u-%u\n",
+msg,
+query->function ? query->function : "",
+query->filename ? query->filename : "",
+query->module ? query->module : "",
+fmtlen, query->format ? query->format : "",
+query->first_lineno, query->last_lineno);
+}
+
 /*
  * Search the tables for _ddebug's which match the given `query' and
  * apply the `flags' and `mask' to them.  Returns number of matching
@@ -148,7 +156,7 @@ static int ddebug_change(const struct ddebug_query *query,
if (query->module && strcmp(query->module, dt->mod_name))
continue;
 
-   for (i = 0 ; i < dt->num_ddebugs ; i++) {
+   for (i = 0; i < dt->num_ddebugs; i++) {
struct _ddebug *dp = >ddebugs[i];
 
/* match against the source filename */
@@ -183,10 +191,10 @@ static int ddebug_change(const struct ddebug_query *query,
continue;
dp->flags = newflags;
vpr_info("changed %s:%d [%s]%s =%s\n",
-   trim_prefix(dp->filename), dp->lineno,
-   dt->mod_name, dp->function,
-   ddebug_describe_flags(dp, flagbuf,
-   sizeof(flagbuf)));
+trim_prefix(dp->filename), dp->lineno,
+dt->mod_name, dp->function,
+ddebug_describe_flags(dp, flagbuf,
+  sizeof(flagbuf)));
}
}
mutex_unlock(_lock);
@@ -220,12 +228,12 @@ static int ddebug_tokenize(char *buf, char *words[], int 
maxwords)
/* find `end' of word, whitespace separated or quoted */
if (*buf == '"' || *buf == '\'') {
int quote = *buf++;
-   for (end = buf ; *end && *end != quote ; end++)
+   for (end = buf; *end && *end != quote; end++)
;
if (!*end)
return -EINVAL; /* unclosed quote */
} else {
-   for (end = buf ; *end && !isspace(*end) ; end++)
+   for (end = buf; *end && !isspace(*end); end++)
;
BUG_ON(end == buf);

[PATCH 3/3] dynamic_debug: add pr_errs before -EINVALs

2012-12-05 Thread Jason Baron

From: Jim Cromie 

Ma noted that dynamic-debug is silent about many query errors, so add
pr_err()s to explain those errors, and tweak a few others.  Also parse
flags 1st, so that match-spec errs are slightly clearer.

CC: Jianpeng Ma 
CC: Joe Perches 
CC: Greg KH 
Signed-off-by: Jim Cromie 
Signed-off-by: Jason Baron 
---
 lib/dynamic_debug.c |   47 +++
 1 files changed, 35 insertions(+), 12 deletions(-)

diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index c0869f1..da820f2 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -230,8 +230,10 @@ static int ddebug_tokenize(char *buf, char *words[], int 
maxwords)
int quote = *buf++;
for (end = buf; *end && *end != quote; end++)
;
-   if (!*end)
+   if (!*end) {
+   pr_err("unclosed quote: %s\n", buf);
return -EINVAL; /* unclosed quote */
+   }
} else {
for (end = buf; *end && !isspace(*end); end++)
;
@@ -239,8 +241,10 @@ static int ddebug_tokenize(char *buf, char *words[], int 
maxwords)
}
 
/* `buf' is start of word, `end' is one past its end */
-   if (nwords == maxwords)
+   if (nwords == maxwords) {
+   pr_err("too many words, legal max <=%d\n", maxwords);
return -EINVAL; /* ran out of words[] before bytes */
+   }
if (*end)
*end++ = '\0';  /* terminate the word */
words[nwords++] = buf;
@@ -272,7 +276,11 @@ static inline int parse_lineno(const char *str, unsigned 
int *val)
return 0;
}
*val = simple_strtoul(str, , 10);
-   return end == NULL || end == str || *end != '\0' ? -EINVAL : 0;
+   if (end == NULL || end == str || *end != '\0') {
+   pr_err("bad line-number: %s\n", str);
+   return -EINVAL;
+   }
+   return 0;
 }
 
 /*
@@ -352,8 +360,10 @@ static int ddebug_parse_query(char *words[], int nwords,
int rc;
 
/* check we have an even number of words */
-   if (nwords % 2 != 0)
+   if (nwords % 2 != 0) {
+   pr_err("expecting pairs of match-spec \n");
return -EINVAL;
+   }
memset(query, 0, sizeof(*query));
 
if (modname)
@@ -374,18 +384,22 @@ static int ddebug_parse_query(char *words[], int nwords,
char *first = words[i+1];
char *last = strchr(first, '-');
if (query->first_lineno || query->last_lineno) {
-   pr_err("match-spec:line given 2 times\n");
+   pr_err("match-spec: line used 2x\n");
return -EINVAL;
}
if (last)
*last++ = '\0';
-   if (parse_lineno(first, >first_lineno) < 0)
+   if (parse_lineno(first, >first_lineno) < 0) {
+   pr_err("line-number is <0\n");
return -EINVAL;
+   }
if (last) {
/* range - */
if (parse_lineno(last, >last_lineno)
< query->first_lineno) {
-   pr_err("last-line < 1st-line\n");
+   pr_err("last-line:%d < 1st-line:%d\n",
+   query->last_lineno,
+   query->first_lineno);
return -EINVAL;
}
} else {
@@ -421,6 +435,7 @@ static int ddebug_parse_flags(const char *str, unsigned int 
*flagsp,
op = *str++;
break;
default:
+   pr_err("bad flag-op %c, at start of %s\n", *str, str);
return -EINVAL;
}
vpr_info("op='%c'\n", op);
@@ -432,8 +447,10 @@ static int ddebug_parse_flags(const char *str, unsigned 
int *flagsp,
break;
}
}
-   if (i < 0)
+   if (i < 0) {
+   pr_err("unknown flag '%c' in \"%s\"\n", *str, str);
return -EINVAL;
+   }
}
vpr_info("flags=0x%x\n", flags);
 
@@ -465,13 +482,19 @@ static int ddebug_exec_query(char *query_string, const 
char *modname)
char *words[MAXWORDS];
 
nwords = ddebug_tokenize(query_string, words, MAXWORDS);
-   if (nwords <= 0)
+   if (nwords <= 0) {
+

Re: [YASB] Re: Linux 3.7-rc7

2012-12-05 Thread Andreas Mohr

Hi,

got through all steps after all, and the ghost vanished.

Perhaps I did some silly mistake (marked that original master HEAD
as "bad" despite not actually having run that but rather some local
seemingly innocuous modifications - that will teach me for sure...).

Or perhaps it was something about the weird (to put it mildly) gcc ICEs
that I got when doing next bisection build on *some* of those kernels...
(i.e., some earlier effects from that, thus causing the problem)
A memtest run would be in order...
(fortunately I'm one generation post magnetic core memory on this "box" :-))

Just saw the -rc8 announce, thus I wanted to clarify status now,
but I'm still only 98% solid about this bisection result here.
Probably best to go straight ahead to -rc8.

Andreas Mohr
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 2/3] dynamic_debug: dynamic hex dump

2012-12-05 Thread Jason Baron

From: Vladimir Kondratiev 

Introduce print_hex_dump_debug() that can be dynamically controlled, similar to
pr_debug.

Also, make print_hex_dump_bytes() dynamically controlled

Implement only 'p' flag (_DPRINTK_FLAGS_PRINT) to keep it simple since hex dump 
prints
multiple lines and long prefix would impact readability.
To provide line/file etc. information, use pr_debug or similar
before/after print_hex_dump_debug()

Signed-off-by: Vladimir Kondratiev 
Signed-off-by: Jason Baron 
---
 Documentation/dynamic-debug-howto.txt |   15 +--
 include/linux/dynamic_debug.h |   11 +++
 include/linux/printk.h|   17 +
 lib/hexdump.c |4 +++-
 4 files changed, 44 insertions(+), 3 deletions(-)

diff --git a/Documentation/dynamic-debug-howto.txt 
b/Documentation/dynamic-debug-howto.txt
index 6e16849..72322c6 100644
--- a/Documentation/dynamic-debug-howto.txt
+++ b/Documentation/dynamic-debug-howto.txt
@@ -6,8 +6,16 @@ This document describes how to use the dynamic debug (dyndbg) 
feature.
 
 Dynamic debug is designed to allow you to dynamically enable/disable
 kernel code to obtain additional kernel information.  Currently, if
-CONFIG_DYNAMIC_DEBUG is set, then all pr_debug()/dev_dbg() calls can
-be dynamically enabled per-callsite.
+CONFIG_DYNAMIC_DEBUG is set, then all pr_debug()/dev_dbg() and
+print_hex_dump_debug()/print_hex_dump_bytes() calls can be dynamically
+enabled per-callsite.
+
+If CONFIG_DYNAMIC_DEBUG is not set, print_hex_dump_debug() is just
+shortcut for print_hex_dump(KERN_DEBUG).
+
+For print_hex_dump_debug()/print_hex_dump_bytes(), format string is
+its 'prefix_str' argument, if it is constant string; or "hexdump"
+in case 'prefix_str' is build dynamically.
 
 Dynamic debug has even more useful features:
 
@@ -202,6 +210,9 @@ The flags are:
   tInclude thread ID in messages not generated from interrupt context
   _No flags are set. (Or'd with others on input)
 
+For print_hex_dump_debug() and print_hex_dump_bytes(), only 'p' flag
+have meaning, other flags ignored.
+
 For display, the flags are preceded by '='
 (mnemonic: what the flags are currently equal to).
 
diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h
index 6dd4787..2fe93b2 100644
--- a/include/linux/dynamic_debug.h
+++ b/include/linux/dynamic_debug.h
@@ -95,6 +95,17 @@ do { 
\
 ##__VA_ARGS__);\
 } while (0)
 
+#define dynamic_hex_dump(prefix_str, prefix_type, rowsize, \
+groupsize, buf, len, ascii)\
+do {   \
+   DEFINE_DYNAMIC_DEBUG_METADATA(descriptor,   \
+   __builtin_constant_p(prefix_str) ? prefix_str : "hexdump");\
+   if (unlikely(descriptor.flags & _DPRINTK_FLAGS_PRINT))  \
+   print_hex_dump(KERN_DEBUG, prefix_str,  \
+  prefix_type, rowsize, groupsize, \
+  buf, len, ascii);\
+} while (0)
+
 #else
 
 #include 
diff --git a/include/linux/printk.h b/include/linux/printk.h
index 9afc01e..02c95cf 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -321,8 +321,13 @@ extern void hex_dump_to_buffer(const void *buf, size_t len,
 extern void print_hex_dump(const char *level, const char *prefix_str,
   int prefix_type, int rowsize, int groupsize,
   const void *buf, size_t len, bool ascii);
+#if defined(CONFIG_DYNAMIC_DEBUG)
+#define print_hex_dump_bytes(prefix_str, prefix_type, buf, len)\
+   dynamic_hex_dump(prefix_str, prefix_type, 16, 1, buf, len, true)
+#else
 extern void print_hex_dump_bytes(const char *prefix_str, int prefix_type,
 const void *buf, size_t len);
+#endif /* defined(CONFIG_DYNAMIC_DEBUG) */
 #else
 static inline void print_hex_dump(const char *level, const char *prefix_str,
  int prefix_type, int rowsize, int groupsize,
@@ -336,4 +341,16 @@ static inline void print_hex_dump_bytes(const char 
*prefix_str, int prefix_type,
 
 #endif
 
+#if defined(CONFIG_DYNAMIC_DEBUG)
+#define print_hex_dump_debug(prefix_str, prefix_type, rowsize, \
+groupsize, buf, len, ascii)\
+   dynamic_hex_dump(prefix_str, prefix_type, rowsize,  \
+groupsize, buf, len, ascii)
+#else
+#define print_hex_dump_debug(prefix_str, prefix_type, rowsize, \
+groupsize, buf, len, ascii)\
+   print_hex_dump(KERN_DEBUG, prefix_str, prefix_type, rowsize,\
+  groupsize, buf, len, ascii)
+#endif /* defined(CONFIG_DYNAMIC_DEBUG) */
+
 #endif
diff --git a/lib/hexdump.c b/lib/hexdump.c
index 6540d65..3f0494c 100644
--- a/lib/hexdump.c
+++

[PATCH 0/3] dynamic_debug: Add print_hex_dump_bytes/debug support and cleanups

2012-12-05 Thread Jason Baron

Hi Greg,

Here's a collection of the latest dyanmic debug patches that I have
pending. 

Thanks,

-Jason

Jim Cromie (1):
  dynamic_debug: add pr_errs before -EINVALs

Joe Perches (1):
  dynamic_debug: Fix vpr_ logging styles

Vladimir Kondratiev (1):
  dynamic_debug: dynamic hex dump

 Documentation/dynamic-debug-howto.txt |   15 +++-
 include/linux/dynamic_debug.h |   11 ++
 include/linux/printk.h|   17 
 lib/dynamic_debug.c   |  165 +++--
 lib/hexdump.c |4 +-
 5 files changed, 141 insertions(+), 71 deletions(-)

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 1/3] HWPOISON, hugetlbfs: fix warning on freeing hwpoisoned hugepage

2012-12-05 Thread Naoya Horiguchi

This patch fixes the warning from __list_del_entry() which is triggered
when a process tries to do free_huge_page() for a hwpoisoned hugepage.

Originally, page->lru of hugetlbfs head page was dangling when the
hugepage was in use. This behavior has changed by commit 0edaecfab218d7
("hugetlb: add a list for tracking in-use HugeTLB pages"), where hugepages
in use are linked to hugepage_activelist. HWpoisoned hugepages should not
be charged to any process, so we introduce another list to link hwpoisoned
hugepages.

Signed-off-by: Naoya Horiguchi 
---
 include/linux/hugetlb.h | 3 +++
 mm/hugetlb.c| 3 ++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git v3.7-rc8.orig/include/linux/hugetlb.h 
v3.7-rc8/include/linux/hugetlb.h
index 2251648..13858ba 100644
--- v3.7-rc8.orig/include/linux/hugetlb.h
+++ v3.7-rc8/include/linux/hugetlb.h
@@ -230,6 +230,9 @@ struct hstate {
unsigned long nr_overcommit_huge_pages;
struct list_head hugepage_activelist;
struct list_head hugepage_freelists[MAX_NUMNODES];
+#ifdef CONFIG_MEMORY_FAILURE
+   struct list_head hugepage_hwpoisonedlist;
+#endif
unsigned int nr_huge_pages_node[MAX_NUMNODES];
unsigned int free_huge_pages_node[MAX_NUMNODES];
unsigned int surplus_huge_pages_node[MAX_NUMNODES];
diff --git v3.7-rc8.orig/mm/hugetlb.c v3.7-rc8/mm/hugetlb.c
index 59a0059..e61a749 100644
--- v3.7-rc8.orig/mm/hugetlb.c
+++ v3.7-rc8/mm/hugetlb.c
@@ -1939,6 +1939,7 @@ void __init hugetlb_add_hstate(unsigned order)
for (i = 0; i < MAX_NUMNODES; ++i)
INIT_LIST_HEAD(>hugepage_freelists[i]);
INIT_LIST_HEAD(>hugepage_activelist);
+   INIT_LIST_HEAD(>hugepage_hwpoisonedlist);
h->next_nid_to_alloc = first_node(node_states[N_HIGH_MEMORY]);
h->next_nid_to_free = first_node(node_states[N_HIGH_MEMORY]);
snprintf(h->name, HSTATE_NAME_LEN, "hugepages-%lukB",
@@ -3170,7 +3171,7 @@ int dequeue_hwpoisoned_huge_page(struct page *hpage)
 
spin_lock(_lock);
if (is_hugepage_on_freelist(hpage)) {
-   list_del(>lru);
+   list_move(>lru, >hugepage_hwpoisonedlist);
set_page_refcounted(hpage);
h->free_huge_pages--;
h->free_huge_pages_node[nid]--;
-- 
1.7.11.7

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: Look Ma, da kernel is b0rken

2012-12-05 Thread Borislav Petkov

On Wed, Dec 05, 2012 at 01:38:53PM -0800, Andrew Morton wrote:
> Also, the original patch is missing a signed-off-by. Here's what I
> have queued:

Thanks, looks good.



-- 
Regards/Gruss,
Boris.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v4] backlight: corgi_lcd: Use gpio_set_value_cansleep() to avoid WARN_ON

2012-12-05 Thread Grant Likely

On Wed, Dec 5, 2012 at 7:21 PM, Russell King - ARM Linux
 wrote:
> As I say above, IMHO it would've been much better to rename these functions
> to be the other way around but David was always very dismissive of any
> comments I had against any code he'd written.

FWIW, I'll gladly take a patch to rename them now if someone does the legwork.

g.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v8 21/46] x86, mm: setup page table in top-down

2012-12-05 Thread Konrad Rzeszutek Wilk

On Wed, Nov 28, 2012 at 12:16:16PM -0800, Yinghai Lu wrote:
> On Wed, Nov 28, 2012 at 9:50 AM, Konrad Rzeszutek Wilk
>  wrote:
> >>  /*
> >> - * Iterate through E820 memory map and create direct mappings for only 
> >> E820_RAM
> >> - * regions. We cannot simply create direct mappings for all pfns from
> >> - * [0 to max_low_pfn) and [4GB to max_pfn) because of possible memory 
> >> holes in
> >> - * high addresses that cannot be marked as UC by fixed/variable range 
> >> MTRRs.
> >> - * Depending on the alignment of E820 ranges, this may possibly result in 
> >> using
> >> - * smaller size (i.e. 4K instead of 2M or 1G) page tables.
> >> + * would have hole in the middle or ends, and only ram parts will be 
> >> mapped.
> >
> >
> > What? What is the 'would' refering to? Why remove a good comment that 
> > explains
> > the function. Why not just modify it a bit please?
> >
> 
> ==> update to
> 
> /*
>  * We need to iterate through E820 memory map and create direct mappings
>  * for only E820_RAM and E820_KERN_RESERVED regions. We cannot simply
>  * create direct mappings for all pfns from [0 to max_low_pfn) and
>  * [4GB to max_pfn) because of possible memory holes in high addresses
>  * that cannot be marked as UC by fixed/variable range MTRRs.
>  * Depending on the alignment of E820 ranges, this may possibly result
>  * in using smaller size (i.e. 4K instead of 2M or 1G) page tables.
>  *
>  * init_mem_mapping call init_range_memory_mapping with big range.
>  * That range would have hole in the middle or ends, and only ram parts
>  * will be mapped in init_range_memory_mapping.
>  */
> 
> 
> 
> >> - max_pfn_mapped = 0; /* will get exact value next */
> >>   /* the ISA range is always mapped regardless of memory holes */
> >>   init_memory_mapping(0, ISA_END_ADDRESS);
> >> - init_range_memory_mapping(ISA_END_ADDRESS, end);
> >> +
> >> + /* xen has big range in reserved near end of ram, skip it at first */
> >
> > I am not seeing the logic for doing it? The loop is quite generic
> > in doing it in reverse order, and the memblock_find_in_range
> > gets a nice PMD_SIZE region from the end of the memory.
> >
> > If the memory at the end is reserved, then it looks like it won't
> > be even considered in the loop, but it does get included in the fallback:
> >
> > if (real_end < end)
> > init_range_memory_mapping(real_end, end);
> 
> that reserved in in memblock.reserved and it is not in e820.
> 
> so memblock.memory will have that range too. then if we use all of
> first 2M to map
> 
> those reserved range, we would not have enough mapped pages to be used
> as new page tables.

You should include that nice explanation as part of the comment. It is
rather suddle (or would be for me in 6 months when I would look at this
code).

> 
> >
> >
> >
> >> + addr = memblock_find_in_range(ISA_END_ADDRESS, end, PMD_SIZE,
> >> +  PAGE_SIZE);
> >> + real_end = addr + PMD_SIZE;
> >> +
> >> + /* step_size need to be small so pgt_buf from BRK could cover it */
> >> + step_size = PMD_SIZE;
> >> + max_pfn_mapped = 0; /* will get exact value next */
> >> + min_pfn_mapped = real_end >> PAGE_SHIFT;
> >> + last_start = start = real_end;
> >
> > Everytime I look at this loop, I keep on forgetting that it goes in reverse.
> > I am not sure if it is just me, but it might be useful for other
> > folks who are going to look at this in a year or so to have
> > a little hint:
> >
> > N.B. We start from the top (end of memory) and go to the bottom. The
> > memblock_find_in_range gets us a block of RAM from the end
> > of RAM.
> 
> put the that in the comments.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] tmpfs: fix shared mempolicy leak

2012-12-05 Thread Hugh Dickins

On Wed, 5 Dec 2012, Tommi Rantala wrote:
> 2012/12/5 Mel Gorman :
> > On Tue, Dec 04, 2012 at 11:24:30PM -0800, Hugh Dickins wrote:
> >> From: Mel Gorman 
> >>
> >> Commit 00442ad04a5e ("mempolicy: fix a memory corruption by refcount
> >> imbalance in alloc_pages_vma()") changed get_vma_policy() to raise the
> >> refcount on a shmem shared mempolicy; whereas shmem_alloc_page() went
> >> on expecting alloc_page_vma() to drop the refcount it had acquired.
> >> This deserves a rework: but for now fix the leak in shmem_alloc_page().
> >
> > Thanks Hugh for turning gibber into a patch!
> >
> > Signed-off-by: Mel Gorman 
> >
> > Tommi, just in case, can you confirm this fixes the problem for you please?
> 
> Confirmed! No more complaints from kmemleak.

Great, thanks.  I'll update the tags and send straight to Linus now.

Hugh
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] tmpfs: fix shared mempolicy leak

2012-12-05 Thread Hugh Dickins

From: Mel Gorman 

This fixes a regression in 3.7-rc, which has since gone into stable.

Commit 00442ad04a5e ("mempolicy: fix a memory corruption by refcount
imbalance in alloc_pages_vma()") changed get_vma_policy() to raise the
refcount on a shmem shared mempolicy; whereas shmem_alloc_page() went
on expecting alloc_page_vma() to drop the refcount it had acquired.
This deserves a rework: but for now fix the leak in shmem_alloc_page().

Hugh: shmem_swapin() did not need a fix, but surely it's clearer to use
the same refcounting there as in shmem_alloc_page(), delete its onstack
mempolicy, and the strange mpol_cond_copy() and __mpol_cond_copy() -
those were invented to let swapin_readahead() make an unknown number of
calls to alloc_pages_vma() with one mempolicy; but since 00442ad04a5e,
alloc_pages_vma() has kept refcount in balance, so now no problem.

Reported-and-tested-by: Tommi Rantala 
Signed-off-by: Mel Gorman 
Signed-off-by: Hugh Dickins 
Cc: sta...@vger.kernel.org
---

 include/linux/mempolicy.h |   16 
 mm/mempolicy.c|   22 --
 mm/shmem.c|   26 --
 3 files changed, 16 insertions(+), 48 deletions(-)

--- 3.7-rc8/include/linux/mempolicy.h   2012-10-14 16:16:57.637308925 -0700
+++ linux/include/linux/mempolicy.h 2012-12-04 22:38:21.812178829 -0800
@@ -82,16 +82,6 @@ static inline void mpol_cond_put(struct
__mpol_put(pol);
 }
 
-extern struct mempolicy *__mpol_cond_copy(struct mempolicy *tompol,
- struct mempolicy *frompol);
-static inline struct mempolicy *mpol_cond_copy(struct mempolicy *tompol,
-   struct mempolicy *frompol)
-{
-   if (!frompol)
-   return frompol;
-   return __mpol_cond_copy(tompol, frompol);
-}
-
 extern struct mempolicy *__mpol_dup(struct mempolicy *pol);
 static inline struct mempolicy *mpol_dup(struct mempolicy *pol)
 {
@@ -215,12 +205,6 @@ static inline void mpol_cond_put(struct
 {
 }
 
-static inline struct mempolicy *mpol_cond_copy(struct mempolicy *to,
-   struct mempolicy *from)
-{
-   return from;
-}
-
 static inline void mpol_get(struct mempolicy *pol)
 {
 }
--- 3078/mm/mempolicy.c 2012-10-20 20:56:24.675917367 -0700
+++ 3078X/mm/mempolicy.c2012-12-04 22:33:31.516171929 -0800
@@ -2037,28 +2037,6 @@ struct mempolicy *__mpol_dup(struct memp
return new;
 }
 
-/*
- * If *frompol needs [has] an extra ref, copy *frompol to *tompol ,
- * eliminate the * MPOL_F_* flags that require conditional ref and
- * [NOTE!!!] drop the extra ref.  Not safe to reference *frompol directly
- * after return.  Use the returned value.
- *
- * Allows use of a mempolicy for, e.g., multiple allocations with a single
- * policy lookup, even if the policy needs/has extra ref on lookup.
- * shmem_readahead needs this.
- */
-struct mempolicy *__mpol_cond_copy(struct mempolicy *tompol,
-   struct mempolicy *frompol)
-{
-   if (!mpol_needs_cond_ref(frompol))
-   return frompol;
-
-   *tompol = *frompol;
-   tompol->flags &= ~MPOL_F_SHARED;/* copy doesn't need unref */
-   __mpol_put(frompol);
-   return tompol;
-}
-
 /* Slow path of a mempolicy comparison */
 bool __mpol_equal(struct mempolicy *a, struct mempolicy *b)
 {
--- 3078/mm/shmem.c 2012-11-16 19:26:56.388459961 -0800
+++ 3078X/mm/shmem.c2012-12-04 22:32:35.328170594 -0800
@@ -910,25 +910,29 @@ static struct mempolicy *shmem_get_sbmpo
 static struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp,
struct shmem_inode_info *info, pgoff_t index)
 {
-   struct mempolicy mpol, *spol;
struct vm_area_struct pvma;
-
-   spol = mpol_cond_copy(,
-   mpol_shared_policy_lookup(>policy, index));
+   struct page *page;
 
/* Create a pseudo vma that just contains the policy */
pvma.vm_start = 0;
/* Bias interleave by inode number to distribute better across nodes */
pvma.vm_pgoff = index + info->vfs_inode.i_ino;
pvma.vm_ops = NULL;
-   pvma.vm_policy = spol;
-   return swapin_readahead(swap, gfp, , 0);
+   pvma.vm_policy = mpol_shared_policy_lookup(>policy, index);
+
+   page = swapin_readahead(swap, gfp, , 0);
+
+   /* Drop reference taken by mpol_shared_policy_lookup() */
+   mpol_cond_put(pvma.vm_policy);
+
+   return page;
 }
 
 static struct page *shmem_alloc_page(gfp_t gfp,
struct shmem_inode_info *info, pgoff_t index)
 {
struct vm_area_struct pvma;
+   struct page *page;
 
/* Create a pseudo vma that just contains the policy */
pvma.vm_start = 0;
@@ -937,10 +941,12 @@ static struct page *shmem_alloc_page(gfp
pvma.vm_ops = NULL;
pvma.vm_policy = mpol_shared_policy_lookup(>policy, index);
 
-

RE: [PATCH 3/3] HWPOISON, hugetlbfs: fix RSS-counter warning

2012-12-05 Thread Luck, Tony

if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) {
-   if (PageAnon(page))
+   if (PageHuge(page))
+   ;
+   else if (PageAnon(page))
dec_mm_counter(mm, MM_ANONPAGES);
else
dec_mm_counter(mm, MM_FILEPAGES);

This style minimizes the "diff" ... but wouldn't it be nicer to say:

if (!PageHuge(page)) {
old code in here
}

-Tony
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 0/3] dynamic_debug: Add print_hex_dump_bytes/debug support and cleanups

2012-12-05 Thread Joe Perches

On Wed, 2012-12-05 at 16:48 -0500, Jason Baron wrote:
> Here's a collection of the latest dynamic debug patches that I have
> pending. 

Any update on the jump table support?

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 6/6 v8] cpufreq, highbank: add support for highbank cpufreq

2012-12-05 Thread Mark Langsdorf

On 12/05/2012 12:49 PM, Mike Turquette wrote:
> On Wed, Dec 5, 2012 at 8:48 AM, Mark Langsdorf
>  wrote:
>> diff --git a/drivers/cpufreq/highbank-cpufreq.c 
>> b/drivers/cpufreq/highbank-cpufreq.c
>> new file mode 100644
>> index 000..1f28fa6
>> --- /dev/null
>> +++ b/drivers/cpufreq/highbank-cpufreq.c
>> @@ -0,0 +1,102 @@
> 
> Looks pretty good to me.  Some tedious nitpicks and discussion below.
> 
> 
>> +static int hb_voltage_change(unsigned int freq)
>> +{
>> +   int i;
>> +   u32 msg[7];
>> +
>> +   msg[0] = HB_CPUFREQ_CHANGE_NOTE;
>> +   msg[1] = freq / 100;
>> +   for (i = 2; i < 7; i++)
>> +   msg[i] = 0;
>> +
>> +   return pl320_ipc_transmit(msg);
>> +}
>> +
>> +static int hb_cpufreq_clk_notify(struct notifier_block *nb,
>> +   unsigned long action, void *hclk)
>> +{
>> +   struct clk_notifier_data *clk_data = hclk;
>> +   int i = 0;
>> +
>> +   if (action == PRE_RATE_CHANGE) {
>> +   if (clk_data->new_rate > clk_data->old_rate)
>> +   while (hb_voltage_change(clk_data->new_rate))
>> +   if (i++ > 15)
> 
> There are a few magic numbers here.  How about something like:
> 
> #define HB_VOLT_CHANGE_MAX_TRIES 15
> 
> Maybe do the same for the i2c message length?

Fixed.

>> +   return NOTIFY_STOP;
> 
> How about NOTIFY_BAD?  It more clearly signals that an error has occurred.

> Same as above.  It is true that the clock framework does nothing with
> post-rate change notifier aborts but that might change in the future.

Changed and added.

>> +   }
>> +
>> +   return NOTIFY_DONE;
>> +}
>> +
>> +static struct notifier_block hb_cpufreq_clk_nb = {
>> +   .notifier_call = hb_cpufreq_clk_notify,
>> +};
>> +
> 
> Do you have any plans to convert your voltage change routine over to
> the regulator framework?  Likewise do you plan to use the OPP library
> in the future?  I can understand if you do not do that since your
> regulator/dvfs programming model makes things very simple for you.

I looked at treating the ECME as a voltage regulator, but it was a very
bad fit. The ECME has a certain amount of intelligence built into it and
corporate plans are to treat voltage control as a black box.

The current solution is actually nicely generic from my perspective. The
clk notifiers guarantee we can make the voltage changes at the right
time regardless of the underlying cpufreq driver implementation. I don't
think we need more until we get into cpufreq QoS issues, and even then
I'd want to stick with something like the current structure.

--Mark Langsdorf
Calxeda, Inc.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 0/6 v8] gpio: Add block GPIO

2012-12-05 Thread Roland Stigge

Hi Wolfgang,

On 05/12/12 19:44, Wolfgang Grandegger wrote:
>> * There is probably an explicit interrupt configuration necessary (via
>> struct gpio_block, and devicetree, respectively) since there are
>> constellations where gpio_to_irq() isn't working. E.g., in contrast to
>> controllers which are aware of their IRQs and providing to_irq(), there
>> is typically independent wiring from GPIO expander chips' interrupt line
>> to individual IRQ inputs on SoCs/CPUs. Or should all this be solved via
>> devicetree and drivers (which should support IRQ config where possible)?
> 
> Yes, I think it's up to the device tree or platform code to properly setup
> the interrupt... like for defining the GPIO block.

OK, sounds reasonable. Luckily, in reality it already works fine in this
regard with many current drivers.

>> * For the same reason, the IRQ flags are currently IRQF_TRIGGER_FALLING,
>> which isn't flexible. Instead, either preset by board setup/firmware, or
>> via interrupts config in devicetree (optional property of a GPIO block?)
> 
> Yes, and it did fail on my setup.

OK, will replace the flags with 0 (and need to fix my own board setup ;-) ).

>> * Some GPIOs' IRQs are not suitable for GPI input change detection. E.g.
>> on LPC32xx, I can configure the IRQ which is controlled directly by the
>> GPI's values as FALLING, RISING, HIGH /exclusive/ or LOW. I.e., this way
>> it's not possible to detect both 0->1 and 1->0 changes without
>> reconfiguring the GPIO controller inbetween. Other controllers provide a
>> dedicated interrupt on all values changes.
> 
> Hm.

For now, we are expecting IRQs to fire on "changes". Otherwise, the user
needs to handle the issue manually, using busy polling, manual
reconfiguration of the GPIO controller etc.

>> * Would IRQF_SHARED be appropriate to enable opening IRQ enabled GPIO
>> blocks multiple times?
> 
> Sounds reasonable for me. Some more comments in the patch mails...

OK, will do in the next update.

Thanks for your feedback,

Roland
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

RE: [PATCH 1/3] HWPOISON, hugetlbfs: fix warning on freeing hwpoisoned hugepage

2012-12-05 Thread Luck, Tony

> This patch fixes the warning from __list_del_entry() which is triggered
> when a process tries to do free_huge_page() for a hwpoisoned hugepage.

Ultimately it would be nice to avoid poisoning huge pages. Generally we know the
location of the poison to a cache line granularity (but sometimes only to a 4K
granularity) ... and it is rather inefficient to take an entire 2M page out of 
service.
With 1G pages things would be even worse!!

It also makes life harder for applications that would like to catch the SIGBUS
and try to take their own recovery actions. Losing more data than they really
need to will make it less likely that they can do something to work around the
loss.

Has anyone looked at how hard it might be to have the code in memory-failure.c
break up a huge page and only poison the 4K that needs to be taken out of 
service?

-Tony
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 3/3] HWPOISON, hugetlbfs: fix RSS-counter warning

2012-12-05 Thread Naoya Horiguchi

Hi Tony,

On Wed, Dec 05, 2012 at 10:04:50PM +, Luck, Tony wrote:
>   if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) {
> - if (PageAnon(page))
> + if (PageHuge(page))
> + ;
> + else if (PageAnon(page))
>   dec_mm_counter(mm, MM_ANONPAGES);
>   else
>   dec_mm_counter(mm, MM_FILEPAGES);
> 
> This style minimizes the "diff" ... but wouldn't it be nicer to say:
> 
>   if (!PageHuge(page)) {
>   old code in here
>   }
> 

I think this need more lines in diff because old code should be
indented without any logical change.

Thanks,
Naoya
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] Additional compiler optimization options

2012-12-05 Thread John

Came across this and wanted to show it to others more knowledgeable for 
feedback.  Is this value-added or not?  Upstream URL 
is: https://gitorious.org/rummage/random_patches

Please cc me on replies as I am not a regular subscriber to lkml.  Thank you.

--- linux-3.6/arch/x86/include/asm/module.h2012-04-29 18:43:58.239336240 -0700
+++ linux-3.6.mod/arch/x86/include/asm/module.h2012-04-29 18:43:41.609545221 
-0700
@@ -17,6 +17,14 @@
 #define MODULE_PROC_FAMILY "586MMX "
 #elif defined CONFIG_MCORE2
 #define MODULE_PROC_FAMILY "CORE2 "
+#elif defined CONFIG_MCOREI7
+#define MODULE_PROC_FAMILY "COREI7 "
+#elif defined CONFIG_MCOREI7AVX
+#define MODULE_PROC_FAMILY "COREI7AVX "
+#elif defined CONFIG_MCOREAVXI
+#define MODULE_PROC_FAMILY "COREAVXI "
+#elif defined CONFIG_MCOREAVX2
+#define MODULE_PROC_FAMILY "COREAVX2 "
 #elif defined CONFIG_MATOM
 #define MODULE_PROC_FAMILY "ATOM "
 #elif defined CONFIG_M686
@@ -35,6 +43,14 @@
 #define MODULE_PROC_FAMILY "K7 "
 #elif defined CONFIG_MK8
 #define MODULE_PROC_FAMILY "K8 "
+#elif defined CONFIG_MBARCELONA
+#define MODULE_PROC_FAMILY "BARCELONA "
+#elif defined CONFIG_MBOBCAT
+#define MODULE_PROC_FAMILY "BOBCAT "
+#elif defined CONFIG_MBULLDOZER
+#define MODULE_PROC_FAMILY "BULLDOZER "
+#elif defined CONFIG_MPILEDRIVER
+#define MODULE_PROC_FAMILY "PILEDRIVER "
 #elif defined CONFIG_MELAN
 #define MODULE_PROC_FAMILY "ELAN "
 #elif defined CONFIG_MCRUSOE
--- linux-3.6/arch/x86/Kconfig.cpu2012-04-29 18:43:58.249336198 -0700
+++ linux-3.6.mod/arch/x86/Kconfig.cpu2012-04-29 18:40:46.091751798 -0700
@@ -147,7 +147,7 @@
 
 
 config MK6
-bool "K6/K6-II/K6-III"
+bool "AMD K6/K6-II/K6-III"
 depends on X86_32
 ---help---
   Select this for an AMD K6-family processor.  Enables use of
@@ -155,7 +155,7 @@
   flags to GCC.
 
 config MK7
-bool "Athlon/Duron/K7"
+bool "AMD Athlon/Duron/K7"
 depends on X86_32
 ---help---
   Select this for an AMD Athlon K7-family processor.  Enables use of
@@ -163,12 +163,40 @@
   flags to GCC.
 
 config MK8
-bool "Opteron/Athlon64/Hammer/K8"
+bool "AMD Opteron/Athlon64/Hammer/K8"
 ---help---
   Select this for an AMD Opteron or Athlon64 Hammer-family processor.
   Enables use of some extended instructions, and passes appropriate
   optimization flags to GCC.
 
+config MBARCELONA
+bool "AMD Barcelona"
+---help---
+  Select this for AMD Barcelona and newer processors.
+
+  Enables -march=barcelona
+
+config MBOBCAT
+bool "AMD Bobcat"
+---help---
+  Select this for AMD Bobcat processors.
+
+  Enables -march=btver1
+
+config MBULLDOZER
+bool "AMD Bulldozer"
+---help---
+  Select this for AMD Bulldozer processors.
+
+  Enables -march=bdver1
+
+config MPILEDRIVER
+bool "AMD Piledriver"
+---help---
+  Select this for AMD Piledriver processors.
+
+  Enables -march=bdver2
+
 config MCRUSOE
 bool "Crusoe"
 depends on X86_32
@@ -260,7 +288,7 @@
   in /proc/cpuinfo. Family 15 is an older Xeon, Family 6 a newer one.
 
 config MCORE2
-bool "Core 2/newer Xeon"
+bool "Intel Core 2"
 ---help---
 
   Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and
@@ -268,6 +296,41 @@
   family in /proc/cpuinfo. Newer ones have 6 and older ones 15
   (not a typo)
 
+  Enables -march=core2
+
+config MCOREI7
+bool "Intel Core i7"
+---help---
+
+  Select this for the Intel Nehalem platform. Intel Nehalem proecessors
+  include Core i3, i5, i7, Xeon: 34xx, 35xx, 55xx, 56xx, 75xx processors.
+
+  Enables -march=corei7
+
+config MCOREI7AVX
+bool "Intel Core 2nd Gen AVX"
+---help---
+
+  Select this for 2nd Gen Core processors including Sandy Bridge.
+
+  Enables -march=corei7-avx
+
+config MCOREAVXI
+bool "Intel Core 3rd Gen AVX"
+---help---
+
+  Select this for 3rd Gen Core processors including Ivy Bridge.
+
+  Enables -march=corei7-avx
+
+config MCOREAVX2
+bool "Intel Core AVX-2"
+---help---
+
+  Select this for AVX-2 enabled processors including Haswell.
+
+  Enables -march=corei7-avx-2
+
 config MATOM
 bool "Intel Atom"
 ---help---
@@ -312,7 +375,7 @@
 config X86_L1_CACHE_SHIFT
 int
 default "7" if MPENTIUM4 || MPSC
-default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || 
X86_GENERIC || GENERIC_CPU
+default "6" if MK7 || MK8 || MBARCELONA || MBOBCAT || MBULLDOZER || 
MPILEDRIVER || MPENTIUMM || MCORE2 || MCOREI7 || MCOREI7AVX || MCOREAVXI || 
MCOREAVX2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU
 default "4" if MELAN || M486 || M386 || MGEODEGX1
 default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII 
|| MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || 
MVIAC3_2 || MGEODE_LX
 
@@ -363,11 +426,11 @@
 
 config X86_INTEL_USERCOPY
 def_bool y
-depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || 
X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2
+depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || 
X86_GENERIC || MK8 || MK7 || MBARCELONA || MEFFICEON || MCORE2 || MCOREI7 || 
MCOREI7AVX || MCOREAVXI || MCOREAVX2
 
 config X86_USE_PPRO_CHECKSUM
 def_bool y

Re: [RFC] Capabilities still can't be inherited by normal programs

2012-12-05 Thread Serge Hallyn

Quoting Andy Lutomirski (l...@amacapital.net):
> On Wed, Dec 5, 2012 at 1:05 PM, Serge Hallyn  
> wrote:
> > Quoting Andy Lutomirski (l...@amacapital.net):
> >> On Tue, Dec 4, 2012 at 5:54 AM, Serge E. Hallyn  wrote:
> >> > Quoting Andy Lutomirski (l...@amacapital.net):
> >> >> >> d) If I really wanted, I could emulate execve without actually doing
> >> >> >> execve, and capabilities would be inherited.
> >> >> >
> >> >> > If you could modify the executable properties of the binary that has
> >> >> > the privilege to wield a privilege then you are either exploiting an
> >> >> > app bug, or doing something the privileged binary has been trusted to
> >> >> > do.
> >> >>
> >> >> That's not what I mean.  I would:
> >> >>
> >> >> fork()
> >> >> munmap everything
> >> >> mmap ld.so
> >> >> set up a fake initial stack and the right fd or mapping or whatever
> >> >> just to ld-linux.so
> >> >>
> >> >> That's almost execve, and privilege inheritance works.
> >> >
> >> > But of course that is why you only want to fill fI on programs you trust
> >> > not to do that.  What you are arguing is that you want to give fI on
> >> > programs you don't trust anyway, and so heck why not just give it on
> >> > everything.
> >> >
> >>
> >> Huh?  I'd set fP on a program I expect to do *exactly* that (or use
> >> actual in-kernel capability inheritance, which I would find vastly
> >> more pleasant).  If I give a program a capability (via fP or fI & pI),
> >> then I had better trust it not to abuse that capability.  Having it
> >> pass that capability on to a child helper process would be just fine
> >> with me *because it already has that capability*.
> >>
> >> The problem with the current inheritance mechanism is that it's very
> >> difficult to understand what it means for an fI bit or a pI bit to be
> >> set.  Saying "set a pI bit using pam if you want to grant permission
> >> to that user to run a particular program with fI set" is crap -- it
> >> only works if there is exactly one binary on the system with that bit
> >> set.  In any case, a different administrator or package might use it
> >> for something different.
> >>
> >> Suppose I use the (apparently) current suggested approach: I install a
> >> fI=cap_net_raw copy of tcpdump somewhere.  Then I write a helper that
> >> has fP=cap_new_raw and invokes that copy of tcpdump after appropriate
> >> validation of parameters.  All is well.
> >
> > Since you're writing a special helper, you can surely have it validate
> > the userid and make it so the calling user doesn't have to have
> > cap_net_raw in pI?
> 
> I can and did.

Oh, oops, I mis-understood what you meant was the problem.

Yup, that is a real limitation.

Yes, with the posix file caps you will be disappointed unless you see
pI=X as "this user may run any program which is Inh-trusted with X" and
fI=X as "this program may be run with X by any user Inh-trusted with X".

It almost makes me want to say that there should be an execve-analogue
to prctl(PR_SET_KEEPCAPS), which says caps will remain unchanged for one
execve.  Or perhaps an intermediate securebits state between
!SECBIT_NOROOT and SECBIT_NOROOT, which automatically transitions after
the first execve to SECBIT_NOROOT.

> The mere presence of a cap_net_raw+i tcpdump binary is more or less
> equivalent to saying that users with cap_net_raw in pI can capture
> packets.  I've just prevented pI=cap_net_raw from meaning anything
> less than "can capture packets".  So I think we should bite the bullet
> and just let programs opt in (via some appropriately careful
> mechanism) to real capability inheritance.

By real you mean more precise.  I think it'd be very interesting to get
together with Markku and learn more from the N9 experiment!

Markku, are there any post-mortem analysis papers we can read for
starters?  Andy would not be trying to restrict root in general, so
the ramification you cited may not necessarily be relevant.

-serge
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: Use PCI ROMs from EFI boot services

2012-12-05 Thread Bjorn Helgaas

On Wed, Dec 5, 2012 at 1:22 PM, Matthew Garrett  wrote:
> On Wed, Dec 05, 2012 at 01:09:25PM -0700, Bjorn Helgaas wrote:
>
>> That's right; nobody stepped up to fix the section mismatch.  I'm
>> happy to fold in your fix, especially if Matthew acks it.
>
> Yes, sorry, I've been way behind on pretty much everything for the past
> few months. Please do add my Ack.
>
>> David, Eric, what about the kexec question?  It looks to me like this
>> wouldn't make things worse than they are today.  If I understand
>> correctly, today we don't use ROM data from EFI on either an initial
>> boot or a kexec.  After this patch, we could use EFI ROM data on the
>> initial boot, but not after a kexec.  So it's worse in the sense that
>> the kexec case doesn't match the initial boot, but at least it's not
>> something that used to work and is now broken.
>
> I think I'd agree here - it's not ideal, but it's no more broken than
> the current situation.

OK, I applied Seth's fix, added his Tested-by, and put this series in
my -next branch.  I plan to merge it during the v3.8 merge window next
week.

Thanks!

Bjorn
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 3/6 v9] gpio: Add userland device interface to block GPIO

2012-12-05 Thread Roland Stigge

Hi Wolfgang,

On 05/12/12 20:01, Wolfgang Grandegger wrote:
>> +for (i = 0; i < block->ngpio; i++) {
>> +status = gpio_request(block->gpio[i], "gpioblock dev");
> 
> You could use the name of the GPIO block.

OK.

>> +if (status)
>> +goto err1;
>> +
>> +irq = gpio_to_irq(block->gpio[i]);
>> +if (irq >= 0 &&
>> +!test_bit(FLAG_IS_OUT, _desc[block->gpio[i]].flags) &&
>> +!gpio_block_is_irq_duplicate(block, i)) {
>> +status = request_irq(irq, gpio_block_irq_handler,
>> + IRQF_TRIGGER_FALLING,
>> + block->name, block);
>> +if (status)
>> +goto err2;
>> +
>> +block->irq_controlled = true;
>> +}
>> +}
> 
> There is no need to request IRQs if "O_NONBLOCK" is specified.

Sure? Regarding this, I found: "The poll() function shall not be
affected by the O_NONBLOCK flag." [1]

> I observed that the read returns once immediately (without blocking)
> after reboot. I did not look into that yet.

Didn't happen to me. Can you tell how this can be reproduced?

Thanks,

Roland


[1] http://pubs.opengroup.org/onlinepubs/009695399/functions/poll.html
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

PATCH reduce impact of FIFREEZE on userland processes

2012-12-05 Thread Alun


This patch is against kernel version 3.7-rc7.

The FIFREEZE ioctl blocks userland writes, then calls sync_filesystem.
If there is a large amount of dirty data, this sync can take a
substantial time to complete, with corresponding loss of responsiveness
to any userland processes wishing to write.

This patch simply adds an extra call to sync_filesystem prior to
blocking writes, so that (hopefully) the majority of outstanding dirty
data has been flushed before we impact on userland.

I'm a complete kernel newbie and have only done some pretty minimal
testing on my own machine, but with the patch in place the impact of
running "fsfreeze -f" immediately followed by "fsfreeze -u" on a
moderately loaded filesystem (as measured by time taken for a write()
to complete) was reduced from 2.5 to 0.2 seconds. Hopefully there's no
subtlety in how all this works, and that adding the extra call has no
scary implications...

Signed-off-by: Alun Jones 

--- linux-3.7-rc7/fs/super.c.orig   2012-11-29 17:35:37.0
+ +++ linux-3.7-rc7/fs/super.c2012-12-05 20:56:38.730631855
+ @@ -1314,6 +1314,11 @@ int freeze_super(struct super_block *sb)
return 0;
}
 
+   /* Sync before we block writes to reduce the amount of
+* work that has to be done afterwards.
+*/
+   sync_filesystem(sb);
+
/* From now on, no new normal writers can start */
sb->s_writers.frozen = SB_FREEZE_WRITE;
smp_wmb();
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] Additional compiler optimization options

2012-12-05 Thread Borislav Petkov

On Wed, Dec 05, 2012 at 02:09:15PM -0800, John wrote:
> Came across this and wanted to show it to others
> more knowledgeable for feedback.  Is this value-added or not?
>  Upstream URL is: https://gitorious.org/rummage/random_patches

This only makes sense if you can show a noticeable performance
improvement using one of the those -march options with at least one
benchmark from those which we have now in the kernel.

And even that use case is limited because the majority of the kernel
builds out there are distro kernels built with -mtune=generic so it
would only make difference for people building their own kernels, who
are a small minority, relatively.

Thanks.

-- 
Regards/Gruss,
Boris.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v2 06/44] of/vendor-prefixes: add Imagination Technologies

2012-12-05 Thread Grant Likely

On Wed, 5 Dec 2012 16:08:24 +, James Hogan  wrote:
> The "powervr" prefix which is currently described as "Imagination
> Technologies" isn't really appropriate for non-PowerVR hardware, so
> deprecate it, changing the description of "powervr" to "PowerVR
> (deprecated, use img)", and add a separate "img" prefix for "Imagination
> Technologies Ltd.".
> 
> Signed-off-by: James Hogan 
> Cc: Grant Likely 
> Cc: Rob Herring 
> Cc: Rob Landley 
> Cc: devicetree-disc...@lists.ozlabs.org

Applied, thanks.

g.

> ---
>  .../devicetree/bindings/vendor-prefixes.txt|3 ++-
>  1 files changed, 2 insertions(+), 1 deletions(-)
> 
> diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt 
> b/Documentation/devicetree/bindings/vendor-prefixes.txt
> index 9de2b9f..aab2993 100644
> --- a/Documentation/devicetree/bindings/vendor-prefixes.txt
> +++ b/Documentation/devicetree/bindings/vendor-prefixes.txt
> @@ -25,6 +25,7 @@ gef GE Fanuc Intelligent Platforms Embedded Systems, Inc.
>  hp   Hewlett Packard
>  ibm  International Business Machines (IBM)
>  idt  Integrated Device Technologies, Inc.
> +img  Imagination Technologies Ltd.
>  intercontrol Inter Control Group
>  linuxLinux-specific binding
>  marvell  Marvell Technology Group Ltd.
> @@ -35,7 +36,7 @@ nintendoNintendo
>  nvidia   NVIDIA
>  nxp  NXP Semiconductors
>  picochip Picochip Ltd
> -powervr  Imagination Technologies
> +powervr  PowerVR (deprecated, use img)
>  qcom Qualcomm, Inc.
>  ramtron  Ramtron International
>  realtek Realtek Semiconductor Corp.
> -- 
> 1.7.7.6
> 
> 

-- 
Grant Likely, B.Sc, P.Eng.
Secret Lab Technologies, Ltd.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC PATCH v3 0/3] acpi: Introduce prepare_remove device operation

2012-12-05 Thread Toshi Kani

On Wed, 2012-12-05 at 20:10 +0800, Hanjun Guo wrote:
> On 2012/12/5 7:23, Toshi Kani wrote:
> > On Tue, 2012-12-04 at 17:16 +0800, Hanjun Guo wrote:
> >> On 2012/12/4 8:10, Toshi Kani wrote:
> >>> On Mon, 2012-12-03 at 12:25 +0800, Hanjun Guo wrote:
>  On 2012/11/30 6:27, Toshi Kani wrote:
> >>>
> >>> If I read the code right, the framework calls ACPI drivers differently
> >>> at boot-time and hot-add as follows.  That is, the new entry points are
> >>> called at hot-add only, but .add() is called at both cases.  This
> >>> requires .add() to work differently.
> >>
> >> Hi Toshi,
> >> Thanks for your comments!
> >>
> >>>
> >>> Boot: .add()
> >>
> >> Actually, at boot time: .add(), .start()
> > 
> > Right.
> > 
> >>> Hot-Add : .add(), .pre_configure(), configure(), etc.
> >>
> >> Yes, we did it as you said in the framework. We use .pre_configure(), 
> >> configure(),
> >> and post_configure() to instead of .start() for better error handling and 
> >> recovery.
> > 
> > I think we should have hot-plug interfaces at the module level, not at
> > the ACPI-internal level.  In this way, the interfaces can be
> > platform-neutral and allow any modules to register, which makes it more
> > consistent with the boot-up sequence.  It can also allow ordering of the
> > sequence among the registered modules.  Right now, we initiate all
> > procedures from ACPI during hot-plug, which I think is inflexible and
> > steps into other module's role.
> > 
> > I am also concerned about the slot handling, which is the core piece of
> > the infrastructure and only allows hot-plug operations on ACPI objects
> > where slot objects are previously created by checking _EJ0.  The
> > infrastructure should allow hot-plug operations on any objects, and it
> > should not be dependent on the slot design.
> > 
> > I have some rough idea, and it may be easier to review / explain if I
> > make some code changes.  So, let me prototype it, and send it you all if
> > that works out.  Hopefully, it won't take too long.
> 
> Great! If any thing I can do, please let me know it.

Cool.  Yes, if the prototype turns out to be a good one, we can work
together to improve it. :)
 
Thanks,
-Toshi

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

< 4 5 6 7 8 9 10 11 >

801 - 900 of 1044 matches

Mail list logo