date:20141208

[PATCH 1/7] CXL: Change contexts_lock to a mutex to fix sleep while atomic bug

2014-12-08 Thread Ian Munsie

From: Ian Munsie imun...@au1.ibm.com

We had a known sleep while atomic bug if a CXL device was forcefully
unbound while it was in use. This could occur as a result of EEH, or
manually induced with something like this while the device was in use:

echo :01:00.0  /sys/bus/pci/drivers/cxl-pci/unbind

The issue was that in this code path we iterated over each context and
forcefully detached it with the contexts_lock spin lock held, however
the detach also needed to take the spu_mutex, and call schedule.

This patch changes the contexts_lock to a mutex so that we are not in
atomic context while doing the detach, thereby avoiding the sleep while
atomic.

Also delete the related TODO comment, which suggested an alternate
solution which turned out to not be workable.

Signed-off-by: Ian Munsie imun...@au1.ibm.com
---
 drivers/misc/cxl/context.c | 15 ---
 drivers/misc/cxl/cxl.h |  2 +-
 drivers/misc/cxl/native.c  |  7 ---
 drivers/misc/cxl/pci.c |  2 +-
 drivers/misc/cxl/sysfs.c   | 10 +-
 5 files changed, 15 insertions(+), 21 deletions(-)

diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c
index cca4721..4aa31a3 100644
--- a/drivers/misc/cxl/context.c
+++ b/drivers/misc/cxl/context.c
@@ -82,12 +82,12 @@ int cxl_context_init(struct cxl_context *ctx, struct 
cxl_afu *afu, bool master)
 * Allocating IDR! We better make sure everything's setup that
 * dereferences from it.
 */
+   mutex_lock(afu-contexts_lock);
idr_preload(GFP_KERNEL);
-   spin_lock(afu-contexts_lock);
i = idr_alloc(ctx-afu-contexts_idr, ctx, 0,
  ctx-afu-num_procs, GFP_NOWAIT);
-   spin_unlock(afu-contexts_lock);
idr_preload_end();
+   mutex_unlock(afu-contexts_lock);
if (i  0)
return i;
 
@@ -168,21 +168,22 @@ void cxl_context_detach_all(struct cxl_afu *afu)
struct cxl_context *ctx;
int tmp;
 
-   rcu_read_lock();
-   idr_for_each_entry(afu-contexts_idr, ctx, tmp)
+   mutex_lock(afu-contexts_lock);
+   idr_for_each_entry(afu-contexts_idr, ctx, tmp) {
/*
 * Anything done in here needs to be setup before the IDR is
 * created and torn down after the IDR removed
 */
__detach_context(ctx);
-   rcu_read_unlock();
+   }
+   mutex_unlock(afu-contexts_lock);
 }
 
 void cxl_context_free(struct cxl_context *ctx)
 {
-   spin_lock(ctx-afu-contexts_lock);
+   mutex_lock(ctx-afu-contexts_lock);
idr_remove(ctx-afu-contexts_idr, ctx-pe);
-   spin_unlock(ctx-afu-contexts_lock);
+   mutex_unlock(ctx-afu-contexts_lock);
synchronize_rcu();
 
free_page((u64)ctx-sstp);
diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index b5b6bda..7c05239 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -351,7 +351,7 @@ struct cxl_afu {
struct device *chardev_s, *chardev_m, *chardev_d;
struct idr contexts_idr;
struct dentry *debugfs;
-   spinlock_t contexts_lock;
+   struct mutex contexts_lock;
struct mutex spa_mutex;
spinlock_t afu_cntl_lock;
 
diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c
index 9a5a442..1001cf4 100644
--- a/drivers/misc/cxl/native.c
+++ b/drivers/misc/cxl/native.c
@@ -610,13 +610,6 @@ static inline int detach_process_native_dedicated(struct 
cxl_context *ctx)
return 0;
 }
 
-/*
- * TODO: handle case when this is called inside a rcu_read_lock() which may
- * happen when we unbind the driver (ie. cxl_context_detach_all()) .  Terminate
- *  remove use a mutex lock and schedule which will not good with lock held.
- * May need to write do_process_element_cmd() that handles outstanding page
- * faults synchronously.
- */
 static inline int detach_process_native_afu_directed(struct cxl_context *ctx)
 {
if (!ctx-pe_inserted)
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 10c98ab..0f2cc9f 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -502,7 +502,7 @@ static struct cxl_afu *cxl_alloc_afu(struct cxl *adapter, 
int slice)
afu-dev.release = cxl_release_afu;
afu-slice = slice;
idr_init(afu-contexts_idr);
-   spin_lock_init(afu-contexts_lock);
+   mutex_init(afu-contexts_lock);
spin_lock_init(afu-afu_cntl_lock);
mutex_init(afu-spa_mutex);
 
diff --git a/drivers/misc/cxl/sysfs.c b/drivers/misc/cxl/sysfs.c
index ce7ec06..461bdbd 100644
--- a/drivers/misc/cxl/sysfs.c
+++ b/drivers/misc/cxl/sysfs.c
@@ -121,7 +121,7 @@ static ssize_t reset_store_afu(struct device *device,
int rc;
 
/* Not safe to reset if it is currently in use */
-   spin_lock(afu-contexts_lock);
+   mutex_lock(afu-contexts_lock);
if (!idr_is_empty(afu-contexts_idr)) {
rc = -EBUSY;
goto err;
@@ -132,7 +132,7 @@ static ssize_t

[PATCH 2/7] CXL: Add timeout to process element commands

2014-12-08 Thread Ian Munsie

From: Ian Munsie imun...@au1.ibm.com

In the event that something goes wrong in the hardware and it is unable
to complete a process element comment we would end up polling forever,
effectively making the associated process unkillable.

This patch adds a timeout to the process element command code path, so
that we will give up if the hardware does not respond in a reasonable
time.

Signed-off-by: Ian Munsie imun...@au1.ibm.com
---
 drivers/misc/cxl/native.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c
index 1001cf4..f2b37b4 100644
--- a/drivers/misc/cxl/native.c
+++ b/drivers/misc/cxl/native.c
@@ -277,6 +277,7 @@ static int do_process_element_cmd(struct cxl_context *ctx,
  u64 cmd, u64 pe_state)
 {
u64 state;
+   unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT);
 
WARN_ON(!ctx-afu-enabled);
 
@@ -286,6 +287,10 @@ static int do_process_element_cmd(struct cxl_context *ctx,
smp_mb();
cxl_p1n_write(ctx-afu, CXL_PSL_LLCMD_An, cmd | ctx-pe);
while (1) {
+   if (time_after_eq(jiffies, timeout)) {
+   dev_warn(ctx-afu-dev, WARNING: Process Element 
Command timed out!\n);
+   return -EBUSY;
+   }
state = be64_to_cpup(ctx-afu-sw_command_status);
if (state == ~0ULL) {
pr_err(cxl: Error adding process element to AFU\n);
-- 
2.1.3

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 4/7] CXL: Early return from cxl_handle_fault for a shut down context

2014-12-08 Thread Ian Munsie

From: Ian Munsie imun...@au1.ibm.com

If a context is being detached and we get a translation fault for it
there is little point getting it's mm and handling the fault, so just
respond with an address error and return earlier.

Signed-off-by: Ian Munsie imun...@au1.ibm.com
---
 drivers/misc/cxl/fault.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/drivers/misc/cxl/fault.c b/drivers/misc/cxl/fault.c
index c99e896..3e8c06a 100644
--- a/drivers/misc/cxl/fault.c
+++ b/drivers/misc/cxl/fault.c
@@ -176,6 +176,12 @@ void cxl_handle_fault(struct work_struct *fault_work)
return;
}
 
+   /* Early return if the context is being / has been detached */
+   if (ctx-status == CLOSED) {
+   cxl_ack_ae(ctx);
+   return;
+   }
+
pr_devel(CXL BOTTOM HALF handling fault for afu pe: %i. 
DSISR: %#llx DAR: %#llx\n, ctx-pe, dsisr, dar);
 
-- 
2.1.3

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 5/7] CXL: Disable AFU debug flag

2014-12-08 Thread Ian Munsie

From: Ian Munsie imun...@au1.ibm.com

Upon inspection of the implementation specific registers, it was
discovered that the high bit of the implementation specific RXCTL
register was enabled, which enables the DEADB00F debug feature.

The debug feature causes MMIO reads to a disabled AFU to respond with
0xDEADB00F instead of all Fs. In general this should not be visible as
the kernel will only allow MMIO access to enabled AFUs, but there may be
some circumstances where an AFU may become disabled while it is use.
One such case would be an AFU designed to only be used in the dedicated
process mode and to disable itself after it has completed it's work
(however even in that case the effects of this debug flag would be
limited as the userspace application must have completed any required
MMIO accesses before the AFU disables itself with or without the flag).

This patch removes the debug flag and replaces the magic value
programmed into this register with a preprocessor define so it is
clearer what the rest of this initialisation does.

Signed-off-by: Ian Munsie imun...@au1.ibm.com
---
 drivers/misc/cxl/cxl.h | 7 +++
 drivers/misc/cxl/pci.c | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index 7c05239..c1f8aa6 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -287,6 +287,13 @@ static const cxl_p2n_reg_t CXL_PSL_WED_An = {0x0A0};
 #define CXL_PE_SOFTWARE_STATE_S (1ul  (31 - 30)) /* Suspend */
 #define CXL_PE_SOFTWARE_STATE_T (1ul  (31 - 31)) /* Terminate */
 
+/** CXL_PSL_RXCTL_An (Implementation Specific) **
+ * Controls AFU Hang Pulse, which sets the timeout for the AFU to respond to
+ * the PSL for any response (except MMIO). Timeouts will occur between 1x to 2x
+ * of the hang pulse frequency.
+ */
+#define CXL_PSL_RXCTL_AFUHP_4S  0x7000ULL
+
 /* SPA-sw_command_status */
 #define CXL_SPA_SW_CMD_MASK 0xULL
 #define CXL_SPA_SW_CMD_TERMINATE0x0001ULL
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 0f2cc9f..2ccd0a9 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -348,7 +348,7 @@ static int init_implementation_afu_regs(struct cxl_afu *afu)
cxl_p1n_write(afu, CXL_PSL_COALLOC_A, 0xFF00FEFEFEFEULL);
/* for debugging with trace arrays */
cxl_p1n_write(afu, CXL_PSL_SLICE_TRACE, 0xULL);
-   cxl_p1n_write(afu, CXL_PSL_RXCTL_A, 0xF000ULL);
+   cxl_p1n_write(afu, CXL_PSL_RXCTL_A, CXL_PSL_RXCTL_AFUHP_4S);
 
return 0;
 }
-- 
2.1.3

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 6/7] CXL: Disable SPAP register when freeing SPA

2014-12-08 Thread Ian Munsie

From: Ian Munsie imun...@au1.ibm.com

When we deactivate the AFU directed mode we free the scheduled process
area, but did not clear the register in the hardware that has a pointer
to it.

This should be fine since we will have already cleared out every context
and we won't do anything that would cause the hardware to access it
until after we have allocated a new one, but just to be safe this patch
clears out the register when we free the page.

Signed-off-by: Ian Munsie imun...@au1.ibm.com
---
 drivers/misc/cxl/native.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c
index f2b37b4..0f24fa5 100644
--- a/drivers/misc/cxl/native.c
+++ b/drivers/misc/cxl/native.c
@@ -185,6 +185,7 @@ static int alloc_spa(struct cxl_afu *afu)
 
 static void release_spa(struct cxl_afu *afu)
 {
+   cxl_p1n_write(afu, CXL_PSL_SPAP_An, 0);
free_pages((unsigned long) afu-spa, afu-spa_order);
 }
 
-- 
2.1.3

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 3/7] CXL: Fix leaking interrupts if attach process fails

2014-12-08 Thread Ian Munsie

From: Ian Munsie imun...@au1.ibm.com

In this particular error path we have already allocated the AFU
interrupts, but have not yet set the status to STARTED. The detach
context code will only attempt to release the interrupts if the context
is in state STARTED, so in this case the interrupts would remain
allocated.

This patch releases the AFU interrupts immediately if the attach call
fails to prevent them leaking.

Signed-off-by: Ian Munsie imun...@au1.ibm.com
---
 drivers/misc/cxl/file.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c
index 378b099..2e067a5 100644
--- a/drivers/misc/cxl/file.c
+++ b/drivers/misc/cxl/file.c
@@ -181,8 +181,10 @@ static long afu_ioctl_start_work(struct cxl_context *ctx,
ctx-pid = get_pid(get_task_pid(current, PIDTYPE_PID));
 
if ((rc = cxl_attach_process(ctx, false, work.work_element_descriptor,
-amr)))
+amr))) {
+   afu_release_irqs(ctx);
goto out;
+   }
 
ctx-status = STARTED;
rc = 0;
-- 
2.1.3

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 7/7] CXL: Unmap MMIO regions when detaching a context

2014-12-08 Thread Ian Munsie

From: Ian Munsie imun...@au1.ibm.com

If we need to force detach a context (e.g. due to EEH or simply force
unbinding the driver) we should prevent the userspace contexts from
being able to access the Problem State Area MMIO region further, which
they may have mapped with mmap().

This patch unmaps any mapped MMIO regions when detaching a userspace
context.

Signed-off-by: Ian Munsie imun...@au1.ibm.com
---
 drivers/misc/cxl/context.c | 11 ++-
 drivers/misc/cxl/cxl.h |  7 ++-
 drivers/misc/cxl/file.c|  6 +-
 3 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c
index 4aa31a3..51fd6b5 100644
--- a/drivers/misc/cxl/context.c
+++ b/drivers/misc/cxl/context.c
@@ -34,7 +34,8 @@ struct cxl_context *cxl_context_alloc(void)
 /*
  * Initialises a CXL context.
  */
-int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master)
+int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master,
+struct address_space *mapping)
 {
int i;
 
@@ -42,6 +43,8 @@ int cxl_context_init(struct cxl_context *ctx, struct cxl_afu 
*afu, bool master)
ctx-afu = afu;
ctx-master = master;
ctx-pid = NULL; /* Set in start work ioctl */
+   mutex_init(ctx-mapping_lock);
+   ctx-mapping = mapping;
 
/*
 * Allocate the segment table before we put it in the IDR so that we
@@ -147,6 +150,12 @@ static void __detach_context(struct cxl_context *ctx)
afu_release_irqs(ctx);
flush_work(ctx-fault_work); /* Only needed for dedicated process */
wake_up_all(ctx-wq);
+
+   /* Release Problem State Area mapping */
+   mutex_lock(ctx-mapping_lock);
+   if (ctx-mapping)
+   unmap_mapping_range(ctx-mapping, 0, 0, 1);
+   mutex_unlock(ctx-mapping_lock);
 }
 
 /*
diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index c1f8aa6..0df0438 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -405,6 +405,10 @@ struct cxl_context {
phys_addr_t psn_phys;
u64 psn_size;
 
+   /* Used to unmap any mmaps when force detaching */
+   struct address_space *mapping;
+   struct mutex mapping_lock;
+
spinlock_t sste_lock; /* Protects segment table entries */
struct cxl_sste *sstp;
u64 sstp0, sstp1;
@@ -606,7 +610,8 @@ int cxl_alloc_sst(struct cxl_context *ctx);
 void init_cxl_native(void);
 
 struct cxl_context *cxl_context_alloc(void);
-int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool 
master);
+int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master,
+struct address_space *mapping);
 void cxl_context_free(struct cxl_context *ctx);
 int cxl_context_iomap(struct cxl_context *ctx, struct vm_area_struct *vma);
 
diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c
index 2e067a5..b09be44 100644
--- a/drivers/misc/cxl/file.c
+++ b/drivers/misc/cxl/file.c
@@ -77,7 +77,7 @@ static int __afu_open(struct inode *inode, struct file *file, 
bool master)
goto err_put_afu;
}
 
-   if ((rc = cxl_context_init(ctx, afu, master)))
+   if ((rc = cxl_context_init(ctx, afu, master, inode-i_mapping)))
goto err_put_afu;
 
pr_devel(afu_open pe: %i\n, ctx-pe);
@@ -113,6 +113,10 @@ static int afu_release(struct inode *inode, struct file 
*file)
 __func__, ctx-pe);
cxl_context_detach(ctx);
 
+   mutex_lock(ctx-mapping_lock);
+   ctx-mapping = NULL;
+   mutex_unlock(ctx-mapping_lock);
+
put_device(ctx-afu-dev);
 
/*
-- 
2.1.3

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 2 1/4] powerpc: drop the ability to tweak SMT mode at boot time

2014-12-08 Thread Greg Kurz

On Fri, 5 Dec 2014 12:52:45 -0600
Scott Wood scottw...@freescale.com wrote:

 On Fri, 2014-12-05 at 16:14 +0100, Greg Kurz wrote:
  The smt-enabled kernel parameter basically leaves unwanted cpus executing
  in firmware or wherever they happen to be. The very same applies to the
  ibm,smt-enabled DT property which is no more used by anything known. These
  are hacks that shoudn't be used in a production environment.
  
  Quoting mpe, there are better ways for firmware to disable SMT.
 

Hi Scott,

 Those better ways don't apply to Freescale chips, where the OS enables
 (or not) SMT without any interaction with firmware.  I don't care about
 the ibm,smt-enabled property, but can we please keep the smt-enabled
 boot option?
 

Fair enough for the firmware side, what about CPU hot(un)plug then ?

  It also has an evil side effect on the split-core feature for powernv. The
  code needs all the cpus to participate to the split mode update: it relies
  on smp_send_reschedule() to get offline ones to do so. This doesn't work 
  with
  cpus that haven't come up... The consequence is a kernel hang on powernv 
  when
  trying to limit the number of hw threads at boot time (e.g. smt-enabled to
  anything but 8 on POWER8).
 
 In that case could you disable the option only on that hardware?
 

The fact it breaks only powernv doesn't mean it is a powernv only issue.
The smt-enabled feature is a hack because it leaves some cpus in a undefined
state from a kernel POV. Moreover it drags about 80 lines of code and sits
entirely in common ppc64 code. I would reverse the question then ? Why not
moving smt-enabled code to freescale only ?

  This patch simply removes both the smt-enabled kernel parameter and the
  ibm,smt-enabled property for all platforms. The new default is to start
  all hw threads. That leaves /sys the only supported API to change SMT
  settings.
 
 How would you use /sys for this?  Are you talking about CPU hotplug?
 

Yes. This is the safer way to offline cpus.

 -Scott
 

Cheers.

--
Greg

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH] kthread: kthread_bind fails to enforce CPU affinity (fixes kernel BUG at kernel/smpboot.c:134!)

2014-12-08 Thread Ingo Molnar


* Anton Blanchard an...@samba.org wrote:

 I have a busy ppc64le KVM box where guests sometimes hit the 
 infamous kernel BUG at kernel/smpboot.c:134! issue during 
 boot:
 
 BUG_ON(td-cpu != smp_processor_id());
 
 Basically a per CPU hotplug thread scheduled on the wrong CPU. The oops
 output confirms it:
 
 CPU: 0
 Comm: watchdog/130
 
 The issue is in kthread_bind where we set the cpus_allowed 
 mask, but do not touch task_thread_info(p)-cpu. The scheduler 
 assumes the previously scheduled CPU is in the cpus_allowed 
 mask, but in this case we are moving a thread to another CPU so 
 it is not.
 
 We used to call set_task_cpu which sets 
 task_thread_info(p)-cpu (in fact kthread_bind still has a 
 comment suggesting this). That was removed in e2912009fb7b 
 (sched: Ensure set_task_cpu() is never called on blocked 
 tasks).
 
 Since we cannot call set_task_cpu (the task is in a sleeping 
 state), just do an explicit set of task_thread_info(p)-cpu.

So we cannot call set_task_cpu() because in the normal life time 
of a task the -cpu value gets set on wakeup. So if a task is 
blocked right now, and its affinity changes, it ought to get a 
correct -cpu selected on wakeup. The affinity mask and the 
current value of -cpu getting out of sync is thus 'normal'.

(Check for example how set_cpus_allowed_ptr() works: we first set 
the new allowed mask, then do we migrate the task away if 
necessary.)

In the kthread_bind() case this is explicitly assumed: it only 
calls do_set_cpus_allowed().

But obviously the bug triggers in kernel/smpboot.c, and that 
assert shows a real bug - and your patch makes the assert go 
away, so the question is, how did the kthread get woken up and 
put on a runqueue without its -cpu getting set?

One possibility is a generic scheduler bug in ttwu(), resulting 
in -cpu not getting set properly. If this was the case then 
other places would be blowing up as well, and I don't think we 
are seeing this currently, especially not over such a long 
timespan.

Another possibility would be that kthread_bind()'s assumption 
that the task is inactive is false: if the task activates when we 
think it's blocked and we just hotplug-migrate it away while its 
running (setting its td-cpu?), the assert could trigger I think 
- and the patch would make the assert go away.

A third possibility would be, if this is a freshly created 
thread, some sort of initialization race - either in the kthread 
or in the scheduler code.

Weird.

Thanks,

Ingo
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [V6,1/9] elf: Add new powerpc specifc core note sections

2014-12-08 Thread Anshuman Khandual

On 12/03/2014 12:18 PM, Anshuman Khandual wrote:
 On 12/03/2014 10:52 AM, Michael Ellerman wrote:
 On Tue, 2014-02-12 at 07:56:45 UTC, Anshuman Khandual wrote:
 This patch adds four new ELF core note sections for powerpc
 transactional memory and one new ELF core note section for
 powerpc general miscellaneous debug registers. These addition
 of new ELF core note sections extends the existing ELF ABI
 without affecting it in any manner.

 Acked-by: Andrew Morton a...@linux-foundation.org
 Signed-off-by: Anshuman Khandual khand...@linux.vnet.ibm.com
 ---
  include/uapi/linux/elf.h | 5 +
  1 file changed, 5 insertions(+)

 diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h
 index ea9bf25..2260fc0 100644
 --- a/include/uapi/linux/elf.h
 +++ b/include/uapi/linux/elf.h
 @@ -379,6 +379,11 @@ typedef struct elf64_shdr {
  #define NT_PPC_VMX 0x100   /* PowerPC Altivec/VMX registers */
  #define NT_PPC_SPE 0x101   /* PowerPC SPE/EVR registers */
  #define NT_PPC_VSX 0x102   /* PowerPC VSX registers */
 +#define NT_PPC_TM_SPR  0x103   /* PowerPC TM special registers 
 */
 +#define NT_PPC_TM_CGPR 0x104   /* PowerpC TM checkpointed GPR 
 */
 +#define NT_PPC_TM_CFPR 0x105   /* PowerPC TM checkpointed FPR 
 */
 +#define NT_PPC_TM_CVMX 0x106   /* PowerPC TM checkpointed VMX 
 */
 +#define NT_PPC_MISC0x107   /* PowerPC miscellaneous 
 registers */

 This is a really terrible name, MISC.

 Having said that, I guess it's accurate. We have a whole bunch of regs that
 have accrued over recent years that aren't accessible via ptrace.

 It seems to me if we're adding a misc regset we should be adding everything 
 we
 might want to it that is currenty architected.
 
 But I believe they also need to be part of the thread_struct structure to be
 accessible from ptrace.

Currently we dont context save/restore the PMC count registers (PMC1-PMC6)
during the process context switch. So the values of PMC1..PMC6 are not
thread specific in the structure. To be able to access them in ptrace
when the tracee has stopped, we need to context save these counters
in the thread struct. Shall we do that ? Then we can add them to the
MISC regset bucket irrespective of whats the value we get in there when
we probe through ptrace.

The same goes for MMCRA, CFAR registers as well.

  

 But currently you only include the PPR, TAR  DSCR.
 
 Yeah, thats what we started with.
 

 Looking at Power ISA v2.07, I see the following that could be included:

   MMCR2
   MMCRA
   PMC1
   PMC2
   PMC3
   PMC4
   PMC5
   PMC6
   MMCR0
   EBBHR
   EBBRR
   BESCR
   SIAR
   SDAR
   CFAR?
 
 MMCRA, PMC[1..6], EBBHR, BESCR, EBBRR, CFAR are not part of the thread struct.

Sorry. EBBRR, EBBHR, BESCR registers are part of the thread struct.

 

 Those are all new in 2.07 except for CFAR.

 There might be more I missed, that was just a quick scan.

 Some are only accessible when EBB is in use, maybe those could be a separate
 regset.
 
 Yeah we can have one more regset for EBB specific registers.

Should the new EBB specific regset include only EBBRR, EBBHR, BESCR registers
or should it also include SIAR, SDAR, SIER, MMCR0, MMCR2 registers as well. I
was thinking about putting these five registers into the MISC bucket instead.
But from the perf code, it looks like these five registers are also related to
the EBB context as well.

Some clarity on these points would really help.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH] kthread: kthread_bind fails to enforce CPU affinity (fixes kernel BUG at kernel/smpboot.c:134!)

2014-12-08 Thread Anton Blanchard


Hi Ingo,

 So we cannot call set_task_cpu() because in the normal life time 
 of a task the -cpu value gets set on wakeup. So if a task is 
 blocked right now, and its affinity changes, it ought to get a 
 correct -cpu selected on wakeup. The affinity mask and the 
 current value of -cpu getting out of sync is thus 'normal'.
 
 (Check for example how set_cpus_allowed_ptr() works: we first set 
 the new allowed mask, then do we migrate the task away if 
 necessary.)
 
 In the kthread_bind() case this is explicitly assumed: it only 
 calls do_set_cpus_allowed().
 
 But obviously the bug triggers in kernel/smpboot.c, and that 
 assert shows a real bug - and your patch makes the assert go 
 away, so the question is, how did the kthread get woken up and 
 put on a runqueue without its -cpu getting set?

I started going down this line earlier today, and found things like:

select_task_rq_fair:

if (p-nr_cpus_allowed == 1)
return prev_cpu;

I tried returning cpumask_first(tsk_cpus_allowed()) instead, and while
I couldn't hit the BUG I did manage to get a scheduler lockup during
testing.

At that point I thought the previous task_cpu() was somewhat ingrained
in the scheduler and came up with the patch. If not, we could go on a
hunt to see what else needs fixing.

Anton
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: Build regressions/improvements in v3.18

2014-12-08 Thread Geert Uytterhoeven

On Mon, Dec 8, 2014 at 11:24 AM, Geert Uytterhoeven
ge...@linux-m68k.org wrote:
 JFYI, when comparing v3.18[1]  to v3.18-rc7[3], the summaries are:
   - build errors: +13/-11

(ignoring non-interesting things)

  + /home/kisskb/slave/src/arch/powerpc/lib/sstep.c: error: 'do_lfd'
undeclared (first use in this function):  = 1874:29
  + /home/kisskb/slave/src/arch/powerpc/lib/sstep.c: error: 'do_lfs'
undeclared (first use in this function):  = 1872:29
  + /home/kisskb/slave/src/arch/powerpc/lib/sstep.c: error: 'do_stfd'
undeclared (first use in this function):  = 1928:30
  + /home/kisskb/slave/src/arch/powerpc/lib/sstep.c: error: 'do_stfs'
undeclared (first use in this function):  = 1926:30
  + /home/kisskb/slave/src/arch/powerpc/lib/sstep.c: error: implicit
declaration of function 'do_fp_load'
[-Werror=implicit-function-declaration]:  = 1872:4
  + /home/kisskb/slave/src/arch/powerpc/lib/sstep.c: error: implicit
declaration of function 'do_fp_store'
[-Werror=implicit-function-declaration]:  = 1926:4
  + /home/kisskb/slave/src/arch/powerpc/xmon/xmon.c: error: implicit
declaration of function 'dump_tlb_44x'
[-Werror=implicit-function-declaration]:  = 912:4

powerpc-randconfig

 [1] http://kisskb.ellerman.id.au/kisskb/head/8168/ (262 out of 119 configs)
 [3] http://kisskb.ellerman.id.au/kisskb/head/8143/ (262 out of 119 configs)

Gr{oetje,eeting}s,

Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- ge...@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say programmer or something like that.
-- Linus Torvalds
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v5 0/6] powerpc/mpc85xx: Add FSL QorIQ DPAA B/QMan support to device tree(s)

2014-12-08 Thread Emil Medve

v5: Fix the alloc-ranges property
Factorize the SoC specific reserved-memory properties

v4: Add binding for the portals phandle
Fix portals phandle

v3: Remove no-map
Adjust alloc-ranges for the 32-/36-bit SoC(s)

v2: Remove some reserved-memory properties
Split the patchset per IP block
Refined patch assignment

Emil Medve (2):
  dt/bindings: b/qman: Fix the alloc-ranges in the example(s)
  dt/bindings: b/qman: Add phandle to the portals

Kumar Gala (4):
  powerpc/mpc85xx: Create dts components for the FSL QorIQ DPAA BMan
  powerpc/mpc85xx: Create dts components for the FSL QorIQ DPAA QMan
  powerpc/mpc85xx: Add FSL QorIQ DPAA BMan support to device tree(s)
  powerpc/mpc85xx: Add FSL QorIQ DPAA QMan support to device tree(s)

 Documentation/devicetree/bindings/soc/fsl/bman.txt |  12 +-
 Documentation/devicetree/bindings/soc/fsl/qman.txt |  14 +-
 arch/powerpc/boot/dts/b4qds.dtsi   |  29 +-
 arch/powerpc/boot/dts/fsl/b4860si-post.dtsi| 129 -
 arch/powerpc/boot/dts/fsl/b4si-post.dtsi   | 195 ++-
 arch/powerpc/boot/dts/fsl/p1023si-post.dtsi|  80 ++-
 arch/powerpc/boot/dts/fsl/p2041si-post.dtsi|  24 +-
 arch/powerpc/boot/dts/fsl/p3041si-post.dtsi|  24 +-
 arch/powerpc/boot/dts/fsl/p4080si-post.dtsi|  24 +-
 arch/powerpc/boot/dts/fsl/p5020si-post.dtsi|  24 +-
 arch/powerpc/boot/dts/fsl/p5040si-post.dtsi|  24 +-
 arch/powerpc/boot/dts/fsl/qoriq-bman1-portals.dtsi |  90 
 arch/powerpc/boot/dts/fsl/qoriq-bman1.dtsi |  41 ++
 arch/powerpc/boot/dts/fsl/qoriq-qman1-portals.dtsi | 101 
 arch/powerpc/boot/dts/fsl/qoriq-qman1.dtsi |  41 ++
 arch/powerpc/boot/dts/fsl/qoriq-qman3.dtsi |  41 ++
 arch/powerpc/boot/dts/fsl/t1040si-post.dtsi| 143 -
 arch/powerpc/boot/dts/fsl/t2081si-post.dtsi| 231 +++-
 arch/powerpc/boot/dts/fsl/t4240si-post.dtsi| 583 -
 arch/powerpc/boot/dts/kmcoge4.dts  |  27 +
 arch/powerpc/boot/dts/oca4080.dts  |  27 +
 arch/powerpc/boot/dts/p1023rdb.dts |  30 +-
 arch/powerpc/boot/dts/p2041rdb.dts |  29 +-
 arch/powerpc/boot/dts/p3041ds.dts  |  29 +-
 arch/powerpc/boot/dts/p4080ds.dts  |  29 +-
 arch/powerpc/boot/dts/p5020ds.dts  |  29 +-
 arch/powerpc/boot/dts/p5040ds.dts  |  29 +-
 arch/powerpc/boot/dts/t104xqds.dtsi|  29 +-
 arch/powerpc/boot/dts/t104xrdb.dtsi|  26 +
 arch/powerpc/boot/dts/t208xqds.dtsi|  29 +-
 arch/powerpc/boot/dts/t208xrdb.dtsi|  27 +
 arch/powerpc/boot/dts/t4240qds.dts |  29 +-
 arch/powerpc/boot/dts/t4240rdb.dts |  27 +
 33 files changed, 2221 insertions(+), 25 deletions(-)
 create mode 100644 arch/powerpc/boot/dts/fsl/qoriq-bman1-portals.dtsi
 create mode 100644 arch/powerpc/boot/dts/fsl/qoriq-bman1.dtsi
 create mode 100644 arch/powerpc/boot/dts/fsl/qoriq-qman1-portals.dtsi
 create mode 100644 arch/powerpc/boot/dts/fsl/qoriq-qman1.dtsi
 create mode 100644 arch/powerpc/boot/dts/fsl/qoriq-qman3.dtsi

-- 
2.2.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v5 1/6] dt/bindings: b/qman: Fix the alloc-ranges in the example(s)

2014-12-08 Thread Emil Medve

'ranges' are specified as base size not as start end

Signed-off-by: Emil Medve emilian.me...@freescale.com
---
 Documentation/devicetree/bindings/soc/fsl/bman.txt | 2 +-
 Documentation/devicetree/bindings/soc/fsl/qman.txt | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/Documentation/devicetree/bindings/soc/fsl/bman.txt 
b/Documentation/devicetree/bindings/soc/fsl/bman.txt
index 9f80bf8..ee03ef2 100644
--- a/Documentation/devicetree/bindings/soc/fsl/bman.txt
+++ b/Documentation/devicetree/bindings/soc/fsl/bman.txt
@@ -96,7 +96,7 @@ The example below shows a BMan FBPR dynamic allocation memory 
node
 
bman_fbpr: bman-fbpr {
compatible = fsl,bman-fbpr;
-   alloc-ranges = 0 0 0xf 0x;
+   alloc-ranges = 0 0 0x10 0;
size = 0 0x100;
alignment = 0 0x100;
};
diff --git a/Documentation/devicetree/bindings/soc/fsl/qman.txt 
b/Documentation/devicetree/bindings/soc/fsl/qman.txt
index 063e3a0..f3da91e 100644
--- a/Documentation/devicetree/bindings/soc/fsl/qman.txt
+++ b/Documentation/devicetree/bindings/soc/fsl/qman.txt
@@ -113,13 +113,13 @@ The example below shows a QMan FQD and a PFDR dynamic 
allocation memory nodes
 
qman_fqd: qman-fqd {
compatible = fsl,qman-fqd;
-   alloc-ranges = 0 0 0xf 0x;
+   alloc-ranges = 0 0 0x10 0;
size = 0 0x40;
alignment = 0 0x40;
};
qman_pfdr: qman-pfdr {
compatible = fsl,qman-pfdr;
-   alloc-ranges = 0 0 0xf 0x;
+   alloc-ranges = 0 0 0x10 0;
size = 0 0x200;
alignment = 0 0x200;
};
-- 
2.2.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v5 2/6] dt/bindings: b/qman: Add phandle to the portals

2014-12-08 Thread Emil Medve

This supports SoC(s) with multiple B/QMan instances

Signed-off-by: Emil Medve emilian.me...@freescale.com
---
 Documentation/devicetree/bindings/soc/fsl/bman.txt | 10 ++
 Documentation/devicetree/bindings/soc/fsl/qman.txt | 10 ++
 2 files changed, 20 insertions(+)

diff --git a/Documentation/devicetree/bindings/soc/fsl/bman.txt 
b/Documentation/devicetree/bindings/soc/fsl/bman.txt
index ee03ef2..47ac834 100644
--- a/Documentation/devicetree/bindings/soc/fsl/bman.txt
+++ b/Documentation/devicetree/bindings/soc/fsl/bman.txt
@@ -36,6 +36,11 @@ are located at offsets 0xbf8 and 0xbfc
Value type: prop-encoded-array
Definition: Standard property. The error interrupt
 
+- fsl,bman-portals
+   Usage:  Required
+   Value type: phandle
+   Definition: Phandle to this BMan instance's portals
+
 - fsl,liodn
Usage:  See pamu.txt
Value type: prop-encoded-array
@@ -104,6 +109,10 @@ The example below shows a BMan FBPR dynamic allocation 
memory node
 
 The example below shows a (P4080) BMan CCSR-space node
 
+   bportals: bman-portals@ff400 {
+   ...
+   };
+
crypto@30 {
...
fsl,bman = bman, 2;
@@ -115,6 +124,7 @@ The example below shows a (P4080) BMan CCSR-space node
reg = 0x31a000 0x1000;
interrupts = 16 2 1 2;
fsl,liodn = 0x17;
+   fsl,bman-portals = bportals;
memory-region = bman_fbpr;
};
 
diff --git a/Documentation/devicetree/bindings/soc/fsl/qman.txt 
b/Documentation/devicetree/bindings/soc/fsl/qman.txt
index f3da91e..556ebb8 100644
--- a/Documentation/devicetree/bindings/soc/fsl/qman.txt
+++ b/Documentation/devicetree/bindings/soc/fsl/qman.txt
@@ -38,6 +38,11 @@ are located at offsets 0xbf8 and 0xbfc
Value type: prop-encoded-array
Definition: Standard property. The error interrupt
 
+- fsl,qman-portals
+   Usage:  Required
+   Value type: phandle
+   Definition: Phandle to this QMan instance's portals
+
 - fsl,liodn
Usage:  See pamu.txt
Value type: prop-encoded-array
@@ -127,6 +132,10 @@ The example below shows a QMan FQD and a PFDR dynamic 
allocation memory nodes
 
 The example below shows a (P4080) QMan CCSR-space node
 
+   qportals: qman-portals@ff420 {
+   ...
+   };
+
clockgen: global-utilities@e1000 {
...
sysclk: sysclk {
@@ -154,6 +163,7 @@ The example below shows a (P4080) QMan CCSR-space node
reg = 0x318000 0x1000;
interrupts = 16 2 1 3
fsl,liodn = 0x16;
+   fsl,qman-portals = qportals;
memory-region = qman_fqd qman_pfdr;
clocks = platform_pll 1;
};
-- 
2.2.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v5 4/6] powerpc/mpc85xx: Create dts components for the FSL QorIQ DPAA QMan

2014-12-08 Thread Emil Medve

From: Kumar Gala ga...@kernel.crashing.org

Change-Id: I16e63db731e55a3d60d4e147573c1af8718082d3
Signed-off-by: Kumar Gala ga...@kernel.crashing.org
Signed-off-by: Geoff Thorpe geoff.tho...@freescale.com
Signed-off-by: Hai-Ying Wang haiying.w...@freescale.com
[Emil Medve: Sync with the upstream binding]
Signed-off-by: Emil Medve emilian.me...@freescale.com
---
 arch/powerpc/boot/dts/fsl/qoriq-qman1-portals.dtsi | 101 +
 arch/powerpc/boot/dts/fsl/qoriq-qman1.dtsi |  41 +
 arch/powerpc/boot/dts/fsl/qoriq-qman3.dtsi |  41 +
 3 files changed, 183 insertions(+)
 create mode 100644 arch/powerpc/boot/dts/fsl/qoriq-qman1-portals.dtsi
 create mode 100644 arch/powerpc/boot/dts/fsl/qoriq-qman1.dtsi
 create mode 100644 arch/powerpc/boot/dts/fsl/qoriq-qman3.dtsi

diff --git a/arch/powerpc/boot/dts/fsl/qoriq-qman1-portals.dtsi 
b/arch/powerpc/boot/dts/fsl/qoriq-qman1-portals.dtsi
new file mode 100644
index 000..05d51ac
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-qman1-portals.dtsi
@@ -0,0 +1,101 @@
+/*
+ * QorIQ QMan Portal device tree stub for 10 portals  15 pool channels
+ *
+ * Copyright 2011 - 2014 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ *  notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *  notice, this list of conditions and the following disclaimer in the
+ *  documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ *  names of its contributors may be used to endorse or promote products
+ *  derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License (GPL) as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 
THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+qportals {
+   #address-cells = 1;
+   #size-cells = 1;
+   compatible = simple-bus;
+
+   qportal0: qman-portal@0 {
+   compatible = fsl,qman-portal;
+   reg = 0x0 0x4000, 0x10 0x1000;
+   interrupts = 104 2 0 0;
+   fsl,qman-channel-id = 0x0;
+   };
+   qportal1: qman-portal@4000 {
+   compatible = fsl,qman-portal;
+   reg = 0x4000 0x4000, 0x101000 0x1000;
+   interrupts = 106 2 0 0;
+   fsl,qman-channel-id = 1;
+   };
+   qportal2: qman-portal@8000 {
+   compatible = fsl,qman-portal;
+   reg = 0x8000 0x4000, 0x102000 0x1000;
+   interrupts = 108 2 0 0;
+   fsl,qman-channel-id = 2;
+   };
+   qportal3: qman-portal@c000 {
+   compatible = fsl,qman-portal;
+   reg = 0xc000 0x4000, 0x103000 0x1000;
+   interrupts = 110 2 0 0;
+   fsl,qman-channel-id = 3;
+   };
+   qportal4: qman-portal@1 {
+   compatible = fsl,qman-portal;
+   reg = 0x1 0x4000, 0x104000 0x1000;
+   interrupts = 112 2 0 0;
+   fsl,qman-channel-id = 4;
+   };
+   qportal5: qman-portal@14000 {
+   compatible = fsl,qman-portal;
+   reg = 0x14000 0x4000, 0x105000 0x1000;
+   interrupts = 114 2 0 0;
+   fsl,qman-channel-id = 5;
+   };
+   qportal6: qman-portal@18000 {
+   compatible = fsl,qman-portal;
+   reg = 0x18000 0x4000, 0x106000 0x1000;
+   interrupts = 116 2 0 0;
+   fsl,qman-channel-id = 6;
+   };
+
+   qportal7: qman-portal@1c000 {
+   compatible = fsl,qman-portal;
+   reg = 0x1c000 0x4000, 0x107000 0x1000;
+   interrupts = 118 2 0 0;
+   fsl,qman-channel-id = 7;
+   };
+   qportal8: qman-portal@2 {
+   compatible = fsl,qman-portal;

[PATCH v5 3/6] powerpc/mpc85xx: Create dts components for the FSL QorIQ DPAA BMan

2014-12-08 Thread Emil Medve

From: Kumar Gala ga...@kernel.crashing.org

Change-Id: I16e63db731e55a3d60d4e147573c1af8718082d3
Signed-off-by: Kumar Gala ga...@kernel.crashing.org
Signed-off-by: Geoff Thorpe geoff.tho...@freescale.com
Signed-off-by: Hai-Ying Wang haiying.w...@freescale.com
[Emil Medve: Sync with the upstream binding]
Signed-off-by: Emil Medve emilian.me...@freescale.com
---
 arch/powerpc/boot/dts/fsl/qoriq-bman1-portals.dtsi | 90 ++
 arch/powerpc/boot/dts/fsl/qoriq-bman1.dtsi | 41 ++
 2 files changed, 131 insertions(+)
 create mode 100644 arch/powerpc/boot/dts/fsl/qoriq-bman1-portals.dtsi
 create mode 100644 arch/powerpc/boot/dts/fsl/qoriq-bman1.dtsi

diff --git a/arch/powerpc/boot/dts/fsl/qoriq-bman1-portals.dtsi 
b/arch/powerpc/boot/dts/fsl/qoriq-bman1-portals.dtsi
new file mode 100644
index 000..5022432
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-bman1-portals.dtsi
@@ -0,0 +1,90 @@
+/*
+ * QorIQ BMan Portal device tree stub for 10 portals
+ *
+ * Copyright 2011 - 2014 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ *  notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *  notice, this list of conditions and the following disclaimer in the
+ *  documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ *  names of its contributors may be used to endorse or promote products
+ *  derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License (GPL) as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 
THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+bportals {
+   #address-cells = 1;
+   #size-cells = 1;
+   compatible = simple-bus;
+
+   bman-portal@0 {
+   compatible = fsl,bman-portal;
+   reg = 0x0 0x4000, 0x10 0x1000;
+   interrupts = 105 2 0 0;
+   };
+   bman-portal@4000 {
+   compatible = fsl,bman-portal;
+   reg = 0x4000 0x4000, 0x101000 0x1000;
+   interrupts = 107 2 0 0;
+   };
+   bman-portal@8000 {
+   compatible = fsl,bman-portal;
+   reg = 0x8000 0x4000, 0x102000 0x1000;
+   interrupts = 109 2 0 0;
+   };
+   bman-portal@c000 {
+   compatible = fsl,bman-portal;
+   reg = 0xc000 0x4000, 0x103000 0x1000;
+   interrupts = 111 2 0 0;
+   };
+   bman-portal@1 {
+   compatible = fsl,bman-portal;
+   reg = 0x1 0x4000, 0x104000 0x1000;
+   interrupts = 113 2 0 0;
+   };
+   bman-portal@14000 {
+   compatible = fsl,bman-portal;
+   reg = 0x14000 0x4000, 0x105000 0x1000;
+   interrupts = 115 2 0 0;
+   };
+   bman-portal@18000 {
+   compatible = fsl,bman-portal;
+   reg = 0x18000 0x4000, 0x106000 0x1000;
+   interrupts = 117 2 0 0;
+   };
+   bman-portal@1c000 {
+   compatible = fsl,bman-portal;
+   reg = 0x1c000 0x4000, 0x107000 0x1000;
+   interrupts = 119 2 0 0;
+   };
+   bman-portal@2 {
+   compatible = fsl,bman-portal;
+   reg = 0x2 0x4000, 0x108000 0x1000;
+   interrupts = 121 2 0 0;
+   };
+   bman-portal@24000 {
+   compatible = fsl,bman-portal;
+   reg = 0x24000 0x4000, 0x109000 0x1000;
+   interrupts = 123 2 0 0;
+   };
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-bman1.dtsi 
b/arch/powerpc/boot/dts/fsl/qoriq-bman1.dtsi
new file mode 100644
index 000..3b5e350
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-bman1.dtsi
@@ -0,0 +1,41 @@
+/*
+ * QorIQ BMan device tree stub [ controller @

[PATCH v5 6/6] powerpc/mpc85xx: Add FSL QorIQ DPAA QMan support to device tree(s)

2014-12-08 Thread Emil Medve

From: Kumar Gala ga...@kernel.crashing.org

Change-Id: If643fa5ba0a903aef8f5056a2c90ebecc995b760
Signed-off-by: Kumar Gala ga...@kernel.crashing.org
Signed-off-by: Geoff Thorpe geoff.tho...@freescale.com
Signed-off-by: Hai-Ying Wang haiying.w...@freescale.com
Signed-off-by: Chunhe Lan chunhe@freescale.com
Signed-off-by: Poonam Aggrwal poonam.aggr...@freescale.com
[Emil Medve: Sync with the upstream binding]
Signed-off-by: Emil Medve emilian.me...@freescale.com
---
 arch/powerpc/boot/dts/b4qds.dtsi|  12 ++
 arch/powerpc/boot/dts/fsl/b4860si-post.dtsi |  69 ++
 arch/powerpc/boot/dts/fsl/b4si-post.dtsi| 106 ++
 arch/powerpc/boot/dts/fsl/p1023si-post.dtsi |  43 
 arch/powerpc/boot/dts/fsl/p2041si-post.dtsi |  13 ++
 arch/powerpc/boot/dts/fsl/p3041si-post.dtsi |  13 ++
 arch/powerpc/boot/dts/fsl/p4080si-post.dtsi |  13 ++
 arch/powerpc/boot/dts/fsl/p5020si-post.dtsi |  13 ++
 arch/powerpc/boot/dts/fsl/p5040si-post.dtsi |  13 ++
 arch/powerpc/boot/dts/fsl/t1040si-post.dtsi |  78 +++
 arch/powerpc/boot/dts/fsl/t2081si-post.dtsi | 126 +++
 arch/powerpc/boot/dts/fsl/t4240si-post.dtsi | 318 
 arch/powerpc/boot/dts/kmcoge4.dts   |  12 ++
 arch/powerpc/boot/dts/oca4080.dts   |  12 ++
 arch/powerpc/boot/dts/p1023rdb.dts  |  12 ++
 arch/powerpc/boot/dts/p2041rdb.dts  |  12 ++
 arch/powerpc/boot/dts/p3041ds.dts   |  12 ++
 arch/powerpc/boot/dts/p4080ds.dts   |  12 ++
 arch/powerpc/boot/dts/p5020ds.dts   |  12 ++
 arch/powerpc/boot/dts/p5040ds.dts   |  12 ++
 arch/powerpc/boot/dts/t104xqds.dtsi |  12 ++
 arch/powerpc/boot/dts/t104xrdb.dtsi |  12 ++
 arch/powerpc/boot/dts/t208xqds.dtsi |  12 ++
 arch/powerpc/boot/dts/t208xrdb.dtsi |  12 ++
 arch/powerpc/boot/dts/t4240qds.dts  |  12 ++
 arch/powerpc/boot/dts/t4240rdb.dts  |  12 ++
 26 files changed, 985 insertions(+)

diff --git a/arch/powerpc/boot/dts/b4qds.dtsi b/arch/powerpc/boot/dts/b4qds.dtsi
index 89fa85f..7ca7b7a 100644
--- a/arch/powerpc/boot/dts/b4qds.dtsi
+++ b/arch/powerpc/boot/dts/b4qds.dtsi
@@ -113,6 +113,14 @@
size = 0 0x100;
alignment = 0 0x100;
};
+   qman_fqd: qman-fqd {
+   size = 0 0x40;
+   alignment = 0 0x40;
+   };
+   qman_pfdr: qman-pfdr {
+   size = 0 0x200;
+   alignment = 0 0x200;
+   };
};
 
dcsr: dcsr@f {
@@ -123,6 +131,10 @@
ranges = 0x0 0xf 0xf400 0x200;
};
 
+   qportals: qman-portals@ff600 {
+   ranges = 0x0 0xf 0xf600 0x200;
+   };
+
soc: soc@ffe00 {
ranges = 0x 0xf 0xfe00 0x100;
reg = 0xf 0xfe00 0 0x1000;
diff --git a/arch/powerpc/boot/dts/fsl/b4860si-post.dtsi 
b/arch/powerpc/boot/dts/fsl/b4860si-post.dtsi
index 2dd61fa..38e297b 100644
--- a/arch/powerpc/boot/dts/fsl/b4860si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/b4860si-post.dtsi
@@ -167,6 +167,75 @@
};
 };
 
+qportals {
+   qportal14: qman-portal@38000 {
+   compatible = fsl,qman-portal;
+   reg = 0x38000 0x4000, 0x100e000 0x1000;
+   interrupts = 132 0x2 0 0;
+   fsl,qman-channel-id = 0xe;
+   };
+   qportal15: qman-portal@3c000 {
+   compatible = fsl,qman-portal;
+   reg = 0x3c000 0x4000, 0x100f000 0x1000;
+   interrupts = 134 0x2 0 0;
+   fsl,qman-channel-id = 0xf;
+   };
+   qportal16: qman-portal@4 {
+   compatible = fsl,qman-portal;
+   reg = 0x4 0x4000, 0x101 0x1000;
+   interrupts = 136 0x2 0 0;
+   fsl,qman-channel-id = 0x10;
+   };
+   qportal17: qman-portal@44000 {
+   compatible = fsl,qman-portal;
+   reg = 0x44000 0x4000, 0x1011000 0x1000;
+   interrupts = 138 0x2 0 0;
+   fsl,qman-channel-id = 0x11;
+   };
+   qportal18: qman-portal@48000 {
+   compatible = fsl,qman-portal;
+   reg = 0x48000 0x4000, 0x1012000 0x1000;
+   interrupts = 140 0x2 0 0;
+   fsl,qman-channel-id = 0x12;
+   };
+   qportal19: qman-portal@4c000 {
+   compatible = fsl,qman-portal;
+   reg = 0x4c000 0x4000, 0x1013000 0x1000;
+   interrupts = 142 0x2 0 0;
+   fsl,qman-channel-id = 0x13;
+   };
+   qportal20: qman-portal@5 {
+   compatible = fsl,qman-portal;
+   reg = 0x5 0x4000, 0x1014000 0x1000;
+   interrupts = 144 0x2 0 0;
+   fsl,qman-channel-id = 0x14;
+   };
+   qportal21: qman-portal@54000 {
+   compatible =

[PATCH v5 5/6] powerpc/mpc85xx: Add FSL QorIQ DPAA BMan support to device tree(s)

2014-12-08 Thread Emil Medve

From: Kumar Gala ga...@kernel.crashing.org

Change-Id: If643fa5ba0a903aef8f5056a2c90ebecc995b760
Signed-off-by: Kumar Gala ga...@kernel.crashing.org
Signed-off-by: Geoff Thorpe geoff.tho...@freescale.com
Signed-off-by: Hai-Ying Wang haiying.w...@freescale.com
Signed-off-by: Chunhe Lan chunhe@freescale.com
Signed-off-by: Poonam Aggrwal poonam.aggr...@freescale.com
[Emil Medve: Sync with the upstream binding]
Signed-off-by: Emil Medve emilian.me...@freescale.com
---
 arch/powerpc/boot/dts/b4qds.dtsi|  17 +-
 arch/powerpc/boot/dts/fsl/b4860si-post.dtsi |  60 ++-
 arch/powerpc/boot/dts/fsl/b4si-post.dtsi|  89 +-
 arch/powerpc/boot/dts/fsl/p1023si-post.dtsi |  37 +++-
 arch/powerpc/boot/dts/fsl/p2041si-post.dtsi |  11 +-
 arch/powerpc/boot/dts/fsl/p3041si-post.dtsi |  11 +-
 arch/powerpc/boot/dts/fsl/p4080si-post.dtsi |  11 +-
 arch/powerpc/boot/dts/fsl/p5020si-post.dtsi |  11 +-
 arch/powerpc/boot/dts/fsl/p5040si-post.dtsi |  11 +-
 arch/powerpc/boot/dts/fsl/t1040si-post.dtsi |  65 ++-
 arch/powerpc/boot/dts/fsl/t2081si-post.dtsi | 105 ++-
 arch/powerpc/boot/dts/fsl/t4240si-post.dtsi | 265 +++-
 arch/powerpc/boot/dts/kmcoge4.dts   |  15 ++
 arch/powerpc/boot/dts/oca4080.dts   |  15 ++
 arch/powerpc/boot/dts/p1023rdb.dts  |  18 +-
 arch/powerpc/boot/dts/p2041rdb.dts  |  17 +-
 arch/powerpc/boot/dts/p3041ds.dts   |  17 +-
 arch/powerpc/boot/dts/p4080ds.dts   |  17 +-
 arch/powerpc/boot/dts/p5020ds.dts   |  17 +-
 arch/powerpc/boot/dts/p5040ds.dts   |  17 +-
 arch/powerpc/boot/dts/t104xqds.dtsi |  17 +-
 arch/powerpc/boot/dts/t104xrdb.dtsi |  14 ++
 arch/powerpc/boot/dts/t208xqds.dtsi |  17 +-
 arch/powerpc/boot/dts/t208xrdb.dtsi |  15 ++
 arch/powerpc/boot/dts/t4240qds.dts  |  17 +-
 arch/powerpc/boot/dts/t4240rdb.dts  |  15 ++
 26 files changed, 899 insertions(+), 22 deletions(-)

diff --git a/arch/powerpc/boot/dts/b4qds.dtsi b/arch/powerpc/boot/dts/b4qds.dtsi
index 6188583..89fa85f 100644
--- a/arch/powerpc/boot/dts/b4qds.dtsi
+++ b/arch/powerpc/boot/dts/b4qds.dtsi
@@ -1,7 +1,7 @@
 /*
  * B4420DS Device Tree Source
  *
- * Copyright 2012 Freescale Semiconductor, Inc.
+ * Copyright 2012 - 2014 Freescale Semiconductor, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -104,10 +104,25 @@
device_type = memory;
};
 
+   reserved-memory {
+   #address-cells = 2;
+   #size-cells = 2;
+   ranges;
+
+   bman_fbpr: bman-fbpr {
+   size = 0 0x100;
+   alignment = 0 0x100;
+   };
+   };
+
dcsr: dcsr@f {
ranges = 0x 0xf 0x 0x01052000;
};
 
+   bportals: bman-portals@ff400 {
+   ranges = 0x0 0xf 0xf400 0x200;
+   };
+
soc: soc@ffe00 {
ranges = 0x 0xf 0xfe00 0x100;
reg = 0xf 0xfe00 0 0x1000;
diff --git a/arch/powerpc/boot/dts/fsl/b4860si-post.dtsi 
b/arch/powerpc/boot/dts/fsl/b4860si-post.dtsi
index f356ed2..2dd61fa 100644
--- a/arch/powerpc/boot/dts/fsl/b4860si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/b4860si-post.dtsi
@@ -1,7 +1,7 @@
 /*
  * B4860 Silicon/SoC Device Tree Source (post include)
  *
- * Copyright 2012 Freescale Semiconductor Inc.
+ * Copyright 2012 - 2014 Freescale Semiconductor Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -109,6 +109,64 @@
};
 };
 
+bportals {
+   bman-portal@38000 {
+   compatible = fsl,bman-portal;
+   reg = 0x38000 0x4000, 0x100e000 0x1000;
+   interrupts = 133 2 0 0;
+   };
+   bman-portal@3c000 {
+   compatible = fsl,bman-portal;
+   reg = 0x3c000 0x4000, 0x100f000 0x1000;
+   interrupts = 135 2 0 0;
+   };
+   bman-portal@4 {
+   compatible = fsl,bman-portal;
+   reg = 0x4 0x4000, 0x101 0x1000;
+   interrupts = 137 2 0 0;
+   };
+   bman-portal@44000 {
+   compatible = fsl,bman-portal;
+   reg = 0x44000 0x4000, 0x1011000 0x1000;
+   interrupts = 139 2 0 0;
+   };
+   bman-portal@48000 {
+   compatible = fsl,bman-portal;
+   reg = 0x48000 0x4000, 0x1012000 0x1000;
+   interrupts = 141 2 0 0;
+   };
+   bman-portal@4c000 {
+   compatible = fsl,bman-portal;
+   reg = 0x4c000 0x4000, 0x1013000 0x1000;
+   interrupts = 143 2 0 0;
+   };
+   bman-portal@5 {
+   compatible = fsl,bman-portal;
+

Re: [PATCH V2] tick-broadcast: Register for hrtimer based broadcast as the fallback broadcast mode

2014-12-08 Thread Mark Rutland

Hi Preeti,

On Mon, Dec 08, 2014 at 06:55:43AM +, Preeti U Murthy wrote:
 Commit 5d1638acb9f6 ('tick: Introduce hrtimer based broadcast') added a
 hrtimer based broadcast mode for those platforms in which local timers stop
 when CPUs enter deep idle states. The commit expected the platforms to
 register for this mode explicitly when they lacked a better external device
 to wake up CPUs in deep idle. Given that more platforms are beginning to use
 this mode, we can avoid the call to set it up on every platform that requires
 it, by registering for the hrtimer based broadcast mode in the core code if
 no better broadcast device is available.
 
 This commit also helps detect cases where the platform fails to register for
 a broadcast device but invokes the help of one when entering deep idle states.
 Currently we do not handle this situation at all and call the broadcast clock
 device without checking for its existence. This patch will handle such buggy
 cases properly.
 
 Signed-off-by: Preeti U Murthy pre...@linux.vnet.ibm.com

I've just given this a go on an arm64 platform (Juno) without any
system-wide clock_event_devices registered, and everything works well
with CPUs entering and exiting idle states where the cpu-local timers
lose state. So:

Tested-by: Mark Rutland mark.rutl...@arm.com

One minor thing I noticed when testing was that
/sys/devices/system/clockevents/broadcast/name contained (null),
because we never set the name field on the clock_event_device. It's
always been that way, but now might be a good time to change that to
something like broadcast_hrtimer.

[...]

 diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
 index 2e4cb67..91754b0 100644
 --- a/include/linux/clockchips.h
 +++ b/include/linux/clockchips.h
 @@ -187,11 +187,11 @@ extern int tick_receive_broadcast(void);
  #endif
  
  #if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST)  
 defined(CONFIG_TICK_ONESHOT)
 -extern void tick_setup_hrtimer_broadcast(void);
 +extern int __init tick_setup_hrtimer_broadcast(void);
  extern int tick_check_broadcast_expired(void);
  #else
  static inline int tick_check_broadcast_expired(void) { return 0; }
 -static inline void tick_setup_hrtimer_broadcast(void) {};
 +static inline int __init tick_setup_hrtimer_broadcast(void) { return 0; }
  #endif
  
  #ifdef CONFIG_GENERIC_CLOCKEVENTS
 @@ -207,7 +207,7 @@ static inline void clockevents_resume(void) {}
  
  static inline int clockevents_notify(unsigned long reason, void *arg) { 
 return 0; }
  static inline int tick_check_broadcast_expired(void) { return 0; }
 -static inline void tick_setup_hrtimer_broadcast(void) {};
 +static inline int __init tick_setup_hrtimer_broadcast(void) { return 0; }

With the initcall moved to the driver we have no external users of
tick_setup_hrtimer_broadcast, so I think we can remove the prototype
entirely from clockchips.h...

  #endif
  
 diff --git a/kernel/time/tick-broadcast-hrtimer.c 
 b/kernel/time/tick-broadcast-hrtimer.c
 index eb682d5..5c35995 100644
 --- a/kernel/time/tick-broadcast-hrtimer.c
 +++ b/kernel/time/tick-broadcast-hrtimer.c
 @@ -98,9 +98,11 @@ static enum hrtimer_restart bc_handler(struct hrtimer *t)
   return HRTIMER_RESTART;
  }
  
 -void tick_setup_hrtimer_broadcast(void)
 +int __init tick_setup_hrtimer_broadcast(void)

...and make it static here.

  {
   hrtimer_init(bctimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
   bctimer.function = bc_handler;
   clockevents_register_device(ce_broadcast_hrtimer);
 + return 0;
  }
 +early_initcall(tick_setup_hrtimer_broadcast);

Otherwise this looks good to me, thanks for putting this together!

Mark.
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v5 1/4] tools/perf: support parsing parameterized events

2014-12-08 Thread Jiri Olsa

On Sat, Dec 06, 2014 at 11:37:24PM -0800, Sukadev Bhattiprolu wrote:
 Jiri Olsa [jo...@redhat.com] wrote:
 
 | anyway we could assign directly to the param term name as you do,
 | but I think we just need to mark the term as parametrized, like:
 | 
 | in /sys/bus/event_source/devices/pmu/events/event_name you have:
 |   param2=?,bar=1,param1=?
 
 I like the idea of just using a single ? for required parameters, but
 the problem I had with this approach can be seen with these two sysfs
 entries:
 
 $ cat HPM_0THRD_NON_IDLE_CCYC__PHYS_CORE
 domain=0x2,offset=0xe0,starting_index=core,lpar=0x0
 
 $ cat HPM_0THRD_NON_IDLE_CCYC__VCPU_HOME_CORE
 domain=0x3,offset=0xe0,starting_index=vcpu,lpar=sibling_guest_id
 
 The parameter 'starting_index' refers to a core in one event and vcpu in
 another event. We were trying to give a hint as to what it refers to.
 
 Given that, 'starting_index' is not very intuitive, how about discarding
 starting_index and replacing with what it really means for the event and,
 use a simple '?' to indicate required parameter).
 
 $ cat HPM_0THRD_NON_IDLE_CCYC__PHYS_CORE
 domain=0x2,offset=0xe0,core=?,lpar=0x0
 
 $ cat HPM_0THRD_NON_IDLE_CCYC__VCPU_HOME_CORE
 domain=0x3,offset=0xe0,vcpu=?,lpar=?
 
 perf list shows these as:
 
   hv_24x7/HPM_0THRD_NON_IDLE_CCYC__PHYS_CORE,core=?/ 
   hv_24x7/HPM_0THRD_NON_IDLE_CCYC__VCPU_HOME_CHIP,vcpu=?,lpar=?/
 
 command line would be
 
   -e hv_24x7/HPM_0THRD_NON_IDLE_CCYC__PHYS_CORE,core=2/ 
 
   or
 
   -e hv_24x7/HPM_0THRD_NON_IDLE_CCYC__VCPU_HOME_CHIP,vcpu=2,lpar=7/
 
 and would fail if a required parameter is missing.

that sounds good to me

jirka
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

linux-next: build failure after merge of the access_once tree

2014-12-08 Thread Stephen Rothwell

Hi Christian,

After merging the acess_once tree, today's linux-next build (powerpc
allnoconfig) failed like this:

arch/powerpc/mm/hugetlbpage.c: In function 'find_linux_pte_or_hugepte':
arch/powerpc/mm/hugetlbpage.c:981:3: error: invalid initializer
   pud  = ACCESS_ONCE(*pudp);
   ^
arch/powerpc/mm/hugetlbpage.c:993:4: error: invalid initializer
pmd  = ACCESS_ONCE(*pmdp);
^

These are preexisting ...

mm/gup.c: In function 'gup_pmd_range':
mm/gup.c:929:3: error: invalid initializer
   pmd_t pmd = ACCESS_ONCE(*pmdp);
   ^
mm/gup.c:929:3: error: (near initialization for 'pmd')

This is from commit f30c59e921f1 (mm: Update generic gup
implementation to handle hugepage directory) from the powerpc-mpe (and
powerpc) tree and so will require a merge fix patch (presumable
s/ACCESS_ONCE/READ_ONCE/).

I am not sure how many architectures you are trying to cover, but
powerpc is one I care about :-)

I have dropped the access_once tree again today, sorry (its too late at
night).
-- 
Cheers,
Stephen Rothwells...@canb.auug.org.au


pgpMbNxwriN2y.pgp
Description: OpenPGP digital signature
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: linux-next: build failure after merge of the access_once tree

2014-12-08 Thread Christian Borntraeger

Am 08.12.2014 um 12:26 schrieb Stephen Rothwell:
 Hi Christian,
 
 After merging the acess_once tree, today's linux-next build (powerpc
 allnoconfig) failed like this:
 
 arch/powerpc/mm/hugetlbpage.c: In function 'find_linux_pte_or_hugepte':
 arch/powerpc/mm/hugetlbpage.c:981:3: error: invalid initializer
pud  = ACCESS_ONCE(*pudp);
^
 arch/powerpc/mm/hugetlbpage.c:993:4: error: invalid initializer
 pmd  = ACCESS_ONCE(*pmdp);
 ^
 
 These are preexisting ...
 
 mm/gup.c: In function 'gup_pmd_range':
 mm/gup.c:929:3: error: invalid initializer
pmd_t pmd = ACCESS_ONCE(*pmdp);
^
 mm/gup.c:929:3: error: (near initialization for 'pmd')
 
 This is from commit f30c59e921f1 (mm: Update generic gup
 implementation to handle hugepage directory) from the powerpc-mpe (and
 powerpc) tree and so will require a merge fix patch (presumable
 s/ACCESS_ONCE/READ_ONCE/).
 
 I am not sure how many architectures you are trying to cover, but
 powerpc is one I care about :-)
 
 I have dropped the access_once tree again today, sorry (its too late at
 night).

Yes, we expect that tightening ACCESS_ONCE will trigger these bugs. I tried to 
cover everything in Linus master, but next is certainly a new game. It will be 
pretty hard for me to keep this running when hundreds of trees get merged 
during 3.19-rc. :-(

Linus, I see two options:

a: Either we go a full round in linux-next and expect all trees to follow the 
new rules, so we can merge this for 3.20
b: Only apply READ/ASSIGN_ONCE and the first fixups for 3.19 and put the 
ACCESS_ONCE change in next after rc1 so it will go in for 3.20 (or later) so 
that people can adopt. 

Any opinions?

Christian


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH V2] tick-broadcast: Register for hrtimer based broadcast as the fallback broadcast mode

2014-12-08 Thread Preeti U Murthy

On 12/08/2014 04:18 PM, Mark Rutland wrote:
 Hi Preeti,
 
 On Mon, Dec 08, 2014 at 06:55:43AM +, Preeti U Murthy wrote:
 Commit 5d1638acb9f6 ('tick: Introduce hrtimer based broadcast') added a
 hrtimer based broadcast mode for those platforms in which local timers stop
 when CPUs enter deep idle states. The commit expected the platforms to
 register for this mode explicitly when they lacked a better external device
 to wake up CPUs in deep idle. Given that more platforms are beginning to use
 this mode, we can avoid the call to set it up on every platform that requires
 it, by registering for the hrtimer based broadcast mode in the core code if
 no better broadcast device is available.

 This commit also helps detect cases where the platform fails to register for
 a broadcast device but invokes the help of one when entering deep idle 
 states.
 Currently we do not handle this situation at all and call the broadcast clock
 device without checking for its existence. This patch will handle such buggy
 cases properly.

 Signed-off-by: Preeti U Murthy pre...@linux.vnet.ibm.com
 
 I've just given this a go on an arm64 platform (Juno) without any
 system-wide clock_event_devices registered, and everything works well
 with CPUs entering and exiting idle states where the cpu-local timers
 lose state. So:
 
 Tested-by: Mark Rutland mark.rutl...@arm.com

Thanks!

 
 One minor thing I noticed when testing was that
 /sys/devices/system/clockevents/broadcast/name contained (null),
 because we never set the name field on the clock_event_device. It's
 always been that way, but now might be a good time to change that to
 something like broadcast_hrtimer.

You mean /sys/devices/system/clockevents/broadcast/current_device right?

 
 [...]
 
 diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
 index 2e4cb67..91754b0 100644
 --- a/include/linux/clockchips.h
 +++ b/include/linux/clockchips.h
 @@ -187,11 +187,11 @@ extern int tick_receive_broadcast(void);
  #endif
  
  #if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST)  
 defined(CONFIG_TICK_ONESHOT)
 -extern void tick_setup_hrtimer_broadcast(void);
 +extern int __init tick_setup_hrtimer_broadcast(void);
  extern int tick_check_broadcast_expired(void);
  #else
  static inline int tick_check_broadcast_expired(void) { return 0; }
 -static inline void tick_setup_hrtimer_broadcast(void) {};
 +static inline int __init tick_setup_hrtimer_broadcast(void) { return 0; }
  #endif
  
  #ifdef CONFIG_GENERIC_CLOCKEVENTS
 @@ -207,7 +207,7 @@ static inline void clockevents_resume(void) {}
  
  static inline int clockevents_notify(unsigned long reason, void *arg) { 
 return 0; }
  static inline int tick_check_broadcast_expired(void) { return 0; }
 -static inline void tick_setup_hrtimer_broadcast(void) {};
 +static inline int __init tick_setup_hrtimer_broadcast(void) { return 0; }
 
 With the initcall moved to the driver we have no external users of
 tick_setup_hrtimer_broadcast, so I think we can remove the prototype
 entirely from clockchips.h...
 
  #endif
  
 diff --git a/kernel/time/tick-broadcast-hrtimer.c 
 b/kernel/time/tick-broadcast-hrtimer.c
 index eb682d5..5c35995 100644
 --- a/kernel/time/tick-broadcast-hrtimer.c
 +++ b/kernel/time/tick-broadcast-hrtimer.c
 @@ -98,9 +98,11 @@ static enum hrtimer_restart bc_handler(struct hrtimer *t)
  return HRTIMER_RESTART;
  }
  
 -void tick_setup_hrtimer_broadcast(void)
 +int __init tick_setup_hrtimer_broadcast(void)
 
 ...and make it static here.

Yep will do. Sorry I overlooked this.

 
  {
  hrtimer_init(bctimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
  bctimer.function = bc_handler;
  clockevents_register_device(ce_broadcast_hrtimer);
 +return 0;
  }
 +early_initcall(tick_setup_hrtimer_broadcast);
 
 Otherwise this looks good to me, thanks for putting this together!

Thanks a lot for the review! Will send out the patch with the above
corrections.

Regards
Preeti U Murthy
 
 Mark.
 ___
 Linuxppc-dev mailing list
 Linuxppc-dev@lists.ozlabs.org
 https://lists.ozlabs.org/listinfo/linuxppc-dev
 

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH V2] tick-broadcast: Register for hrtimer based broadcast as the fallback broadcast mode

2014-12-08 Thread Mark Rutland

On Mon, Dec 08, 2014 at 12:02:55PM +, Preeti U Murthy wrote:
 On 12/08/2014 04:18 PM, Mark Rutland wrote:
  Hi Preeti,
  
  On Mon, Dec 08, 2014 at 06:55:43AM +, Preeti U Murthy wrote:
  Commit 5d1638acb9f6 ('tick: Introduce hrtimer based broadcast') added a
  hrtimer based broadcast mode for those platforms in which local timers stop
  when CPUs enter deep idle states. The commit expected the platforms to
  register for this mode explicitly when they lacked a better external device
  to wake up CPUs in deep idle. Given that more platforms are beginning to 
  use
  this mode, we can avoid the call to set it up on every platform that 
  requires
  it, by registering for the hrtimer based broadcast mode in the core code if
  no better broadcast device is available.
 
  This commit also helps detect cases where the platform fails to register 
  for
  a broadcast device but invokes the help of one when entering deep idle 
  states.
  Currently we do not handle this situation at all and call the broadcast 
  clock
  device without checking for its existence. This patch will handle such 
  buggy
  cases properly.
 
  Signed-off-by: Preeti U Murthy pre...@linux.vnet.ibm.com
  
  I've just given this a go on an arm64 platform (Juno) without any
  system-wide clock_event_devices registered, and everything works well
  with CPUs entering and exiting idle states where the cpu-local timers
  lose state. So:
  
  Tested-by: Mark Rutland mark.rutl...@arm.com
 
 Thanks!
 
  
  One minor thing I noticed when testing was that
  /sys/devices/system/clockevents/broadcast/name contained (null),
  because we never set the name field on the clock_event_device. It's
  always been that way, but now might be a good time to change that to
  something like broadcast_hrtimer.
 
 You mean /sys/devices/system/clockevents/broadcast/current_device right?

Whoops, yes I did.

  [...]
  
  diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
  index 2e4cb67..91754b0 100644
  --- a/include/linux/clockchips.h
  +++ b/include/linux/clockchips.h
  @@ -187,11 +187,11 @@ extern int tick_receive_broadcast(void);
   #endif
   
   #if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST)  
  defined(CONFIG_TICK_ONESHOT)
  -extern void tick_setup_hrtimer_broadcast(void);
  +extern int __init tick_setup_hrtimer_broadcast(void);
   extern int tick_check_broadcast_expired(void);
   #else
   static inline int tick_check_broadcast_expired(void) { return 0; }
  -static inline void tick_setup_hrtimer_broadcast(void) {};
  +static inline int __init tick_setup_hrtimer_broadcast(void) { return 0; }
   #endif
   
   #ifdef CONFIG_GENERIC_CLOCKEVENTS
  @@ -207,7 +207,7 @@ static inline void clockevents_resume(void) {}
   
   static inline int clockevents_notify(unsigned long reason, void *arg) { 
  return 0; }
   static inline int tick_check_broadcast_expired(void) { return 0; }
  -static inline void tick_setup_hrtimer_broadcast(void) {};
  +static inline int __init tick_setup_hrtimer_broadcast(void) { return 0; }
  
  With the initcall moved to the driver we have no external users of
  tick_setup_hrtimer_broadcast, so I think we can remove the prototype
  entirely from clockchips.h...
  
   #endif
   
  diff --git a/kernel/time/tick-broadcast-hrtimer.c 
  b/kernel/time/tick-broadcast-hrtimer.c
  index eb682d5..5c35995 100644
  --- a/kernel/time/tick-broadcast-hrtimer.c
  +++ b/kernel/time/tick-broadcast-hrtimer.c
  @@ -98,9 +98,11 @@ static enum hrtimer_restart bc_handler(struct hrtimer 
  *t)
 return HRTIMER_RESTART;
   }
   
  -void tick_setup_hrtimer_broadcast(void)
  +int __init tick_setup_hrtimer_broadcast(void)
  
  ...and make it static here.
 
 Yep will do. Sorry I overlooked this.
 
  
   {
 hrtimer_init(bctimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
 bctimer.function = bc_handler;
 clockevents_register_device(ce_broadcast_hrtimer);
  +  return 0;
   }
  +early_initcall(tick_setup_hrtimer_broadcast);
  
  Otherwise this looks good to me, thanks for putting this together!
 
 Thanks a lot for the review! Will send out the patch with the above
 corrections.

Cheers!

Thanks,
Mark.
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V3] tick-broadcast: Register for hrtimer based broadcast as the fallback broadcast mode

2014-12-08 Thread Preeti U Murthy

Commit 5d1638acb9f6 ('tick: Introduce hrtimer based broadcast') added a
hrtimer based broadcast mode for those platforms in which local timers stop
when CPUs enter deep idle states. The commit expected the platforms to
register for this mode explicitly when they lacked a better external device
to wake up CPUs in deep idle. Given that more platforms are beginning to use
this mode, we can avoid the call to set it up on every platform that requires
it, by registering for the hrtimer based broadcast mode in the core code if
no better broadcast device is available.

This commit also helps detect cases where the platform fails to register for
a broadcast device but invokes the help of one when entering deep idle states.
Currently we do not handle this situation at all and call the broadcast clock
device without checking for its existence. This patch will handle such buggy
cases properly. While at it, give a name to this mode of broadcast which was
missing all along.

Signed-off-by: Preeti U Murthy pre...@linux.vnet.ibm.com
Tested-by: Mark Rutland mark.rutl...@arm.com
---
Changes from V1: https://lkml.org/lkml/2014/12/5/261
1.Moved registering the hrtimer based broadcast from timekeeping code
to an early_initcall.

Changes from V2: https://lkml.org/lkml/2014/12/8/57
1.Added the 'name' param to hrtimer broadcast mode and removed the prototype
to setup this mode since there are no external callers of it.

 arch/arm64/kernel/time.c |2 --
 arch/powerpc/kernel/time.c   |1 -
 include/linux/clockchips.h   |3 ---
 kernel/time/tick-broadcast-hrtimer.c |5 -
 4 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c
index 1a7125c..47baaa8 100644
--- a/arch/arm64/kernel/time.c
+++ b/arch/arm64/kernel/time.c
@@ -70,8 +70,6 @@ void __init time_init(void)
of_clk_init(NULL);
clocksource_of_init();
 
-   tick_setup_hrtimer_broadcast();
-
arch_timer_rate = arch_timer_get_rate();
if (!arch_timer_rate)
panic(Unable to initialise architected timer.\n);
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 7505599..51433a8 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -942,7 +942,6 @@ void __init time_init(void)
clocksource_init();
 
init_decrementer_clockevent();
-   tick_setup_hrtimer_broadcast();
 }
 
 
diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index 2e4cb67..c362143 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -187,11 +187,9 @@ extern int tick_receive_broadcast(void);
 #endif
 
 #if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST)  
defined(CONFIG_TICK_ONESHOT)
-extern void tick_setup_hrtimer_broadcast(void);
 extern int tick_check_broadcast_expired(void);
 #else
 static inline int tick_check_broadcast_expired(void) { return 0; }
-static inline void tick_setup_hrtimer_broadcast(void) {};
 #endif
 
 #ifdef CONFIG_GENERIC_CLOCKEVENTS
@@ -207,7 +205,6 @@ static inline void clockevents_resume(void) {}
 
 static inline int clockevents_notify(unsigned long reason, void *arg) { return 
0; }
 static inline int tick_check_broadcast_expired(void) { return 0; }
-static inline void tick_setup_hrtimer_broadcast(void) {};
 
 #endif
 
diff --git a/kernel/time/tick-broadcast-hrtimer.c 
b/kernel/time/tick-broadcast-hrtimer.c
index eb682d5..1f6bc6a 100644
--- a/kernel/time/tick-broadcast-hrtimer.c
+++ b/kernel/time/tick-broadcast-hrtimer.c
@@ -72,6 +72,7 @@ static int bc_set_next(ktime_t expires, struct 
clock_event_device *bc)
 }
 
 static struct clock_event_device ce_broadcast_hrtimer = {
+   .name   = broadcast_hrtimer,
.set_mode   = bc_set_mode,
.set_next_ktime = bc_set_next,
.features   = CLOCK_EVT_FEAT_ONESHOT |
@@ -98,9 +99,11 @@ static enum hrtimer_restart bc_handler(struct hrtimer *t)
return HRTIMER_RESTART;
 }
 
-void tick_setup_hrtimer_broadcast(void)
+static int __init tick_setup_hrtimer_broadcast(void)
 {
hrtimer_init(bctimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
bctimer.function = bc_handler;
clockevents_register_device(ce_broadcast_hrtimer);
+   return 0;
 }
+early_initcall(tick_setup_hrtimer_broadcast);

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH] kthread: kthread_bind fails to enforce CPU affinity (fixes kernel BUG at kernel/smpboot.c:134!)

2014-12-08 Thread Steven Rostedt

On Mon,  8 Dec 2014 14:27:01 +1100
Anton Blanchard an...@samba.org wrote:

 I have a busy ppc64le KVM box where guests sometimes hit the infamous
 kernel BUG at kernel/smpboot.c:134! issue during boot:
 
 BUG_ON(td-cpu != smp_processor_id());
 
 Basically a per CPU hotplug thread scheduled on the wrong CPU. The oops
 output confirms it:
 
 CPU: 0
 Comm: watchdog/130
 
 The issue is in kthread_bind where we set the cpus_allowed mask, but do
 not touch task_thread_info(p)-cpu. The scheduler assumes the previously
 scheduled CPU is in the cpus_allowed mask, but in this case we are
 moving a thread to another CPU so it is not.
 

Does this happen always on boot up, and always with the watchdog thread?

I followed the logic that starts the watchdog threads.

watchdog_enable_all_cpus()
  smpboot_register_percpu-thread() {

for_each_online_cpu(cpu) { ... }

Where watchdog_enable_all_cpus() can be called by
lockup_detector_init() before SMP is started, but also by
proc_dowatchdog() which is called by the sysctl commands (after SMP is
up and running).

I noticed there's no get_online_cpus() anywhere, although the
unregister_percpu_thread() has it. Is it possible that we created a
thread on a CPU that wasn't fully online yet?

Perhaps the following patch is needed? Even if this isn't the solution
to this bug, it is probably needed as watchdog_enable_all_cpus() can be
called after boot up too.

-- Steve

diff --git a/kernel/smpboot.c b/kernel/smpboot.c
index eb89e1807408..60d35ac5d3f1 100644
--- a/kernel/smpboot.c
+++ b/kernel/smpboot.c
@@ -279,6 +279,7 @@ int smpboot_register_percpu_thread(struct 
smp_hotplug_thread *plug_thread)
unsigned int cpu;
int ret = 0;
 
+   get_online_cpus();
mutex_lock(smpboot_threads_lock);
for_each_online_cpu(cpu) {
ret = __smpboot_create_thread(plug_thread, cpu);
@@ -291,6 +292,7 @@ int smpboot_register_percpu_thread(struct 
smp_hotplug_thread *plug_thread)
list_add(plug_thread-list, hotplug_threads);
 out:
mutex_unlock(smpboot_threads_lock);
+   put_online_cpus();
return ret;
 }
 EXPORT_SYMBOL_GPL(smpboot_register_percpu_thread);
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH] powerpc32: missing accessors to pgprot_t objects

2014-12-08 Thread Christophe Leroy

Compilation with #define STRICT_MM_TYPECHECKS in arch/powerpc/include/asm/page.h
fails due to missing use of pgprot_val() when using pgprot_t objects.

arch/powerpc/mm/pgtable_32.c: In function '__ioremap_caller':
arch/powerpc/mm/pgtable_32.c:185:9: error: invalid operands to binary | (have 
'long unsigned int' and 'pgprot_t')
   flags |= PAGE_KERNEL;
 ^
arch/powerpc/mm/pgtable_32.c: In function '__mapin_ram_chunk':
arch/powerpc/mm/pgtable_32.c:320:5: error: incompatible types when assigning to 
type 'long unsigned int' from type 'pgprot_t'
   f = ktext ? PAGE_KERNEL_TEXT : PAGE_KERNEL;
 ^
arch/powerpc/mm/ppc_mmu_32.c:95:2: error: incompatible type for argument 5 of 
'setbat'
  setbat(2, PAGE_OFFSET, 0, bl, PAGE_KERNEL_X);
  ^
In file included from arch/powerpc/mm/ppc_mmu_32.c:35:0:
arch/powerpc/mm/mmu_decl.h:98:13: note: expected 'int' but argument is of type 
'pgprot_t'
 extern void setbat(int index, unsigned long virt, phys_addr_t phys,
 ^
arch/powerpc/mm/ppc_mmu_32.c:103:3: error: incompatible type for argument 5 of 
'setbat'
   setbat(3, PAGE_OFFSET+done, done, bl, PAGE_KERNEL_X);
   ^
In file included from arch/powerpc/mm/ppc_mmu_32.c:35:0:
arch/powerpc/mm/mmu_decl.h:98:13: note: expected 'int' but argument is of type 
'pgprot_t'
 extern void setbat(int index, unsigned long virt, phys_addr_t phys,
 ^

Signed-off-by: Christophe Leroy christophe.le...@c-s.fr

---
 arch/powerpc/mm/dma-noncoherent.c | 2 +-
 arch/powerpc/mm/pgtable_32.c  | 4 ++--
 arch/powerpc/mm/ppc_mmu_32.c  | 5 +++--
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/mm/dma-noncoherent.c 
b/arch/powerpc/mm/dma-noncoherent.c
index d85e86a..169aba4 100644
--- a/arch/powerpc/mm/dma-noncoherent.c
+++ b/arch/powerpc/mm/dma-noncoherent.c
@@ -228,7 +228,7 @@ __dma_alloc_coherent(struct device *dev, size_t size, 
dma_addr_t *handle, gfp_t
do {
SetPageReserved(page);
map_page(vaddr, page_to_phys(page),
-pgprot_noncached(PAGE_KERNEL));
+pgprot_val(pgprot_noncached(PAGE_KERNEL)));
page++;
vaddr += PAGE_SIZE;
} while (size -= PAGE_SIZE);
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index cf11342..a349089 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -182,7 +182,7 @@ __ioremap_caller(phys_addr_t addr, unsigned long size, 
unsigned long flags,
 
/* Make sure we have the base flags */
if ((flags  _PAGE_PRESENT) == 0)
-   flags |= PAGE_KERNEL;
+   flags |= pgprot_val(PAGE_KERNEL);
 
/* Non-cacheable page cannot be coherent */
if (flags  _PAGE_NO_CACHE)
@@ -317,7 +317,7 @@ void __init __mapin_ram_chunk(unsigned long offset, 
unsigned long top)
p = memstart_addr + s;
for (; s  top; s += PAGE_SIZE) {
ktext = ((char *) v = _stext  (char *) v  etext);
-   f = ktext ? PAGE_KERNEL_TEXT : PAGE_KERNEL;
+   f = pgprot_val(ktext ? PAGE_KERNEL_TEXT : PAGE_KERNEL);
map_page(v, p, f);
 #ifdef CONFIG_PPC_STD_MMU_32
if (ktext)
diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c
index 5029dc1..dc710d4 100644
--- a/arch/powerpc/mm/ppc_mmu_32.c
+++ b/arch/powerpc/mm/ppc_mmu_32.c
@@ -92,7 +92,7 @@ unsigned long __init mmu_mapin_ram(unsigned long top)
break;
}
 
-   setbat(2, PAGE_OFFSET, 0, bl, PAGE_KERNEL_X);
+   setbat(2, PAGE_OFFSET, 0, bl, pgprot_val(PAGE_KERNEL_X));
done = (unsigned long)bat_addrs[2].limit - PAGE_OFFSET + 1;
if ((done  tot)  !bat_addrs[3].limit) {
/* use BAT3 to cover a bit more */
@@ -100,7 +100,8 @@ unsigned long __init mmu_mapin_ram(unsigned long top)
for (bl = 12810; bl  max_size; bl = 1)
if (bl * 2  tot)
break;
-   setbat(3, PAGE_OFFSET+done, done, bl, PAGE_KERNEL_X);
+   setbat(3, PAGE_OFFSET+done, done, bl,
+  pgprot_val(PAGE_KERNEL_X));
done = (unsigned long)bat_addrs[3].limit - PAGE_OFFSET + 1;
}
 
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH] powerpc32/chrp: fix section mismatch warning

2014-12-08 Thread Christophe Leroy

This patch fixes a section mismatch warning

WARNING: vmlinux.o(.text+0x213b6): Section mismatch in reference from the 
function chrp_init_early() to the variable .init.data:boot_command_line
The function chrp_init_early() references
the variable __initdata boot_command_line.
This is often because chrp_init_early lacks a __initdata 
annotation or the annotation of boot_command_line is wrong.

Signed-off-by: Christophe Leroy christophe.le...@c-s.fr

---
 arch/powerpc/platforms/chrp/setup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/chrp/setup.c 
b/arch/powerpc/platforms/chrp/setup.c
index 5b77b19..d5d1ea5 100644
--- a/arch/powerpc/platforms/chrp/setup.c
+++ b/arch/powerpc/platforms/chrp/setup.c
@@ -253,7 +253,7 @@ static void briq_restart(char *cmd)
  * But unfortunately, the firmware does not connect /chosen/{stdin,stdout}
  * the the built-in serial node. Instead, a /failsafe node is created.
  */
-static void chrp_init_early(void)
+static __init void chrp_init_early(void)
 {
struct device_node *node;
const char *property;
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: Right location in sysfs for dlpar file

2014-12-08 Thread Nathan Fontenot

On 12/03/2014 10:31 PM, Greg KH wrote:
 On Wed, Dec 03, 2014 at 09:07:27PM -0600, Nathan Fontenot wrote:
 On 12/01/2014 10:26 PM, Greg KH wrote:
 On Mon, Dec 01, 2014 at 09:41:03AM -0600, Nathan Fontenot wrote:
 On 11/26/2014 09:12 PM, Benjamin Herrenschmidt wrote:
 Hi Greg,

 So Nathan is working on a patch series to cleanup and improve our
 DLPAR infrastructure which is basically our hotplug mechanism when
 running under the PowerVM (aka pHyp) and KVM hypervisors.

 The cleanup to the dlpar infrastructure will move the entire operation
 of hotplugging a device to the kernel instead of doing it partially in
 userspace and partially in the kernel as is currently done.


 I'll let Nathan give you a bit more details/background and answer
 subsequent question you might have as this is really his area of
 expertise.

 To cut a long story short, we need a sysfs file that allows our
 userspace tools to notify the kernel of hotplug events coming from
 the management console (which talks to userspace daemons using a
 proprietary protocol) to initiate the hotplug operations, which in
 turn get dispatched internally in the kernel to the right subsystem
 (memory, cpu, pci, ...) based on the resource type.

 On IRC, Greg suggested /sys/firmware and /sys/hypervisor which both
 look like a reasonable option to me, probably better than dlpar...

 For PowerVM systems we need this sysfs file to deliver what is
 essentially a binary blob (specifically a rtas error log) to the
 kernel. The current patch set is creating /sys/kernel/dlpar. As Ben
 mentioned we would like your input on what would be the proper place
 to create this file.

 And what is the kernel supposed to do with such a binary blob?  Parse
 it?  Or pass it to something else?

 The kernel will parse it and perform the requested hotplug operation.
 
 Oh I was hoping you would not say that :(

Heh! hint taken. I won't cc you.

 
 Seriously?  Parsing binary blobs from userspace?  Don't do that, you
 know better.

Yes, not ideal. One thing to note here is that the code to parse these
binary blobs already exists in the kernel for powerpc. The binary blob
we need to pass in is the same format as rtas error logs that we can be
handed by firmware on powerpc systems.

-Nathan

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V2] powerpc: add little endian flag to syscall_get_arch()

2014-12-08 Thread Richard Guy Briggs

Since both ppc and ppc64 have LE variants which are now reported by uname, add
that flag (__AUDIT_ARCH_LE) to syscall_get_arch() and add AUDIT_ARCH_PPC*LE
variants.

Without this,  perf trace and auditctl fail.

Mainline kernel reports ppc64le (per a058801) but there is no matching
AUDIT_ARCH_PPC64LE.

See:
https://www.redhat.com/archives/linux-audit/2014-August/msg00082.html
https://www.redhat.com/archives/linux-audit/2014-December/msg4.html

v1 - v2:
Added ; at the end of the #ifdef-protected line so it actually 
compiles

Signed-off-by: Richard Guy Briggs r...@redhat.com
---
 arch/powerpc/include/asm/syscall.h |6 +-
 include/uapi/linux/audit.h |2 ++
 2 files changed, 7 insertions(+), 1 deletions(-)

diff --git a/arch/powerpc/include/asm/syscall.h 
b/arch/powerpc/include/asm/syscall.h
index 6fa2708..d1934e5 100644
--- a/arch/powerpc/include/asm/syscall.h
+++ b/arch/powerpc/include/asm/syscall.h
@@ -90,6 +90,10 @@ static inline void syscall_set_arguments(struct task_struct 
*task,
 
 static inline int syscall_get_arch(void)
 {
-   return is_32bit_task() ? AUDIT_ARCH_PPC : AUDIT_ARCH_PPC64;
+   int arch = is_32bit_task() ? AUDIT_ARCH_PPC : AUDIT_ARCH_PPC64;
+#ifdef __LITTLE_ENDIAN__
+   arch |= __AUDIT_ARCH_LE;
+#endif
+   return arch;
 }
 #endif /* _ASM_SYSCALL_H */
diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index 4d100c8..fe29a99 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -364,7 +364,9 @@ enum {
 #define AUDIT_ARCH_PARISC  (EM_PARISC)
 #define AUDIT_ARCH_PARISC64(EM_PARISC|__AUDIT_ARCH_64BIT)
 #define AUDIT_ARCH_PPC (EM_PPC)
+#define AUDIT_ARCH_PPCLE   (EM_PPC|__AUDIT_ARCH_LE)
 #define AUDIT_ARCH_PPC64   (EM_PPC64|__AUDIT_ARCH_64BIT)
+#define AUDIT_ARCH_PPC64LE (EM_PPC64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
 #define AUDIT_ARCH_S390(EM_S390)
 #define AUDIT_ARCH_S390X   (EM_S390|__AUDIT_ARCH_64BIT)
 #define AUDIT_ARCH_SH  (EM_SH)
-- 
1.7.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V3] powerpc: add little endian flag to syscall_get_arch()

2014-12-08 Thread Richard Guy Briggs

Since both ppc and ppc64 have LE variants which are now reported by uname, add
that flag (__AUDIT_ARCH_LE) to syscall_get_arch() and add AUDIT_ARCH_PPC*LE
variants.

Without this,  perf trace and auditctl fail.

Mainline kernel reports ppc64le (per a058801) but there is no matching
AUDIT_ARCH_PPC64LE.

Since 32-bit PPC LE is not supported, throw a compiler error rather than return
a bogus architecture to audit.

See:
https://www.redhat.com/archives/linux-audit/2014-August/msg00082.html
https://www.redhat.com/archives/linux-audit/2014-December/msg4.html

v2 - v3:
Throw a compiler error on 32-bit LE.

v1 - v2:
Added ; at the end of the #ifdef-protected line so it actually 
compiles

Signed-off-by: Richard Guy Briggs r...@redhat.com
---
 arch/powerpc/include/asm/syscall.h |7 +++
 include/uapi/linux/audit.h |1 +
 2 files changed, 8 insertions(+), 0 deletions(-)

diff --git a/arch/powerpc/include/asm/syscall.h 
b/arch/powerpc/include/asm/syscall.h
index 6fa2708..cf7fcab 100644
--- a/arch/powerpc/include/asm/syscall.h
+++ b/arch/powerpc/include/asm/syscall.h
@@ -90,6 +90,13 @@ static inline void syscall_set_arguments(struct task_struct 
*task,
 
 static inline int syscall_get_arch(void)
 {
+#ifdef __LITTLE_ENDIAN__
+   return AUDIT_ARCH_PPC64LE;
+#ifndef CONFIG_64BIT
+#error PPC 32-bit Little Endian architecture not supported.
+#endif /* CONFIG_64BIT */
+#else /* __LITTLE_ENDIAN__ */
return is_32bit_task() ? AUDIT_ARCH_PPC : AUDIT_ARCH_PPC64;
+#endif /* __LITTLE_ENDIAN__ */
 }
 #endif /* _ASM_SYSCALL_H */
diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index 4d100c8..fa2a6af 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -365,6 +365,7 @@ enum {
 #define AUDIT_ARCH_PARISC64(EM_PARISC|__AUDIT_ARCH_64BIT)
 #define AUDIT_ARCH_PPC (EM_PPC)
 #define AUDIT_ARCH_PPC64   (EM_PPC64|__AUDIT_ARCH_64BIT)
+#define AUDIT_ARCH_PPC64LE (EM_PPC64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
 #define AUDIT_ARCH_S390(EM_S390)
 #define AUDIT_ARCH_S390X   (EM_S390|__AUDIT_ARCH_64BIT)
 #define AUDIT_ARCH_SH  (EM_SH)
-- 
1.7.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: Right location in sysfs for dlpar file

2014-12-08 Thread Benjamin Herrenschmidt

On Mon, 2014-12-08 at 09:21 -0600, Nathan Fontenot wrote:
  Oh I was hoping you would not say that :(
 
 Heh! hint taken. I won't cc you.
 
  
  Seriously?  Parsing binary blobs from userspace?  Don't do that, you
  know better.
 
 Yes, not ideal. One thing to note here is that the code to parse these
 binary blobs already exists in the kernel for powerpc. The binary blob
 we need to pass in is the same format as rtas error logs that we can be
 handed by firmware on powerpc systems.

We should clarify that it's a small blob, it's an error log entry as defined
by our firmware interfaces. We already obtain these from the FW and parse them
(it's really just mapping the right structure based on the type in the header)
under various circumstances.

What happens is on KVM, we used that existing path to signal the kernel of an
hotplug event. On PowerVM (aka pHyp), we want to use the same kernel code path
by passing an equivalent event to the kernel.

Nathan, if Greg still strongly objects, we could have the sysfs interface be
something like a simple action + resource ID string, we don't *need* it to
be encapsulated in an error log, we would just pipe into the same infrastructure
in the kernel but at a slightly lower level...

Ben.


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v3] i2c: Driver to expose PowerNV platform i2c busses

2014-12-08 Thread Benjamin Herrenschmidt

On Mon, 2014-12-08 at 12:06 +0530, Neelesh Gupta wrote:
 The patch exposes the available i2c busses on the PowerNV platform
 to the kernel and implements the bus driver to support i2c and
 smbus commands.
 The driver uses the platform device infrastructure to probe the busses
 on the platform and registers them with the i2c driver framework.

Wolfram, what are you remaining objections here ? We need that in
distros ASAP ...

I still maintain that it's not reasonable to hold driver for the
additions of multi-byte smbus offsets. This is a new feature that will
require changes to a number of existing bus and device drivers, so a
very pervasive change, and which will be visible to user space, which
means that drivers will need to continue supporting the old way at
least for a while anyway...

Ben.

 Signed-off-by: Neelesh Gupta neele...@linux.vnet.ibm.com
 Signed-off-by: Benjamin Herrenschmidt b...@kernel.crashing.org
 ---
 
 v2 - v3:
 - Added the device tree binding documentation for the driver.
 - Sorted the ordering of this new driver added in Makefile.
 - Removed populating the superfluous .owner field in 'struct driver'.
 
  Documentation/devicetree/bindings/i2c/i2c-opal.txt |   37 +++
  arch/powerpc/include/asm/opal.h|   29 ++
  arch/powerpc/platforms/powernv/opal-wrappers.S |1 
  arch/powerpc/platforms/powernv/opal.c  |   11 +
  drivers/i2c/busses/Kconfig |   11 +
  drivers/i2c/busses/Makefile|1 
  drivers/i2c/busses/i2c-opal.c  |  294 
 
  7 files changed, 384 insertions(+)
  create mode 100644 Documentation/devicetree/bindings/i2c/i2c-opal.txt
  create mode 100644 drivers/i2c/busses/i2c-opal.c
 
 diff --git a/Documentation/devicetree/bindings/i2c/i2c-opal.txt 
 b/Documentation/devicetree/bindings/i2c/i2c-opal.txt
 new file mode 100644
 index 000..12bc614
 --- /dev/null
 +++ b/Documentation/devicetree/bindings/i2c/i2c-opal.txt
 @@ -0,0 +1,37 @@
 +Device-tree bindings for I2C OPAL driver
 +
 +
 +Most of the device node and properties layout is specific to the firmware and
 +used by the firmware itself for configuring the port. From the linux
 +perspective, the properties of use are ibm,port-name and ibm,opal-id.
 +
 +Required properties:
 +
 +- reg: Port-id within a given master
 +- compatible: must be ibm,opal-i2c
 +- ibm,opal-id: Refers to a specific bus and used to identify it when calling
 +the relevant OPAL functions.
 +- bus-frequency: Operating frequency of the i2c bus (in HZ). Informational 
 for
 +  linux, used by the FW though.
 +
 +Optional properties:
 +- ibm,port-name: Firmware provides this name that uniquely identifies the i2c
 +  port.
 +
 +The node contains a number of other properties that are used by the FW itself
 +and depend on the specific hardware implementation. The example below depicts
 +a P8 on-chip bus.
 +
 +Example:
 +
 +i2c-bus@0 {
 + reg = 0x0;
 + bus-frequency = 0x61a80;
 + compatible = ibm,power8-i2c-port, ibm,opal-i2c;
 + ibm,opal-id = 0x1;
 + ibm,port-name = p8__e1p0;
 + #address-cells = 0x1;
 + phandle = 0x1006;
 + #size-cells = 0x0;
 + linux,phandle = 0x1006;
 +};
 diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
 index 9124b0e..537807b 100644
 --- a/arch/powerpc/include/asm/opal.h
 +++ b/arch/powerpc/include/asm/opal.h
 @@ -56,6 +56,14 @@ struct opal_sg_list {
  #define OPAL_HARDWARE_FROZEN -13
  #define OPAL_WRONG_STATE -14
  #define OPAL_ASYNC_COMPLETION-15
 +#define OPAL_I2C_TIMEOUT -17
 +#define OPAL_I2C_INVALID_CMD -18
 +#define OPAL_I2C_LBUS_PARITY -19
 +#define OPAL_I2C_BKEND_OVERRUN   -20
 +#define OPAL_I2C_BKEND_ACCESS-21
 +#define OPAL_I2C_ARBT_LOST   -22
 +#define OPAL_I2C_NACK_RCVD   -23
 +#define OPAL_I2C_STOP_ERR-24
  
  /* API Tokens (in r0) */
  #define OPAL_INVALID_CALL-1
 @@ -154,6 +162,7 @@ struct opal_sg_list {
  #define OPAL_HANDLE_HMI  98
  #define OPAL_REGISTER_DUMP_REGION101
  #define OPAL_UNREGISTER_DUMP_REGION  102
 +#define OPAL_I2C_REQUEST 109
  
  #ifndef __ASSEMBLY__
  
 @@ -801,6 +810,24 @@ typedef struct oppanel_line {
   uint64_tline_len;
  } oppanel_line_t;
  
 +/* OPAL I2C request */
 +struct opal_i2c_request {
 + uint8_t type;
 +#define OPAL_I2C_RAW_READ0
 +#define OPAL_I2C_RAW_WRITE   1
 +#define OPAL_I2C_SM_READ 2
 +#define OPAL_I2C_SM_WRITE3
 + uint8_t flags;
 +#define OPAL_I2C_ADDR_10 0x01/* Not supported yet */
 + uint8_t subaddr_sz; /* Max 4 */
 + uint8_t reserved;
 + __be16 addr;/* 7 or 10 bit address */
 + __be16 reserved2;
 + __be32 subaddr; /* Sub-address if any */
 + __be32 size;/* Data size

Re: [PATCH V3] powerpc: add little endian flag to syscall_get_arch()

2014-12-08 Thread Paul Moore

On Monday, December 08, 2014 12:59:32 PM Richard Guy Briggs wrote:
 Since both ppc and ppc64 have LE variants which are now reported by uname,
 add that flag (__AUDIT_ARCH_LE) to syscall_get_arch() and add
 AUDIT_ARCH_PPC*LE variants.
 
 Without this,  perf trace and auditctl fail.
 
 Mainline kernel reports ppc64le (per a058801) but there is no matching
 AUDIT_ARCH_PPC64LE.
 
 Since 32-bit PPC LE is not supported, throw a compiler error rather than
 return a bogus architecture to audit.
 
 See:
   https://www.redhat.com/archives/linux-audit/2014-August/msg00082.html
   https://www.redhat.com/archives/linux-audit/2014-December/msg4.html
 
 v2 - v3:
   Throw a compiler error on 32-bit LE.
 
 v1 - v2:
   Added ; at the end of the #ifdef-protected line so it actually 
 compiles
 
 Signed-off-by: Richard Guy Briggs r...@redhat.com
 ---
  arch/powerpc/include/asm/syscall.h |7 +++
  include/uapi/linux/audit.h |1 +
  2 files changed, 8 insertions(+), 0 deletions(-)

Looks reasonable to me from an audit perspective, but I'll let the ppc folks 
merge this patch into their tree.

Acked-by: Paul Moore p...@paul-moore.com

 diff --git a/arch/powerpc/include/asm/syscall.h
 b/arch/powerpc/include/asm/syscall.h index 6fa2708..cf7fcab 100644
 --- a/arch/powerpc/include/asm/syscall.h
 +++ b/arch/powerpc/include/asm/syscall.h
 @@ -90,6 +90,13 @@ static inline void syscall_set_arguments(struct
 task_struct *task,
 
  static inline int syscall_get_arch(void)
  {
 +#ifdef __LITTLE_ENDIAN__
 + return AUDIT_ARCH_PPC64LE;
 +#ifndef CONFIG_64BIT
 +#error PPC 32-bit Little Endian architecture not supported.
 +#endif /* CONFIG_64BIT */
 +#else /* __LITTLE_ENDIAN__ */
   return is_32bit_task() ? AUDIT_ARCH_PPC : AUDIT_ARCH_PPC64;
 +#endif /* __LITTLE_ENDIAN__ */
  }
  #endif   /* _ASM_SYSCALL_H */
 diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
 index 4d100c8..fa2a6af 100644
 --- a/include/uapi/linux/audit.h
 +++ b/include/uapi/linux/audit.h
 @@ -365,6 +365,7 @@ enum {
  #define AUDIT_ARCH_PARISC64  (EM_PARISC|__AUDIT_ARCH_64BIT)
  #define AUDIT_ARCH_PPC   (EM_PPC)
  #define AUDIT_ARCH_PPC64 (EM_PPC64|__AUDIT_ARCH_64BIT)
 +#define AUDIT_ARCH_PPC64LE   (EM_PPC64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
  #define AUDIT_ARCH_S390  (EM_S390)
  #define AUDIT_ARCH_S390X (EM_S390|__AUDIT_ARCH_64BIT)
  #define AUDIT_ARCH_SH(EM_SH)

-- 
paul moore
www.paul-moore.com

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v3] i2c: Driver to expose PowerNV platform i2c busses

2014-12-08 Thread Wolfram Sang

On Tue, Dec 09, 2014 at 07:13:15AM +1100, Benjamin Herrenschmidt wrote:
 On Mon, 2014-12-08 at 12:06 +0530, Neelesh Gupta wrote:
  The patch exposes the available i2c busses on the PowerNV platform
  to the kernel and implements the bus driver to support i2c and
  smbus commands.
  The driver uses the platform device infrastructure to probe the busses
  on the platform and registers them with the i2c driver framework.
 
 Wolfram, what are you remaining objections here ? We need that in
 distros ASAP ...

Oh, I thought we agreed that you take it via powerpc. I still think this
is the best solution.

 I still maintain that it's not reasonable to hold driver for the
 additions of multi-byte smbus offsets. This is a new feature that will
 require changes to a number of existing bus and device drivers, so a
 very pervasive change, and which will be visible to user space, which
 means that drivers will need to continue supporting the old way at
 least for a while anyway...

Yeah, I agree on that. I am still unsure about the port-name binding,
but well, if it is needed to fit your PowerNV scheme...

  Signed-off-by: Neelesh Gupta neele...@linux.vnet.ibm.com
  Signed-off-by: Benjamin Herrenschmidt b...@kernel.crashing.org
  ---
  
  v2 - v3:
  - Added the device tree binding documentation for the driver.
  - Sorted the ordering of this new driver added in Makefile.
  - Removed populating the superfluous .owner field in 'struct driver'.

Thanks for the updates!



signature.asc
Description: Digital signature
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v3 4/4] powernv: powerpc: Add winkle support for offline cpus

2014-12-08 Thread Shreyas B Prabhu



On Monday 08 December 2014 11:22 AM, Paul Mackerras wrote:
 On Thu, Dec 04, 2014 at 12:58:23PM +0530, Shreyas B. Prabhu wrote:
 Winkle is a deep idle state supported in power8 chips. A core enters
 winkle when all the threads of the core enter winkle. In this state
 power supply to the entire chiplet i.e core, private L2 and private L3
 is turned off. As a result it gives higher powersavings compared to
 sleep.

 But entering winkle results in a total hypervisor state loss. Hence the
 hypervisor context has to be preserved before entering winkle and
 restored upon wake up.

 Power-on Reset Engine (PORE) is a dedicated engine which is responsible
 for powering on the chiplet during wake up. It can be programmed to
 restore the register contests of a few specific registers. This patch
 uses PORE to restore register state wherever possible and uses stack to
 save and restore rest of the necessary registers.

 With hypervisor state restore things fall under three categories-
 per-core state, per-subcore state and per-thread state. To manage this,
 extend the infrastructure introduced for sleep. Mainly we add a paca
 variable subcore_sibling_mask. Using this and the core_idle_state we can
 distingush first thread in core and subcore.
 
 Comments below...
 
 diff --git a/arch/powerpc/kernel/exceptions-64s.S 
 b/arch/powerpc/kernel/exceptions-64s.S
 index 7637889..2b9b5fb 100644
 --- a/arch/powerpc/kernel/exceptions-64s.S
 +++ b/arch/powerpc/kernel/exceptions-64s.S
 @@ -102,9 +102,7 @@ system_reset_pSeries:
  #ifdef CONFIG_PPC_P7_NAP
  BEGIN_FTR_SECTION
  /* Running native on arch 2.06 or later, check if we are
 - * waking up from nap. We only handle no state loss and
 - * supervisor state loss. We do -not- handle hypervisor
 - * state loss at this time.
 + * waking up from nap/sleep/winkle.
   */
  mfspr   r13,SPRN_SRR1
  rlwinm. r13,r13,47-31,30,31
 @@ -112,7 +110,17 @@ BEGIN_FTR_SECTION
  
  cmpwi   cr3,r13,2
  
 -GET_PACA(r13)
 +/* Check if last bit of HSPGR0 is set. This indicates whether we are
 + * waking up from winkle */
 +li  r3,1
 +mfspr   r4,SPRN_HSPRG0
 +and r5,r4,r3
 +cmpwi   cr4,r5,1/* Store result in cr4 for later use */
 +
 +andcr4,r4,r3
 +mtspr   SPRN_HSPRG0,r4
 +
 +mr  r13,r4
 
 This seems unnecessarily convoluted.  How about:
 
   GET_PACA(r13)
   clrldi  r5,r13,63
   clrrdi  r13,r13,1
   cmpwi   cr4,r5,1
   mtspr   SPRN_HSPRG0,r13
 
Yes, makes more sense. I'll use this.

 diff --git a/arch/powerpc/kernel/idle_power7.S 
 b/arch/powerpc/kernel/idle_power7.S
 index 8c3a1f4..8102075 100644
 --- a/arch/powerpc/kernel/idle_power7.S
 +++ b/arch/powerpc/kernel/idle_power7.S
 @@ -19,8 +19,24 @@
  #include asm/kvm_book3s_asm.h
  #include asm/opal.h
  #include asm/cpuidle.h
 +#include asm/mmu-hash64.h
  
  #undef DEBUG
 +/*
 + * Use unused space in the interrupt stack to save and restore
 + * registers for winkle support.
 + */
 +#define _SDR1   GPR3
 +#define _RPRGPR4
 +#define _SPURR  GPR5
 +#define _PURR   GPR6
 +#define _TSCR   GPR7
 +#define _DSCR   GPR8
 +#define _AMOR   GPR9
 +#define _PMC5   GPR10
 +#define _PMC6   GPR11
 
 Why only PMC5 and PMC6 out of all the PMU registers?  What about
 PMC1-PMC4 and the MMCR registers?  I assume they're lost during winkle
 state also, aren't they?  If we're not saving them, what's the point
 of saving and restoring PMC5 and PMC6?

Yes all PMC and MMCR contents are lost. Using __restore_cpu_power8, the
MMCR registers are initialized to 0. The reasoning behind specifically
restoring PMC5 and PMC6 was the fact that they are not programmable and
count cycles/instructions by default. We suspected that there might be a
userspace program which relied on PMC5/PMC6 always increasing.
But now on closer look, since these counters are 32 bit and cycles/
instruction counts are bound to exceed it, I doubt such userspace programs
exist. I'll drop PMC5 and PMC6 in the next version.
 
 +#define _WORT   GPR12
 +#define _WORC   GPR13
  
  /* Idle state entry routines */
  
 @@ -124,8 +140,8 @@ power7_enter_nap_mode:
  stb r4,HSTATE_HWTHREAD_STATE(r13)
  #endif
  stb r3,PACA_THREAD_IDLE_STATE(r13)
 -cmpwi   cr1,r3,PNV_THREAD_SLEEP
 -bge cr1,2f
 +cmpwi   cr3,r3,PNV_THREAD_SLEEP
 +bge cr3,2f
  IDLE_STATE_ENTER_SEQ(PPC_NAP)
  /* No return */
  2:
 @@ -154,7 +170,8 @@ pnv_fastsleep_workaround_at_entry:
  isync
  bne-lwarx_loop1
  
 -common_enter: /* common code for all the threads entering sleep */
 +common_enter: /* common code for all the threads entering sleep  or winkle 
 */
 +bgt cr3,enter_winkle
  IDLE_STATE_ENTER_SEQ(PPC_SLEEP)
  
  fastsleep_workaround_at_entry:
 @@ -175,6 +192,34 @@ fastsleep_workaround_at_entry:
  stw r0,0(r14)
  b   common_enter
  
 +enter_winkle:
 +/*
 + * Note all register i.e per-core,

Re: [PATCH 2 1/4] powerpc: drop the ability to tweak SMT mode at boot time

2014-12-08 Thread Scott Wood

On Mon, 2014-12-08 at 09:23 +0100, Greg Kurz wrote:
 On Fri, 5 Dec 2014 12:52:45 -0600
 Scott Wood scottw...@freescale.com wrote:
 
  On Fri, 2014-12-05 at 16:14 +0100, Greg Kurz wrote:
   The smt-enabled kernel parameter basically leaves unwanted cpus executing
   in firmware or wherever they happen to be. The very same applies to the
   ibm,smt-enabled DT property which is no more used by anything known. These
   are hacks that shoudn't be used in a production environment.
   
   Quoting mpe, there are better ways for firmware to disable SMT.
  
 
 Hi Scott,
 
  Those better ways don't apply to Freescale chips, where the OS enables
  (or not) SMT without any interaction with firmware.  I don't care about
  the ibm,smt-enabled property, but can we please keep the smt-enabled
  boot option?
  
 
 Fair enough for the firmware side, what about CPU hot(un)plug then ?

Not yet supported in mainline for e6500 (or maybe it works with
generic_mach_cpu_die which would not be helpful).

Plus, it's more complicated (both to use and how it works internally)
and doesn't avoid having the secondary thread ever run.  Sometimes it's
useful to ensure that the second thread has never run when debugging a
problem.

   It also has an evil side effect on the split-core feature for powernv. The
   code needs all the cpus to participate to the split mode update: it relies
   on smp_send_reschedule() to get offline ones to do so. This doesn't work 
   with
   cpus that haven't come up... The consequence is a kernel hang on powernv 
   when
   trying to limit the number of hw threads at boot time (e.g. smt-enabled to
   anything but 8 on POWER8).
  
  In that case could you disable the option only on that hardware?
  
 
 The fact it breaks only powernv doesn't mean it is a powernv only issue.
 The smt-enabled feature is a hack because it leaves some cpus in a undefined
 state from a kernel POV.

I'm aware of an issue where per-cpu threads get created for these CPUs,
which seems like a bug if they were never marked online (it's on my todo
list to investigate further).  Are there other issues?  It seems like
there ought to be some way to do this right.

  Moreover it drags about 80 lines of code and sits entirely in common
 ppc64 code. I would reverse the question then ? Why not moving
 smt-enabled code to freescale only ?

I'm fine with making it Freescale-only.

-Scott


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v3] i2c: Driver to expose PowerNV platform i2c busses

2014-12-08 Thread Benjamin Herrenschmidt

On Mon, 2014-12-08 at 21:55 +0100, Wolfram Sang wrote:
 On Tue, Dec 09, 2014 at 07:13:15AM +1100, Benjamin Herrenschmidt wrote:
  On Mon, 2014-12-08 at 12:06 +0530, Neelesh Gupta wrote:
   The patch exposes the available i2c busses on the PowerNV platform
   to the kernel and implements the bus driver to support i2c and
   smbus commands.
   The driver uses the platform device infrastructure to probe the busses
   on the platform and registers them with the i2c driver framework.
  
  Wolfram, what are you remaining objections here ? We need that in
  distros ASAP ...
 
 Oh, I thought we agreed that you take it via powerpc. I still think this
 is the best solution.

I threatened to do that :-) I don't remember you replying, did I miss
it ? If you are ok with the driver and are happy for me to take it,
please send an Ack.

  I still maintain that it's not reasonable to hold driver for the
  additions of multi-byte smbus offsets. This is a new feature that will
  require changes to a number of existing bus and device drivers, so a
  very pervasive change, and which will be visible to user space, which
  means that drivers will need to continue supporting the old way at
  least for a while anyway...
 
 Yeah, I agree on that. I am still unsure about the port-name binding,
 but well, if it is needed to fit your PowerNV scheme...

From a binding perspective, it's just a piece of additional info that
the firmware provides for convenience. That we use it as the i2c port
name in Linux makes sense, it means that when listing the i2c ports,
it's immediately clear to the user which is which, it's not used
functionally by any driver or piece of code, but it's handy for people
doing things like manufacturing of machines, to know what bus to poke to
program a VPD EEPROM or test if a chip responds for example.

Cheers,
Ben.


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH] powerpc: secondary CPUs signal to master before setting active and online (fixes kernel BUG at kernel/smpboot.c:134!)

2014-12-08 Thread Anton Blanchard

Hi Ingo,

 At that point I thought the previous task_cpu() was somewhat ingrained
 in the scheduler and came up with the patch. If not, we could go on a
 hunt to see what else needs fixing.

I had another look. The scheduled does indeed make assumptions about the
previous task_cpu, but we have a hammer to fix it up called
select_fallback_rq.

I annotated select_fallback_rq, and did hit a case where the CPU was
not active. ppc64 patch below.

I think x86 have a similar (although harder to hit) issue. While it
does wait for the cpu_online bit to be set:

while (!cpu_online(cpu)) {
cpu_relax();
touch_nmi_watchdog();
}

The cpu_active bit is set after the cpu_online bit:

void set_cpu_online(unsigned int cpu, bool online)
{
if (online) {
cpumask_set_cpu(cpu, to_cpumask(cpu_online_bits));
cpumask_set_cpu(cpu, to_cpumask(cpu_active_bits));

If the CPU got delayed between the two stores (eg a KVM guest had the CPU
scheduled out), then we'd end up with cpu_active unset and hit the same
issue in select_fallback_rq.

Anton
--

I have a busy ppc64le KVM box where guests sometimes hit the infamous
kernel BUG at kernel/smpboot.c:134! issue during boot:

BUG_ON(td-cpu != smp_processor_id());

Basically a per CPU hotplug thread scheduled on the wrong CPU. The oops
output confirms it:

CPU: 0
Comm: watchdog/130

The problem is that we aren't ensuring the CPU active and online bits are set
before allowing the master to continue on. The master unparks the secondary
CPUs kthreads and the scheduler looks for a CPU to run on. It calls
select_task_rq and realises the suggested CPU is not in the cpus_allowed
mask. It then ends up in select_fallback_rq, and since the active and
online bits aren't set we choose some other CPU to run on.

Cc: sta...@vger.kernel.org
Signed-off-by: Anton Blanchard an...@samba.org
---
 arch/powerpc/kernel/smp.c | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 71e186d..d40e46e 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -700,7 +700,6 @@ void start_secondary(void *unused)
smp_store_cpu_info(cpu);
set_dec(tb_ticks_per_jiffy);
preempt_disable();
-   cpu_callin_map[cpu] = 1;
 
if (smp_ops-setup_cpu)
smp_ops-setup_cpu(cpu);
@@ -739,6 +738,14 @@ void start_secondary(void *unused)
notify_cpu_starting(cpu);
set_cpu_online(cpu, true);
 
+   /*
+* CPU must be marked active and online before we signal back to the
+* master, because the scheduler needs to see the cpu_online and
+* cpu_active bits set.
+*/
+   smp_wmb();
+   cpu_callin_map[cpu] = 1;
+
local_irq_enable();
 
cpu_startup_entry(CPUHP_ONLINE);
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v3 1/3] Revert clk: ppc-corenet: Fix Section mismatch warning

2014-12-08 Thread Kevin Hao

On Thu, Dec 04, 2014 at 09:51:59PM -0600, Scott Wood wrote:
 This patch is going to conflict with commit a4ae8f3b0f7ac6ab3 clk: drop
 owner assignment from platform_drivers in linux-next -- or rather,
 you've based this on that patch, but it's not in mpe's next branch, so I
 get a merge conflict and there'd be another merge conflict later on to
 get back to the newer base.
 
 I really think this should go via the clock tree.  That's where the
 breakage was introduced in the first place...

Hi Mike,

Could you pick up these patches?

Thanks,
Kevin


pgp7onzYSv2aW.pgp
Description: PGP signature
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH] kthread: kthread_bind fails to enforce CPU affinity (fixes kernel BUG at kernel/smpboot.c:134!)

2014-12-08 Thread Lai Jiangshan

On 12/08/2014 09:54 PM, Steven Rostedt wrote:
 On Mon,  8 Dec 2014 14:27:01 +1100
 Anton Blanchard an...@samba.org wrote:
 
 I have a busy ppc64le KVM box where guests sometimes hit the infamous
 kernel BUG at kernel/smpboot.c:134! issue during boot:

 BUG_ON(td-cpu != smp_processor_id());

 Basically a per CPU hotplug thread scheduled on the wrong CPU. The oops
 output confirms it:

 CPU: 0
 Comm: watchdog/130

 The issue is in kthread_bind where we set the cpus_allowed mask, but do
 not touch task_thread_info(p)-cpu. The scheduler assumes the previously
 scheduled CPU is in the cpus_allowed mask, but in this case we are
 moving a thread to another CPU so it is not.

 
 Does this happen always on boot up, and always with the watchdog thread?
 
 I followed the logic that starts the watchdog threads.
 
 watchdog_enable_all_cpus()
   smpboot_register_percpu-thread() {
 
 for_each_online_cpu(cpu) { ... }
 
 Where watchdog_enable_all_cpus() can be called by
 lockup_detector_init() before SMP is started, but also by
 proc_dowatchdog() which is called by the sysctl commands (after SMP is
 up and running).
 
 I noticed there's no get_online_cpus() anywhere, although the
 unregister_percpu_thread() has it. Is it possible that we created a
 thread on a CPU that wasn't fully online yet?
 
 Perhaps the following patch is needed? Even if this isn't the solution
 to this bug, it is probably needed as watchdog_enable_all_cpus() can be
 called after boot up too.
 
 -- Steve


Hi, Steven, tglx

See this https://lkml.org/lkml/2014/7/30/804
[PATCH] smpboot: add missing get_online_cpus() when register


Thanks,
Lai

 
 diff --git a/kernel/smpboot.c b/kernel/smpboot.c
 index eb89e1807408..60d35ac5d3f1 100644
 --- a/kernel/smpboot.c
 +++ b/kernel/smpboot.c
 @@ -279,6 +279,7 @@ int smpboot_register_percpu_thread(struct 
 smp_hotplug_thread *plug_thread)
   unsigned int cpu;
   int ret = 0;
  
 + get_online_cpus();
   mutex_lock(smpboot_threads_lock);
   for_each_online_cpu(cpu) {
   ret = __smpboot_create_thread(plug_thread, cpu);
 @@ -291,6 +292,7 @@ int smpboot_register_percpu_thread(struct 
 smp_hotplug_thread *plug_thread)
   list_add(plug_thread-list, hotplug_threads);
  out:
   mutex_unlock(smpboot_threads_lock);
 + put_online_cpus();
   return ret;
  }
  EXPORT_SYMBOL_GPL(smpboot_register_percpu_thread);
 --
 To unsubscribe from this list: send the line unsubscribe linux-kernel in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 Please read the FAQ at  http://www.tux.org/lkml/
 .
 

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 2 1/4] powerpc: drop the ability to tweak SMT mode at boot time

2014-12-08 Thread Michael Ellerman

On Fri, 2014-12-05 at 12:52 -0600, Scott Wood wrote:
 On Fri, 2014-12-05 at 16:14 +0100, Greg Kurz wrote:
  The smt-enabled kernel parameter basically leaves unwanted cpus executing
  in firmware or wherever they happen to be. The very same applies to the
  ibm,smt-enabled DT property which is no more used by anything known. These
  are hacks that shoudn't be used in a production environment.
  
  Quoting mpe, there are better ways for firmware to disable SMT.
 
 Those better ways don't apply to Freescale chips, where the OS enables
 (or not) SMT without any interaction with firmware.

But how does it know there even are SMT threads? From the device tree? So
just don't present the threads in the device tree?

cheers


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 1/7] CXL: Change contexts_lock to a mutex to fix sleep while atomic bug

2014-12-08 Thread Ian Munsie

This patch will definitely need to go to stable - we've run into issues
a couple of times when something has gone wrong on an AFU and ended up
taking down the whole system as a result of this bug.

Cheers,
-Ian

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH] arch: powerpc: kernel: vio.c: Remove unused function

2014-12-08 Thread Michael Ellerman

On Sun, 2014-12-07 at 23:31 +0100, Rickard Strandqvist wrote:
 Remove the function cmo_high_show() that is not used anywhere.
 
 This was partially found by using a static code analysis program called 
 cppcheck.
 
 Signed-off-by: Rickard Strandqvist rickard_strandqv...@spectrumdigital.se
 ---
  arch/powerpc/kernel/vio.c |5 -
  1 file changed, 5 deletions(-)
 
 diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c
 index 5bfdab9..11657d2 100644
 --- a/arch/powerpc/kernel/vio.c
 +++ b/arch/powerpc/kernel/vio.c
 @@ -1032,11 +1032,6 @@ viobus_cmo_pool_rd_attr(reserve, size);
  viobus_cmo_pool_rd_attr(excess, size);
  viobus_cmo_pool_rd_attr(excess, free);
  
 -static ssize_t cmo_high_show(struct bus_type *bt, char *buf)
 -{
 - return sprintf(buf, %lu\n, vio_cmo.high);
 -}


from ../arch/powerpc/kernel/vio.c:17:
../arch/powerpc/kernel/vio.c:1046:20: error: 'cmo_high_show' undeclared here 
(not in a function)
static BUS_ATTR_RW(cmo_high);
^
../include/linux/sysfs.h:76:10: note: in definition of macro '__ATTR'
.show = _show,  \
^
../include/linux/device.h:53:42: note: in expansion of macro '__ATTR_RW'
struct bus_attribute bus_attr_##_name = __ATTR_RW(_name)
^
../arch/powerpc/kernel/vio.c:1046:8: note: in expansion of macro 'BUS_ATTR_RW'
static BUS_ATTR_RW(cmo_high);
^

cheers


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 2/7] CXL: Add timeout to process element commands

2014-12-08 Thread Ian Munsie

This one needs to go to stable - I've hit it a couple of times while
testing bad AFUs and it results in an unkillable process.

Cheers,
-Ian

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 3/7] CXL: Fix leaking interrupts if attach process fails

2014-12-08 Thread Ian Munsie

This one would be nice to go to stable, but I'm not sure if it's
critical enough to justify it since it only reduces the number of
available interrupts (and therefore, contexts) that can be used by the
card (so, maybe you can only run 507 contexts simultaneously instead of
509)...

Cheers,
-Ian

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 5/7] CXL: Disable AFU debug flag

2014-12-08 Thread Ian Munsie

This one would be nice to go to stable, but I'm not sure it really meets
the rules. It could be a problem for userspace error paths checking the
result of MMIO reads, but only if the AFU has actually been unexpectedly
disabled somehow yet the PSL is still responding...

I don't think this is a high priority to go to stable unless it starts
causing problems for someone.

Cheers,
-Ian

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 7/7] CXL: Unmap MMIO regions when detaching a context

2014-12-08 Thread Ian Munsie

This one should go to stable - this was the first bug uncovered after
fixing the sleep while atomic and force unbinding the driver.

Cheers,
-Ian

Excerpts from Ian Munsie's message of 2014-12-08 19:18:01 +1100:
 From: Ian Munsie imun...@au1.ibm.com
 
 If we need to force detach a context (e.g. due to EEH or simply force
 unbinding the driver) we should prevent the userspace contexts from
 being able to access the Problem State Area MMIO region further, which
 they may have mapped with mmap().
 
 This patch unmaps any mapped MMIO regions when detaching a userspace
 context.
 
 Signed-off-by: Ian Munsie imun...@au1.ibm.com
 ---
  drivers/misc/cxl/context.c | 11 ++-
  drivers/misc/cxl/cxl.h |  7 ++-
  drivers/misc/cxl/file.c|  6 +-
  3 files changed, 21 insertions(+), 3 deletions(-)
 
 diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c
 index 4aa31a3..51fd6b5 100644
 --- a/drivers/misc/cxl/context.c
 +++ b/drivers/misc/cxl/context.c
 @@ -34,7 +34,8 @@ struct cxl_context *cxl_context_alloc(void)
  /*
   * Initialises a CXL context.
   */
 -int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool 
 master)
 +int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool 
 master,
 + struct address_space *mapping)
  {
  int i;
  
 @@ -42,6 +43,8 @@ int cxl_context_init(struct cxl_context *ctx, struct 
 cxl_afu *afu, bool master)
  ctx-afu = afu;
  ctx-master = master;
  ctx-pid = NULL; /* Set in start work ioctl */
 +mutex_init(ctx-mapping_lock);
 +ctx-mapping = mapping;
  
  /*
   * Allocate the segment table before we put it in the IDR so that we
 @@ -147,6 +150,12 @@ static void __detach_context(struct cxl_context *ctx)
  afu_release_irqs(ctx);
  flush_work(ctx-fault_work); /* Only needed for dedicated process */
  wake_up_all(ctx-wq);
 +
 +/* Release Problem State Area mapping */
 +mutex_lock(ctx-mapping_lock);
 +if (ctx-mapping)
 +unmap_mapping_range(ctx-mapping, 0, 0, 1);
 +mutex_unlock(ctx-mapping_lock);
  }
  
  /*
 diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
 index c1f8aa6..0df0438 100644
 --- a/drivers/misc/cxl/cxl.h
 +++ b/drivers/misc/cxl/cxl.h
 @@ -405,6 +405,10 @@ struct cxl_context {
  phys_addr_t psn_phys;
  u64 psn_size;
  
 +/* Used to unmap any mmaps when force detaching */
 +struct address_space *mapping;
 +struct mutex mapping_lock;
 +
  spinlock_t sste_lock; /* Protects segment table entries */
  struct cxl_sste *sstp;
  u64 sstp0, sstp1;
 @@ -606,7 +610,8 @@ int cxl_alloc_sst(struct cxl_context *ctx);
  void init_cxl_native(void);
  
  struct cxl_context *cxl_context_alloc(void);
 -int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool 
 master);
 +int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool 
 master,
 + struct address_space *mapping);
  void cxl_context_free(struct cxl_context *ctx);
  int cxl_context_iomap(struct cxl_context *ctx, struct vm_area_struct *vma);
  
 diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c
 index 2e067a5..b09be44 100644
 --- a/drivers/misc/cxl/file.c
 +++ b/drivers/misc/cxl/file.c
 @@ -77,7 +77,7 @@ static int __afu_open(struct inode *inode, struct file 
 *file, bool master)
  goto err_put_afu;
  }
  
 -if ((rc = cxl_context_init(ctx, afu, master)))
 +if ((rc = cxl_context_init(ctx, afu, master, inode-i_mapping)))
  goto err_put_afu;
  
  pr_devel(afu_open pe: %i\n, ctx-pe);
 @@ -113,6 +113,10 @@ static int afu_release(struct inode *inode, struct file 
 *file)
   __func__, ctx-pe);
  cxl_context_detach(ctx);
  
 +mutex_lock(ctx-mapping_lock);
 +ctx-mapping = NULL;
 +mutex_unlock(ctx-mapping_lock);
 +
  put_device(ctx-afu-dev);
  
  /*

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH] selftest/ppc: Add subpage protection self test.

2014-12-08 Thread Aneesh Kumar K.V

From: Paul Mackerras pau...@samba.org

Signed-off-by: Paul Mackerras pau...@samba.org
Signed-off-by: Aneesh Kumar K.V aneesh.ku...@linux.vnet.ibm.com
---
 tools/testing/selftests/powerpc/mm/Makefile   |   2 +-
 tools/testing/selftests/powerpc/mm/subpage_prot.c | 201 ++
 2 files changed, 202 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/powerpc/mm/subpage_prot.c

diff --git a/tools/testing/selftests/powerpc/mm/Makefile 
b/tools/testing/selftests/powerpc/mm/Makefile
index 357ccbd6bad9..fb00c6f7d675 100644
--- a/tools/testing/selftests/powerpc/mm/Makefile
+++ b/tools/testing/selftests/powerpc/mm/Makefile
@@ -1,7 +1,7 @@
 noarg:
$(MAKE) -C ../
 
-PROGS := hugetlb_vs_thp_test
+PROGS := hugetlb_vs_thp_test subpage_prot
 
 all: $(PROGS)
 
diff --git a/tools/testing/selftests/powerpc/mm/subpage_prot.c 
b/tools/testing/selftests/powerpc/mm/subpage_prot.c
new file mode 100644
index ..62f0db3e3fb2
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/subpage_prot.c
@@ -0,0 +1,201 @@
+/*
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2.1 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ */
+
+#include stdio.h
+#include stdlib.h
+#include string.h
+#include unistd.h
+#include signal.h
+#include stdarg.h
+#include sys/ptrace.h
+#include sys/mman.h
+#include errno.h
+#include ucontext.h
+#include assert.h
+#include fcntl.h
+
+#include utils.h
+
+void *mallocblock;
+unsigned long mallocsize;
+char *file_name;
+void *fileblock;
+off_t filesize;
+
+int in_test;
+volatile int faulted;
+volatile void *dar;
+int errors;
+
+static void segv(int signum, siginfo_t *info, void *ctxt_v)
+{
+   ucontext_t *ctxt = (ucontext_t *)ctxt_v;
+   struct pt_regs *regs = ctxt-uc_mcontext.regs;
+
+   if (!in_test) {
+   fprintf(stderr, Segfault outside of test !\n);
+   exit(1);
+   }
+   faulted = 1;
+   dar = (void *)regs-dar;
+   regs-nip += 4;
+}
+
+static inline void do_read(const volatile void *addr)
+{
+   int ret;
+
+   asm volatile(lwz %0,0(%1); twi 0,%0,0; isync;\n
+: =r (ret) : r (addr) : memory);
+}
+
+static inline void do_write(const volatile void *addr)
+{
+   int val = 0x1234567;
+
+   asm volatile(stw %0,0(%1); sync; \n
+: : r (val), r (addr) : memory);
+}
+
+static inline void check_faulted(void *addr, long page, long subpage, int 
write)
+{
+   int want_fault = (subpage == ((page + 3) % 16));
+
+   if (write)
+   want_fault |= (subpage == ((page + 1) % 16));
+
+   if (faulted != want_fault) {
+   printf(Failed at 0x%p (p=%ld,sp=%ld,w=%d), want=%s, got=%s 
!\n,
+  addr, page, subpage, write,
+  want_fault ? fault : pass,
+  faulted ? fault : pass);
+   ++errors;
+   }
+   if (faulted) {
+   if (dar != addr) {
+   printf(Fault expected at 0x%p and happened at 0x%p 
!\n,
+  addr, dar);
+   }
+   faulted = 0;
+   asm volatile(sync : : : memory);
+   }
+}
+
+static int run_test(void *addr, unsigned long size)
+{
+   unsigned int *map;
+   long i, j, pages, err;
+
+   pages = size / 0x1;
+   map = malloc(pages * 4);
+   assert(map);
+
+   /* for each page, mark subpage i % 16 read only and subpage
+* (i + 3) % 16 inaccessible
+*/
+   for (i = 0; i  pages; i++)
+   map[i] = (0x4000  (((i + 1) * 2) % 32)) |
+   (0xc000  (((i + 3) * 2) % 32));
+   err = syscall(310, addr, size, map);
+   if (err) {
+   perror(subpage_perm);
+   return 1;
+   }
+   free(map);
+
+   in_test = 1;
+   errors = 0;
+   for (i = 0; i  pages; i++)
+   for (j = 0; j  16; j++, addr += 0x1000) {
+   do_read(addr);
+   check_faulted(addr, i, j, 0);
+   do_write(addr);
+   check_faulted(addr, i, j, 1);
+   }
+   in_test = 0;
+   if (errors) {
+   printf(%d errors detected\n, errors);
+   return 1;
+   }
+   return 0;
+}
+
+int test_anon(void)
+{
+   unsigned long align;
+
+   if (getpagesize() != 0x1) {
+   fprintf(stderr, Kernel page size must be 64K!\n);
+   return 1;
+   }
+
+   struct sigaction act = {
+   .sa_sigaction = segv,
+   .sa_flags = SA_SIGINFO
+   };
+   sigaction(SIGSEGV, act, NULL);
+
+

50 matches

Mail list logo