[PATCH] Powerpc: separate CONFIG_RELOCATABLE from CONFIG_CRASHDUMP in boot code

2010-11-18 Thread Sonny Rao
Fix head_64.S so that we can build a relocatable kernel
that isn't necessarily a crash-dump kernel

Signed-off-by: Milton Miller milt...@bga.com
Signed-off-by: Sonny Rao sonny...@linux.vnet.ibm.com
---
 arch/powerpc/kernel/head_64.S |6 ++
 1 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index f0dd577..53b098f 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -96,7 +96,7 @@ __secondary_hold_acknowledge:
.llong hvReleaseData-KERNELBASE
 #endif /* CONFIG_PPC_ISERIES */

-#ifdef CONFIG_CRASH_DUMP
+#ifdef CONFIG_RELOCATABLE
/* This flag is set to 1 by a loader if the kernel should run
 * at the loaded address instead of the linked address.  This
 * is used by kexec-tools to keep the the kdump kernel in the
@@ -384,12 +384,10 @@ _STATIC(__after_prom_start)
/* process relocations for the final address of the kernel */
lis r25,page_off...@highest /* compute virtual base of kernel */
sldir25,r25,32
-#ifdef CONFIG_CRASH_DUMP
lwz r7,__run_at_load-_stext(r26)
-   cmplwi  cr0,r7,1/* kdump kernel ? - stay where we are */
+   cmplwi  cr0,r7,1/* flagged to stay where we are ? */
bne 1f
add r25,r25,r26
-#endif
 1: mr  r3,r25
bl  .relocate
 #endif
-- 
1.5.6.5

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH] powerpc: minor cleanups for machdep.h

2010-11-18 Thread Sonny Rao
remove stale declaration of setup_pci_ptrs, aparently from ppc before 2.4.0

remove #ifdef around struct existance delcaration

fix spelling of linear

Signed-off-by: Milton Miller milt...@bga.com
Signed-off-by: Sonny Rao sonny...@linux.vnet.ibm.com
---
 arch/powerpc/include/asm/machdep.h |6 +-
 1 files changed, 1 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/machdep.h 
b/arch/powerpc/include/asm/machdep.h
index d045b01..8433d36 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -27,9 +27,7 @@ struct iommu_table;
 struct rtc_time;
 struct file;
 struct pci_controller;
-#ifdef CONFIG_KEXEC
 struct kimage;
-#endif

 #ifdef CONFIG_SMP
 struct smp_ops_t {
@@ -72,7 +70,7 @@ struct machdep_calls {
 int psize, int ssize);
void(*flush_hash_range)(unsigned long number, int local);

-   /* special for kexec, to be called in real mode, linar mapping is
+   /* special for kexec, to be called in real mode, linear mapping is
 * destroyed as well */
void(*hpte_clear_all)(void);

@@ -324,8 +322,6 @@ extern sys_ctrler_t sys_ctrler;

 #endif /* CONFIG_PPC_PMAC */

-extern void setup_pci_ptrs(void);
-
 #ifdef CONFIG_SMP
 /* Poor default implementations */
 extern void __devinit smp_generic_give_timebase(void);
-- 
1.5.6.5

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH 1/2]: Powerpc: Fix EHCA driver on relocatable kernel

2010-08-19 Thread Sonny Rao
Some modules (like eHCA) want to map all of kernel memory, for this to
work with a relocated kernel, we need to export kernstart_addr so
modules can use PHYSICAL_START and memstart_addr so they could use
MEMORY_START.  Note that the 32bit code already exports these symbols.

Signed-off-By: Sonny Rao sonny...@us.ibm.com

Index: common/arch/powerpc/mm/init_64.c
===
--- common.orig/arch/powerpc/mm/init_64.c   2010-08-16 02:38:33.0 
-0500
+++ common/arch/powerpc/mm/init_64.c2010-08-16 02:39:25.0 -0500
@@ -79,7 +79,9 @@
 #endif /* CONFIG_PPC_STD_MMU_64 */
 
 phys_addr_t memstart_addr = ~0;
+EXPORT_SYMBOL(memstart_addr);
 phys_addr_t kernstart_addr;
+EXPORT_SYMBOL(kernstart_addr);
 
 void free_initmem(void)
 {
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH 2/2]: Powerpc: Fix EHCA driver on relocatable kernel

2010-08-19 Thread Sonny Rao
the eHCA driver registers a MR for all of kernel memory, but makes the
assumption that valid memory exists at KERNELBASE.  This assumption
may not be true in the case of a relocatable kernel, so use KERNELBASE
+ PHYSICAL_START to get the true beginning of usable kernel memory.

This patch depends on the earlier patch which exports the necessary
symbol for PHYSICAL_START in a relocatable kernel.

cc: Joachim Fenkes fen...@de.ibm.com
cc: Christoph Raisch rai...@de.ibm.com
cc: Hoan-Ham Hguyen hngu...@de.ibm.com
Signed-off-by: Sonny Rao sonny...@us.ibm.com


Index: linux-2.6/drivers/infiniband/hw/ehca/ehca_mrmw.c
===
--- linux-2.6.orig/drivers/infiniband/hw/ehca/ehca_mrmw.c   2010-08-09 
22:16:57.688652613 -0500
+++ linux-2.6/drivers/infiniband/hw/ehca/ehca_mrmw.c2010-08-19 
22:53:03.451507146 -0500
@@ -171,7 +171,7 @@
}

ret = ehca_reg_maxmr(shca, e_maxmr,
-(void *)ehca_map_vaddr((void *)KERNELBASE),
+(void *)ehca_map_vaddr((void *)(KERNELBASE 
+ PHYSICAL_START)),
 mr_access_flags, e_pd,
 e_maxmr-ib.ib_mr.lkey,
 e_maxmr-ib.ib_mr.rkey);
@@ -1636,7 +1636,7 @@

/* register internal max-MR on HCA */
size_maxmr = ehca_mr_len;
-   iova_start = (u64 *)ehca_map_vaddr((void *)KERNELBASE);
+   iova_start = (u64 *)ehca_map_vaddr((void *)(KERNELBASE + 
PHYSICAL_START));
ib_pbuf.addr = 0;
ib_pbuf.size = size_maxmr;
num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size_maxmr,
@@ -2209,7 +2209,7 @@
 {
/* a MR is treated as max-MR only if it fits following: */
if ((size == ehca_mr_len) 
-   (iova_start == (void *)ehca_map_vaddr((void *)KERNELBASE))) {
+   (iova_start == (void *)ehca_map_vaddr((void *)(KERNELBASE + 
PHYSICAL_START {
ehca_gen_dbg(this is a max-MR);
return 1;
} else
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH] need check for devices with bad status status property in __of_scan_bus()

2010-05-19 Thread Sonny Rao
On Mon, May 10, 2010 at 08:13:41PM -0500, Sonny Rao wrote:
 Hi Ben, we ran into an issue where it looks like we're not
 properly ignoring a pci device with a non-good status property
 when we walk the device tree and create our device nodes.
 
 However, the EEH init code does look for the property and 
 disables EEH on these devices.   This leaves us in an
 inconsistent where we are poking at a supposedly bad
 piece of hardware and RTAS will block our config cycles 
 because EEH isn't enabled anyway.
 
 This has only been compile tested.
 
 Signed-of-by: Sonny Rao sonny...@linux.vnet.ibm.com
 
 Index: common/arch/powerpc/kernel/pci_of_scan.c
 ===
 --- common/arch/powerpc/kernel.orig/pci_of_scan.c 2010-05-10 
 20:00:40.0 -0500
 +++ common/arch/powerpc/kernel/pci_of_scan.c  2010-05-10 20:03:04.0 
 -0500
 @@ -310,6 +310,8 @@ static void __devinit __of_scan_bus(stru
   /* Scan direct children */
   for_each_child_of_node(node, child) {
   pr_debug(  * %s\n, child-full_name);
 + if (!of_device_is_available(child))
 + continue;
   reg = of_get_property(child, reg, reglen);
   if (reg == NULL || reglen  20)
   continue;


Ok, it's now been actually tested with firmware
that marks some devices as failed and appears to work.


-- 
Sonny Rao, LTC OzLabs, BML team
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH] powerpc/pseries: Add support for IO Event interrupt drivers

2010-05-19 Thread Sonny Rao
On Wed, May 19, 2010 at 10:10:58PM +1000, Michael Ellerman wrote:
snip
   The checks the scope requires an RTAS call, which takes a global lock
   (and you add another) - these aren't going to be used for anything
   performance critical I hope?
  
  Nope it shouldn't be performance critical, but it does raise the point
  that the current RTAS implementation in Linux *always* uses the global
  lock.  There is a set of calls which are not required to be serialized
  against each other.  This would be a totally different patchset to fix that
  problem.  The check-exception call is one that doesn't require the global
  RTAS lock.  I might work on that someday :-)
 
 Aha, that's kind of what I was wondering. I take it PAPR documents which
 calls need to be serialised and which don't?

Yeah, here's my workin list of what calls can avoid the global lock:

List of re-entrant to the number of processors in the system RTAS Calls
--
ibm,get-xive
ibm,set-xive
ibm,int-off
ibm,int-on


OS machine check and soft-reset handlerse must be able to call rtas
(I'm saying these are therefore re-entrant because we could deadlock
if we took a machine check or reset with the global lock held)
--
nvram-fetch
nvram-store
check-exception (includes our io-events)
display-character
system-reboot
set-power-level(0,0)
power-off
ibm,set-eeh-option
ibm,set-slot-reset
ibm,read-slot-reset-state2

additional serialiaztion group by itself
--
stop-self
start-cpu
set-power-level


snip
  Also, if we're going to go ahead and use rtas_call() which locks
  it's own buffer which meets the requirements, why do we even need
  a separate buffer?  Really, we should make this call, then copy
  the content of the buffer before handing it over to the drivers.
 
 But another CPU could rtas_call() and blow away our buffer after we've
 dropped the RTAS lock but before we've used the content of the buffer.

Yeah, maybe I'm getting ahead of myself here -- to me this just highlights
the whole locking problem with the API as written, since the locking is
all done inside that call. The API needs to be extended such that
we have the option of doing our own locking of RTAS calls.


+   if (rtas_elog-type != RTAS_TYPE_IO_EVENT) {
+   pr_warning(IO Events: We got called with an event type 
of %d
+   rather than %d!\n, rtas_elog-type,
+  RTAS_TYPE_IO_EVENT);
+   WARN_ON(1);
+   goto out;
+   }
  
  Should we try to process this instead of just warning?  
  The type we get might be one of the the ones we recognize in
  ras.c; so this is an argument for combining ras.c with this code
  or at least report this in the same manner we report any other
  RTAS error log.
 
 We could, but that would be a massive firmware bug - not that it
 wouldn't happen, but it would be Not Our Problem TM.

Yeah, this is paranoia (*cough* Milton's suggestion)

   We /could/ copy the ioei_sec and drop the buf lock, which would allow
   another interrupt to come in and start doing the RTAS call (on another
   cpu, and iff there are actually multiple interrupts). But we probably
   don't care.
  
  I think we *have* to copy it because we don't want our lock held when we
  call random handlers.
 
 They're not really random, and as long as they don't call the
 register/unregister routines it should be /OK/. But copying is probably
 good so we don't hold the lock for too long.

Yeah, this is probably ok since it's all happening in interrupt
context anyway the handlers have to be running in an atomic context
anyway.

-- 
Sonny Rao, LTC OzLabs, BML team
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH] powerpc/pseries: Add support for IO Event interrupt drivers

2010-05-18 Thread Sonny Rao
On Tue, 18 May 2010 23:37:31 +1000, Michael Ellerman wrote:
 
 On Mon, 2010-05-17 at 22:53 +1000, Mark Nelson wrote:
  This patch adds support for handling IO Event interrupts which come
  through at the /event-sources/ibm,io-events device tree node.
 
 Hi Mark,
 
 You'll have to explain to me offline sometime how it is we ran out of
 interrupts and started needing to multiplex them ..

Firmware has decided to multiplex all i/o error reporting through a single
interrupt for reasons unknown, that is the primary reason for this patch.

One question is, we already register a few RAS interrupts which call
RTAS using check-exception for getting error information.  Those live
in platforms/pseries/ras.c -- should we combine the two into a common
implementation somehow?

  There is one ibm,io-events interrupt, but this interrupt might be used
  for multiple I/O devices, each with their own separate driver. So, we
  create a platform interrupt handler that will do the RTAS check-exception
  call and then call the appropriate driver's interrupt handler (the one(s)
  that registered with a scope that matches the scope of the incoming
  interrupt).
  
  So, a driver for a device that uses IO Event interrupts will register
  it's interrupt service routine (or interrupt handler) with the platform
  code using ioei_register_isr(). This register function takes a function
  pointer to the driver's handler and the scope that the driver is
  interested in (scopes defined in arch/powerpc/include/asm/io_events.h).
  The driver's handler must take a pointer to a struct io_events_section
  and must not return anything.
  
  The platform code registers io_event_interrupt() as the interrupt handler
  for the ibm,io-events interrupt. Upon receiving an IO Event interrupt, it
  checks the scope of the incoming interrupt and only calls those drivers'
  handlers that have registered as being interested in that scope.
 
 The checks the scope requires an RTAS call, which takes a global lock
 (and you add another) - these aren't going to be used for anything
 performance critical I hope?

Nope it shouldn't be performance critical, but it does raise the point
that the current RTAS implementation in Linux *always* uses the global
lock.  There is a set of calls which are not required to be serialized
against each other.  This would be a totally different patchset to fix that
problem.  The check-exception call is one that doesn't require the global
RTAS lock.  I might work on that someday :-)

snip

  +   /* check to see if we've already registered this function with
  +* this scope. If we have, don't register it again
  +*/
  +   iter = ioei_isr_list;
  +   while (iter) {
  +   if (iter-ioei_isr == isr  iter-scope == scope)
  +   break;
  +   iter = iter-next;
  +   }
  +
  +   if (iter) {
  +   ret = -EEXIST;
  +   goto out;
  +   }
  +
  +   cons = kmalloc(sizeof(struct ioei_consumer), GFP_KERNEL);
 
 But you don't want to kmalloc while holding the lock and with interrupts
 off.

Well, he could use GFP_ATOMIC but that's the wrong approach.  You should
allocate the buffer (using GFP_KERNEL) before taking the spin lock.

snip

  +#define EXT_INT_VECTOR_OFFSET  0x500
  +#define RTAS_TYPE_IO_EVENT 0xE1

These defines should probably go in asm/rtas.h

I noticed the code in ras.c has it's own define too RAS_VECTOR_OFFSET
for 0x500 and asm/rtas.h actually has RTAS_TYPE_IO for 0xE1

  +
  +static irqreturn_t io_event_interrupt(int irq, void *dev_id)
  +{
  +   struct rtas_error_log *rtas_elog;
  +   struct io_events_section *ioei_sec;
  +   char *ch_ptr;
  +   int status;
  +   u16 *sec_len;
  +
  +   spin_lock(ioei_log_buf_lock);
  +
  +   status = rtas_call(ioei_check_exception_token, 6, 1, NULL,
  +  EXT_INT_VECTOR_OFFSET,
  +  irq_map[irq].hwirq,
 
 This is going to be  slow anyway, you may as well use virq_to_hw().
 
  +  RTAS_IO_EVENTS, 1 /*Time Critical */,
 
 Missing space before the T  ^
 
  +  __pa(ioei_log_buf),
 
 Does the buffer need to be aligned, and/or inside the RMO? I'd guess
 yes.

The docs for check-exception don't particularly specify alignment
requirements, but RTAS in generally going to want it in the RMO

Also, if we're going to go ahead and use rtas_call() which locks
it's own buffer which meets the requirements, why do we even need
a separate buffer?  Really, we should make this call, then copy
the content of the buffer before handing it over to the drivers.


  +   rtas_get_error_log_max());

Here, we're passing back what RTAS told us what it's max is
which doesn't necessarily equal the static buffer size we
allocated which can cause a buffer overflow.  So this
argument should be the static size of the buffer.

  +
  +   rtas_elog = (struct rtas_error_log *)ioei_log_buf;
  +
  +   if (status != 0)
  +   goto out;
  +
  +   /* We 

[PATCH] need check for devices with bad status status property in __of_scan_bus()

2010-05-10 Thread Sonny Rao
Hi Ben, we ran into an issue where it looks like we're not
properly ignoring a pci device with a non-good status property
when we walk the device tree and create our device nodes.

However, the EEH init code does look for the property and 
disables EEH on these devices.   This leaves us in an
inconsistent where we are poking at a supposedly bad
piece of hardware and RTAS will block our config cycles 
because EEH isn't enabled anyway.

This has only been compile tested.

Signed-of-by: Sonny Rao sonny...@linux.vnet.ibm.com

Index: common/arch/powerpc/kernel/pci_of_scan.c
===
--- common/arch/powerpc/kernel.orig/pci_of_scan.c   2010-05-10 
20:00:40.0 -0500
+++ common/arch/powerpc/kernel/pci_of_scan.c2010-05-10 20:03:04.0 
-0500
@@ -310,6 +310,8 @@ static void __devinit __of_scan_bus(stru
/* Scan direct children */
for_each_child_of_node(node, child) {
pr_debug(  * %s\n, child-full_name);
+   if (!of_device_is_available(child))
+   continue;
reg = of_get_property(child, reg, reglen);
if (reg == NULL || reglen  20)
continue;


-- 
Sonny Rao, LTC OzLabs, BML team
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH] Fix BSR to allow mmap of small BSR on 64k kernel

2009-06-18 Thread Sonny Rao
On Mon, Nov 17, 2008 at 01:26:13AM -0600, Sonny Rao wrote:
 On Fri, Nov 07, 2008 at 04:28:29PM +1100, Paul Mackerras wrote:
  Sonny Rao writes:
  
   Fix the BSR driver to allow small BSR devices, which are limited to a
   single 4k space, on a 64k page kernel.  Previously the driver would
   reject the mmap since the size was smaller than PAGESIZE (or because
   the size was greater than the size of the device).  Now, we check for
   this case use remap_4k_pfn(). Also, take out code to set vm_flags,
   as the remap_pfn functions will do this for us.
  
  Thanks.
  
  Do we know that the BSR size will always be 4k if it's not a multiple
  of 64k?  Is it possible that we could get 8k, 16k or 32k or BSRs?
  If it is possible, what does the user need to be able to do?  Do they
  just want to map 4k, or might then want to map the whole thing?
 
 
 Hi Paul, I took a look at changing the driver to reject a request for
 mapping more than a single 4k page, however the only indication we get
 of the requested size in the mmap function is the vma size, and this
 is always one page at minimum.  So, it's not possible to determine if
 the user wants one 4k page or more.  As I noted in my first response,
 there is only one case where this is even possible and I don't think
 it is a significant concern.
 
 I did notice that I left out the check to see if the user is trying to
 map more than the device length, so I fixed that.  Here's the revised
 patch.

Alright, I've reworked this now so that if we get one of these cases
where there's a bsr that's  4k and  64k on a 64k kernel we'll only
advertise that it is a 4k BSR to userspace.  I think this is the best
solution since user programs are only supposed to look at sysfs to
determine how much can be mapped, and libbsr does this as well.

Please consider for 2.6.31 as a fix, thanks.

---

Fix the BSR driver to allow small BSR devices on a 64k page kernel.  
Previously the driver would reject the mmap since the size was smaller
than PAGESIZE. This patch adds a check for this case and uses remap_4k_pfn().

There are also casees where we have a size that is greater than 4k but
smaller than 64k, and in that case we would only map the first 4k using
remap_4k_pfn, so we also change the length that we advertise in sysfs, 
so the user knows they can only map 4k. 

Also, take out code to set vm_flags, as the remap_pfn functions will
do this for us.

Signed-off-by: Sonny Rao sonny...@us.ibm.com

Index: linux-2.6.30/drivers/char/bsr.c
===
--- linux-2.6.30.orig/drivers/char/bsr.c2009-06-18 13:02:16.0 
-0500
+++ linux-2.6.30/drivers/char/bsr.c 2009-06-18 18:18:29.0 -0500
@@ -27,6 +27,7 @@
 #include linux/cdev.h
 #include linux/list.h
 #include linux/mm.h
+#include asm/pgtable.h
 #include asm/io.h
 
 /*
@@ -117,15 +118,22 @@
 {
unsigned long size   = vma-vm_end - vma-vm_start;
struct bsr_dev *dev = filp-private_data;
+   int ret;
 
-   if (size  dev-bsr_len || (size  (PAGE_SIZE-1)))
-   return -EINVAL;
-
-   vma-vm_flags |= (VM_IO | VM_DONTEXPAND);
vma-vm_page_prot = pgprot_noncached(vma-vm_page_prot);
 
-   if (io_remap_pfn_range(vma, vma-vm_start, dev-bsr_addr  PAGE_SHIFT,
-  size, vma-vm_page_prot))
+   /* check for the case of a small BSR device and map one 4k page for it*/
+   if (dev-bsr_len  PAGE_SIZE  size == PAGE_SIZE)
+   ret = remap_4k_pfn(vma, vma-vm_start, dev-bsr_addr  12,
+  vma-vm_page_prot);
+   else if (size = dev-bsr_len)
+   ret = io_remap_pfn_range(vma, vma-vm_start,
+dev-bsr_addr  PAGE_SHIFT,
+size, vma-vm_page_prot);
+   else
+   return -EINVAL;
+
+   if (ret)
return -EAGAIN;
 
return 0;
@@ -205,6 +213,11 @@
cur-bsr_stride = bsr_stride[i];
cur-bsr_dev= MKDEV(bsr_major, i + total_bsr_devs);
 
+   /* if we have a bsr_len of  4k and less then PAGE_SIZE (64k 
pages) */
+   /* we can only map 4k of it, so only advertise the 4k in sysfs 
*/
+   if (cur-bsr_len  4096  cur-bsr_len  PAGE_SIZE)
+   cur-bsr_len = 4096;
+
switch(cur-bsr_bytes) {
case 8:
cur-bsr_type = BSR_8;
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH] BSR: add 4096 byte BSR size

2009-06-18 Thread Sonny Rao
Add a 4096 byte BSR size which will be used on new machines.  Also, remove
the warning when we run into an unknown size, as this can spam the kernel
log excessively.

Signed-off-by: Sonny Rao sonny...@us.ibm.com

Index: linux-2.6.27/drivers/char/bsr.c
===
--- linux-2.6.27.orig/drivers/char/bsr.c2009-06-18 17:50:41.0 
-0500
+++ linux-2.6.27/drivers/char/bsr.c 2009-06-18 17:50:58.0 -0500
@@ -76,12 +76,13 @@
 static int bsr_major;
 
 enum {
-   BSR_8   = 0,
-   BSR_16  = 1,
-   BSR_64  = 2,
-   BSR_128 = 3,
-   BSR_UNKNOWN = 4,
-   BSR_MAX = 5,
+   BSR_8= 0,
+   BSR_16   = 1,
+   BSR_64   = 2,
+   BSR_128  = 3,
+   BSR_4096 = 4,
+   BSR_UNKNOWN = 5,
+   BSR_MAX  = 6,
 };
 
 static unsigned bsr_types[BSR_MAX];
@@ -231,9 +232,11 @@
case 128:
cur-bsr_type = BSR_128;
break;
+   case 4096:
+   cur-bsr_type = BSR_4096;
+   break;
default:
cur-bsr_type = BSR_UNKNOWN;
-   printk(KERN_INFO unknown BSR size 
%d\n,cur-bsr_bytes);
}
 
cur-bsr_num = bsr_types[cur-bsr_type];
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH] Fix BSR to allow mmap of small BSR on 64k kernel

2008-11-20 Thread Sonny Rao
On Thu, Nov 20, 2008 at 09:54:21AM +1100, Paul Mackerras wrote:
 Sonny Rao writes:
 
  On Wed, Nov 19, 2008 at 03:07:04PM +1100, Paul Mackerras wrote:
   I think we should be checking that dev-bsr_len == 4096 here.
   
   Paul.
  
  Well, dev-bsr_len could be 4096 or 8192
 
 Isn't the dev-bsr_len == 8192 case the one where we'll only map 4096
 bytes and therefore not do what the user expected?  Sounds to me like
 we want to return an error for that case.

Well, the problem is that we can't tell if the user asked for 4k or
8k (since we only know the size of the VMA).  

If we fail whenever dev-bsr_len is 8k then the user could never
map that device on a 64k page kernel. Is that what we want?


-- 
Sonny Rao, LTC OzLabs, BML team
___
Linuxppc-dev mailing list
Linuxppc-dev@ozlabs.org
https://ozlabs.org/mailman/listinfo/linuxppc-dev


Re: [PATCH] Fix BSR to allow mmap of small BSR on 64k kernel

2008-11-19 Thread Sonny Rao
On Wed, Nov 19, 2008 at 03:07:04PM +1100, Paul Mackerras wrote:
 Sonny Rao writes:
 
  -   if (io_remap_pfn_range(vma, vma-vm_start, dev-bsr_addr  PAGE_SHIFT,
  -  size, vma-vm_page_prot))
  +   /* check for the case of a small BSR device and map one 4k page for it*/
  +   if (dev-bsr_len  PAGE_SIZE  size == PAGE_SIZE)
  +   ret = remap_4k_pfn(vma, vma-vm_start, dev-bsr_addr  12,
  +  vma-vm_page_prot);
 
 I think we should be checking that dev-bsr_len == 4096 here.
 
 Paul.

Well, dev-bsr_len could be 4096 or 8192

-- 
Sonny Rao, LTC OzLabs, BML team
___
Linuxppc-dev mailing list
Linuxppc-dev@ozlabs.org
https://ozlabs.org/mailman/listinfo/linuxppc-dev


Re: [PATCH] Fix BSR to allow mmap of small BSR on 64k kernel

2008-11-16 Thread Sonny Rao
On Fri, Nov 07, 2008 at 04:28:29PM +1100, Paul Mackerras wrote:
 Sonny Rao writes:
 
  Fix the BSR driver to allow small BSR devices, which are limited to a
  single 4k space, on a 64k page kernel.  Previously the driver would
  reject the mmap since the size was smaller than PAGESIZE (or because
  the size was greater than the size of the device).  Now, we check for
  this case use remap_4k_pfn(). Also, take out code to set vm_flags,
  as the remap_pfn functions will do this for us.
 
 Thanks.
 
 Do we know that the BSR size will always be 4k if it's not a multiple
 of 64k?  Is it possible that we could get 8k, 16k or 32k or BSRs?
 If it is possible, what does the user need to be able to do?  Do they
 just want to map 4k, or might then want to map the whole thing?


Hi Paul, I took a look at changing the driver to reject a request for
mapping more than a single 4k page, however the only indication we get
of the requested size in the mmap function is the vma size, and this
is always one page at minimum.  So, it's not possible to determine if
the user wants one 4k page or more.  As I noted in my first response,
there is only one case where this is even possible and I don't think
it is a significant concern.

I did notice that I left out the check to see if the user is trying to
map more than the device length, so I fixed that.  Here's the revised
patch.



Fix the BSR driver to allow small BSR devices on a 64k page kernel.  
Previously the driver would reject the mmap since the size was smaller
than PAGESIZE. This patch adds a  check for this case and uses remap_4k_pfn().

Also, take out code to set vm_flags, as the remap_pfn functions will
do this for us.

Signed-off-by: Sonny Rao [EMAIL PROTECTED]

Index: linux/drivers/char/bsr.c
===
--- linux.orig/drivers/char/bsr.c   2008-11-17 00:29:23.0 -0600
+++ linux/drivers/char/bsr.c2008-11-17 00:59:57.0 -0600
@@ -27,6 +27,7 @@
 #include linux/cdev.h
 #include linux/list.h
 #include linux/mm.h
+#include asm/pgtable.h
 #include asm/io.h
 
 /*
@@ -115,15 +116,22 @@
 {
unsigned long size   = vma-vm_end - vma-vm_start;
struct bsr_dev *dev = filp-private_data;
+   int ret;
 
-   if (size  dev-bsr_len || (size  (PAGE_SIZE-1)))
-   return -EINVAL;
-
-   vma-vm_flags |= (VM_IO | VM_DONTEXPAND);
vma-vm_page_prot = pgprot_noncached(vma-vm_page_prot);
 
-   if (io_remap_pfn_range(vma, vma-vm_start, dev-bsr_addr  PAGE_SHIFT,
-  size, vma-vm_page_prot))
+   /* check for the case of a small BSR device and map one 4k page for it*/
+   if (dev-bsr_len  PAGE_SIZE  size == PAGE_SIZE)
+   ret = remap_4k_pfn(vma, vma-vm_start, dev-bsr_addr  12,
+  vma-vm_page_prot);
+   else if (size = dev-bsr_len)
+   ret = io_remap_pfn_range(vma, vma-vm_start,
+dev-bsr_addr  PAGE_SHIFT,
+size, vma-vm_page_prot);
+   else
+   return -EINVAL;
+
+   if (ret)
return -EAGAIN;
 
return 0;
___
Linuxppc-dev mailing list
Linuxppc-dev@ozlabs.org
https://ozlabs.org/mailman/listinfo/linuxppc-dev


[PATCH] powerpc: BSR: support multiple OF-node description of BSR

2008-11-09 Thread Sonny Rao
Add support for multiple BSR nodes in the device tree.

Previously, the BSR driver only supported a single OF node describing
a BSR.  Apparently when an LPAR is set to use all system resources
the BSR appears as a single node, but when it is handed out in pieces,
each 8 byte piece gets its own node.  So, keep a list of bsr devices
instead of the array and include all nodes.

Also, be more inclusive of what BSR devices we accept by only checking
compatibility and not the device name property (which might change in 
the future versions of BSR).

Signed-off-by: Sonny Rao [EMAIL PROTECTED]

Index: common/drivers/char/bsr.c
===
--- common.orig/drivers/char/bsr.c  2008-11-09 17:21:47.0 -0600
+++ common/drivers/char/bsr.c   2008-11-09 17:36:22.0 -0600
@@ -61,6 +61,8 @@
unsigned bsr_num;  /* bsr id number for its type */
int  bsr_minor;
 
+   struct list_head bsr_list;
+
dev_tbsr_dev;
struct cdev bsr_cdev;
struct device *bsr_device;
@@ -68,8 +70,8 @@
 
 };
 
-static unsigned num_bsr_devs;
-static struct bsr_dev *bsr_devs;
+static unsigned total_bsr_devs;
+static struct list_head bsr_devs = LIST_HEAD_INIT(bsr_devs);
 static struct class *bsr_class;
 static int bsr_major;
 
@@ -155,24 +157,25 @@
 
 static void bsr_cleanup_devs(void)
 {
-   int i;
-   for (i=0 ; i  num_bsr_devs; i++) {
-   struct bsr_dev *cur = bsr_devs + i;
+   struct bsr_dev *cur, *n;
+
+   list_for_each_entry_safe(cur, n, bsr_devs, bsr_list) {
if (cur-bsr_device) {
cdev_del(cur-bsr_cdev);
device_del(cur-bsr_device);
}
+   list_del(cur-bsr_list);
+   kfree(cur);
}
-
-   kfree(bsr_devs);
 }
 
-static int bsr_create_devs(struct device_node *bn)
+static int bsr_add_node(struct device_node *bn)
 {
-   int bsr_stride_len, bsr_bytes_len;
+   int bsr_stride_len, bsr_bytes_len, num_bsr_devs;
const u32 *bsr_stride;
const u32 *bsr_bytes;
unsigned i;
+   int ret = -ENODEV;
 
bsr_stride = of_get_property(bn, ibm,lock-stride, bsr_stride_len);
bsr_bytes  = of_get_property(bn, ibm,#lock-bytes, bsr_bytes_len);
@@ -180,35 +183,36 @@
if (!bsr_stride || !bsr_bytes ||
(bsr_stride_len != bsr_bytes_len)) {
printk(KERN_ERR bsr of-node has missing/incorrect property\n);
-   return -ENODEV;
+   return ret;
}
 
num_bsr_devs = bsr_bytes_len / sizeof(u32);
 
-   /* only a warning, its informational since we'll fail and exit */
-   WARN_ON(num_bsr_devs  BSR_MAX_DEVS);
-
-   bsr_devs = kzalloc(sizeof(struct bsr_dev) * num_bsr_devs, GFP_KERNEL);
-   if (!bsr_devs)
-   return -ENOMEM;
-
for (i = 0 ; i  num_bsr_devs; i++) {
-   struct bsr_dev *cur = bsr_devs + i;
+   struct bsr_dev *cur = kzalloc(sizeof(struct bsr_dev),
+ GFP_KERNEL);
struct resource res;
int result;
 
+   if (!cur) {
+   printk(KERN_ERR Unable to alloc bsr dev\n);
+   ret = -ENOMEM;
+   goto out_err;
+   }
+
result = of_address_to_resource(bn, i, res);
if (result  0) {
-   printk(KERN_ERR bsr of-node has invalid reg 
property\n);
-   goto out_err;
+   printk(KERN_ERR bsr of-node has invalid reg property, 
skipping\n);
+   kfree(cur);
+   continue;
}
 
-   cur-bsr_minor  = i;
+   cur-bsr_minor  = i + total_bsr_devs;
cur-bsr_addr   = res.start;
cur-bsr_len= res.end - res.start + 1;
cur-bsr_bytes  = bsr_bytes[i];
cur-bsr_stride = bsr_stride[i];
-   cur-bsr_dev= MKDEV(bsr_major, i);
+   cur-bsr_dev= MKDEV(bsr_major, i + total_bsr_devs);
 
switch(cur-bsr_bytes) {
case 8:
@@ -229,14 +233,15 @@
}
 
cur-bsr_num = bsr_types[cur-bsr_type];
-   bsr_types[cur-bsr_type] = cur-bsr_num + 1;
snprintf(cur-bsr_name, 32, bsr%d_%d,
 cur-bsr_bytes, cur-bsr_num);
 
cdev_init(cur-bsr_cdev, bsr_fops);
result = cdev_add(cur-bsr_cdev, cur-bsr_dev, 1);
-   if (result)
+   if (result) {
+   kfree(cur);
goto out_err;
+   }
 
cur-bsr_device = device_create_drvdata(bsr_class, NULL,
cur-bsr_dev,
@@ -245,16 +250,37 @@
printk(KERN_ERR

Re: [PATCH] Fix BSR to allow mmap of small BSR on 64k kernel

2008-11-07 Thread Sonny Rao
On Fri, Nov 07, 2008 at 04:28:29PM +1100, Paul Mackerras wrote:
 Sonny Rao writes:
 
  Fix the BSR driver to allow small BSR devices, which are limited to a
  single 4k space, on a 64k page kernel.  Previously the driver would
  reject the mmap since the size was smaller than PAGESIZE (or because
  the size was greater than the size of the device).  Now, we check for
  this case use remap_4k_pfn(). Also, take out code to set vm_flags,
  as the remap_pfn functions will do this for us.
 
 Thanks.
 
 Do we know that the BSR size will always be 4k if it's not a multiple
 of 64k?  Is it possible that we could get 8k, 16k or 32k or BSRs?
 If it is possible, what does the user need to be able to do?  Do they
 just want to map 4k, or might then want to map the whole thing?

Hi Paul, the BSR comes in 4 different sizes, 8, 16, 64, 128.

The 8 byte BSR registers are always contained to 4k pages and are
always representing a piece of a larger BSR, but can be assigned to
individual LPARs.  

The 16 byte BSR is contained in two 4k pages, and so the code as
patched would not allow both 4k pages to be mapped.  However, I don't
see any reason for the user to need both 4k pages.  

To give some background as to why the BSR exists in multiple pages at
all I'll say that one proposed way to use the BSR is to have each
participating cpu own a cache-line sized piece of the BSR mapped
page and write only to that piece.  The reasoning is that using this approach,
software could use either a BSR or regular cachable memory for the
barrier operation, and I would not need to know which it is actually
using.  So in this type of scenario, there should be enough
cache-lines sized pieces mappable. In the case of the 16 byte BSR, one
4k page contains 32 128byte cache-line pieces.  So this is enough to
still use the BSR in this way.

The 64 byte BSR is contained in 16 4k-pages -- so one 64k page works
there, and the 128 byte BSR is contained in 32 4k pages.

The case of the 16 byte BSR is the only one where it matters, I can
change the code to map both 4k pages if the user requests it, but I
don't see any extra utility.  For consistency though, maybe we should
reject a request to map more than 4k but less than 64k on a 64k kernel?

Sonny
___
Linuxppc-dev mailing list
Linuxppc-dev@ozlabs.org
https://ozlabs.org/mailman/listinfo/linuxppc-dev


[PATCH] Fix BSR to allow mmap of small BSR on 64k kernel

2008-11-06 Thread Sonny Rao
Fix the BSR driver to allow small BSR devices, which are limited to a
single 4k space, on a 64k page kernel.  Previously the driver would
reject the mmap since the size was smaller than PAGESIZE (or because
the size was greater than the size of the device).  Now, we check for
this case use remap_4k_pfn(). Also, take out code to set vm_flags,
as the remap_pfn functions will do this for us.


Signed-off-by: Sonny Rao [EMAIL PROTECTED]

Index: common/drivers/char/bsr.c
===
--- common.orig/drivers/char/bsr.c  2008-11-06 16:43:58.0 -0600
+++ common/drivers/char/bsr.c   2008-11-06 18:30:41.0 -0600
@@ -27,6 +27,7 @@
 #include linux/cdev.h
 #include linux/list.h
 #include linux/mm.h
+#include asm/pgtable.h
 #include asm/io.h
 
 /*
@@ -115,15 +116,23 @@
 {
unsigned long size   = vma-vm_end - vma-vm_start;
struct bsr_dev *dev = filp-private_data;
+   int ret;
 
-   if (size  dev-bsr_len || (size  (PAGE_SIZE-1)))
-   return -EINVAL;
+   /* This is legal where we have a BSR on a 4k page but a 64k kernel */
+   if (size  dev-bsr_len)
+   size = dev-bsr_len;
 
-   vma-vm_flags |= (VM_IO | VM_DONTEXPAND);
vma-vm_page_prot = pgprot_noncached(vma-vm_page_prot);
 
-   if (io_remap_pfn_range(vma, vma-vm_start, dev-bsr_addr  PAGE_SHIFT,
-  size, vma-vm_page_prot))
+   if (dev-bsr_len  PAGE_SIZE)
+   ret = remap_4k_pfn(vma, vma-vm_start, dev-bsr_addr  12,
+  vma-vm_page_prot);
+   else
+   ret = io_remap_pfn_range(vma, vma-vm_start,
+dev-bsr_addr  PAGE_SHIFT,
+size, vma-vm_page_prot);
+
+   if (ret)
return -EAGAIN;
 
return 0;
___
Linuxppc-dev mailing list
Linuxppc-dev@ozlabs.org
https://ozlabs.org/mailman/listinfo/linuxppc-dev


[PATCH] powerpc: don't spin on sync

2008-07-11 Thread Sonny Rao
Push the sync below the secondary smp init hold loop and comment its purpose.
This should speed up boot by reducing global traffic during the single-threaded
portion of boot.

Signed-off-by: Sonny Rao [EMAIL PROTECTED]
Signed-off-by: Milton Miller [EMAIL PROTECTED]

--- next.git/arch/powerpc/kernel/head_64.S~orig 2008-07-11 17:28:47.0 
-0500
+++ next.git/arch/powerpc/kernel/head_64.S  2008-07-11 17:31:50.0 
-0500
@@ -1198,7 +1198,6 @@ _GLOBAL(generic_secondary_smp_init)
 3: HMT_LOW
lbz r23,PACAPROCSTART(r13)  /* Test if this processor should */
/* start.*/
-   sync
 
 #ifndef CONFIG_SMP
b   3b  /* Never go on non-SMP   */
@@ -1206,6 +1205,8 @@ _GLOBAL(generic_secondary_smp_init)
cmpwi   0,r23,0
beq 3b  /* Loop until told to go */
 
+   sync/* order paca.run and cur_cpu_spec */
+
/* See if we need to call a cpu state restore handler */
LOAD_REG_IMMEDIATE(r23, cur_cpu_spec)
ld  r23,0(r23)
___
Linuxppc-dev mailing list
Linuxppc-dev@ozlabs.org
https://ozlabs.org/mailman/listinfo/linuxppc-dev


Re: [PATCH] Power5,Power6 BSR driver

2008-07-07 Thread Sonny Rao
On Mon, Jul 07, 2008 at 02:59:35PM +1000, Benjamin Herrenschmidt wrote:
 
  +   cur-bsr_addr   = reg[i * 2];
  +   cur-bsr_len= reg[i * 2 + 1];
 
 That's fishy... hand-reading of reg property without taking
 into account the parent's #size-cells/#address-cells... can't you
 use of_address_to_resource or something similar and carry a struct
 resource around instead ?

So, with this suggestion I looked at the resource API... not very well
documented, and I get the feeling like it's more for carving up a PCI
memory address range.  In the case of the BSR, everything is already
partitioned (by hardware) so I don't see the point of using this API
here.  Or am I missing something about it?

 In fact, same goes with the way you do num_bsr_devs = reg_len / 16.
 
 You should rather use -another- property of well known lenght, or
 get the #address/#size-cells of the parent and use those appropriately.

Well, I check to make sure the lengths are consistent with each other
right above there so we shouldn't walk off the end of anything, but I
will take a look at using #size-cells / #address-cells instead.

Thanks for the comments

Sonny
___
Linuxppc-dev mailing list
Linuxppc-dev@ozlabs.org
https://ozlabs.org/mailman/listinfo/linuxppc-dev


Re: [PATCHv4] Power5,Power6 BSR driver

2008-07-07 Thread Sonny Rao
On Tue, Jul 08, 2008 at 02:52:34PM +1000, Stephen Rothwell wrote:
 Hi Sonny,
 
 On Mon, 7 Jul 2008 21:58:12 -0500 Sonny Rao [EMAIL PROTECTED] wrote:
 
  +static int bsr_create_devs(struct device_node *bn)
  +{
 
  +   cur-bsr_device = device_create(bsr_class, NULL,
  +   cur-bsr_dev,
  +   cur-bsr_name);
  +   if (!cur-bsr_device) {
  +   printk(KERN_ERR device_create failed for %s\n,
  +  cur-bsr_name);
  +   cdev_del(cur-bsr_cdev);
  +   goto out_err;
  +   }
  +   dev_set_drvdata(cur-bsr_device, cur);
 
 device_create() is being removed in 2.6.27 because the above introduces a
 race, use device_create_drvdata() instead.
 

Stephen, thanks for the heads up. 

From: Sonny Rao [EMAIL PROTECTED]

Adds a character driver for BSR support on IBM POWER systems including 
Power5 and Power6.  The BSR is an optional processor facility not currently 
implemented by any other processors.  It's primary purpose is fast large SMP 
synchronization.  More details on the BSR are in comments to the code which 
follows.  This patch adds BSR driver to pseries_defconfig.

Signed-off-by: Sonny Rao [EMAIL PROTECTED]
Signed-off-by: Joel Schopp [EMAIL PROTECTED]

Index: linux-dev/drivers/char/bsr.c
===
--- /dev/null   1970-01-01 00:00:00.0 +
+++ linux-dev/drivers/char/bsr.c2008-07-08 00:29:22.0 -0500
@@ -0,0 +1,312 @@
+/* IBM POWER Barrier Synchronization Register Driver
+ *
+ * Copyright IBM Corporation 2008
+ *
+ * Author: Sonny Rao [EMAIL PROTECTED]
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include linux/kernel.h
+#include linux/of.h
+#include linux/of_device.h
+#include linux/of_platform.h
+#include linux/module.h
+#include linux/cdev.h
+#include linux/list.h
+#include linux/mm.h
+#include asm/io.h
+
+/*
+ This driver exposes a special register which can be used for fast
+ synchronization across a large SMP machine.  The hardware is exposed
+ as an array of bytes where each process will write to one of the bytes to
+ indicate it has finished the current stage and this update is broadcast to
+ all processors without having to bounce a cacheline between them. In
+ POWER5 and POWER6 there is one of these registers per SMP,  but it is
+ presented in two forms; first, it is given as a whole and then as a number
+ of smaller registers which alias to parts of the single whole register.
+ This can potentially allow multiple groups of processes to each have their
+ own private synchronization device.
+
+ Note that this hardware *must* be written to using *only* single byte writes.
+ It may be read using 1, 2, 4, or 8 byte loads which must be aligned since
+ this region is treated as cache-inhibited  processes should also use a
+ full sync before and after writing to the BSR to ensure all stores and
+ the BSR update have made it to all chips in the system
+*/
+
+/* This is arbitrary number, up to Power6 it's been 17 or fewer  */
+#define BSR_MAX_DEVS (32)
+
+struct bsr_dev {
+   u64  bsr_addr; /* Real address */
+   u64  bsr_len;  /* length of mem region we can map */
+   unsigned bsr_bytes;/* size of the BSR reg itself */
+   unsigned bsr_stride;   /* interval at which BSR repeats in the page */
+   unsigned bsr_type; /* maps to enum below */
+   unsigned bsr_num;  /* bsr id number for its type */
+   int  bsr_minor;
+
+   dev_tbsr_dev;
+   struct cdev bsr_cdev;
+   struct device *bsr_device;
+   char bsr_name[32];
+
+};
+
+static unsigned num_bsr_devs;
+static struct bsr_dev *bsr_devs;
+static struct class *bsr_class;
+static int bsr_major;
+
+enum {
+   BSR_8   = 0,
+   BSR_16  = 1,
+   BSR_64  = 2,
+   BSR_128 = 3,
+   BSR_UNKNOWN = 4,
+   BSR_MAX = 5,
+};
+
+static unsigned bsr_types[BSR_MAX];
+
+static ssize_t
+bsr_size_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+   struct bsr_dev *bsr_dev = dev_get_drvdata(dev);
+   return sprintf(buf, %u\n, bsr_dev-bsr_bytes);
+}
+
+static ssize_t
+bsr_stride_show(struct device *dev, struct

Re: [PATCH] Power5,Power6 BSR driver

2008-06-18 Thread Sonny Rao
On Tue, Jun 17, 2008 at 05:44:43PM -0500, Sonny Rao wrote:
 On Tue, Jun 17, 2008 at 05:39:52PM -0500, Nathan Lynch wrote:
  Hi, mainly a couple of coding style things, but one minor bug (I
  think).
snip
 
 Ok Will fix and send out again

From: Sonny Rao [EMAIL PROTECTED]

Adds a character driver for BSR support on IBM POWER systems including 
Power5 and Power6.  The BSR is an optional processor facility not currently 
implemented by any other processors.  It's primary purpose is large SMP 
synchronization.  More details on the BSR are in comments to the code which 
follows.

After addressing any issues from the community I'm hoping this can be queued
for 2.6.27.

Signed-off-by: Sonny Rao [EMAIL PROTECTED]
Signed-off-by: Joel Schopp [EMAIL PROTECTED]

Index: linux-2.6.24/drivers/char/bsr.c
===
--- /dev/null   1970-01-01 00:00:00.0 +
+++ linux-2.6.24/drivers/char/bsr.c 2008-06-18 01:45:49.0 -0500
@@ -0,0 +1,309 @@
+/* IBM POWER Barrier Synchronization Register Driver
+ *
+ * Copyright IBM Corporation 2008
+ *
+ * Author: Sonny Rao [EMAIL PROTECTED]
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include linux/kernel.h
+#include linux/of.h
+#include linux/of_device.h
+#include linux/of_platform.h
+#include linux/module.h
+#include linux/cdev.h
+#include linux/list.h
+#include linux/mm.h
+#include asm/io.h
+
+/*
+ This driver exposes a special register which can be used for fast
+ synchronization across a large SMP machine.  The hardware is exposed
+ as an array of bytes where each process will write to one of the bytes to
+ indicate it has finished the current stage and this update is broadcast to
+ all processors without having to bounce a cacheline between them. In
+ POWER5 and POWER6 there is one of these registers per SMP,  but it is
+ presented in two forms; first, it is given as a whole and then as a number
+ of smaller registers which alias to parts of the single whole register.
+ This can potentially allow multiple groups of processes to each have their
+ own private synchronization device.
+
+ Note that this hardware *must* be written to using *only* single byte writes.
+ It may be read using 1, 2, 4, or 8 byte loads which must be aligned since
+ this region is treated as cache-inhibited  processes should also use a
+ full sync before and after writing to the BSR to ensure all stores and
+ the BSR update have made it to all chips in the system
+*/
+
+/* This is arbitrary number, up to Power6 it's been 17 or fewer  */
+#define BSR_MAX_DEVS (32)
+
+struct bsr_dev {
+   u64  bsr_addr; /* Real address */
+   u64  bsr_len;  /* length of mem region we can map */
+   unsigned bsr_bytes;/* size of the BSR reg itself */
+   unsigned bsr_stride;   /* interval at which BSR repeats in the page */
+   unsigned bsr_type; /* maps to enum below */
+   unsigned bsr_num;  /* bsr id number for its type */
+   int  bsr_minor;
+
+   dev_tbsr_dev;
+   struct cdev bsr_cdev;
+   struct device *bsr_device;
+   char bsr_name[32];
+
+};
+
+static unsigned num_bsr_devs;
+static struct bsr_dev *bsr_devs;
+static struct class *bsr_class;
+static int bsr_major;
+
+enum {
+   BSR_8   = 0,
+   BSR_16  = 1,
+   BSR_64  = 2,
+   BSR_128 = 3,
+   BSR_UNKNOWN = 4,
+   BSR_MAX = 5,
+};
+
+static unsigned bsr_types[BSR_MAX];
+
+static ssize_t
+bsr_size_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+   struct bsr_dev *bsr_dev = dev_get_drvdata(dev);
+   return sprintf(buf, %u\n, bsr_dev-bsr_bytes);
+}
+
+static ssize_t
+bsr_stride_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+   struct bsr_dev *bsr_dev = dev_get_drvdata(dev);
+   return sprintf(buf, %u\n, bsr_dev-bsr_stride);
+}
+
+static ssize_t
+bsr_len_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+   struct bsr_dev *bsr_dev = dev_get_drvdata(dev);
+   return sprintf(buf, %lu\n, bsr_dev-bsr_len);
+}
+
+static struct device_attribute bsr_dev_attrs[] = {
+   __ATTR(bsr_size, S_IRUGO, bsr_size_show, NULL),
+   __ATTR(bsr_stride, S_IRUGO, bsr_stride_show, NULL),
+   __ATTR(bsr_length, S_IRUGO, bsr_len_show, NULL

Re: [PATCH] Power5,Power6 BSR driver

2008-06-18 Thread Sonny Rao
On Mon, Jun 16, 2008 at 01:53:44PM -0500, [EMAIL PROTECTED] wrote:
 From: Sonny Rao [EMAIL PROTECTED]
 
 Adds a character driver for BSR support on IBM POWER systems including 
 Power5 and Power6.  The BSR is an optional processor facility not currently 
 implemented by any other processors.  It's primary purpose is large SMP 
 synchronization.  More details on the BSR are in comments to the code which 
 follows.
 


Here's a basic, quick n' dirty testcase I have
Remember to link w/ -lpthread


#include unistd.h
#include stdio.h
#include stdlib.h
#include sys/mman.h
#include sys/types.h
#include sys/stat.h
#include fcntl.h
#include stdint.h
#include pthread.h

static void rw_test(char *map, unsigned bytes)
{
unsigned i;
printf(reading current bsr values\n);
for (i=0 ; i  bytes;i++) {
printf(bsr[%u] = 0x%x\n,
   i, map[i]);
}
printf(writing all 1s into bsr\n);
for (i=0; i bytes; i++) {
map[i] = 0xff;
}
printf(reading current bsr values\n);
for (i=0 ; i  bytes;i++) {
printf(bsr[%u] = 0x%x\n,
   i, map[i]);
}
printf(writing all byte numbers into bsr\n);
for (i=0; i bytes; i++) {
map[i] = i;
}
printf(reading current bsr values\n);
for (i=0 ; i  bytes;i++) {
printf(bsr[%u] = 0x%x\n,
   i, map[i]);
}

}

struct thread_data {
pthread_t thread;
volatile char *map;
unsigned id;
uint64_t counter;
};

#define be_busy(cycles) do { \
  __asm__ __volatile__ (1: addic. %0,%0,-1\n \
   bne 1b\n : :r (cycles) : cr0); } while(0)
#define  __sync() do { \
  __asm__ __volatile__ (sync\n ::: memory); } while(0)

static
void * thread_fn(void * data)
{
struct thread_data *mydata = data;

__sync();
mydata-map[mydata-id]++;
__sync();
while (mydata-map[0] == 0) {
/* be_busy(10); */
mydata-counter++;
}
return NULL;
}

static
void pthread_test(volatile char *map, unsigned num)
{
struct thread_data *pthreads;
unsigned i;

pthreads = malloc(sizeof(struct thread_data) * num);
if (!pthreads) {
perror(malloc);
return;
}
for (i=0; inum; i++) {
map[i] = 0;
}
__sync();
for (i=1; i num;i++) {
struct thread_data *cur = pthreads[i];
cur-map = map;
cur-id  = i;
if (pthread_create(cur-thread, NULL, thread_fn, cur)) {
perror(pthread_create);
exit(1);
}
}
for (i=1; inum;i++) {
char status;
do {
status = map[i];
} while(status == 0);
}
__sync();
map[0] = 1;
__sync();
for (i=1; inum;i++) {
if (pthread_join(pthreads[i].thread, NULL)) {
perror(pthread_join);
}
printf(%03u %llu\n, pthreads[i].id, pthreads[i].counter);
}
free(pthreads);
}

int main (int argc, char *argv[])
{
char *file;
int fd;
char *map;
int pagesize = getpagesize();
unsigned  bytes;

if (argc  3) {
fprintf(stderr, usage: bsr dev num bytes\n);
return 1;
}
file = argv[1];
bytes = strtoul(argv[2], NULL, 0);
fd = open(file, O_RDWR);
if (fd  0) {
perror(open);
return 1;
}
map = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if (!map) {
perror(mmap);
close(fd);
return 1;
}

rw_test(map, bytes);
pthread_test(map, bytes);
close(fd);
return 0;
}
___
Linuxppc-dev mailing list
Linuxppc-dev@ozlabs.org
https://ozlabs.org/mailman/listinfo/linuxppc-dev


Re: [PATCH] Power5,Power6 BSR driver

2008-06-18 Thread Sonny Rao
On Tue, Jun 17, 2008 at 05:39:52PM -0500, Nathan Lynch wrote:
 Hi, mainly a couple of coding style things, but one minor bug (I
 think).
 
 [EMAIL PROTECTED] wrote:
  From: Sonny Rao [EMAIL PROTECTED]
  
  +static int bsr_mmap(struct file *filp, struct vm_area_struct *vma)
  +{
  +   unsigned long size   = vma-vm_end - vma-vm_start;
  +   struct bsr_dev *dev = filp-private_data;
  +
  +   if (size  dev-bsr_len || (size  (PAGE_SIZE-1)))
  +   return -EINVAL;
  +
  +   vma-vm_flags |= (VM_IO | VM_DONTEXPAND);
  +   vma-vm_page_prot = pgprot_noncached(vma-vm_page_prot);
  +
  +   if (io_remap_pfn_range(vma, vma-vm_start, dev-bsr_addr  PAGE_SHIFT,
  +  size, vma-vm_page_prot))
  +   return -EAGAIN;
 
 Indentation is wrong.

Yeah I noticed that too.

  +static void bsr_cleanup_devs(void)
  +{
  +   int i;
  +   for (i=0 ; i  num_bsr_devs; i++) {
 
  i = 0
 
  +   struct bsr_dev *cur = bsr_devs + i;
  +   if (cur-bsr_device) {
  +   cdev_del(cur-bsr_cdev);
  +   device_del(cur-bsr_device);
  +   }
  +   }
  +
  +   kfree(bsr_devs);
  +}
  +
  +static int bsr_create_devs(struct device_node *bn)
  +{
  +   int reg_len, bsr_stride_len, bsr_bytes_len;
  +   const u64 *reg;
  +   const u32 *bsr_stride;
  +   const u32 *bsr_bytes;
  +   unsigned i;
  +
  +   reg= of_get_property(bn, reg, reg_len);
  +   bsr_stride = of_get_property(bn, ibm,lock-stride, bsr_stride_len);
  +   bsr_bytes  = of_get_property(bn, ibm,#lock-bytes, bsr_bytes_len);
  +
  +   if (!reg || !bsr_stride || !bsr_bytes ||
  +   (bsr_stride_len != bsr_bytes_len) ||
  +   (bsr_stride_len/4 != reg_len/16)) {
 
  bsr_stride_len / 4 != reg_len / 16
 
 
  +   printk(KERN_ERR bsr of-node has missing/incorrect property\n);
  +   return -ENODEV;
  +   }
 
 ...
 
  +static int __init bsr_init(void)
  +{
  +   struct device_node *np;
  +   dev_t bsr_dev = MKDEV(bsr_major, 0);
  +   int ret = -ENODEV;
  +   int result;
  +
  +   np = of_find_compatible_node(NULL, ibm,bsr, ibm,bsr);
  +   if (!np)
  +   goto out_err;
  +
  +   bsr_class = class_create(THIS_MODULE, bsr);
  +   if (IS_ERR(bsr_class)) {
  +   printk(KERN_ERR class_create() failed for bsr_class\n);
  +   goto out_err;
 
 At this point I think you can leak a reference to np.

Yeah, you're right.

 
  +   }
  +   bsr_class-dev_attrs = bsr_dev_attrs;
  +
  +   result = alloc_chrdev_region(bsr_dev, 0, BSR_MAX_DEVS, bsr);
  +   bsr_major = MAJOR(bsr_dev);
  +   if (result  0) {
  +   printk(KERN_ERR alloc_chrdev_region() failed for bsr\n);
  +   goto out_err_1;
  +   }
  +
  +   if ((ret = bsr_create_devs(np))  0)
  +   goto out_err_2;
  +
  +   of_node_put(np);
  +
  +   return 0;
  +
  + out_err_2:
  +   unregister_chrdev_region(bsr_dev, BSR_MAX_DEVS);
  +
  + out_err_1:
  +   class_destroy(bsr_class);
  +   of_node_put(np);
  +
  + out_err:
  +
  +   return ret;
  +}

Ok Will fix and send out again

-- 
Sonny Rao, LTC Ozlabs
___
Linuxppc-dev mailing list
Linuxppc-dev@ozlabs.org
https://ozlabs.org/mailman/listinfo/linuxppc-dev


Re: Help required for porting ISP1362 usb device driver

2007-07-13 Thread Sonny Rao
On Thu, Jul 12, 2007 at 03:57:32PM -0700, Vikram Kone wrote:
 
Hi..
I'm a linux newbie and im working on porting the USB driver ISP1362 by
Philips on to my Freescale ppc board.
I dont know how to do this... so if any of you can tell me how to do this
step by step, i would be very grateful to you

Try posting this question on the kernel newbies mailing list

http://kernelnewbies.org/MailingList
___
Linuxppc-dev mailing list
Linuxppc-dev@ozlabs.org
https://ozlabs.org/mailman/listinfo/linuxppc-dev


Re: [PATCH] [POWERPC] check for NULL ppc_md.init_IRQ() before calling

2007-07-09 Thread Sonny Rao
On Fri, Jul 06, 2007 at 05:16:34AM -0400, Sonny Rao wrote:
 On Thu, Jul 05, 2007 at 08:37:34AM -0500, Olof Johansson wrote:
  On Sun, Jul 01, 2007 at 08:49:37PM -0400, Sonny Rao wrote:
   The pseries platform does not have a default function for init_IRQ and
   does not install one if it doesn't find or doesn't recognize an
   interrupt controller in the device tree.  Currently, the kernel dies
   when it tries to call the NULL init_IRQ() function.  Clean that up.
  
  Doesn't it make more sense to make init_IRQ() check that the pointer is
  set instead? That'll work for more platforms than just pseries.
 
 Yeah, that might be the simplest way.  The only reason I can think of
 to do it this way is that (I think) every single other platform in
 arch/powerpc statically initializes init_IRQ, with pseries being the
 oddball.  It doesn't matter much to me, so I can post another patch in a
 bit.

Check to make sure ppc_md.init_IRQ exists before calling it.

Signed-off-by: Sonny Rao [EMAIL PROTECTED]

--- linux/arch/powerpc/kernel/irq.c~orig 2007-07-09 12:46:58.0 -0500
+++ linux/arch/powerpc/kernel/irq.c 2007-07-09 12:47:07.0 -0500
@@ -337,8 +337,8 @@ void do_IRQ(struct pt_regs *regs)
 
 void __init init_IRQ(void)
 {
-
-   ppc_md.init_IRQ();
+   if (ppc_md.init_IRQ)
+   ppc_md.init_IRQ();
 #ifdef CONFIG_PPC64
irq_ctx_init();
 #endif
___
Linuxppc-dev mailing list
Linuxppc-dev@ozlabs.org
https://ozlabs.org/mailman/listinfo/linuxppc-dev


Re: [PATCH] pseries: don't die if unknown/missing interrupt controller property

2007-07-04 Thread Sonny Rao
On Sun, Jul 01, 2007 at 08:49:37PM -0400, Sonny Rao wrote:
 The pseries platform does not have a default function for init_IRQ and
 does not install one if it doesn't find or doesn't recognize an
 interrupt controller in the device tree.  Currently, the kernel dies
 when it tries to call the NULL init_IRQ() function.  Clean that up.
 
 Signed-off-by: Sonny Rao [EMAIL PROTECTED]
 
 --- kernel/arch/powerpc/platforms/pseries/setup.c~orig2007-06-26 
 16:23:38.0 -0500
 +++ kernel/arch/powerpc/platforms/pseries/setup.c 2007-06-26 
 19:09:53.0 -0500
 @@ -274,6 +274,12 @@ static void pseries_lpar_enable_pmcs(voi
   get_lppaca()-pmcregs_in_use = 1;
  }
  
 +static void no_irq_init(void)
 +{
 + printk(KERN_ERR no_irq_init: failed to recognize
 + interrupt-controller\n);
 +}
 +
  static void __init pseries_discover_pic(void)
  {
   struct device_node *np;
 @@ -296,8 +302,6 @@ static void __init pseries_discover_pic(
   return;
   }
   }
 - printk(KERN_ERR pSeries_discover_pic: failed to recognize
 - interrupt-controller\n);
  }
  
  static void __init pSeries_setup_arch(void)
 @@ -594,4 +598,5 @@ define_machine(pseries) {
   .check_legacy_ioport= pSeries_check_legacy_ioport,
   .system_reset_exception = pSeries_system_reset_exception,
   .machine_check_exception = pSeries_machine_check_exception,
 + .init_IRQ   = no_irq_init,  
  };


Hi Paul, if there's no objection I'd like this to go into 2.6.23 thanks.

___
Linuxppc-dev mailing list
Linuxppc-dev@ozlabs.org
https://ozlabs.org/mailman/listinfo/linuxppc-dev