[PATCH 07/14] PCI, powerpc: Register busn_res for root buses

2012-01-30 Thread Yinghai Lu
Signed-off-by: Yinghai Lu 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: linuxppc-dev@lists.ozlabs.org
---
 arch/powerpc/kernel/pci-common.c |7 ++-
 1 files changed, 6 insertions(+), 1 deletions(-)

diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index cce98d7..501f29b 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -1732,6 +1732,8 @@ void __devinit pcibios_scan_phb(struct pci_controller 
*hose)
bus->secondary = hose->first_busno;
hose->bus = bus;
 
+   pci_bus_insert_busn_res(bus, hose->first_busno, hose->last_busno);
+
/* Get probe mode and perform scan */
mode = PCI_PROBE_NORMAL;
if (node && ppc_md.pci_probe_mode)
@@ -1742,8 +1744,11 @@ void __devinit pcibios_scan_phb(struct pci_controller 
*hose)
of_scan_bus(node, bus);
}
 
-   if (mode == PCI_PROBE_NORMAL)
+   if (mode == PCI_PROBE_NORMAL) {
+   pci_bus_update_busn_res_end(bus, 255);
hose->last_busno = bus->subordinate = pci_scan_child_bus(bus);
+   pci_bus_update_busn_res_end(bus, bus->subordinate);
+   }
 
/* Platform gets a chance to do some global fixups before
 * we proceed to resource allocation
-- 
1.7.7

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH 2/2 v2] P1020RDB-PC: Add p1020rdb-pc platform support

2012-01-30 Thread Zhicheng Fan
From: Zhicheng Fan 

Signed-off-by: Zhicheng Fan 
---
 arch/powerpc/platforms/85xx/mpc85xx_rdb.c |   26 +-
 1 files changed, 25 insertions(+), 1 deletions(-)

diff --git a/arch/powerpc/platforms/85xx/mpc85xx_rdb.c 
b/arch/powerpc/platforms/85xx/mpc85xx_rdb.c
index ccf520e..0d3d7c6 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_rdb.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_rdb.c
@@ -1,7 +1,7 @@
 /*
  * MPC85xx RDB Board Setup
  *
- * Copyright 2009 Freescale Semiconductor Inc.
+ * Copyright 2009,2012 Freescale Semiconductor Inc.
  *
  * This program is free software; you can redistribute  it and/or modify it
  * under  the terms of  the GNU General  Public License as published by the
@@ -90,10 +90,20 @@ static void __init mpc85xx_rdb_setup_arch(void)
 
 machine_device_initcall(p2020_rdb, mpc85xx_common_publish_devices);
 machine_device_initcall(p1020_rdb, mpc85xx_common_publish_devices);
+machine_device_initcall(p1020_rdb_pc, mpc85xx_common_publish_devices);
 
 /*
  * Called very early, device-tree isn't unflattened
  */
+static int __init p1020_rdb_pc_probe(void)
+{
+   unsigned long root = of_get_flat_dt_root();
+
+   if (of_flat_dt_is_compatible(root, "fsl,P1020RDB-PC"))
+   return 1;
+   return 0;
+}
+
 static int __init p2020_rdb_probe(void)
 {
unsigned long root = of_get_flat_dt_root();
@@ -139,3 +149,17 @@ define_machine(p1020_rdb) {
.calibrate_decr = generic_calibrate_decr,
.progress   = udbg_progress,
 };
+
+define_machine(p1020_rdb_pc) {
+   .name   = "P1020RDB-PC",
+   .probe  = p1020_rdb_pc_probe,
+   .setup_arch = mpc85xx_rdb_setup_arch,
+   .init_IRQ   = mpc85xx_rdb_pic_init,
+#ifdef CONFIG_PCI
+   .pcibios_fixup_bus  = fsl_pcibios_fixup_bus,
+#endif
+   .get_irq= mpic_get_irq,
+   .restart= fsl_rstcr_restart,
+   .calibrate_decr = generic_calibrate_decr,
+   .progress   = udbg_progress,
+};
-- 
1.7.0.4


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH 1/2 v2] powerpc/dts: Add dts for p1020rdb-pc

2012-01-30 Thread Zhicheng Fan
From: Zhicheng Fan 

P1020RDB-PC Overview
--
1Gbyte DDR3 SDRAM
32 Mbyte NAND flash
10 16Mbyte NOR flash
16 Mbyte SPI flash
SD connector to interface with the SD memory card
Real-time clock on I2C bus

PCIe:
- x1 PCIe slot
- x1 mini-PCIe slot

10/100/1000 BaseT Ethernet ports:
- eTSEC1, RGMII: one 10/100/1000 port using VitesseTM VSC7385 L2 switch
- eTSEC2, SGMII: one 10/100/1000 port using VitesseTM VSC8221
- eTSEC3, RGMII: one 10/100/1000 port using AtherosTM AR8021

USB 2.0 port:
- Two USB2.0 Type A receptacles
- One USB2.0 signal to Mini PCIe slot

Dual RJ45 UART ports:
- DUART interface: supports two UARTs up to 115200 bps for console display

Signed-off-by: Zhicheng Fan 
---
 arch/powerpc/boot/dts/p1020rdb-pc.dts|   90 
 arch/powerpc/boot/dts/p1020rdb-pc.dtsi   |  255 ++
 arch/powerpc/boot/dts/p1020rdb-pc_36b.dts|   90 
 arch/powerpc/boot/dts/p1020rdb-pc_camp_core0.dts |   63 ++
 arch/powerpc/boot/dts/p1020rdb-pc_camp_core1.dts |  141 
 5 files changed, 639 insertions(+), 0 deletions(-)
 create mode 100644 arch/powerpc/boot/dts/p1020rdb-pc.dts
 create mode 100644 arch/powerpc/boot/dts/p1020rdb-pc.dtsi
 create mode 100644 arch/powerpc/boot/dts/p1020rdb-pc_36b.dts
 create mode 100644 arch/powerpc/boot/dts/p1020rdb-pc_camp_core0.dts
 create mode 100644 arch/powerpc/boot/dts/p1020rdb-pc_camp_core1.dts

diff --git a/arch/powerpc/boot/dts/p1020rdb-pc.dts 
b/arch/powerpc/boot/dts/p1020rdb-pc.dts
new file mode 100644
index 000..5c333b0
--- /dev/null
+++ b/arch/powerpc/boot/dts/p1020rdb-pc.dts
@@ -0,0 +1,90 @@
+/*
+ * P1020 RDB-PC Device Tree Source
+ *
+ * Copyright 2012 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in the
+ *   documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ *   names of its contributors may be used to endorse or promote products
+ *   derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 
THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "fsl/p1020si-pre.dtsi"
+/ {
+   model = "fsl,P1020RDB-PC";
+   compatible = "fsl,P1020RDB-PC";
+
+   memory {
+   device_type = "memory";
+   };
+
+   lbc: localbus@ffe05000 {
+   reg = <0 0xffe05000 0 0x1000>;
+
+   /* NOR, NAND Flashes and Vitesse 5 port L2 switch */
+   ranges = <0x0 0x0 0x0 0xef00 0x0100
+ 0x1 0x0 0x0 0xff80 0x0004
+ 0x2 0x0 0x0 0xffb0 0x0002
+ 0x3 0x0 0x0 0xffa0 0x0002>;
+   };
+
+   soc: soc@ffe0 {
+   ranges = <0x0 0x0 0xffe0 0x10>;
+   };
+
+   pci0: pcie@ffe09000 {
+   ranges = <0x200 0x0 0xa000 0 0xa000 0x0 0x2000
+ 0x100 0x0 0x 0 0xffc1 0x0 0x1>;
+   reg = <0 0xffe09000 0 0x1000>;
+   pcie@0 {
+   ranges = <0x200 0x0 0xa000
+ 0x200 0x0 0xa000
+ 0x0 0x2000
+
+ 0x100 0x0 0x0
+ 0x100 0x0 0x0
+ 0x0 0x10>;
+   };
+   };
+
+   pci1: pcie@ffe0a000 {
+   reg = <0 0xffe0a000 0 0x1000>;
+   ranges = <0x200 0x0 0x8000 0 0x8000 0x0 0x2000
+ 0x1

Re: [PATCH v3 00/25] irq_domain generalization and refinement

2012-01-30 Thread Olof Johansson
On Fri, Jan 27, 2012 at 02:35:54PM -0700, Grant Likely wrote:
> Hey everyone,
> 
> This patch series is ready for much wider consumption now.  I'd like
> to get it into linux-next ASAP because there will be ARM board support
> depending on it.  I'll wait a few days before I ask Stephen to pull
> this in.
> 
> Stephen/Milton/Ben, any testing you can help with here would be
> appreciated since you've got access to a wider variety of Power
> machines than I do.

This series has been:

Tested-by: Olof Johansson 

On powerpc/pasemi (it's the only one I still have easy access to).


-Olof
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


RE: [RFC] dmaengine/dma_slave: add context parameter to prep_slave_sg callback

2012-01-30 Thread Vinod Koul
On Mon, 2012-01-30 at 08:55 -0800, Bounine, Alexandre wrote:
> On Monday, January 30, 2012 at 4:31 AM, Vinod Koul wrote:
> > 
> > On Thu, 2012-01-26 at 16:22 -0500, Alexandre Bounine wrote:
> > > As we agreed during our discussion about adding DMA Engine support for 
> > > RapidIO
> > > subsystem, RapidIO and similar clients may benefit from adding an extra 
> > > context
> > > parameter to device_prep_slave_sg() callback.
> > > See https://lkml.org/lkml/2011/10/24/275 for more details.
> > >
> > > Adding the context parameter will allow to pass client/target specific
> > > information associated with an individual data transfer request.
> > >
> > > In the case of RapidIO support this additional information consists of 
> > > target
> > > destination ID and its buffer address (which is not mapped into the local 
> > > CPU
> > > memory space). Because a single RapidIO-capable DMA channel may queue data
> > > transfer requests to different target devices, the per-request 
> > > configuration
> > > is required.
> > >
> > > The proposed change eliminates need for new subsystem-specific API.
> > > Existing DMA_SLAVE clients will ignore the new parameter.
> > >
> > > This RFC only demonstrates the API change and does not include 
> > > corresponding
> > > changes to existing DMA_SLAVE clients. Complete set of patches will be 
> > > provided
> > > after (if) this API change is accepted.
> >
> > This looks good to me. But was thinking if we need to add this new
> > parameter for other slave calls (circular, interleaved, memcpy...)
> > 
> 
> I agree that cyclic and interleaved calls may benefit from adding that 
> parameter as well.
> Benefits to the cyclic call are straightforward - same as dma_slave.
> Adding a context parameter to the interleaved transfers may be more future 
> proofing option
> than an immediate need. Memcopy and other calls that deal with local memory 
> transfers
> probably should be left untouched.
> 
> What if we limit modifications to:
> 1) three calls (slave, cyclic and interleaved) OR
> 2) two (slave and cyclic) at this moment?
> 
> I am just more focused on dma_slave just because it fits well to provide RDMA
> over RapidIO fabric.
> 
> If everybody agrees, I can go ahead and make changes to all three at once.
For now we need at least slave and cyclic, so pls go ahead and make these 
changes.
For interleaved, we might need it sooner [1], but I would think it would
need few more changes to the API, so it can be rolled as part of those
changes.

-- 
~Vinod

[1]: https://lkml.org/lkml/2012/1/30/48

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 2/2] powerpc: Abstract common define of signal multiplex control for qe

2012-01-30 Thread fanzc

On 01/30/2012 08:32 PM, Tabi Timur-B04825 wrote:

fanzc wrote:

Signed-off-by: Fanzc

Please fix this.  There are only two e's in freescale.  In addition,
please use your full name.


Hi Timur,

You mean that need to remove the define to other file or create new
file?

No, I mean you're signed-off-by should be this:

Signed-off-by: Zhicheng Fan


I got it ,thanks

--
Zhicheng Fan

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: Question about GPIO Lib

2012-01-30 Thread Bruce_Leonard
Bill,

Bill Gatliff  wrote on 01/27/2012 10:42:57 AM:

> 
> On Fri, Jan 27, 2012 at 5:31 AM,   wrote:
> >
> > The problem is we've got a number of other things hooked up to the 
GPIO
> > pins that it would be very bad if someone from user space played with
> > them, like our FPGA configuration pin.  Some one toggles that and our 
box
> > goes stupid.  So what I'm wondering is if there's a way, preferably 
via
> > the device tree, to limit the GPIOs that GPIO Lib exposes to user 
space?
> 
> Sounds like you DON'T want to merely export that GPIO pin to userspace.
> 

Well, yes I do want to just export to userspace, I just want to restrict 
the pins that get exported to only those that are defined in the device 
tree.  I don't want or need to access any of the exported pins from kernel 
space and I don't want user space to access any pin not explicitly called 
out in the device tree.  I want it to behave like gpio-leds only with 
input as well as output capabilities.

> If you have anything in kernel space doing a gpio_request() on that
> pin, it won't be exportable to userspace anyway.  Regardless, you are
> probably better off implement a DEVICE_ATTR that, in its store()
> method, treads lightly on said pin.  And then do a gpio_request() in
> kernel space so that users can't ever see the pin directly.
> 
> Just my $0.02.
> 

If I understand this correctly you're basically saying that gpiolib is a 
waste of time and I should just write my own driver?

Bruce

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH] powerpc/booke64: Configurable lazy interrupt disabling

2012-01-30 Thread Scott Wood
On 01/30/2012 04:15 PM, Benjamin Herrenschmidt wrote:
> On Mon, 2012-01-30 at 15:47 -0600, Scott Wood wrote:
> 
>> Only the first one will happen in a context where we want to store.  The
>> issue is if we get another higher priority interrupt when we enable, and
>> that enables interrupts and we get the doorbell that wants to run the
>> saved irq.  If we get priorities out of order we'll EOI the wrong interrupt.
> 
> Hrm, ok, what about in handle_masked we just "save" it onto some kind of
> PACA local stack ? Then on enable, before actually turning EE back, we
> see if there's something there and we hit do_IRQ() if there is. Your
> get_irq() would preferrably pop things out of that little stack.
> 
> Any hole in that scheme ?

If we never enable EE, there's no need for a stack -- we disable EE on
the first interrupt and can leave it in EPR.  It's similar to my
original patch, but with the exception hack replaced with a call to
do_IRQ().  The quality of the regs you pass (if any) may suffer, which
is why I did the exception hack, but I can live with that if you can.

>> IIRC we now never enable interrupts while servicing one (are individual
>> handlers banned from doing this too?), 
> 
> No I think they still can.

OK.  Another option could be to use the doorbell and store EPR
somewhere, but make sure if we get a real interrupt and there's a
pending interrupt stored, we clear it out and process both in proper
order.  When the doorbell eventually fires it's a nop.  Testing this
would require some effort, though.  Better to stick with the simple
scheme where we never enable EE with a pending interrupt.

>>> However the main thing is that this significantly improves the quality
>>> of the samples obtained from performance interrupts which can now act as
>>> pseudo-NMI up to a certain point.
>>
>> Which is compensation for the hardware not doing it right with a proper
>> critical interrupt or equivalent, but yeah, that's a benefit.
> 
> Right, server has no concept really of critical interrupts.

Would be nice if the embedded version used critical, though (or could be
configured to do so).

-Scott

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH] powerpc/booke64: Configurable lazy interrupt disabling

2012-01-30 Thread Benjamin Herrenschmidt
On Mon, 2012-01-30 at 15:47 -0600, Scott Wood wrote:

> Only the first one will happen in a context where we want to store.  The
> issue is if we get another higher priority interrupt when we enable, and
> that enables interrupts and we get the doorbell that wants to run the
> saved irq.  If we get priorities out of order we'll EOI the wrong interrupt.

Hrm, ok, what about in handle_masked we just "save" it onto some kind of
PACA local stack ? Then on enable, before actually turning EE back, we
see if there's something there and we hit do_IRQ() if there is. Your
get_irq() would preferrably pop things out of that little stack.

Any hole in that scheme ?

I'm thinking about reworking the lazy EE code, so maybe leave me a
couple of days. You did find a real bug on PS3 I believe and potentially
with Anton's dec replay stuff as well, when the enable is implicit in
the exception return path.

I'm thinking about breaking down that into a lower level function
returning whether we need a DEC replay, IRQ replay or nothing, and call
it in two contexts:

 - From arch...restore(), if we need to replay, we then tail-call an asm
helper which will generate an irq stack frame and call do_IRQ() or
timer_interrupt().

 - From exception restore, if we need to replay, we re-use the existing
stack frame, change the TRAP value and move on to
do_IRQ/timer_interrupt.

I may not have time to do that this week (hint hint, if you have
time ... :-) but that's what's on my mind atm.

> IIRC we now never enable interrupts while servicing one (are individual
> handlers banned from doing this too?), 

No I think they still can.

> in which case it shouldn't be an issue. 

> I'm a bit hesitant to rely on that, but oh well.  Beats having
> to add CTPR support to the hypervisor just for this.  We could throw a
> WARN_ONCE if we see a stored interrupt when we take an external
> interrupt exception.
>
> >> and book3s decrementers 
> > 
> > Book3s decrementer is level sensitive based on the sign bit of the
> > decrementer (a bit odd but heh) at least on 64-bit processors.
> 
> So what's up with "On server, re-trigger the decrementer if it went
> negative since some processors only trigger on edge transitions of the
> sign bit" in arch_local_irq_restore()?

Ask Anton or Paulus, at least on P7 it's level :-) I think maybe old
Power3 had it different.

> >> and other hypervisors... 
> > 
> > I wouldn't take the PS3 HV and legacy iseries HV as good design
> > examples :-) The later was working around limited HW functionality at
> > the time as well. 
> 
> Just pointing out we're not the first. :-)

Yeah yeah ok :-) I hope you do see my point however of not wanting to
get into an ifdef mess all over again... If we can get that stuff
reasonably efficiently NOP'ed out, that would do, tho I suppose one of
your concerns is the generation of a stack frame for re-enabling.

One possibility would be to inline the part that tests if the hw irq
happened, and use asm to branch out of line to something that will then
make up a stack frame separately. It's a bit gross but would remove the
cost of creating stack frames in callers.

> >> and you force
> >> all functions that enable interrupts to create a stack frame to deal
> >> with this.
> > 
> > Right, but overall this is still more efficient performance wise on most
> > processors than whacking the MSR.
> 
> Laurentiu ran lmbench on e5500 with/without lazy EE and the results were
> mixed.  No large differences either way, but probably at least as many
> tests were slower with lazy EE as were faster with lazy EE.  Or possibly
> there was no significant difference and it was just noise from one run
> to another (I'm not sure how many times he ran it or what the variation
> was).
> 
> He did claim a noticeable increase in networking performance with
> external proxy enabled.

Hrm, any decent networking HW should mitigate interrupts and mostly rely
on polling... you must have been doing something wrong :-)

> I guess hard-EE is worse on some other chips?

Hard EE is bad on server chips afaik, tho mtmsrd x,1 does mitigate the
damage.

> > However the main thing is that this significantly improves the quality
> > of the samples obtained from performance interrupts which can now act as
> > pseudo-NMI up to a certain point.
> 
> Which is compensation for the hardware not doing it right with a proper
> critical interrupt or equivalent, but yeah, that's a benefit.

Right, server has no concept really of critical interrupts.

> >> What is the compelling reason for forcing lazy EE on us?  Why is it tied
> >> to 64-bit?
> > 
> > Because that's where we historically implemented it and because iSeries
> > more/less required to begin with. And I don't want to have a split
> > scheme, especially not a compile time one.
> 
> We can probably live with it in this case -- the patch to disable lazy
> EE was largely an artifact of my not having time to try a new approach,
> and other people here wanting some fix sooner.
> 
> In

Re: [PATCH] powerpc/booke64: Configurable lazy interrupt disabling

2012-01-30 Thread Scott Wood
On 01/23/2012 02:50 PM, Benjamin Herrenschmidt wrote:
> On Mon, 2012-01-23 at 13:21 -0600, Scott Wood wrote:
>> Perhaps the issues with a higher priority interrupt intervening can be
>> addressed by messing around with current task priority at the MPIC (with
>> an hcall introduced for the hv case, since currently task priority is
>> not exposed to the guest).  I haven't had time to revisit this, and
>> don't expect to soon.  If someone else wants to try, fine.  In the
>> meantime, lazy EE is causing problems.
> 
> Or by storing pending interrupts in an array.

Only the first one will happen in a context where we want to store.  The
issue is if we get another higher priority interrupt when we enable, and
that enables interrupts and we get the doorbell that wants to run the
saved irq.  If we get priorities out of order we'll EOI the wrong interrupt.

IIRC we now never enable interrupts while servicing one (are individual
handlers banned from doing this too?), in which case it shouldn't be an
issue.  I'm a bit hesitant to rely on that, but oh well.  Beats having
to add CTPR support to the hypervisor just for this.  We could throw a
WARN_ONCE if we see a stored interrupt when we take an external
interrupt exception.

>> and book3s decrementers 
> 
> Book3s decrementer is level sensitive based on the sign bit of the
> decrementer (a bit odd but heh) at least on 64-bit processors.

So what's up with "On server, re-trigger the decrementer if it went
negative since some processors only trigger on edge transitions of the
sign bit" in arch_local_irq_restore()?

>> and other hypervisors... 
> 
> I wouldn't take the PS3 HV and legacy iseries HV as good design
> examples :-) The later was working around limited HW functionality at
> the time as well. 

Just pointing out we're not the first. :-)

>> and you force
>> all functions that enable interrupts to create a stack frame to deal
>> with this.
> 
> Right, but overall this is still more efficient performance wise on most
> processors than whacking the MSR.

Laurentiu ran lmbench on e5500 with/without lazy EE and the results were
mixed.  No large differences either way, but probably at least as many
tests were slower with lazy EE as were faster with lazy EE.  Or possibly
there was no significant difference and it was just noise from one run
to another (I'm not sure how many times he ran it or what the variation
was).

He did claim a noticeable increase in networking performance with
external proxy enabled.

I guess hard-EE is worse on some other chips?

> However the main thing is that this significantly improves the quality
> of the samples obtained from performance interrupts which can now act as
> pseudo-NMI up to a certain point.

Which is compensation for the hardware not doing it right with a proper
critical interrupt or equivalent, but yeah, that's a benefit.

>> What is the compelling reason for forcing lazy EE on us?  Why is it tied
>> to 64-bit?
> 
> Because that's where we historically implemented it and because iSeries
> more/less required to begin with. And I don't want to have a split
> scheme, especially not a compile time one.

We can probably live with it in this case -- the patch to disable lazy
EE was largely an artifact of my not having time to try a new approach,
and other people here wanting some fix sooner.

In general, though, I hope that the history of previously having 64-bit
to yourself doesn't mean that our 64-bit chips are treated second class
citizens, having to live with design decisions oriented around the chips
that got there first, with a mandate that there be no special kernel
builds, even just for optimization[1].  No, I don't want to go back to
one kernel per board, but some build-time configuration is reasonable on
embedded IMHO, as long as the possibilities are limited.  We're already
running a different build from book3s.

If the issue is just that you think making this particular feature
configurable would be a mess, fine (though I think it would have been
managable).

-Scott

[1] The hypervisor's issues with guest IACK should be fixable with an
hv-internal CTPR hack if anyone cares enough, but there would be a
performance cost to not using external proxy.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[RFC] powerpc: Make SPARSE_IRQ required

2012-01-30 Thread Grant Likely
All IRQs on powerpc are managed via irq_domain anyway, there isn't really
any advantage to turning SPARSE_IRQ off, and it's the direction we want
to take the kernel design anyway.  This patch makes powerpc always use
SPARSE_IRQ.

On pseries_defconfig, SPARSE_IRQ adds only about 0x300 bytes to the
.text sections, and removes about 0x2 from the data section for the
static irq_desc table.

Signed-off-by: Grant Likely 
Cc: Rob Herring 
Cc: Ben Herrenschmidt 
Cc: Thomas Gleixner 
---
 arch/powerpc/Kconfig  |2 +-
 arch/powerpc/kernel/irq.c |4 
 2 files changed, 1 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 303703d..1aa840d 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -133,7 +133,7 @@ config PPC
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_HW_BREAKPOINT if PERF_EVENTS && PPC_BOOK3S_64
select HAVE_GENERIC_HARDIRQS
-   select HAVE_SPARSE_IRQ
+   select SPARSE_IRQ
select IRQ_PER_CPU
select IRQ_DOMAIN
select GENERIC_IRQ_SHOW
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index e3673ff..282fb39 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -93,10 +93,6 @@ extern int tau_interrupts(int);
 
 #ifdef CONFIG_PPC64
 
-#ifndef CONFIG_SPARSE_IRQ
-EXPORT_SYMBOL(irq_desc);
-#endif
-
 int distribute_irqs = 1;
 
 static inline notrace unsigned long get_hard_enabled(void)
-- 
1.7.5.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH v3 22/25] irq_domain/x86: Convert x86 (embedded) to use common irq_domain

2012-01-30 Thread Grant Likely
On Sat, Jan 28, 2012 at 05:44:05PM +0100, Sebastian Andrzej Siewior wrote:
> * Grant Likely | 2012-01-27 14:36:16 [-0700]:
> 
> >This patch removes the x86-specific definition of irq_domain and replaces
> >it with the common implementation.
> 
> I pulled your devicetree/next tree. After this patch I get:
> 
> |Hierarchical RCU implementation.
> |NR_IRQS:2304 nr_irqs:256 16
> |[ cut here ]
> |WARNING: at 
> /home/bigeasy/work/shiva/git/linux-2.6-tip/kernel/irq/irqdomain.c:114 
> irq_domain_add_legacy+0x75/0x150()
> |Modules linked in:
> |Pid: 0, comm: swapper/0 Not tainted 3.3.0-rc1+ #65
> |Call Trace:
> | [] ? printk+0x18/0x1a
> | [] warn_slowpath_common+0x6d/0xa0
> | [] ? irq_domain_add_legacy+0x75/0x150
> | [] ? irq_domain_add_legacy+0x75/0x150
> | [] warn_slowpath_null+0x1d/0x20
> | [] irq_domain_add_legacy+0x75/0x150
> | [] x86_add_irq_domains+0x96/0xd6
> | [] init_IRQ+0x8/0x33
> | [] start_kernel+0x191/0x2e1
> | [] ? loglevel+0x2b/0x2b
> | [] i386_start_kernel+0x81/0x86
> |---[ end trace 4eaa2a86a8e2da22 ]---
> |[ cut here ]
> |kernel BUG at 
> /home/bigeasy/work/shiva/git/linux-2.6-tip/arch/x86/kernel/devicetree.c:367!
> 
> The warning is comming from this piece in irq_domain_add_legacy()
> |for (i = 0; i < size; i++) {
> | int irq = first_irq + i;
> | struct irq_data *irq_data = irq_get_irq_data(irq);
> | 
> | if (WARN_ON(!irq_data || irq_data->domain)) {
> 
> irq_data is NULL here.
> 
> | mutex_unlock(&irq_domain_mutex);
> | of_node_put(domain->of_node);
> | kfree(domain);
> | return NULL;
> | }
> | }
> | 
> 
> This is not always the case. arch_early_irq_init() in [0] sets up the
> first 16 entries. The reminaing few (there is a toal of 24 irqs for
> first ioapic and a second ioapic) are not initialized. This happens
> later via ->xlate, ioapic_xlate() => io_apic_setup_irq_pin() =>
> alloc_irq_and_cfg_at() calls irq_set_chip_data() on demand.
> 
> [0] arch/x86/kernel/apic/io_apic.c

Ugh.  This isn't easy.  The legacy mapping really needs all the
irq_desc structures to be allocated.  You could call irq_alloc_descs()
before calling irq_domain_add_legacy(), but that causes all the
irq_descs to be allocated (regardless of whether they are used), and
it will break io_apic_setup_irq_pin() which also wants to call
irq_alloc_desc().

Ideally irq_domain support would be rolled directly into ioapic.
That's more work though, and a greater change of breaking x86 on
non-embedded.  Looking at the ioapic code, it seems to me that it
could be simplified quite a bit by switching to irq_domain instead of
using the custom irq_cfg linked list.  Faster too since it could use
the irq_data->hwirq to go from linux irq to the hw irq number.  It
doesn't look like it would be even that hard, but of course the devil
is in the details and I don't have sufficient time right now to dig
into the guts of the ioapic.  Maybe after connect, but it would help
if you can find time to look at it.

If integrated into the ioapic code, then the irq_domain linear map is
probably the type of irq_domain to use.

g.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


RE: [RFC] dmaengine/dma_slave: add context parameter to prep_slave_sg callback

2012-01-30 Thread Bounine, Alexandre
On Monday, January 30, 2012 at 4:31 AM, Vinod Koul wrote:
> 
> On Thu, 2012-01-26 at 16:22 -0500, Alexandre Bounine wrote:
> > As we agreed during our discussion about adding DMA Engine support for 
> > RapidIO
> > subsystem, RapidIO and similar clients may benefit from adding an extra 
> > context
> > parameter to device_prep_slave_sg() callback.
> > See https://lkml.org/lkml/2011/10/24/275 for more details.
> >
> > Adding the context parameter will allow to pass client/target specific
> > information associated with an individual data transfer request.
> >
> > In the case of RapidIO support this additional information consists of 
> > target
> > destination ID and its buffer address (which is not mapped into the local 
> > CPU
> > memory space). Because a single RapidIO-capable DMA channel may queue data
> > transfer requests to different target devices, the per-request configuration
> > is required.
> >
> > The proposed change eliminates need for new subsystem-specific API.
> > Existing DMA_SLAVE clients will ignore the new parameter.
> >
> > This RFC only demonstrates the API change and does not include corresponding
> > changes to existing DMA_SLAVE clients. Complete set of patches will be 
> > provided
> > after (if) this API change is accepted.
>
> This looks good to me. But was thinking if we need to add this new
> parameter for other slave calls (circular, interleaved, memcpy...)
> 

I agree that cyclic and interleaved calls may benefit from adding that 
parameter as well.
Benefits to the cyclic call are straightforward - same as dma_slave.
Adding a context parameter to the interleaved transfers may be more future 
proofing option
than an immediate need. Memcopy and other calls that deal with local memory 
transfers
probably should be left untouched.

What if we limit modifications to:
1) three calls (slave, cyclic and interleaved) OR
2) two (slave and cyclic) at this moment?

I am just more focused on dma_slave just because it fits well to provide RDMA
over RapidIO fabric.

If everybody agrees, I can go ahead and make changes to all three at once.

Alex.


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 06/13] PCI, powerpc: Register busn_res for root buses

2012-01-30 Thread Bjorn Helgaas
On Fri, Jan 27, 2012 at 6:49 PM, Yinghai Lu  wrote:
> Signed-off-by: Yinghai Lu 
> Cc: Benjamin Herrenschmidt 
> Cc: Paul Mackerras 
> Cc: linuxppc-dev@lists.ozlabs.org
> ---
>  arch/powerpc/kernel/pci-common.c |    7 ++-
>  1 files changed, 6 insertions(+), 1 deletions(-)
>
> diff --git a/arch/powerpc/kernel/pci-common.c 
> b/arch/powerpc/kernel/pci-common.c
> index cce98d7..501f29b 100644
> --- a/arch/powerpc/kernel/pci-common.c
> +++ b/arch/powerpc/kernel/pci-common.c
> @@ -1732,6 +1732,8 @@ void __devinit pcibios_scan_phb(struct pci_controller 
> *hose)
>        bus->secondary = hose->first_busno;
>        hose->bus = bus;
>
> +       pci_bus_insert_busn_res(bus, hose->first_busno, hose->last_busno);

The pci_create_root_bus() call is a few lines above this.  So this is
a case of "create the root bus" followed by "fix something that's
wrong with the bus we just created."  I'm trying to get rid of that
pattern because it's just an opportunity for bugs.  I'd rather create
the root bus with all the information it needs up front.

And pci_bus_insert_busn_res() is not really architecture-specific, so
I'd like this better if that call were done in the PCI core somewhere.

>        /* Get probe mode and perform scan */
>        mode = PCI_PROBE_NORMAL;
>        if (node && ppc_md.pci_probe_mode)
> @@ -1742,8 +1744,11 @@ void __devinit pcibios_scan_phb(struct pci_controller 
> *hose)
>                of_scan_bus(node, bus);
>        }
>
> -       if (mode == PCI_PROBE_NORMAL)
> +       if (mode == PCI_PROBE_NORMAL) {
> +               pci_bus_update_busn_res_end(bus, 255);
>                hose->last_busno = bus->subordinate = pci_scan_child_bus(bus);
> +               pci_bus_update_busn_res_end(bus, bus->subordinate);
> +       }
>
>        /* Platform gets a chance to do some global fixups before
>         * we proceed to resource allocation
> --
> 1.7.7
>
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 2/2] powerpc: Abstract common define of signal multiplex control for qe

2012-01-30 Thread Tabi Timur-B04825
fanzc wrote:
>>
>>> Signed-off-by: Fanzc
>> Please fix this.  There are only two e's in freescale.  In addition,
>> please use your full name.
>>
> Hi Timur,
>
>You mean that need to remove the define to other file or create new
> file?

No, I mean you're signed-off-by should be this:

Signed-off-by: Zhicheng Fan 

-- 
Timur Tabi
Linux kernel developer at Freescale
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [RFC] dmaengine/dma_slave: add context parameter to prep_slave_sg callback

2012-01-30 Thread Vinod Koul
On Thu, 2012-01-26 at 16:22 -0500, Alexandre Bounine wrote:
> As we agreed during our discussion about adding DMA Engine support for RapidIO
> subsystem, RapidIO and similar clients may benefit from adding an extra 
> context
> parameter to device_prep_slave_sg() callback.
> See https://lkml.org/lkml/2011/10/24/275 for more details.
> 
> Adding the context parameter will allow to pass client/target specific
> information associated with an individual data transfer request.
> 
> In the case of RapidIO support this additional information consists of target
> destination ID and its buffer address (which is not mapped into the local CPU
> memory space). Because a single RapidIO-capable DMA channel may queue data
> transfer requests to different target devices, the per-request configuration
> is required.
> 
> The proposed change eliminates need for new subsystem-specific API.
> Existing DMA_SLAVE clients will ignore the new parameter.
> 
> This RFC only demonstrates the API change and does not include corresponding
> changes to existing DMA_SLAVE clients. Complete set of patches will be 
> provided
> after (if) this API change is accepted.
This looks good to me. But was thinking if we need to add this new
parameter for other slave calls (circular, interleaved, memcpy...)

> 
> Signed-off-by: Alexandre Bounine 
> Cc: Jassi Brar 
> Cc: Russell King  
> Cc: Kumar Gala 
> Cc: Matt Porter 
> Cc: Li Yang 
> ---
>  include/linux/dmaengine.h |7 ---
>  1 files changed, 4 insertions(+), 3 deletions(-)
> 
> diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
> index 679b349..79d71bb 100644
> --- a/include/linux/dmaengine.h
> +++ b/include/linux/dmaengine.h
> @@ -575,7 +575,7 @@ struct dma_device {
>   struct dma_async_tx_descriptor *(*device_prep_slave_sg)(
>   struct dma_chan *chan, struct scatterlist *sgl,
>   unsigned int sg_len, enum dma_transfer_direction direction,
> - unsigned long flags);
> + unsigned long flags, void *context);
>   struct dma_async_tx_descriptor *(*device_prep_dma_cyclic)(
>   struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
>   size_t period_len, enum dma_transfer_direction direction);
> @@ -607,12 +607,13 @@ static inline int dmaengine_slave_config(struct 
> dma_chan *chan,
>  
>  static inline struct dma_async_tx_descriptor *dmaengine_prep_slave_single(
>   struct dma_chan *chan, void *buf, size_t len,
> - enum dma_transfer_direction dir, unsigned long flags)
> + enum dma_transfer_direction dir, unsigned long flags, void *context)
>  {
>   struct scatterlist sg;
>   sg_init_one(&sg, buf, len);
>  
> - return chan->device->device_prep_slave_sg(chan, &sg, 1, dir, flags);
> + return chan->device->device_prep_slave_sg(chan, &sg, 1, dir, flags,
> +   context);
>  }
>  
>  static inline int dmaengine_terminate_all(struct dma_chan *chan)


-- 
~Vinod

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev