[PATCH v2] powerpc: mpc5200: Add a3m071 board support

2012-12-06 Thread Stefan Roese
This patch adds the MPC5200B based a3m071 board.

Signed-off-by: Stefan Roese s...@denx.de
Cc: Anatolij Gustschin ag...@denx.de
---
v2:
- Remove cdm@200 DT node as it's not used
- Disable i2c controller in dts as its unused on this board

 arch/powerpc/boot/dts/a3m071.dts | 144 +++
 arch/powerpc/platforms/52xx/mpc5200_simple.c |   1 +
 2 files changed, 145 insertions(+)
 create mode 100644 arch/powerpc/boot/dts/a3m071.dts

diff --git a/arch/powerpc/boot/dts/a3m071.dts b/arch/powerpc/boot/dts/a3m071.dts
new file mode 100644
index 000..877a28c
--- /dev/null
+++ b/arch/powerpc/boot/dts/a3m071.dts
@@ -0,0 +1,144 @@
+/*
+ * a3m071 board Device Tree Source
+ *
+ * Copyright 2012 Stefan Roese s...@denx.de
+ *
+ * Copyright (C) 2011 DENX Software Engineering GmbH
+ * Heiko Schocher h...@denx.de
+ *
+ * Copyright (C) 2007 Semihalf
+ * Marian Balakowicz m...@semihalf.com
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+/include/ mpc5200b.dtsi
+
+/ {
+   model = anonymous,a3m071;
+   compatible = anonymous,a3m071;
+
+   soc5200@f000 {
+   #address-cells = 1;
+   #size-cells = 1;
+   compatible = fsl,mpc5200b-immr;
+   ranges = 0 0xf000 0xc000;
+   reg = 0xf000 0x0100;
+   bus-frequency = 0; /* From boot loader */
+   system-frequency = 0; /* From boot loader */
+
+   timer@600 {
+   fsl,has-wdt;
+   };
+
+   spi@f00 {
+   status = disabled;
+   };
+
+   usb: usb@1000 {
+   status = disabled;
+   };
+
+   psc@2000 {
+   compatible = 
fsl,mpc5200b-psc-uart,fsl,mpc5200-psc-uart;
+   reg = 0x2000 0x100;
+   interrupts = 2 1 0;
+   };
+
+   psc@2200 {
+   status = disabled;
+   };
+
+   psc@2400 {
+   status = disabled;
+   };
+
+   psc@2600 {
+   status = disabled;
+   };
+
+   psc@2800 {
+   status = disabled;
+   };
+
+   psc@2c00 {  // PSC6
+   compatible = 
fsl,mpc5200b-psc-uart,fsl,mpc5200-psc-uart;
+   reg = 0x2c00 0x100;
+   interrupts = 2 4 0;
+   };
+
+   ethernet@3000 {
+   phy-handle = phy0;
+   };
+
+   mdio@3000 {
+   phy0: ethernet-phy@3 {
+   reg = 0x03;
+   };
+   };
+
+   ata@3a00 {
+   status = disabled;
+   };
+
+   i2c@3d00 {
+   status = disabled;
+   };
+
+   i2c@3d40 {
+   status = disabled;
+   };
+   };
+
+   localbus {
+   compatible = fsl,mpc5200b-lpb,simple-bus;
+   #address-cells = 2;
+   #size-cells = 1;
+   ranges = 0 0 0xfc00 0x0200
+ 3 0 0xe900 0x0008
+ 5 0 0xe800 0x0001;
+
+   flash@0,0 {
+   #address-cells = 1;
+   #size-cells = 1;
+   reg = 0 0x0 0x0200;
+   compatible = cfi-flash;
+   bank-width = 2;
+   partition@0x0 {
+   label = u-boot;
+   reg = 0x 0x0004;
+   read-only;
+   };
+   partition@0x0004 {
+   label = env;
+   reg = 0x0004 0x0002;
+   };
+   partition@0x0006 {
+   label = dtb;
+   reg = 0x0006 0x0002;
+   };
+   partition@0x0008 {
+   label = kernel;
+   reg = 0x0008 0x0050;
+   };
+   partition@0x0058 {
+   label = root;
+   reg = 0x0058 0x00A8;
+   };
+   };
+
+   fpga@3,0 {
+   compatible = anonymous,a3m071-fpga;
+   reg = 3 0x0 0x0008
+  5 0x0 0x0001;
+

Re: [linuxppc-release][PATCH] powerpc/pci-hotplug: fix init issue of rescanned pci device

2012-12-06 Thread Chen Yuanquan-B41889

On 12/06/2012 05:30 AM, Bjorn Helgaas wrote:

On Wed, Dec 5, 2012 at 2:29 AM, Chen Yuanquan-B41889
b41...@freescale.com wrote:

On 12/05/2012 04:26 PM, Benjamin Herrenschmidt wrote:

On Wed, 2012-12-05 at 16:20 +0800, Chen Yuanquan-B41889 wrote:

On 12/05/2012 03:17 PM, Benjamin Herrenschmidt wrote:

On Wed, 2012-12-05 at 10:31 +0800, Yuanquan Chen wrote:

On powerpc arch, some fixup work of PCI/PCI-e device is just done
during the
first scan at booting time. For the PCI/PCI-e device rescanned after
linux OS
booting up, the fixup work won't be done, which leads to dma_set_mask
error or
irq related issue in rescanned PCI/PCI-e device's driver. So, it does
the same
fixup work for the rescanned device to avoid this issue.

Hrm, the patch is a bit gross. First the code shouldn't be copy/pasted
that way but factored out.

Please, at least format your email properly so I can try to undertand
without needing aspirin.


There's a judgement if (!bus-is_added) before calling of
pcibios_fixup_bus in pci_scan_child_bus, so for the rescanned device,
the fixup won't execute, which leads to fatal error in driver of
rescanned
device on freescale  powerpc, no this issues on x86 arch.

First, none of that invalidates my statement that you shouldn't
duplicate a whole block of code like this. Even if your approach is
correct (which is debated separately), at the very least you should
factor the code out into a common function between the two copies.


Remove the judgement, let it to do the pcibios_fixup_bus
directly, the error won't occur for the rescanned device. But it's
general code, not proper to change here, so copy the pcibios_fixup_bus
work to  pcibios_enable_device.


I'm surprised also that is_added is false when pcibios_enable_device()
gets called ... that looks strange to me. At what point is that enable
happening in the hotplug sequence ?

All devices are rescanned and then call the pci_enable_devices and
pci_bus_add_devices.

Where ? How ? What is the sequence happening ? In any case, I think if
we need a proper fixup done per-device like that after scan we ought to
create a new hook at the generic level rather than that sort of hack.


echo 1  rescan to trigger dev_rescan_store:

dev_rescan_store-pci_rescan_bus-pci_scan_child_bus,
pci_assign_unassigned_bus_resources,
pci_enable_bridges, pci_bus_add_devices

pci_enable_bridges-pci_enable_device-__pci_enable_device_flags-do_pci_enable_device-
pcibios_enable_device

pci_bus_add_devices-pci_bus_add_device-dev-is_added = 1

Yeah, it's general fixup code for every rescanned PCI/PCI-e device on
powerpc at runtime. So if
we want to call it in a ppc_md member, we need to wrap it as a function and
assign it in every ppc_md,
it isn't proper for the general code.

Regards,
yuanquan



The patch code will be called by pci_enable_devices. The dev-is_added
is set in pci_bus_add_device
which is called by pci_bus_add_devices. So dev-is_added is false when
checking it in pcibios_enable_device
for the rescanned device.

Who calls pci_enable_device() in the rescan case ? Why isn't it left to
the driver ? I don't think we can rely on that behaviour not to change.


How do you trigger the rescan anyway ?

Use the interface under /sys :
echo 1  /sys/bus/pci/devices/xxx/remove

then echo 1 to the pci device which is the bus of the removed device
echo 1  /sys/bus/pci/devices//rescan
the removed device will be scanned and it's driver module will be loaded
automatically.

Yeah this code path are known to be fishy. I think the problem is at the
generic abstraction level and that's where it needs to be fixed.

Cheers,
Ben.


Regards,
yuanquan

I think the problem needs to be solve at a higher level, I'm adding
linux-pci  Bjorn to the CC list.

Cheers,
Ben.


Signed-off-by: Yuanquan Chen b41...@freescale.com
---
arch/powerpc/kernel/pci-common.c |   20 
1 file changed, 20 insertions(+)

diff --git a/arch/powerpc/kernel/pci-common.c
b/arch/powerpc/kernel/pci-common.c
index 7f94f76..f0fb070 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -1496,6 +1496,26 @@ int pcibios_enable_device(struct pci_dev *dev,
int mask)
 if (ppc_md.pcibios_enable_device_hook(dev))
 return -EINVAL;
+if (!dev-is_added) {
+   /*
+* Fixup NUMA node as it may not be setup yet by the
generic
+* code and is needed by the DMA init
+*/
+   set_dev_node(dev-dev, pcibus_to_node(dev-bus));
+
+   /* Hook up default DMA ops */
+   set_dma_ops(dev-dev, pci_dma_ops);
+   set_dma_offset(dev-dev, PCI_DRAM_OFFSET);
+
+   /* Additional platform DMA/iommu setup */
+   if (ppc_md.pci_dma_dev_setup)
+   ppc_md.pci_dma_dev_setup(dev);
+
+   /* Read default IRQs and fixup if necessary */
+   pci_read_irq_line(dev);
+   if 

Re: [stable] [PATCH] powerpc/ptrace: Fix build with gcc 4.6

2012-12-06 Thread Michael Ellerman


Michael Ellerman mich...@ellerman.id.au wrote:

On Thu, 2011-11-17 at 13:31 +1100, Michael Neuling wrote:
 From: Benjamin Herrenschmidt b...@kernel.crashing.org
 
 powerpc/ptrace: Fix build with gcc 4.6
 
 gcc (rightfully) complains that we are accessing beyond the
 end of the fpr array (we do, to access the fpscr).

This patch is still missing from the 3.0 stable series.

Do we need to resend ?

And resent to the correct stable address.

cheers

-- 
Sent from my Android phone with K-9 Mail. Please excuse my brevity.___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: Understanding how kernel updates MMU hash table

2012-12-06 Thread Benjamin Herrenschmidt
On Wed, 2012-12-05 at 23:57 -0800, Pegasus11 wrote:
 Hi Ben.
 
 Got it..no more quoting replies...

Quoting is fine ... as long as you quote the bits your reply to, not
your actual reply part :)

 You mentioned the MMU looking into a hash table if it misses a translation
 entry in the TLB. This means that there is a hardware TLB for sure. 

Sure, nobody sane would design a CPU without one nowadays :-)

 By your words, I understand that the hash table is an in-memory cache of
 translations meaning it is implemented in software.

Well, it's populated by software and read by HW. IE. On x86, the MMU
will walk a radix tree of page tables, on powerpc it will walk an in
memory hash table. The main difference is that on x86, there is usually
a tree per process while the powerpc hash table tends to be global.

 So whenever the MMU wishes to translate a virtual address, it first checks 
 the TLB and if it
 isn't found there, it looks for it in the hash table. Now this seems fine to
 me when looked at from the perspective of the MMU. Now when I look at it
 from the kernel's perspective, I am a bit confused.
 
 So when we (the kernel) encounter a virtual address, we walk the page tables
 and if we find that there is no valid entry for this address, we page fault
 which causes an exception right?

Hrm ... not sure what we mean by the kernel. There are two different
path here, but let's focus on the usual case... the processor encounters
an address, whether it's trying to fetch an instruction, or having done
that, is performing a load or a store. This will use what we call in
powerpc lingua an effective address. This gets in turn turned into a
virtual address after an SLB lookup.

I refer you to the architecture here, it's a bit tricky but basically
the principle is that the virtual address space is *somewhat* the
effective address space along with the process id. Except that on
powerpc, we do that per-segment (we divide the address space into
segments) so each segment has its top bits transformed into something
larger called the VSID.

In any case, this results in a virtual address which is then looked up
in the TLB (I'm ignoring the ERAT here which is the 1-st level TLB but
let's not complicate things even more). If that misses, the CPU looks up
in the hash table. If that misses, it causes an exception (0x300 for
data accesses, 0x400 for instruction accesses).

There, Linux will usually go into hash_page which looks for the Linux
PTE. If the PTE is absent (or has any other reason to be unusable such
as being read-only for a write access), we get to do_page_fault.

Else, we populate the hash table with a translation, set the HASHPTE bit
in the PTE, and retry the access.

  And this exception then takes us to the
 exception handler which I guess is 'do_page_fault'. On checking this
 function I see that it gets the PGD, allocates a PMD, allocates a PTE and
 then it calls handle_pte_fault. The comment banner for handle_pte_fault
 reads:
 
 1638 /* These routines also need to handle stuff like marking pages dirty
 1639 * and/or accessed for architectures that don't do it in hardware (most
 1640 * RISC architectures).  The early dirtying is also good on the i386.
 1641 *
 1642 * There is also a hook called update_mmu_cache() that architectures
 1643 * with external mmu caches can use to update those (ie the Sparc or
 1644 * PowerPC hashed page tables that act as extended TLBs)
 .
 */

Yes, when we go to do_page_fault() because the PTE wasn't populated in
the first place, we have a hook to pre-fill the hash table instead of
taking a fault again which will fill it the second time around. It's
just a shortcut.

 It is from such comments that I inferred that the hash tables were being
 used as extended TLBs. However the above also infers (atleast to me) that
 these caches are in hardware as theyve used the word 'extended'. Pardon me
 if I am being nitpicky but these things are confusing me a bit. So to clear
 this confusion, there are three things I would like to know.
 1. Is the MMU cache implemented in hardware or software? I trust you on it
 being software but it would be great if you could address my concern in the
 above paragraph.

The TLB is a piece of HW. (there's really three in fact, the I-ERAT, the
D-ERAT and the TLB ;-)

The Hash Table is a piece of RAM (pointed to by the SDR1 register) setup
by the OS and populated by the OS but read by the HW. Just like the page
tables on x86.

 2. The kernel, it looks from the do_page_fault sequence, is updating its
 internal page table first and then it goes on to update the mmu cache. So
 this only means it is satisfying the requirement of someone else, perhaps
 the MMU here. 

update_mmu_cache() is just a shortcut.

As I explained above, we populate the hash table lazily on fault.
However, when taking an actual high level page fault (do_page_fault), we
*know* the hash doesn't have an appropriate translation, so rather than
just filling up the linux PTE and then 

[PATCH 13/20] ALSA: sound/ps3: remove __dev* attributes

2012-12-06 Thread Bill Pemberton
CONFIG_HOTPLUG is going away as an option.  As result the __dev*
markings will be going away.

Remove use of __devinit, __devexit_p, __devinitdata, __devinitconst,
and __devexit.

Signed-off-by: Bill Pemberton wf...@virginia.edu
Cc: Geoff Levand ge...@infradead.org
Cc: linuxppc-dev@lists.ozlabs.org
Cc: cbe-oss-...@lists.ozlabs.org
---
 sound/ppc/snd_ps3.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/sound/ppc/snd_ps3.c b/sound/ppc/snd_ps3.c
index 9b18b52..8c7dcbe 100644
--- a/sound/ppc/snd_ps3.c
+++ b/sound/ppc/snd_ps3.c
@@ -786,7 +786,7 @@ static struct snd_pcm_ops snd_ps3_pcm_spdif_ops = {
 };
 
 
-static int __devinit snd_ps3_map_mmio(void)
+static int snd_ps3_map_mmio(void)
 {
the_card.mapped_mmio_vaddr =
ioremap(the_card.ps3_dev-m_region-bus_addr,
@@ -808,7 +808,7 @@ static void snd_ps3_unmap_mmio(void)
the_card.mapped_mmio_vaddr = NULL;
 }
 
-static int __devinit snd_ps3_allocate_irq(void)
+static int snd_ps3_allocate_irq(void)
 {
int ret;
u64 lpar_addr, lpar_size;
@@ -866,7 +866,7 @@ static void snd_ps3_free_irq(void)
ps3_irq_plug_destroy(the_card.irq_no);
 }
 
-static void __devinit snd_ps3_audio_set_base_addr(uint64_t ioaddr_start)
+static void snd_ps3_audio_set_base_addr(uint64_t ioaddr_start)
 {
uint64_t val;
int ret;
@@ -882,7 +882,7 @@ static void __devinit snd_ps3_audio_set_base_addr(uint64_t 
ioaddr_start)
ret);
 }
 
-static void __devinit snd_ps3_audio_fixup(struct snd_ps3_card_info *card)
+static void snd_ps3_audio_fixup(struct snd_ps3_card_info *card)
 {
/*
 * avsetting driver seems to never change the followings
@@ -906,7 +906,7 @@ static void __devinit snd_ps3_audio_fixup(struct 
snd_ps3_card_info *card)
   PS3_AUDIO_AO_3WMCTRL_ASOPLRCK_DEFAULT);
 }
 
-static int __devinit snd_ps3_init_avsetting(struct snd_ps3_card_info *card)
+static int snd_ps3_init_avsetting(struct snd_ps3_card_info *card)
 {
int ret;
pr_debug(%s: start\n, __func__);
@@ -928,7 +928,7 @@ static int __devinit snd_ps3_init_avsetting(struct 
snd_ps3_card_info *card)
return ret;
 }
 
-static int __devinit snd_ps3_driver_probe(struct ps3_system_bus_device *dev)
+static int snd_ps3_driver_probe(struct ps3_system_bus_device *dev)
 {
int i, ret;
u64 lpar_addr, lpar_size;
-- 
1.8.0.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 13/20] ALSA: sound/ps3: remove __dev* attributes

2012-12-06 Thread Geoff Levand
On Thu, 2012-12-06 at 12:35 -0500, Bill Pemberton wrote:
 CONFIG_HOTPLUG is going away as an option.  As result the __dev*
 markings will be going away.
 
 Remove use of __devinit, __devexit_p, __devinitdata, __devinitconst,
 and __devexit.
 
 Signed-off-by: Bill Pemberton wf...@virginia.edu
 Cc: Geoff Levand ge...@infradead.org
 Cc: linuxppc-dev@lists.ozlabs.org
 Cc: cbe-oss-...@lists.ozlabs.org
 ---
  sound/ppc/snd_ps3.c | 12 ++--
  1 file changed, 6 insertions(+), 6 deletions(-)

Looks OK for PS3.

Acked-by: Geoff Levand ge...@infradead.org

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [Patch v4 08/12] memory-hotplug: remove memmap of sparse-vmemmap

2012-12-06 Thread Tang Chen

Hi Wu,

I met some problems when I was digging into the code. It's very
kind of you if you could help me with that. :)

If I misunderstood your code, please tell me.
Please see below. :)

On 12/03/2012 10:23 AM, Jianguo Wu wrote:

Signed-off-by: Jianguo Wuwujian...@huawei.com
Signed-off-by: Jiang Liujiang@huawei.com
---
  include/linux/mm.h  |1 +
  mm/sparse-vmemmap.c |  231 +++
  mm/sparse.c |3 +-
  3 files changed, 234 insertions(+), 1 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 5657670..1f26af5 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1642,6 +1642,7 @@ int vmemmap_populate(struct page *start_page, unsigned 
long pages, int node);
  void vmemmap_populate_print_last(void);
  void register_page_bootmem_memmap(unsigned long section_nr, struct page *map,
  unsigned long size);
+void vmemmap_free(struct page *memmap, unsigned long nr_pages);

  enum mf_flags {
MF_COUNT_INCREASED = 1  0,
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index 1b7e22a..748732d 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -29,6 +29,10 @@
  #includeasm/pgalloc.h
  #includeasm/pgtable.h

+#ifdef CONFIG_MEMORY_HOTREMOVE
+#includeasm/tlbflush.h
+#endif
+
  /*
   * Allocate a block of memory to be used to back the virtual memory map
   * or to back the page tables that are used to create the mapping.
@@ -224,3 +228,230 @@ void __init sparse_mem_maps_populate_node(struct page 
**map_map,
vmemmap_buf_end = NULL;
}
  }
+
+#ifdef CONFIG_MEMORY_HOTREMOVE
+
+#define PAGE_INUSE 0xFD
+
+static void vmemmap_free_pages(struct page *page, int order)
+{
+   struct zone *zone;
+   unsigned long magic;
+
+   magic = (unsigned long) page-lru.next;
+   if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) {
+   put_page_bootmem(page);
+
+   zone = page_zone(page);
+   zone_span_writelock(zone);
+   zone-present_pages++;
+   zone_span_writeunlock(zone);
+   totalram_pages++;
+   } else
+   free_pages((unsigned long)page_address(page), order);


Here, I think SECTION_INFO and MIX_SECTION_INFO pages are all allocated
by bootmem, so I put this function this way.

I'm not sure if parameter order is necessary here. It will always be 0
in your code. Is this OK to you ?

static void free_pagetable(struct page *page)
{
struct zone *zone;
bool bootmem = false;
unsigned long magic;

/* bootmem page has reserved flag */
if (PageReserved(page)) {
__ClearPageReserved(page);
bootmem = true;
}

magic = (unsigned long) page-lru.next;
if (magic == SECTION_INFO || magic == MIX_SECTION_INFO)
put_page_bootmem(page);
else
__free_page(page);

/*
 * SECTION_INFO pages and MIX_SECTION_INFO pages
 * are all allocated by bootmem.
 */
if (bootmem) {
zone = page_zone(page);
zone_span_writelock(zone);
zone-present_pages++;
zone_span_writeunlock(zone);
totalram_pages++;
}
}

(snip)


+
+static void vmemmap_pte_remove(pmd_t *pmd, unsigned long addr, unsigned long 
end)
+{
+   pte_t *pte;
+   unsigned long next;
+   void *page_addr;
+
+   pte = pte_offset_kernel(pmd, addr);
+   for (; addr  end; pte++, addr += PAGE_SIZE) {
+   next = (addr + PAGE_SIZE)  PAGE_MASK;
+   if (next  end)
+   next = end;
+
+   if (pte_none(*pte))


Here, you checked xxx_none() in your vmemmap_xxx_remove(), but you used
!xxx_present() in your x86_64 patches. Is it OK if I only check
!xxx_present() ?


+   continue;
+   if (IS_ALIGNED(addr, PAGE_SIZE)
+   IS_ALIGNED(next, PAGE_SIZE)) {
+   vmemmap_free_pages(pte_page(*pte), 0);
+   spin_lock(init_mm.page_table_lock);
+   pte_clear(init_mm, addr, pte);
+   spin_unlock(init_mm.page_table_lock);
+   } else {
+   /*
+* Removed page structs are filled with 0xFD.
+*/
+   memset((void *)addr, PAGE_INUSE, next - addr);
+   page_addr = page_address(pte_page(*pte));
+
+   if (!memchr_inv(page_addr, PAGE_INUSE, PAGE_SIZE)) {
+   spin_lock(init_mm.page_table_lock);
+   pte_clear(init_mm, addr, pte);
+   spin_unlock(init_mm.page_table_lock);


Here, since we clear pte, we should also free the page, right ?


+   }
+   }
+   }
+
+   

[RFC] Add IBM Blue Gene/Q Platform

2012-12-06 Thread Jimi Xenidis
Rather than flood the mailing list with the patches, I've arranged for a git 
repo to hold the changesets.
You can find the repo here:
  https://github.com/jimix/linux-bgq

They are against GregKH's linux-stable.git long-term 3.4.y (y=22) branch.
The first 9 (6e58088f..) effect common code and the rest are BGQ specific.

Here is a are the summary logs:

$ git log --reverse linux-stable/linux-3.4.y..
commit 5a8edb2bdd914597693eed299119ff4c2e6d31f2
Author: Jimi Xenidis ji...@pobox.com
Date:   Fri Nov 9 09:26:00 2012 -0600

powerpc: Fix cputable #ifdef where CONFIG_PPC_A2 is used for 
CONFIG_PPC_BOOK3E_64

Signed-off-by: Jimi Xenidis ji...@pobox.com

commit ea51920d7035c8d23801d6de46261e7d0a537dfd
Author: Jimi Xenidis ji...@pobox.com
Date:   Fri Nov 9 08:58:27 2012 -0600

powerpc/book3e: Remove config for PPC_A2_DD2 since there is no reference to 
it

This must have been leftover from early DD1 days which is not
present in any current kernel code.

Signed-off-by: Jimi Xenidis ji...@pobox.com

commit 08151401a5db4ff0d441a1b7bf8ad92bd92b14c5
Author: Jimi Xenidis ji...@pobox.com
Date:   Mon Nov 5 09:38:01 2012 -0600

powerpc/dcr: Some native DCR fixes

The following fixes have been made:
 - dcr_read/write_native() must use the indexed version of the
   m[ft]dcrx since the non-indexed version only allows a 10-bit
   numerical space, but the C interface allows a full 32-bits.
 - C bindings for m[ft]dcrx, and the table versions, should use
   unsigned long so that they are 64/32 bit neutral.
 - The table versions (__m[ft]cdr) should obtain the table address
   with LOAD_REG_ADDR(), this will also make it 64/32bit neutral.

Signed-off-by: Jimi Xenidis ji...@pobox.com

commit c8320a5daaceed03992d763302020834ea8e17dd
Author: Jimi Xenidis ji...@pobox.com
Date:   Mon Nov 5 09:12:00 2012 -0600

powerpc/dcr: Add 64-bit DCR access methods.

This patch adds the ability to make 64-bit Device Control Register
(DCR) accesses.

Signed-off-by: Jimi Xenidis ji...@pobox.com

commit a763b3f8453b3bd83d7dded8c6644939863af430
Author: Jimi Xenidis ji...@pobox.com
Date:   Thu Nov 29 12:49:24 2012 -0500

powerpc/boot: Add a spin_threads hook to platform_ops

It is useful for the boot program to arrange for all secondary cpus
and threads to enter the kernel in a kexec fashion.  This hook makes
it possible.

Signed-off-by: Jimi Xenidis ji...@pobox.com

commit 391e43393380b514d4d02a42d059619542c7597b
Author: Jimi Xenidis ji...@pobox.com
Date:   Thu Nov 29 13:01:23 2012 -0500

powerpc/kexec: Add kexec hold support for Book3e processors

This patch add two items:
1) Book3e requires that GPR4 survive the hold process, so we make
   sure that happens.
2) Book3e has no real mode, and the hold code exploits this.  Since
   these processors ares always translated, we arrange for the kexeced
   threads to enter the hold code using the normal kernel linear mapping.

Signed-off-by: Jimi Xenidis ji...@pobox.com

commit f6e3c1f706cb6922349d639a74ff6c50acc8b9f8
Author: Jimi Xenidis ji...@pobox.com
Date:   Wed Dec 5 13:41:25 2012 -0500

powerpc: Remove unecessary VSX symbols

The symbol THREAD_VSR0 is defined to be the same as THREAD_FPR0.  Its
presence causes build issues with more complex configurations.

Signed-off-by: Jimi Xenidis ji...@pobox.com

commit 4e817bb42ec8e3d3689877528dd97c4286a870eb
Author: Jimi Xenidis ji...@pobox.com
Date:   Tue Nov 20 10:10:52 2012 -0600

Blue Gene/Q wicked optimizing compiler does not know the rfdi instruction 
yet

Signed-off-by: Jimi Xenidis ji...@pobox.com

commit 2071aa58b2f3b33d97c94e3a127f7c5d4ffaeb34
Author: Jimi Xenidis ji...@pobox.com
Date:   Tue Nov 20 10:14:22 2012 -0600

Blue Gene/Q wicked optimizing compiler does not know the mfdcrx instruction 
yet

Signed-off-by: Jimi Xenidis ji...@pobox.com

commit 6e58088fabedbb2d724637b539ba180c03ed8b68
Author: Jimi Xenidis ji...@pobox.com
Date:   Wed Oct 31 16:33:21 2012 -0500

powerpc/book3e: IBM Blue Gene/Q Boot

This patch specifically deals with the initial program load
environment so that a boot image (dtbImage.bgq) can be loaded by the
BGQ management tools.  The boot code is a little odd because it has to
deal with the following issues:
 - Linux boot image wrappers are 32-bit programs
 - BGQ Tools only load 64bit ELF programs
 - BGQ Firmware information is typically loaded at an address  4G
 - BGQ FW information contains 64-bit ABI function pointers (which are
   actually function descriptors) to access firmware methods
 - BGQ FW methods must be called in 64-bit mode

Includes code contributed from:
  Andrew Tauferner atau...@us.ibm.com
  Todd Inglett tingl...@us.ibm.com
  Eric Van Hensbergen eri...@gmail.com

Signed-off-by: Jimi Xenidis ji...@pobox.com

commit 

Re: [Patch v4 08/12] memory-hotplug: remove memmap of sparse-vmemmap

2012-12-06 Thread Jianguo Wu
Hi Tang,

On 2012/12/7 9:42, Tang Chen wrote:

 Hi Wu,
 
 I met some problems when I was digging into the code. It's very
 kind of you if you could help me with that. :)
 
 If I misunderstood your code, please tell me.
 Please see below. :)
 
 On 12/03/2012 10:23 AM, Jianguo Wu wrote:
 Signed-off-by: Jianguo Wuwujian...@huawei.com
 Signed-off-by: Jiang Liujiang@huawei.com
 ---
   include/linux/mm.h  |1 +
   mm/sparse-vmemmap.c |  231 
 +++
   mm/sparse.c |3 +-
   3 files changed, 234 insertions(+), 1 deletions(-)

 diff --git a/include/linux/mm.h b/include/linux/mm.h
 index 5657670..1f26af5 100644
 --- a/include/linux/mm.h
 +++ b/include/linux/mm.h
 @@ -1642,6 +1642,7 @@ int vmemmap_populate(struct page *start_page, unsigned 
 long pages, int node);
   void vmemmap_populate_print_last(void);
   void register_page_bootmem_memmap(unsigned long section_nr, struct page 
 *map,
 unsigned long size);
 +void vmemmap_free(struct page *memmap, unsigned long nr_pages);

   enum mf_flags {
   MF_COUNT_INCREASED = 1  0,
 diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
 index 1b7e22a..748732d 100644
 --- a/mm/sparse-vmemmap.c
 +++ b/mm/sparse-vmemmap.c
 @@ -29,6 +29,10 @@
   #includeasm/pgalloc.h
   #includeasm/pgtable.h

 +#ifdef CONFIG_MEMORY_HOTREMOVE
 +#includeasm/tlbflush.h
 +#endif
 +
   /*
* Allocate a block of memory to be used to back the virtual memory map
* or to back the page tables that are used to create the mapping.
 @@ -224,3 +228,230 @@ void __init sparse_mem_maps_populate_node(struct page 
 **map_map,
   vmemmap_buf_end = NULL;
   }
   }
 +
 +#ifdef CONFIG_MEMORY_HOTREMOVE
 +
 +#define PAGE_INUSE 0xFD
 +
 +static void vmemmap_free_pages(struct page *page, int order)
 +{
 +struct zone *zone;
 +unsigned long magic;
 +
 +magic = (unsigned long) page-lru.next;
 +if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) {
 +put_page_bootmem(page);
 +
 +zone = page_zone(page);
 +zone_span_writelock(zone);
 +zone-present_pages++;
 +zone_span_writeunlock(zone);
 +totalram_pages++;
 +} else
 +free_pages((unsigned long)page_address(page), order);
 
 Here, I think SECTION_INFO and MIX_SECTION_INFO pages are all allocated
 by bootmem, so I put this function this way.
 
 I'm not sure if parameter order is necessary here. It will always be 0
 in your code. Is this OK to you ?
 

parameter order is necessary in cpu_has_pse case:
vmemmap_pmd_remove
free_pagetable(pmd_page(*pmd), get_order(PMD_SIZE))

 static void free_pagetable(struct page *page)
 {
 struct zone *zone;
 bool bootmem = false;
 unsigned long magic;
 
 /* bootmem page has reserved flag */
 if (PageReserved(page)) {
 __ClearPageReserved(page);
 bootmem = true;
 }
 
 magic = (unsigned long) page-lru.next;
 if (magic == SECTION_INFO || magic == MIX_SECTION_INFO)
 put_page_bootmem(page);
 else
 __free_page(page);
 
 /*
  * SECTION_INFO pages and MIX_SECTION_INFO pages
  * are all allocated by bootmem.
  */
 if (bootmem) {
 zone = page_zone(page);
 zone_span_writelock(zone);
 zone-present_pages++;
 zone_span_writeunlock(zone);
 totalram_pages++;
 }
 }
 
 (snip)
 
 +
 +static void vmemmap_pte_remove(pmd_t *pmd, unsigned long addr, unsigned 
 long end)
 +{
 +pte_t *pte;
 +unsigned long next;
 +void *page_addr;
 +
 +pte = pte_offset_kernel(pmd, addr);
 +for (; addr  end; pte++, addr += PAGE_SIZE) {
 +next = (addr + PAGE_SIZE)  PAGE_MASK;
 +if (next  end)
 +next = end;
 +
 +if (pte_none(*pte))
 
 Here, you checked xxx_none() in your vmemmap_xxx_remove(), but you used
 !xxx_present() in your x86_64 patches. Is it OK if I only check
 !xxx_present() ?

It is Ok.

 
 +continue;
 +if (IS_ALIGNED(addr, PAGE_SIZE)
 +IS_ALIGNED(next, PAGE_SIZE)) {
 +vmemmap_free_pages(pte_page(*pte), 0);
 +spin_lock(init_mm.page_table_lock);
 +pte_clear(init_mm, addr, pte);
 +spin_unlock(init_mm.page_table_lock);
 +} else {
 +/*
 + * Removed page structs are filled with 0xFD.
 + */
 +memset((void *)addr, PAGE_INUSE, next - addr);
 +page_addr = page_address(pte_page(*pte));
 +
 +if (!memchr_inv(page_addr, PAGE_INUSE, PAGE_SIZE)) {
 +spin_lock(init_mm.page_table_lock);
 +pte_clear(init_mm, addr, pte);
 +spin_unlock(init_mm.page_table_lock);
 
 Here, since we clear pte, we should also free the page, right ?
 

Right, I forgot here, sorry.

 +}
 + 

Re: [RFC] Add IBM Blue Gene/Q Platform

2012-12-06 Thread Michael Neuling
 commit f6e3c1f706cb6922349d639a74ff6c50acc8b9f8
 Author: Jimi Xenidis ji...@pobox.com
 Date:   Wed Dec 5 13:41:25 2012 -0500
 
 powerpc: Remove unecessary VSX symbols
 
 The symbol THREAD_VSR0 is defined to be the same as THREAD_FPR0.  Its
 presence causes build issues with more complex configurations.
 
 Signed-off-by: Jimi Xenidis ji...@pobox.com
 

Can you explain what these complex configurations are?

Mikey
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [RFC] Add IBM Blue Gene/Q Platform

2012-12-06 Thread Michael Neuling
 commit 279c0615917b959a652e81f4ad0d886e2d426d85
 Author: Jimi Xenidis ji...@pobox.com
 Date:   Wed Dec 5 13:43:22 2012 -0500
 
 powerpc/book3e: IBM Blue Gene/Q Quad Processing eXtention (QPX)
 
 This enables kernel support for the QPX extention and is intended for
 processors that support it, usually an IBM Blue Gene processor.
 Turning it on does not effect other processors but it does add code
 and will quadruple the per thread save and restore area for the FPU
 (hense the name).  If you have enabled VSX it will only double the
 space.
 
 Signed-off-by: Jimi Xenidis ji...@pobox.com

Can you give a diagram of how the QPX registers are layed out.

+#if defined(CONFIG_PPC_QPX)
+#define TS_FPRWIDTH 4
+#elif defined(CONFIG_VSX)

Are they 256 bits wide?


+#define QVLFDXA(QRT,RA,RB) \
+   .long (0x7c00048f | ((QRT)  21) | ((RA)  16) | ((RB)  11))

Put this in ppc-opcode.h.

+#if defined(CONFIG_VSX) || defined(CONFIG_PPC_QPX)
+   /* they are the same MSR bit */

OMG!


+BEGIN_FTR_SECTION  \
+   SAVE_32VSRS(n,c,base);  \
+END_FTR_SECTION_IFSET(CPU_FTR_VSX);\
+BEGIN_FTR_SECTION  \
+   SAVE_32QRS(n,c,base);   \
+END_FTR_SECTION_IFSET(CPU_FTR_QPX);

I don't think we want to do this.  We are going to end up with 64
NOPS here somewhere.

I'd like to see this patch broken into different parts.

Also, have you boot tested this change on a VSX enabled box?

Mikey
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [RFC] Add IBM Blue Gene/Q Platform

2012-12-06 Thread Michael Neuling
Michael Neuling mi...@neuling.org wrote:

  commit 279c0615917b959a652e81f4ad0d886e2d426d85
  Author: Jimi Xenidis ji...@pobox.com
  Date:   Wed Dec 5 13:43:22 2012 -0500
  
  powerpc/book3e: IBM Blue Gene/Q Quad Processing eXtention (QPX)
  
  This enables kernel support for the QPX extention and is intended for
  processors that support it, usually an IBM Blue Gene processor.
  Turning it on does not effect other processors but it does add code
  and will quadruple the per thread save and restore area for the FPU
  (hense the name).  If you have enabled VSX it will only double the
  space.
  
  Signed-off-by: Jimi Xenidis ji...@pobox.com
 
 Can you give a diagram of how the QPX registers are layed out.
 
 +#if defined(CONFIG_PPC_QPX)
 +#define TS_FPRWIDTH 4
 +#elif defined(CONFIG_VSX)
 
 Are they 256 bits wide?
 
 
 +#define QVLFDXA(QRT,RA,RB)   \
 + .long (0x7c00048f | ((QRT)  21) | ((RA)  16) | ((RB)  11))
 
 Put this in ppc-opcode.h.
 
 +#if defined(CONFIG_VSX) || defined(CONFIG_PPC_QPX)
 + /* they are the same MSR bit */
 
 OMG!
 
 
 +BEGIN_FTR_SECTION\
 + SAVE_32VSRS(n,c,base);  \
 +END_FTR_SECTION_IFSET(CPU_FTR_VSX);  \
 +BEGIN_FTR_SECTION\
 + SAVE_32QRS(n,c,base);   \
 +END_FTR_SECTION_IFSET(CPU_FTR_QPX);  
 
 I don't think we want to do this.  We are going to end up with 64
 NOPS here somewhere.
 
 I'd like to see this patch broken into different parts.
 
 Also, have you boot tested this change on a VSX enabled box?

Also, this is going to clash with the transactional memory patches.

Mikey
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [RFC] Add IBM Blue Gene/Q Platform

2012-12-06 Thread Michael Neuling
Jimi Xenidis ji...@pobox.com wrote:

 Rather than flood the mailing list with the patches, I've arranged for a git 
 repo to hold the changesets.
 You can find the repo here:
   https://github.com/jimix/linux-bgq
 
 They are against GregKH's linux-stable.git long-term 3.4.y (y=22) branch.
 The first 9 (6e58088f..) effect common code and the rest are BGQ specific.

Do you actually want this upstream?  I assume no.

Mikey

 
 Here is a are the summary logs:
 
 $ git log --reverse linux-stable/linux-3.4.y..
 commit 5a8edb2bdd914597693eed299119ff4c2e6d31f2
 Author: Jimi Xenidis ji...@pobox.com
 Date:   Fri Nov 9 09:26:00 2012 -0600
 
 powerpc: Fix cputable #ifdef where CONFIG_PPC_A2 is used for 
 CONFIG_PPC_BOOK3E_64
 
 Signed-off-by: Jimi Xenidis ji...@pobox.com
 
 commit ea51920d7035c8d23801d6de46261e7d0a537dfd
 Author: Jimi Xenidis ji...@pobox.com
 Date:   Fri Nov 9 08:58:27 2012 -0600
 
 powerpc/book3e: Remove config for PPC_A2_DD2 since there is no reference 
 to it
 
 This must have been leftover from early DD1 days which is not
 present in any current kernel code.
 
 Signed-off-by: Jimi Xenidis ji...@pobox.com
 
 commit 08151401a5db4ff0d441a1b7bf8ad92bd92b14c5
 Author: Jimi Xenidis ji...@pobox.com
 Date:   Mon Nov 5 09:38:01 2012 -0600
 
 powerpc/dcr: Some native DCR fixes
 
 The following fixes have been made:
  - dcr_read/write_native() must use the indexed version of the
m[ft]dcrx since the non-indexed version only allows a 10-bit
numerical space, but the C interface allows a full 32-bits.
  - C bindings for m[ft]dcrx, and the table versions, should use
unsigned long so that they are 64/32 bit neutral.
  - The table versions (__m[ft]cdr) should obtain the table address
with LOAD_REG_ADDR(), this will also make it 64/32bit neutral.
 
 Signed-off-by: Jimi Xenidis ji...@pobox.com
 
 commit c8320a5daaceed03992d763302020834ea8e17dd
 Author: Jimi Xenidis ji...@pobox.com
 Date:   Mon Nov 5 09:12:00 2012 -0600
 
 powerpc/dcr: Add 64-bit DCR access methods.
 
 This patch adds the ability to make 64-bit Device Control Register
 (DCR) accesses.
 
 Signed-off-by: Jimi Xenidis ji...@pobox.com
 
 commit a763b3f8453b3bd83d7dded8c6644939863af430
 Author: Jimi Xenidis ji...@pobox.com
 Date:   Thu Nov 29 12:49:24 2012 -0500
 
 powerpc/boot: Add a spin_threads hook to platform_ops
 
 It is useful for the boot program to arrange for all secondary cpus
 and threads to enter the kernel in a kexec fashion.  This hook makes
 it possible.
 
 Signed-off-by: Jimi Xenidis ji...@pobox.com
 
 commit 391e43393380b514d4d02a42d059619542c7597b
 Author: Jimi Xenidis ji...@pobox.com
 Date:   Thu Nov 29 13:01:23 2012 -0500
 
 powerpc/kexec: Add kexec hold support for Book3e processors
 
 This patch add two items:
 1) Book3e requires that GPR4 survive the hold process, so we make
sure that happens.
 2) Book3e has no real mode, and the hold code exploits this.  Since
these processors ares always translated, we arrange for the kexeced
threads to enter the hold code using the normal kernel linear mapping.
 
 Signed-off-by: Jimi Xenidis ji...@pobox.com
 
 commit f6e3c1f706cb6922349d639a74ff6c50acc8b9f8
 Author: Jimi Xenidis ji...@pobox.com
 Date:   Wed Dec 5 13:41:25 2012 -0500
 
 powerpc: Remove unecessary VSX symbols
 
 The symbol THREAD_VSR0 is defined to be the same as THREAD_FPR0.  Its
 presence causes build issues with more complex configurations.
 
 Signed-off-by: Jimi Xenidis ji...@pobox.com
 
 commit 4e817bb42ec8e3d3689877528dd97c4286a870eb
 Author: Jimi Xenidis ji...@pobox.com
 Date:   Tue Nov 20 10:10:52 2012 -0600
 
 Blue Gene/Q wicked optimizing compiler does not know the rfdi instruction 
 yet
 
 Signed-off-by: Jimi Xenidis ji...@pobox.com
 
 commit 2071aa58b2f3b33d97c94e3a127f7c5d4ffaeb34
 Author: Jimi Xenidis ji...@pobox.com
 Date:   Tue Nov 20 10:14:22 2012 -0600
 
 Blue Gene/Q wicked optimizing compiler does not know the mfdcrx 
 instruction yet
 
 Signed-off-by: Jimi Xenidis ji...@pobox.com
 
 commit 6e58088fabedbb2d724637b539ba180c03ed8b68
 Author: Jimi Xenidis ji...@pobox.com
 Date:   Wed Oct 31 16:33:21 2012 -0500
 
 powerpc/book3e: IBM Blue Gene/Q Boot
 
 This patch specifically deals with the initial program load
 environment so that a boot image (dtbImage.bgq) can be loaded by the
 BGQ management tools.  The boot code is a little odd because it has to
 deal with the following issues:
  - Linux boot image wrappers are 32-bit programs
  - BGQ Tools only load 64bit ELF programs
  - BGQ Firmware information is typically loaded at an address  4G
  - BGQ FW information contains 64-bit ABI function pointers (which are
actually function descriptors) to access firmware methods
  - BGQ FW methods must be called in 64-bit mode
  

Re: [Patch v4 09/12] memory-hotplug: remove page table of x86_64 architecture

2012-12-06 Thread Tang Chen

On 11/27/2012 06:00 PM, Wen Congyang wrote:

For hot removing memory, we sholud remove page table about the memory.
So the patch searches a page table about the removed memory, and clear
page table.


(snip)


+void __meminit
+kernel_physical_mapping_remove(unsigned long start, unsigned long end)
+{
+   unsigned long next;
+   bool pgd_changed = false;
+
+   start = (unsigned long)__va(start);
+   end = (unsigned long)__va(end);


Hi Wu,

Here, you expect start and end are physical addresses. But in
phys_xxx_remove() function, I think using virtual addresses is just
fine. Functions like pmd_addr_end() and pud_index() only calculate
an offset.

So, would you please tell me if we have to use physical addresses here ?

Thanks. :)


+
+   for (; start  end; start = next) {
+   pgd_t *pgd = pgd_offset_k(start);
+   pud_t *pud;
+
+   next = pgd_addr_end(start, end);
+
+   if (!pgd_present(*pgd))
+   continue;
+
+   pud = map_low_page((pud_t *)pgd_page_vaddr(*pgd));
+   phys_pud_remove(pud, __pa(start), __pa(next));
+   if (free_pud_table(pud, pgd))
+   pgd_changed = true;
+   unmap_low_page(pud);
+   }
+
+   if (pgd_changed)
+   sync_global_pgds(start, end - 1);
+
+   flush_tlb_all();
+}
+
  #ifdef CONFIG_MEMORY_HOTREMOVE
  int __ref arch_remove_memory(u64 start, u64 size)
  {
@@ -692,6 +921,8 @@ int __ref arch_remove_memory(u64 start, u64 size)
ret = __remove_pages(zone, start_pfn, nr_pages);
WARN_ON_ONCE(ret);

+   kernel_physical_mapping_remove(start, start + size);
+
return ret;
  }
  #endif



___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [Patch v4 09/12] memory-hotplug: remove page table of x86_64 architecture

2012-12-06 Thread Jianguo Wu
On 2012/12/7 14:43, Tang Chen wrote:

 On 11/27/2012 06:00 PM, Wen Congyang wrote:
 For hot removing memory, we sholud remove page table about the memory.
 So the patch searches a page table about the removed memory, and clear
 page table.
 
 (snip)
 
 +void __meminit
 +kernel_physical_mapping_remove(unsigned long start, unsigned long end)
 +{
 +unsigned long next;
 +bool pgd_changed = false;
 +
 +start = (unsigned long)__va(start);
 +end = (unsigned long)__va(end);
 
 Hi Wu,
 
 Here, you expect start and end are physical addresses. But in
 phys_xxx_remove() function, I think using virtual addresses is just
 fine. Functions like pmd_addr_end() and pud_index() only calculate
 an offset.


Hi Tang,

 

Virtual addresses will work fine, I used physical addresses in order to
keep consistent with phys_pud[pmd/pte]_init(), So I think we should keep this.

Thanks,
Jianguo Wu

 So, would you please tell me if we have to use physical addresses here ?
 
 Thanks. :)
 
 +
 +for (; start  end; start = next) {
 +pgd_t *pgd = pgd_offset_k(start);
 +pud_t *pud;
 +
 +next = pgd_addr_end(start, end);
 +
 +if (!pgd_present(*pgd))
 +continue;
 +
 +pud = map_low_page((pud_t *)pgd_page_vaddr(*pgd));
 +phys_pud_remove(pud, __pa(start), __pa(next));
 +if (free_pud_table(pud, pgd))
 +pgd_changed = true;
 +unmap_low_page(pud);
 +}
 +
 +if (pgd_changed)
 +sync_global_pgds(start, end - 1);
 +
 +flush_tlb_all();
 +}
 +
   #ifdef CONFIG_MEMORY_HOTREMOVE
   int __ref arch_remove_memory(u64 start, u64 size)
   {
 @@ -692,6 +921,8 @@ int __ref arch_remove_memory(u64 start, u64 size)
   ret = __remove_pages(zone, start_pfn, nr_pages);
   WARN_ON_ONCE(ret);

 +kernel_physical_mapping_remove(start, start + size);
 +
   return ret;
   }
   #endif
 
 
 
 .
 



___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH] vfio powerpc: implemented IOMMU driver for VFIO

2012-12-06 Thread Alexey Kardashevskiy
VFIO implements platform independent stuff such as
a PCI driver, BAR access (via read/write on a file descriptor
or direct mapping when possible) and IRQ signaling.

The platform dependent part includes IOMMU initialization
and handling. This patch implements an IOMMU driver for VFIO
which does mapping/unmapping pages for the guest IO and
provides information about DMA window (required by a POWERPC
guest).

The counterpart in QEMU is required to support this functionality.

Cc: David Gibson da...@gibson.dropbear.id.au
Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru
---
 drivers/vfio/Kconfig|6 +
 drivers/vfio/Makefile   |1 +
 drivers/vfio/vfio_iommu_spapr_tce.c |  348 +++
 include/linux/vfio.h|   30 +++
 4 files changed, 385 insertions(+)
 create mode 100644 drivers/vfio/vfio_iommu_spapr_tce.c

diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig
index 7cd5dec..b464687 100644
--- a/drivers/vfio/Kconfig
+++ b/drivers/vfio/Kconfig
@@ -3,10 +3,16 @@ config VFIO_IOMMU_TYPE1
depends on VFIO
default n
 
+config VFIO_IOMMU_SPAPR_TCE
+   tristate
+   depends on VFIO  SPAPR_TCE_IOMMU
+   default n
+
 menuconfig VFIO
tristate VFIO Non-Privileged userspace driver framework
depends on IOMMU_API
select VFIO_IOMMU_TYPE1 if X86
+   select VFIO_IOMMU_SPAPR_TCE if PPC_POWERNV
help
  VFIO provides a framework for secure userspace device drivers.
  See Documentation/vfio.txt for more details.
diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile
index 2398d4a..72bfabc 100644
--- a/drivers/vfio/Makefile
+++ b/drivers/vfio/Makefile
@@ -1,3 +1,4 @@
 obj-$(CONFIG_VFIO) += vfio.o
 obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o
+obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o
 obj-$(CONFIG_VFIO_PCI) += pci/
diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c 
b/drivers/vfio/vfio_iommu_spapr_tce.c
new file mode 100644
index 000..b0f81fe
--- /dev/null
+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
@@ -0,0 +1,348 @@
+/*
+ * VFIO: IOMMU DMA mapping support for TCE on POWER
+ *
+ * Copyright (C) 2012 IBM Corp.  All rights reserved.
+ * Author: Alexey Kardashevskiy a...@ozlabs.ru
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Derived from original vfio_iommu_type1.c:
+ * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
+ * Author: Alex Williamson alex.william...@redhat.com
+ */
+
+#include linux/module.h
+#include linux/pci.h
+#include linux/slab.h
+#include linux/uaccess.h
+#include linux/err.h
+#include linux/vfio.h
+#include asm/iommu.h
+
+#define DRIVER_VERSION  0.1
+#define DRIVER_AUTHOR   a...@ozlabs.ru
+#define DRIVER_DESC VFIO IOMMU SPAPR TCE
+
+static void tce_iommu_detach_group(void *iommu_data,
+   struct iommu_group *iommu_group);
+
+/*
+ * VFIO IOMMU fd for SPAPR_TCE IOMMU implementation
+ */
+
+/*
+ * This code handles mapping and unmapping of user data buffers
+ * into DMA'ble space using the IOMMU
+ */
+
+#define NPAGE_TO_SIZE(npage)   ((size_t)(npage)  PAGE_SHIFT)
+
+struct vwork {
+   struct mm_struct*mm;
+   longnpage;
+   struct work_struct  work;
+};
+
+/* delayed decrement/increment for locked_vm */
+static void lock_acct_bg(struct work_struct *work)
+{
+   struct vwork *vwork = container_of(work, struct vwork, work);
+   struct mm_struct *mm;
+
+   mm = vwork-mm;
+   down_write(mm-mmap_sem);
+   mm-locked_vm += vwork-npage;
+   up_write(mm-mmap_sem);
+   mmput(mm);
+   kfree(vwork);
+}
+
+static void lock_acct(long npage)
+{
+   struct vwork *vwork;
+   struct mm_struct *mm;
+
+   if (!current-mm)
+   return; /* process exited */
+
+   if (down_write_trylock(current-mm-mmap_sem)) {
+   current-mm-locked_vm += npage;
+   up_write(current-mm-mmap_sem);
+   return;
+   }
+
+   /*
+* Couldn't get mmap_sem lock, so must setup to update
+* mm-locked_vm later. If locked_vm were atomic, we
+* wouldn't need this silliness
+*/
+   vwork = kmalloc(sizeof(struct vwork), GFP_KERNEL);
+   if (!vwork)
+   return;
+   mm = get_task_mm(current);
+   if (!mm) {
+   kfree(vwork);
+   return;
+   }
+   INIT_WORK(vwork-work, lock_acct_bg);
+   vwork-mm = mm;
+   vwork-npage = npage;
+   schedule_work(vwork-work);
+}
+
+/*
+ * The container descriptor supports only a single group per container.
+ * Required by the API as the container is not supplied with the IOMMU group
+ * at the moment of initialization.
+ */
+struct tce_container {
+   struct mutex lock;
+   struct iommu_table *tbl;
+};
+
+static void 

[PATCH] vfio powerpc: enabled on powernv platform

2012-12-06 Thread Alexey Kardashevskiy
This patch initializes IOMMU groups based on the IOMMU
configuration discovered during the PCI scan on POWERNV
(POWER non virtualized) platform. The IOMMU groups are
to be used later by VFIO driver (PCI pass through).

It also implements an API for mapping/unmapping pages for
guest PCI drivers and providing DMA window properties.
This API is going to be used later by QEMU-VFIO to handle
h_put_tce hypercalls from the KVM guest.

Although this driver has been tested only on the POWERNV
platform, it should work on any platform which supports
TCE tables.

To enable VFIO on POWER, enable SPAPR_TCE_IOMMU config
option and configure VFIO as required.

Cc: David Gibson da...@gibson.dropbear.id.au
Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru
---
 arch/powerpc/include/asm/iommu.h |   10 ++
 arch/powerpc/kernel/iommu.c  |  214 ++
 arch/powerpc/platforms/powernv/pci.c |  134 +
 drivers/iommu/Kconfig|8 ++
 4 files changed, 366 insertions(+)

diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index cbfe678..be3b11b 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -76,6 +76,9 @@ struct iommu_table {
struct iommu_pool large_pool;
struct iommu_pool pools[IOMMU_NR_POOLS];
unsigned long *it_map;   /* A simple allocation bitmap for now */
+#ifdef CONFIG_IOMMU_API
+   struct iommu_group *it_group;
+#endif
 };
 
 struct scatterlist;
@@ -147,5 +150,12 @@ static inline void iommu_restore(void)
 }
 #endif
 
+extern void iommu_reset_table(struct iommu_table *tbl, bool release);
+extern long iommu_clear_tces(struct iommu_table *tbl, unsigned long entry,
+   unsigned long pages);
+extern long iommu_put_tces(struct iommu_table *tbl, unsigned long entry,
+   uint64_t tce, enum dma_data_direction direction,
+   unsigned long pages);
+
 #endif /* __KERNEL__ */
 #endif /* _ASM_IOMMU_H */
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index ff5a6ce..123431a 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -44,6 +44,7 @@
 #include asm/kdump.h
 #include asm/fadump.h
 #include asm/vio.h
+#include asm/tce.h
 
 #define DBG(...)
 
@@ -856,3 +857,216 @@ void iommu_free_coherent(struct iommu_table *tbl, size_t 
size,
free_pages((unsigned long)vaddr, get_order(size));
}
 }
+
+#ifdef CONFIG_IOMMU_API
+/*
+ * SPAPR TCE API
+ */
+
+/*
+ * iommu_reset_table is called when it started/stopped being used
+ */
+void iommu_reset_table(struct iommu_table *tbl, bool release)
+{
+   /*
+* Page at 0 is marked as used in iommu_init_table,
+* so here we clear it when called with release=false...
+*/
+   if (!release  (tbl-it_offset == 0))
+   clear_bit(0, tbl-it_map);
+
+   iommu_clear_tces(tbl, tbl-it_offset, tbl-it_size);
+
+   memset(tbl-it_map, 0, (tbl-it_size + 7)  3);
+
+   /*
+* ... or restore when release=true
+*/
+   if (release  (tbl-it_offset == 0))
+   set_bit(0, tbl-it_map);
+}
+EXPORT_SYMBOL_GPL(iommu_reset_table);
+
+/*
+ * Returns the number of used IOMMU pages (4K) within
+ * the same system page (4K or 64K).
+ * bitmap_weight is not used as it does not support bigendian maps.
+ * offset is an IOMMU page number relative to DMA window start.
+ */
+static int syspage_weight(unsigned long *map, unsigned long offset)
+{
+   int ret = 0, nbits = PAGE_SIZE/IOMMU_PAGE_SIZE;
+
+   /* Aligns TCE entry number to system page boundary */
+   offset = PAGE_MASK  IOMMU_PAGE_SHIFT;
+
+   /* Count used 4K pages */
+   while (nbits) {
+   if (test_bit(offset, map))
+   ++ret;
+   --nbits;
+   ++offset;
+   }
+
+   return ret;
+}
+
+static void tce_flush(struct iommu_table *tbl)
+{
+   /* Flush/invalidate TLB caches if necessary */
+   if (ppc_md.tce_flush)
+   ppc_md.tce_flush(tbl);
+
+   /* Make sure updates are seen by hardware */
+   mb();
+}
+
+/*
+ * iommu_clear_tces clears tces and returned the number of system pages
+ * which it called put_page() on
+ */
+static long clear_tces_nolock(struct iommu_table *tbl, unsigned long entry,
+   unsigned long pages)
+{
+   int i, retpages = 0, clr;
+   unsigned long oldtce, oldweight;
+   struct page *page;
+
+   for (i = 0; i  pages; ++i) {
+   if (!test_bit(entry + i - tbl-it_offset, tbl-it_map))
+   continue;
+
+   oldtce = ppc_md.tce_get(tbl, entry + i);
+   ppc_md.tce_free(tbl, entry + i, 1);
+
+   oldweight = syspage_weight(tbl-it_map,
+   entry + i - tbl-it_offset);
+   clr = __test_and_clear_bit(entry + i - tbl-it_offset,
+   tbl-it_map);
+
+ 

[PATCH 0/6] powerpc: SMT priority (PPR) save and restore

2012-12-06 Thread Haren Myneni
Ben, 

This patch-set is created against your tree (next branch) and fixed the build 
failure that you pointed. 

Changes from the previous version:
- Changes for PPR save/restore in denorm_exception_hv, 
data_access_slb_relon_pSeries and instruction_access_slb_relon_pSeries 
  exception vectors (P8) code.
- Fix build failure with ppc64e_defconfig
- Macro name changes (HMT_MEDIUM_PPR_DISCARD and HMT_MEDIUM_PPR_SAVE) and other 
fixes as Michael Neuling suggested

[PATCH 0/6] powerpc: SMT priority (PPR) save and restore

On P7/P8 systems, users can define SMT priority levels 2,3 and 4 for
processes so that some can run higher priority than the other ones.
In the current kernel, the default priority is set to 4 which prohibits
processes for using higher priority. Also the kernel boosts the priority to
4 during exceptions without saving the user defined priorities when
the task enters the kernel. So we will be loosing the process PPR value
and can not be restored it back when the task exits the kernel.

This patch-set implements saving and restore the user defined PPR value
for all tasks.

With null_syscall testcase (http://ozlabs.org/~anton/junkcode/null_syscall.c),
this feature takes around extra 10 CPU cycles on average for 25 samples.
 
Haren Myneni (6):
  powerpc: Move branch instruction from ACCOUNT_CPU_USER_ENTRY to caller
  powerpc: Enable PPR save/restore
  powerpc: Increase exceptions arrays in paca struct to save PPR
  powerpc: Define ppr in thread_struct
  powerpc: Macros for saving/restore PPR
  powerpc: Implement PPR save/restore

 arch/powerpc/include/asm/cputable.h  |7 +++-
 arch/powerpc/include/asm/exception-64s.h |   59 +-
 arch/powerpc/include/asm/paca.h  |6 ++--
 arch/powerpc/include/asm/ppc_asm.h   |   27 +-
 arch/powerpc/include/asm/processor.h |   12 ++
 arch/powerpc/include/asm/reg.h   |1 +
 arch/powerpc/kernel/asm-offsets.c|1 +
 arch/powerpc/kernel/entry_64.S   |6 +++-
 arch/powerpc/kernel/exceptions-64e.S |3 +-
 arch/powerpc/kernel/exceptions-64s.S |   23 +++-
 arch/powerpc/kernel/process.c|2 +
 11 files changed, 119 insertions(+), 28 deletions(-)



___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH 1/6] powerpc: Move branch instruction from ACCOUNT_CPU_USER_ENTRY to caller

2012-12-06 Thread Haren Myneni
[PATCH 1/6] powerpc: Move branch instruction from ACCOUNT_CPU_USER_ENTRY to 
caller

The first instruction in ACCOUNT_CPU_USER_ENTRY is 'beq' which checks for
exceptions coming from kernel mode. PPR value will be saved immediately after
ACCOUNT_CPU_USER_ENTRY and is also for user level exceptions. So moved this
branch instruction in the caller code.

Signed-off-by: Haren Myneni ha...@us.ibm.com
---
 arch/powerpc/include/asm/exception-64s.h |3 ++-
 arch/powerpc/include/asm/ppc_asm.h   |2 --
 arch/powerpc/kernel/entry_64.S   |3 ++-
 arch/powerpc/kernel/exceptions-64e.S |3 ++-
 4 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/exception-64s.h 
b/arch/powerpc/include/asm/exception-64s.h
index ad708dd..697de09 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -224,8 +224,9 @@ do_kvm_##n: 
\
std r10,0(r1);  /* make stack chain pointer */ \
std r0,GPR0(r1);/* save r0 in stackframe*/ \
std r10,GPR1(r1);   /* save r1 in stackframe*/ \
+   beq 4f; /* if from kernel mode  */ \
ACCOUNT_CPU_USER_ENTRY(r9, r10);   \
-   std r2,GPR2(r1);/* save r2 in stackframe*/ \
+4: std r2,GPR2(r1);/* save r2 in stackframe*/ \
SAVE_4GPRS(3, r1);  /* save r3 - r6 in stackframe   */ \
SAVE_2GPRS(7, r1);  /* save r7, r8 in stackframe*/ \
ld  r9,area+EX_R9(r13); /* move r9, r10 to stackframe   */ \
diff --git a/arch/powerpc/include/asm/ppc_asm.h 
b/arch/powerpc/include/asm/ppc_asm.h
index ea2a86e..376e36d 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -30,7 +30,6 @@
 #define ACCOUNT_STOLEN_TIME
 #else
 #define ACCOUNT_CPU_USER_ENTRY(ra, rb) \
-   beq 2f; /* if from kernel mode */   \
MFTB(ra);   /* get timebase */  \
ld  rb,PACA_STARTTIME_USER(r13);\
std ra,PACA_STARTTIME(r13); \
@@ -38,7 +37,6 @@
ld  ra,PACA_USER_TIME(r13); \
add ra,ra,rb;   /* add on to user time */   \
std ra,PACA_USER_TIME(r13); \
-2:
 
 #define ACCOUNT_CPU_USER_EXIT(ra, rb)  \
MFTB(ra);   /* get timebase */  \
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index ad7..4e78247 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -62,8 +62,9 @@ system_call_common:
std r12,_MSR(r1)
std r0,GPR0(r1)
std r10,GPR1(r1)
+   beq 2f  /* if from kernel mode */
ACCOUNT_CPU_USER_ENTRY(r10, r11)
-   std r2,GPR2(r1)
+2: std r2,GPR2(r1)
std r3,GPR3(r1)
mfcrr2
std r4,GPR4(r1)
diff --git a/arch/powerpc/kernel/exceptions-64e.S 
b/arch/powerpc/kernel/exceptions-64e.S
index 4684e33..ae54553 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -159,8 +159,9 @@ exc_##n##_common:   
\
std r9,GPR9(r1);/* save r9 in stackframe */ \
std r10,_NIP(r1);   /* save SRR0 to stackframe */   \
std r11,_MSR(r1);   /* save SRR1 to stackframe */   \
+   beq 2f; /* if from kernel mode */   \
ACCOUNT_CPU_USER_ENTRY(r10,r11);/* accounting (uses cr0+eq) */  \
-   ld  r3,excf+EX_R10(r13);/* get back r10 */  \
+2: ld  r3,excf+EX_R10(r13);/* get back r10 */  \
ld  r4,excf+EX_R11(r13);/* get back r11 */  \
mfspr   r5,SPRN_SPRG_GEN_SCRATCH;/* get back r13 */ \
std r12,GPR12(r1);  /* save r12 in stackframe */\
-- 
1.7.1



___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH 2/6] powerpc: Enable PPR save/restore

2012-12-06 Thread Haren Myneni
[PATCH 2/6] powerpc: Enable PPR save/restore

SMT thread status register (PPR) is used to set thread priority. This patch
enables PPR save/restore feature (CPU_FTR_HAS_PPR) on POWER7 and POWER8 systems.

Signed-off-by: Haren Myneni ha...@us.ibm.com
---
 arch/powerpc/include/asm/cputable.h |7 +--
 1 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/cputable.h 
b/arch/powerpc/include/asm/cputable.h
index 76f81bd..241d65d 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -171,6 +171,7 @@ extern const char *powerpc_base_platform;
 #define CPU_FTR_POPCNTD
LONG_ASM_CONST(0x0800)
 #define CPU_FTR_ICSWX  LONG_ASM_CONST(0x1000)
 #define CPU_FTR_VMX_COPY   LONG_ASM_CONST(0x2000)
+#defineCPU_FTR_HAS_PPR 
LONG_ASM_CONST(0x4000)
 
 #ifndef __ASSEMBLY__
 
@@ -400,7 +401,8 @@ extern const char *powerpc_base_platform;
CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
CPU_FTR_DSCR | CPU_FTR_SAO  | CPU_FTR_ASYM_SMT | \
CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
-   CPU_FTR_ICSWX | CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY)
+   CPU_FTR_ICSWX | CPU_FTR_CFAR | CPU_FTR_HVMODE | \
+   CPU_FTR_VMX_COPY | CPU_FTR_HAS_PPR)
 #define CPU_FTRS_POWER8 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\
CPU_FTR_MMCRA | CPU_FTR_SMT | \
@@ -408,7 +410,8 @@ extern const char *powerpc_base_platform;
CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
CPU_FTR_DSCR | CPU_FTR_SAO  | \
CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
-   CPU_FTR_ICSWX | CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY)
+   CPU_FTR_ICSWX | CPU_FTR_CFAR | CPU_FTR_HVMODE | \
+   CPU_FTR_VMX_COPY | CPU_FTR_HAS_PPR)
 #define CPU_FTRS_CELL  (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \
-- 
1.7.1



___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH 3/6] powerpc: Increase exceptions arrays in paca struct to save PPR

2012-12-06 Thread Haren Myneni
[PATCH 3/6] powerpc: Increase exceptions arrays in paca struct to save PPR

Using paca to save user defined PPR value in the first level exception vector.

Signed-off-by: Haren Myneni ha...@us.ibm.com
---
 arch/powerpc/include/asm/exception-64s.h |1 +
 arch/powerpc/include/asm/paca.h  |6 +++---
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/exception-64s.h 
b/arch/powerpc/include/asm/exception-64s.h
index 697de09..3b24ca9 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -47,6 +47,7 @@
 #define EX_R3  64
 #define EX_LR  72
 #define EX_CFAR80
+#define EX_PPR 88  /* SMT thread status register (priority) */
 
 #ifdef CONFIG_RELOCATABLE
 #define EXCEPTION_RELON_PROLOG_PSERIES_1(label, h) \
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index e9e7a69..c47d687 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -93,9 +93,9 @@ struct paca_struct {
 * Now, starting in cacheline 2, the exception save areas
 */
/* used for most interrupts/exceptions */
-   u64 exgen[11] __attribute__((aligned(0x80)));
-   u64 exmc[11];   /* used for machine checks */
-   u64 exslb[11];  /* used for SLB/segment table misses
+   u64 exgen[12] __attribute__((aligned(0x80)));
+   u64 exmc[12];   /* used for machine checks */
+   u64 exslb[12];  /* used for SLB/segment table misses
 * on the linear mapping */
/* SLB related definitions */
u16 vmalloc_sllp;
-- 
1.7.1



___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH 4/6] powerpc: Define ppr in thread_struct

2012-12-06 Thread Haren Myneni
[PATCH 4/6] powerpc: Define ppr in thread_struct

ppr in thread_struct is used to save PPR and restore it before process exits
from kernel.

This patch sets the default priority to 3 when tasks are created such
that users can use 4 for higher priority tasks.

Signed-off-by: Haren Myneni ha...@us.ibm.com
---
 arch/powerpc/include/asm/processor.h |   12 
 arch/powerpc/kernel/asm-offsets.c|1 +
 arch/powerpc/kernel/process.c|2 ++
 3 files changed, 15 insertions(+), 0 deletions(-)

diff --git a/arch/powerpc/include/asm/processor.h 
b/arch/powerpc/include/asm/processor.h
index 8750204..37f87f0 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -18,6 +18,16 @@
 #define TS_FPRWIDTH 1
 #endif
 
+#ifdef CONFIG_PPC64
+/* Default SMT priority is set to 3. Use 11- 13bits to save priority. */
+#define PPR_PRIORITY 3
+#ifdef __ASSEMBLY__
+#define INIT_PPR (PPR_PRIORITY  50)
+#else
+#define INIT_PPR ((u64)PPR_PRIORITY  50)
+#endif /* __ASSEMBLY__ */
+#endif /* CONFIG_PPC64 */
+
 #ifndef __ASSEMBLY__
 #include linux/compiler.h
 #include linux/cache.h
@@ -245,6 +255,7 @@ struct thread_struct {
 #ifdef CONFIG_PPC64
unsigned long   dscr;
int dscr_inherit;
+   unsigned long   ppr;/* used to save/restore SMT priority */
 #endif
 };
 
@@ -278,6 +289,7 @@ struct thread_struct {
.fpr = {{0}}, \
.fpscr = { .val = 0, }, \
.fpexc_mode = 0, \
+   .ppr = INIT_PPR, \
 }
 #endif
 
diff --git a/arch/powerpc/kernel/asm-offsets.c 
b/arch/powerpc/kernel/asm-offsets.c
index 7523539..41f65ec 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -77,6 +77,7 @@ int main(void)
DEFINE(NMI_MASK, NMI_MASK);
DEFINE(THREAD_DSCR, offsetof(struct thread_struct, dscr));
DEFINE(THREAD_DSCR_INHERIT, offsetof(struct thread_struct, 
dscr_inherit));
+   DEFINE(TASKTHREADPPR, offsetof(struct task_struct, thread.ppr));
 #else
DEFINE(THREAD_INFO, offsetof(struct task_struct, stack));
 #endif /* CONFIG_PPC64 */
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index ba48233..2563acc 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -814,6 +814,8 @@ int copy_thread(unsigned long clone_flags, unsigned long 
usp,
p-thread.dscr_inherit = current-thread.dscr_inherit;
p-thread.dscr = current-thread.dscr;
}
+   if (cpu_has_feature(CPU_FTR_HAS_PPR))
+   p-thread.ppr = INIT_PPR;
 #endif
/*
 * The PPC64 ABI makes use of a TOC to contain function 
-- 
1.7.1



___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH 5/6] powerpc: Macros for saving/restore PPR

2012-12-06 Thread Haren Myneni
[PATCH 5/6] powerpc: Macros for saving/restore PPR

Several macros are defined for saving and restore user defined PPR value.

Signed-off-by: Haren Myneni ha...@us.ibm.com
---
 arch/powerpc/include/asm/exception-64s.h |   37 ++
 arch/powerpc/include/asm/ppc_asm.h   |   25 
 arch/powerpc/include/asm/reg.h   |1 +
 3 files changed, 63 insertions(+), 0 deletions(-)

diff --git a/arch/powerpc/include/asm/exception-64s.h 
b/arch/powerpc/include/asm/exception-64s.h
index 3b24ca9..090fcd1 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -108,6 +108,43 @@
 #define RESTORE_LR(reg, area)
 #endif
 
+/*
+ * PPR save/restore macros used in exceptions_64s.S  
+ * Used for P7 or later processors
+ */
+#define SAVE_PPR(area, ra, rb) \
+BEGIN_FTR_SECTION_NESTED(940)  \
+   ld  ra,PACACURRENT(r13);\
+   ld  rb,area+EX_PPR(r13);/* Read PPR from paca */\
+   std rb,TASKTHREADPPR(ra);   \
+END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,940)
+
+#define RESTORE_PPR_PACA(area, ra) \
+BEGIN_FTR_SECTION_NESTED(941)  \
+   ld  ra,area+EX_PPR(r13);\
+   mtspr   SPRN_PPR,ra;\
+END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,941)
+
+/*
+ * Increase the priority on systems where PPR save/restore is not
+ * implemented/ supported.
+ */
+#define HMT_MEDIUM_PPR_DISCARD \
+BEGIN_FTR_SECTION_NESTED(942)  \
+   HMT_MEDIUM; \
+END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,0,942)  /*non P7*/  
+
+/*
+ * Save PPR in paca whenever some register is available to use.
+ * Then increase the priority.
+ */
+#define HMT_MEDIUM_PPR_SAVE(area, ra)  \
+BEGIN_FTR_SECTION_NESTED(943)  \
+   mfspr   ra,SPRN_PPR;\
+   std ra,area+EX_PPR(r13);\
+   HMT_MEDIUM; \
+END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,943) 
+
 #define __EXCEPTION_PROLOG_1(area, extra, vec) \
GET_PACA(r13);  \
std r9,area+EX_R9(r13); /* save r9 - r12 */ \
diff --git a/arch/powerpc/include/asm/ppc_asm.h 
b/arch/powerpc/include/asm/ppc_asm.h
index 376e36d..c2d0e58 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -389,6 +389,31 @@ END_FTR_SECTION_IFCLR(CPU_FTR_601)
FTR_SECTION_ELSE_NESTED(848);   \
mtocrf (FXM), RS;   \
ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_NOEXECUTE, 848)
+
+/*
+ * PPR restore macros used in entry_64.S
+ * Used for P7 or later processors
+ */
+#define HMT_MEDIUM_LOW_HAS_PPR \
+BEGIN_FTR_SECTION_NESTED(944)  \
+   HMT_MEDIUM_LOW; \
+END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,944)
+
+#define SET_DEFAULT_THREAD_PPR(ra, rb) \
+BEGIN_FTR_SECTION_NESTED(945)  \
+   lis ra,INIT_PPR@highest;/* default ppr=3 */ \
+   ld  rb,PACACURRENT(r13);\
+   sldira,ra,32;   /* 11- 13 bits are used for ppr */  \
+   std ra,TASKTHREADPPR(rb);   \
+END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,945)
+
+#define RESTORE_PPR(ra, rb)\
+BEGIN_FTR_SECTION_NESTED(946)  \
+   ld  ra,PACACURRENT(r13);\
+   ld  rb,TASKTHREADPPR(ra);   \
+   mtspr   SPRN_PPR,rb;/* Restore PPR */   \
+END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,946)
+
 #endif
 
 /*
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 1b853f7..d395426 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -289,6 +289,7 @@
 #define SPRN_DBAT6U0x23C   /* Data BAT 6 Upper Register */
 #define SPRN_DBAT7L0x23F   /* Data BAT 7 Lower Register */
 #define SPRN_DBAT7U0x23E   /* Data BAT 7 Upper Register */
+#define SPRN_PPR   0x380   /* SMT Thread status Register */
 
 #define SPRN_DEC   0x016   

[PATCH 6/6] powerpc: Implement PPR save/restore

2012-12-06 Thread Haren Myneni
[PATCH 6/6] powerpc: Implement PPR save/restore

When the task enters in to kernel space, the user defined priority (PPR)
will be saved in to PACA at the beginning of first level exception
vector and then copy from PACA to thread_info in second level vector.
PPR will be restored from thread_info before exits the kernel space.

P7/P8 temporarily raises the thread priority to higher level during
exception until the program executes HMT_* calls. But it will not modify
PPR register. So we save PPR value whenever some register is available
to use and then calls HMT_MEDIUM to increase the priority. This feature
supports on P7 or later processors.

We save/ restore PPR for all exception vectors except system call entry.
GLIBC will be saving / restore for system calls. So the default PPR
value (3) will be set for the system call exit when the task returned
to the user space.

Signed-off-by: Haren Myneni ha...@us.ibm.com
---
 arch/powerpc/include/asm/exception-64s.h |   18 ++
 arch/powerpc/kernel/entry_64.S   |3 +++
 arch/powerpc/kernel/exceptions-64s.S |   23 +--
 3 files changed, 26 insertions(+), 18 deletions(-)

diff --git a/arch/powerpc/include/asm/exception-64s.h 
b/arch/powerpc/include/asm/exception-64s.h
index 090fcd1..c235867 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -147,8 +147,9 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,943)
 
 #define __EXCEPTION_PROLOG_1(area, extra, vec) \
GET_PACA(r13);  \
-   std r9,area+EX_R9(r13); /* save r9 - r12 */ \
-   std r10,area+EX_R10(r13);   \
+   std r9,area+EX_R9(r13); /* save r9 */   \
+   HMT_MEDIUM_PPR_SAVE(area, r9);  \
+   std r10,area+EX_R10(r13);   /* save r10 - r12 */\
BEGIN_FTR_SECTION_NESTED(66);   \
mfspr   r10,SPRN_CFAR;  \
std r10,area+EX_CFAR(r13);  \
@@ -264,6 +265,7 @@ do_kvm_##n: 
\
std r10,GPR1(r1);   /* save r1 in stackframe*/ \
beq 4f; /* if from kernel mode  */ \
ACCOUNT_CPU_USER_ENTRY(r9, r10);   \
+   SAVE_PPR(area, r9, r10);   \
 4: std r2,GPR2(r1);/* save r2 in stackframe*/ \
SAVE_4GPRS(3, r1);  /* save r3 - r6 in stackframe   */ \
SAVE_2GPRS(7, r1);  /* save r7, r8 in stackframe*/ \
@@ -305,7 +307,7 @@ do_kvm_##n: 
\
. = loc;\
.globl label##_pSeries; \
 label##_pSeries:   \
-   HMT_MEDIUM; \
+   HMT_MEDIUM_PPR_DISCARD; \
SET_SCRATCH0(r13);  /* save r13 */  \
EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common,\
 EXC_STD, KVMTEST_PR, vec)
@@ -314,7 +316,7 @@ label##_pSeries:\
. = loc;\
.globl label##_hv;  \
 label##_hv:\
-   HMT_MEDIUM; \
+   HMT_MEDIUM_PPR_DISCARD; \
SET_SCRATCH0(r13);  /* save r13 */  \
EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common,\
 EXC_HV, KVMTEST, vec)
@@ -323,7 +325,7 @@ label##_hv: \
. = loc;\
.globl label##_relon_pSeries;   \
 label##_relon_pSeries: \
-   HMT_MEDIUM; \
+   HMT_MEDIUM_PPR_DISCARD; \
/* No guest interrupts come through here */ \
SET_SCRATCH0(r13);  /* save r13 */  \
EXCEPTION_RELON_PROLOG_PSERIES(PACA_EXGEN, label##_common, \
@@ -333,7 +335,7 @@ label##_relon_pSeries:  
\
. = loc;\
.globl label##_relon_hv;\
 label##_relon_hv:  \
-   HMT_MEDIUM; \
+   HMT_MEDIUM_PPR_DISCARD; \
/* No guest interrupts come through here */ \
SET_SCRATCH0(r13);  /* save r13