Re: [PATCH 05/15] powerpc/powernv: Split out set MSI IRQ chip code

2014-09-19 Thread Gavin Shan
On Thu, Sep 18, 2014 at 06:26:50PM +1000, Michael Neuling wrote:
From: Ian Munsie imun...@au1.ibm.com

Some of the MSI IRQ code in pnv_pci_ioda_msi_setup() is generically useful so
split it out.

This will be used by some of the cxl PCIe code later.

Signed-off-by: Ian Munsie imun...@au1.ibm.com
Signed-off-by: Michael Neuling mi...@neuling.org
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 43 ++-
 1 file changed, 25 insertions(+), 18 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index df241b1..194f90a 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1306,14 +1306,36 @@ static void pnv_ioda2_msi_eoi(struct irq_data *d)
   icp_native_eoi(d);
 }
 
+
+static void set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq)
+{
+  struct irq_data *idata;
+  struct irq_chip *ichip;
+
+  /*
+   * Change the IRQ chip for the MSI interrupts on PHB3.
+   * The corresponding IRQ chip should be populated for
+   * the first time.
+   */
+  if (phb-type == PNV_PHB_IODA2) {
+  if (!phb-ioda.irq_chip_init) {
+  idata = irq_get_irq_data(virq);
+  ichip = irq_data_get_irq_chip(idata);
+  phb-ioda.irq_chip_init = 1;
+  phb-ioda.irq_chip = *ichip;
+  phb-ioda.irq_chip.irq_eoi = pnv_ioda2_msi_eoi;
+  }
+
+  irq_set_chip(virq, phb-ioda.irq_chip);
+  }
+}
+

Nitpick: to check PHB type and bail early could avoid nested code :)

if (phb-type != PNV_PHB_IODA2)
return;

 static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
 unsigned int hwirq, unsigned int virq,
 unsigned int is_64, struct msi_msg *msg)
 {
   struct pnv_ioda_pe *pe = pnv_ioda_get_pe(dev);
   struct pci_dn *pdn = pci_get_pdn(dev);
-  struct irq_data *idata;
-  struct irq_chip *ichip;
   unsigned int xive_num = hwirq - phb-msi_base;
   __be32 data;
   int rc;
@@ -1365,22 +1387,7 @@ static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, 
struct pci_dev *dev,
   }
   msg-data = be32_to_cpu(data);
 
-  /*
-   * Change the IRQ chip for the MSI interrupts on PHB3.
-   * The corresponding IRQ chip should be populated for
-   * the first time.
-   */
-  if (phb-type == PNV_PHB_IODA2) {
-  if (!phb-ioda.irq_chip_init) {
-  idata = irq_get_irq_data(virq);
-  ichip = irq_data_get_irq_chip(idata);
-  phb-ioda.irq_chip_init = 1;
-  phb-ioda.irq_chip = *ichip;
-  phb-ioda.irq_chip.irq_eoi = pnv_ioda2_msi_eoi;
-  }
-
-  irq_set_chip(virq, phb-ioda.irq_chip);
-  }
+  set_msi_irq_chip(phb, virq);
 
   pr_devel(%s: %s-bit MSI on hwirq %x (xive #%d),
 address=%x_%08x data=%x PE# %d\n,

Thanks,
Gavin

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 07/15] powerpc/powerpc: Add new PCIe functions for allocating cxl interrupts

2014-09-19 Thread Gavin Shan
On Thu, Sep 18, 2014 at 06:26:52PM +1000, Michael Neuling wrote:
From: Ian Munsie imun...@au1.ibm.com

This adds a number of functions for allocating IRQs under powernv PCIe for cxl.

Signed-off-by: Ian Munsie imun...@au1.ibm.com
Signed-off-by: Michael Neuling mi...@neuling.org
---
 arch/powerpc/include/asm/pnv-pci.h|  27 +
 arch/powerpc/platforms/powernv/pci-ioda.c | 186 ++
 2 files changed, 213 insertions(+)
 create mode 100644 arch/powerpc/include/asm/pnv-pci.h

diff --git a/arch/powerpc/include/asm/pnv-pci.h 
b/arch/powerpc/include/asm/pnv-pci.h
new file mode 100644
index 000..71717b5
--- /dev/null
+++ b/arch/powerpc/include/asm/pnv-pci.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright 2014 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _ASM_PNV_PCI_H
+#define _ASM_PNV_PCI_H
+
+#include linux/pci.h
+#include misc/cxl.h
+
+int pnv_phb_to_cxl(struct pci_dev *dev);
+int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq,
+ unsigned int virq);
+int pnv_cxl_alloc_hwirqs(struct pci_dev *dev, int num);
+void pnv_cxl_release_hwirqs(struct pci_dev *dev, int hwirq, int num);
+int pnv_cxl_alloc_hwirq_ranges(struct cxl_irq_ranges *irqs,
+ struct pci_dev *dev, int num);
+void pnv_cxl_release_hwirq_ranges(struct cxl_irq_ranges *irqs,
+struct pci_dev *dev);
+int pnv_cxl_get_irq_count(struct pci_dev *dev);
+
+#endif
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index 194f90a..80919f8 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -38,6 +38,8 @@
 #include asm/debug.h
 #include asm/firmware.h
 
+#include misc/cxl.h
+
 #include powernv.h
 #include pci.h
 
@@ -503,6 +505,163 @@ static struct pnv_ioda_pe *pnv_ioda_get_pe(struct 
pci_dev *dev)
   return NULL;
   return phb-ioda.pe_array[pdn-pe_number];
 }
+
+struct device_node *pnv_pci_to_phb_node(struct pci_dev *dev)
+{
+  struct device_node *np;
+  struct property *prop = NULL;
+
+  np = of_node_get(pci_device_to_OF_node(dev));
+
+  /* Scan up the tree looking for the PHB node */
+  while (np) {
+  if ((prop = of_find_property(np, ibm,opal-phbid, NULL)))
+  break;
+  np = of_get_next_parent(np);
+  }
+
+  if (!prop) {
+  of_node_put(np);
+  return NULL;
+  }
+
+  return np;
+}
+EXPORT_SYMBOL(pnv_pci_to_phb_node);

Nitpick: I'm not sure it's better way. struct pci_controller::dn should
always have valid ibm,opal-phbid, so I guess the code could be like this
way:

struct pci_controller *hose = pci_bus_to_host(dev-bus);

return hose-dn;

+
+#ifdef CONFIG_CXL_BASE
+int pnv_phb_to_cxl(struct pci_dev *dev)
+{
+  struct device_node *np;
+  struct pnv_ioda_pe *pe;
+  const u64 *prop64;
+  u64 phb_id;
+  int rc;
+
+  dev_info(dev-dev, switch PHB to CXL\n);
+
+  if (!(np = pnv_pci_to_phb_node(dev)))
+  return -ENODEV;
+
+  prop64 = of_get_property(np, ibm,opal-phbid, NULL);
+
+  phb_id = be64_to_cpup(prop64);
+  dev_info(dev-dev, PHB-ID  : 0x%016llx\n, phb_id);
+

The PHB ID would have been there: struct pnv_phb::opal_id. So
I guess we needn't grab it from device-tree again :)

+  if (!(pe = pnv_ioda_get_pe(dev))) {
+  rc = -ENODEV;
+  goto out;
+  }
+  dev_info(dev-dev,  pe : %i\n, pe-pe_number);

Perhaps you can reuse pe_info() here.

+
+  if ((rc = opal_pci_set_phb_cxl_mode(phb_id, 1, pe-pe_number)))
+  dev_err(dev-dev, opal_pci_set_phb_cxl_mode failed: %i\n, 
rc);
+
+out:
+  of_node_put(np);
+  return rc;
+}
+EXPORT_SYMBOL(pnv_phb_to_cxl);
+
+int pnv_cxl_alloc_hwirqs(struct pci_dev *dev, int num)
+{
+  struct pci_controller *hose = pci_bus_to_host(dev-bus);
+  struct pnv_phb *phb = hose-private_data;
+  int hwirq = msi_bitmap_alloc_hwirqs(phb-msi_bmp, num);
+
+  if (hwirq  0) {
+  dev_warn(dev-dev, Failed to find a free MSI\n);
+  return -ENOSPC;
+  }
+
+  return phb-msi_base + hwirq;
+}
+EXPORT_SYMBOL(pnv_cxl_alloc_hwirqs);
+
+void pnv_cxl_release_hwirqs(struct pci_dev *dev, int hwirq, int num)
+{
+  struct pci_controller *hose = pci_bus_to_host(dev-bus);
+  struct pnv_phb *phb = hose-private_data;
+
+  msi_bitmap_free_hwirqs(phb-msi_bmp, hwirq - phb-msi_base, num);
+}
+EXPORT_SYMBOL(pnv_cxl_release_hwirqs);
+
+
+int pnv_cxl_alloc_hwirq_ranges(struct cxl_irq_ranges *irqs,
+ struct pci_dev *dev, int num)
+{
+  struct pci_controller *hose = pci_bus_to_host(dev-bus);
+

Re: [PATCH v3 03/21] powerpc/8xx: exception InstructionAccess does not exist on MPC8xx

2014-09-19 Thread leroy christophe


Le 18/09/2014 22:02, Joakim Tjernlund a écrit :

christophe leroy christophe.le...@c-s.fr wrote on 2014/09/18 21:11:01:


Le 18/09/2014 20:12, Joakim Tjernlund a écrit :

leroy christophe christophe.le...@c-s.fr wrote on 2014/09/18

18:42:14:

Le 18/09/2014 17:15, Joakim Tjernlund a écrit :

Christophe Leroy christophe.le...@c-s.fr wrote on 2014/09/17

18:36:57:

Exception InstructionAccess does not exist on MPC8xx. No need to

branch

there from somewhere else.

Handling can be done directly in InstructionTLBError Exception.

Signed-off-by: Christophe Leroy christophe.le...@c-s.fr

. = 0x1200
DataStoreTLBMiss:
@@ -477,7 +475,11 @@ DataStoreTLBMiss:
 */
. = 0x1300
InstructionTLBError:
-bInstructionAccess
+EXCEPTION_PROLOG
+mrr4,r12
+mrr5,r9
+/* 0x400 is InstructionAccess exception, needed by

bad_page_fault()

*/

+EXC_XFER_LITE(0x400, handle_page_fault)

You should have the code in TLBMiss and have the TLBError branch there

as

that is the common case.

As far as I remember, I tried it already but we don't have enough space
in TLBmiss for that. We can only have 40 instructions.

Do your other optimizations first, then you might have space :)




Even with the optimisation first, there is no chance to fit ITLBError 
instructions after ITLBMiss and before DTLBMiss.
After optimisation, TLBMiss goes from 0x1100 to 0x1174. TLBError goes 
from 0x1300 to 0x13b4. DTLBMiss is at 0x1200

And this is without CPU6 ERRATA. So this is hopeless I believe.

Christophe
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v4 02/21] powerpc/8xx: Use SCRATCH0 and SCRATCH1 also for TLB handlers

2014-09-19 Thread Christophe Leroy
SCRATCH0 and SCRATCH1 are only used in Exceptions prologs where no other
exception can happen. There is therefore no need to preserve them accross
TLB handlers, we can use them there as in other exceptions. One of the
advantages is that they do not suffer CPU6 errata unlike M_TW register.

Signed-off-by: Christophe Leroy christophe.le...@c-s.fr

---
Changes in v2:
- None

Changes in v3:
- None

Changes in v4:
- None

 arch/powerpc/kernel/head_8xx.S |  104 --
 1 files changed, 36 insertions(+), 68 deletions(-)

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 1329c5a..3af6db1 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -104,12 +104,15 @@ turn_on_mmu:
  * task's thread_struct.
  */
 #define EXCEPTION_PROLOG   \
-   mtspr   SPRN_SPRG_SCRATCH0,r10; \
-   mtspr   SPRN_SPRG_SCRATCH1,r11; \
-   mfcrr10;\
+   EXCEPTION_PROLOG_0; \
EXCEPTION_PROLOG_1; \
EXCEPTION_PROLOG_2
 
+#define EXCEPTION_PROLOG_0 \
+   mtspr   SPRN_SPRG_SCRATCH0,r10; \
+   mtspr   SPRN_SPRG_SCRATCH1,r11; \
+   mfcrr10
+
 #define EXCEPTION_PROLOG_1 \
mfspr   r11,SPRN_SRR1;  /* check whether user or kernel */ \
andi.   r11,r11,MSR_PR; \
@@ -145,6 +148,14 @@ turn_on_mmu:
SAVE_2GPRS(7, r11)
 
 /*
+ * Exception exit code.
+ */
+#define EXCEPTION_EPILOG_0 \
+   mtcrr10;\
+   mfspr   r10,SPRN_SPRG_SCRATCH0; \
+   mfspr   r11,SPRN_SPRG_SCRATCH1
+
+/*
  * Note: code which follows this uses cr0.eq (set if from kernel),
  * r11, r12 (SRR0), and r9 (SRR1).
  *
@@ -293,16 +304,8 @@ InstructionTLBMiss:
 #ifdef CONFIG_8xx_CPU6
stw r3, 8(r0)
 #endif
-   DO_8xx_CPU6(0x3f80, r3)
-   mtspr   SPRN_M_TW, r10  /* Save a couple of working registers */
-   mfcrr10
-#ifdef CONFIG_8xx_CPU6
-   stw r10, 0(r0)
-   stw r11, 4(r0)
-#else
-   mtspr   SPRN_DAR, r10
-   mtspr   SPRN_SPRG_SCRATCH2, r11
-#endif
+   EXCEPTION_PROLOG_0
+   mtspr   SPRN_SPRG_SCRATCH2, r10
mfspr   r10, SPRN_SRR0  /* Get effective address of fault */
 #ifdef CONFIG_8xx_CPU15
addir11, r10, 0x1000
@@ -359,18 +362,11 @@ InstructionTLBMiss:
mtspr   SPRN_MI_RPN, r10/* Update TLB entry */
 
/* Restore registers */
-#ifndef CONFIG_8xx_CPU6
-   mfspr   r10, SPRN_DAR
-   mtcrr10
-   mtspr   SPRN_DAR, r11   /* Tag DAR */
-   mfspr   r11, SPRN_SPRG_SCRATCH2
-#else
-   lwz r11, 0(r0)
-   mtcrr11
-   lwz r11, 4(r0)
+#ifdef CONFIG_8xx_CPU6
lwz r3, 8(r0)
 #endif
-   mfspr   r10, SPRN_M_TW
+   mfspr   r10, SPRN_SPRG_SCRATCH2
+   EXCEPTION_EPILOG_0
rfi
 2:
mfspr   r11, SPRN_SRR1
@@ -381,19 +377,11 @@ InstructionTLBMiss:
mtspr   SPRN_SRR1, r11
 
/* Restore registers */
-#ifndef CONFIG_8xx_CPU6
-   mfspr   r10, SPRN_DAR
-   mtcrr10
-   li  r11, 0x00f0
-   mtspr   SPRN_DAR, r11   /* Tag DAR */
-   mfspr   r11, SPRN_SPRG_SCRATCH2
-#else
-   lwz r11, 0(r0)
-   mtcrr11
-   lwz r11, 4(r0)
+#ifdef CONFIG_8xx_CPU6
lwz r3, 8(r0)
 #endif
-   mfspr   r10, SPRN_M_TW
+   mfspr   r10, SPRN_SPRG_SCRATCH2
+   EXCEPTION_EPILOG_0
b   InstructionAccess
 
. = 0x1200
@@ -401,16 +389,8 @@ DataStoreTLBMiss:
 #ifdef CONFIG_8xx_CPU6
stw r3, 8(r0)
 #endif
-   DO_8xx_CPU6(0x3f80, r3)
-   mtspr   SPRN_M_TW, r10  /* Save a couple of working registers */
-   mfcrr10
-#ifdef CONFIG_8xx_CPU6
-   stw r10, 0(r0)
-   stw r11, 4(r0)
-#else
-   mtspr   SPRN_DAR, r10
-   mtspr   SPRN_SPRG_SCRATCH2, r11
-#endif
+   EXCEPTION_PROLOG_0
+   mtspr   SPRN_SPRG_SCRATCH2, r10
mfspr   r10, SPRN_M_TWB /* Get level 1 table entry address */
 
/* If we are faulting a kernel address, we have to use the
@@ -483,19 +463,12 @@ DataStoreTLBMiss:
mtspr   SPRN_MD_RPN, r10/* Update TLB entry */
 
/* Restore registers */
-#ifndef CONFIG_8xx_CPU6
-   mfspr   r10, SPRN_DAR
-   mtcrr10
-   mtspr   SPRN_DAR, r11   /* Tag DAR */
-   mfspr   r11, SPRN_SPRG_SCRATCH2
-#else
-   mtspr   SPRN_DAR, r11   /* Tag DAR */
-   lwz r11, 0(r0)
-   mtcrr11
-   lwz r11, 4(r0)
+#ifdef CONFIG_8xx_CPU6
lwz r3, 8(r0)
 #endif
-   mfspr   r10, SPRN_M_TW
+   mtspr   SPRN_DAR, r11   /* Tag DAR */
+   mfspr   r10, SPRN_SPRG_SCRATCH2
+   EXCEPTION_EPILOG_0
rfi
 
 /* This is an instruction TLB error on the MPC8xx.  This could be due
@@ -519,23 +492,18 @@ DataTLBError:
 #ifdef CONFIG_8xx_CPU6
stw r3, 8(r0)
 #endif
-   DO_8xx_CPU6(0x3f80, r3)
-   mtspr   SPRN_M_TW, r10  /* Save a couple of working registers */
-   mfcrr10
-   stw r10, 0(r0)
-   

[PATCH v4 00/21] powerpc/8xx: Optimise MMU TLB handling and add support of 16k pages

2014-09-19 Thread Christophe Leroy
This patchset:
1) provides several MMU TLB handling optimisation on MPC8xx.
2) adds support of 16k pages on MPC8xx.
All changes have been successfully tested on a custom board equipped with MPC885

Signed-off-by: Christophe Leroy christophe.le...@c-s.fr
Tested-by: Christophe Leroy christophe.le...@c-s.fr

---
Changes in v2:
- Patch number 10 removed, it was implementing a 16 bit alignment of the PGDIR.
It is not worth potentially wasting up to 64k of memory just for removing one
instruction (ori).
- Preserve r11 while calculating the level 2 address, therefore no more need to
save r11 into CR.

Changes in v3:
- Few fixes following review from Joachim Tjernlund
- Removed the major hack which was saving resisters in memory for CPU6 errata
- Invalidating non present TLB entries earlier (in head_8xx instead of fault.c)

Changes in v4:
- Resubmitting patch 3 and 7 after new comments from Joachim and feedback
from Scott. Has impact on patch 5, 14, 19 and 21.
- Fixed patch 20 that didn't apply

 arch/powerpc/Kconfig |   2 +-
 arch/powerpc/include/asm/mmu-8xx.h   |   2 +
 arch/powerpc/include/asm/pgtable-ppc32.h |  20 ++
 arch/powerpc/include/asm/pte-8xx.h   |   7 +-
 arch/powerpc/include/asm/reg.h   |   3 +-
 arch/powerpc/kernel/head_8xx.S   | 366 -
 arch/powerpc/mm/fault.c  |   7 -
 7 files changed, 201 insertions(+), 206 deletions(-)
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v4 01/21] powerpc/8xx: Declare SPRG2 as a SCRATCH register

2014-09-19 Thread Christophe Leroy
Since coming 469d62be9263b92f2c3329540cbb1c076111f4f3, SPRG2 is used as a
scratch register just like SPRG0 and SPRG1. So Declare it as such and fix
the comment which is not valid anymore since that commit.

Signed-off-by: Christophe Leroy christophe.le...@c-s.fr

---
Changes in v2:
- None

Changes in v3:
- None

Changes in v4:
- None

 arch/powerpc/include/asm/reg.h |3 ++-
 arch/powerpc/kernel/head_8xx.S |   10 +-
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index cb9c174..b6a7d62 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -888,7 +888,7 @@
  * 32-bit 8xx:
  * - SPRG0 scratch for exception vectors
  * - SPRG1 scratch for exception vectors
- * - SPRG2 apparently unused but initialized
+ * - SPRG2 scratch for exception vectors
  *
  */
 #ifdef CONFIG_PPC64
@@ -994,6 +994,7 @@
 #ifdef CONFIG_8xx
 #define SPRN_SPRG_SCRATCH0 SPRN_SPRG0
 #define SPRN_SPRG_SCRATCH1 SPRN_SPRG1
+#define SPRN_SPRG_SCRATCH2 SPRN_SPRG2
 #endif
 
 
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 55d12fb..1329c5a 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -301,7 +301,7 @@ InstructionTLBMiss:
stw r11, 4(r0)
 #else
mtspr   SPRN_DAR, r10
-   mtspr   SPRN_SPRG2, r11
+   mtspr   SPRN_SPRG_SCRATCH2, r11
 #endif
mfspr   r10, SPRN_SRR0  /* Get effective address of fault */
 #ifdef CONFIG_8xx_CPU15
@@ -363,7 +363,7 @@ InstructionTLBMiss:
mfspr   r10, SPRN_DAR
mtcrr10
mtspr   SPRN_DAR, r11   /* Tag DAR */
-   mfspr   r11, SPRN_SPRG2
+   mfspr   r11, SPRN_SPRG_SCRATCH2
 #else
lwz r11, 0(r0)
mtcrr11
@@ -386,7 +386,7 @@ InstructionTLBMiss:
mtcrr10
li  r11, 0x00f0
mtspr   SPRN_DAR, r11   /* Tag DAR */
-   mfspr   r11, SPRN_SPRG2
+   mfspr   r11, SPRN_SPRG_SCRATCH2
 #else
lwz r11, 0(r0)
mtcrr11
@@ -409,7 +409,7 @@ DataStoreTLBMiss:
stw r11, 4(r0)
 #else
mtspr   SPRN_DAR, r10
-   mtspr   SPRN_SPRG2, r11
+   mtspr   SPRN_SPRG_SCRATCH2, r11
 #endif
mfspr   r10, SPRN_M_TWB /* Get level 1 table entry address */
 
@@ -487,7 +487,7 @@ DataStoreTLBMiss:
mfspr   r10, SPRN_DAR
mtcrr10
mtspr   SPRN_DAR, r11   /* Tag DAR */
-   mfspr   r11, SPRN_SPRG2
+   mfspr   r11, SPRN_SPRG_SCRATCH2
 #else
mtspr   SPRN_DAR, r11   /* Tag DAR */
lwz r11, 0(r0)
-- 
1.7.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v4 03/21] powerpc/8xx: exception InstructionAccess does not exist on MPC8xx

2014-09-19 Thread Christophe Leroy
Exception InstructionAccess does not exist on MPC8xx. No need to branch there 
from somewhere else. 
Handling can be done directly in InstructionTLBError Exception.

Signed-off-by: Christophe Leroy christophe.le...@c-s.fr

---
Changes in v2:
- None

Changes in v3:
- arch/powerpc/mm/fault.c uses the vector number, so make sure it understand
the new ones.

Changes in v4:
- Removing the change to mm/fault.c, faking the exception id to make mm/fault
think it comes from InstructionAccess.

 arch/powerpc/kernel/head_8xx.S | 15 +++
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 3af6db1..ec1958f 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -234,15 +234,10 @@ DataAccess:
EXC_XFER_LITE(0x300, handle_page_fault)
 
 /* Instruction access exception.
- * This is never generated by the MPC8xx.  We jump to it for other
- * translation errors.
+ * This is never generated by the MPC8xx.
  */
. = 0x400
 InstructionAccess:
-   EXCEPTION_PROLOG
-   mr  r4,r12
-   mr  r5,r9
-   EXC_XFER_LITE(0x400, handle_page_fault)
 
 /* External interrupt */
EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE)
@@ -382,7 +377,7 @@ InstructionTLBMiss:
 #endif
mfspr   r10, SPRN_SPRG_SCRATCH2
EXCEPTION_EPILOG_0
-   b   InstructionAccess
+   b   InstructionTLBError
 
. = 0x1200
 DataStoreTLBMiss:
@@ -477,7 +472,11 @@ DataStoreTLBMiss:
  */
. = 0x1300
 InstructionTLBError:
-   b   InstructionAccess
+   EXCEPTION_PROLOG
+   mr  r4,r12
+   mr  r5,r9
+   /* 0x400 is InstructionAccess exception, needed by bad_page_fault() */
+   EXC_XFER_LITE(0x400, handle_page_fault)
 
 /* This is the data TLB error on the MPC8xx.  This could be due to
  * many reasons, including a dirty update to a pte.  We can catch that
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v4 04/21] powerpc/8xx: Remove loading of r10 at end of FixupDAR

2014-09-19 Thread Christophe Leroy
Since commit 2321f33790a6c5b80322d907a92d5739e7521a13, r10 is not used anymore
after FixupDAR. There is therefore no need to set it up with the value of DAR.

Signed-off-by: Christophe Leroy christophe.le...@c-s.fr

---
Changes in v2:
- None

Changes in v3:
- None

Changes in v4:
- None

 arch/powerpc/kernel/head_8xx.S |7 +++
 1 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index fbe5d10..e59e39e 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -495,7 +495,7 @@ DataTLBError:
mfspr   r10, SPRN_DAR
cmpwi   cr0, r10, 0x00f0
beq-FixupDAR/* must be a buggy dcbX, icbi insn. */
-DARFixed:/* Return from dcbx instruction bug workaround, r10 holds value of 
DAR */
+DARFixed:/* Return from dcbx instruction bug workaround */
 #ifdef CONFIG_8xx_CPU6
lwz r3, 8(r0)
 #endif
@@ -524,7 +524,7 @@ DARFixed:/* Return from dcbx instruction bug workaround, 
r10 holds value of DAR
 
 /* This is the procedure to calculate the data EA for buggy dcbx,dcbi 
instructions
  * by decoding the registers used by the dcbx instruction and adding them.
- * DAR is set to the calculated address and r10 also holds the EA on exit.
+ * DAR is set to the calculated address.
  */
  /* define if you don't want to use self modifying code */
 #define NO_SELF_MODIFYING_CODE
@@ -564,8 +564,7 @@ FixupDAR:/* Entry point for dcbx workaround. */
beq+142f
cmpwi   cr0, r10, 1964  /* Is icbi? */
beq+142f
-141:   mfspr   r10, SPRN_DAR   /* r10 must hold DAR at exit */
-   b   DARFixed/* Nope, go back to normal TLB processing */
+141:   b   DARFixed/* Nope, go back to normal TLB processing */
 
 144:   mfspr   r10, SPRN_DSISR
rlwinm  r10, r10,0,7,5  /* Clear store bit for buggy dcbst insn */
-- 
1.7.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v4 10/21] powerpc/8xx: Duplicate two insns instead of branching

2014-09-19 Thread Christophe Leroy
Branching takes two cycles on MPC8xx. Lets duplicate the two instructions
and avoid the branching.

Signed-off-by: Christophe Leroy christophe.le...@c-s.fr

---
Changes in v2:
- None

Changes in v3:
- None

Changes in v4:
- None

 arch/powerpc/kernel/head_8xx.S |6 --
 1 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 5037420..4a49ff3 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -638,9 +638,11 @@ modified_instr:
 
/* special handling for r10,r11 since these are modified already */
 153:   mfspr   r11, SPRN_SPRG_SCRATCH1 /* load r11 from SPRN_SPRG_SCRATCH1 */
-   b   155f
+   add r10, r10, r11   /* add it */
+   mfctr   r11 /* restore r11 */
+   b   151b
 154:   mfspr   r11, SPRN_SPRG_SCRATCH0 /* load r10 from SPRN_SPRG_SCRATCH0 */
-155:   add r10, r10, r11   /* add it */
+   add r10, r10, r11   /* add it */
mfctr   r11 /* restore r11 */
b   151b
 #endif
-- 
1.7.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v4 05/21] powerpc/8xx: Fix comment about DIRTY update

2014-09-19 Thread Christophe Leroy
Since commit 2321f33790a6c5b80322d907a92d5739e7521a13, dirty handling is not
handled here anymore. So we fix the comment.

Signed-off-by: Christophe Leroy christophe.le...@c-s.fr

---
Changes in v2:
- None

Changes in v3:
- None

Changes in v4:
- None

 arch/powerpc/kernel/head_8xx.S | 8 ++--
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index e4086d6..10054b8 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -479,12 +479,8 @@ InstructionTLBError:
EXC_XFER_LITE(0x400, handle_page_fault)
 
 /* This is the data TLB error on the MPC8xx.  This could be due to
- * many reasons, including a dirty update to a pte.  We can catch that
- * one here, but anything else is an error.  First, we track down the
- * Linux pte.  If it is valid, write access is allowed, but the
- * page dirty bit is not set, we will set it and reload the TLB.  For
- * any other case, we bail out to a higher level function that can
- * handle it.
+ * many reasons, including a dirty update to a pte.  We bail out to
+ * a higher level function that can handle it.
  */
. = 0x1400
 DataTLBError:
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v4 07/21] powerpc/8xx: DataAccess exception not generated by MPC8xx

2014-09-19 Thread Christophe Leroy
DataAccess exception is never generated by MPC8xx so do the job directly where
it is used to avoid an unnecessary branching.

Signed-off-by: Christophe Leroy christophe.le...@c-s.fr

---
Changes in v2:
- None

Changes in v3:
- arch/powerpc/mm/fault.c uses the vector number, so make sure it understands
the new ones.

Changes in v4:
- Removing the change to mm/fault.c, faking the exception id to make mm/fault
think it comes from DataAccess.

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index a535576..570c1ee 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -219,19 +219,10 @@ MachineCheck:
EXC_XFER_STD(0x200, machine_check_exception)
 
 /* Data access exception.
- * This is never generated by the MPC8xx.  We jump to it for other
- * translation errors.
+ * This is never generated by the MPC8xx.
  */
. = 0x300
 DataAccess:
-   EXCEPTION_PROLOG
-   mfspr   r10,SPRN_DSISR
-   stw r10,_DSISR(r11)
-   mr  r5,r10
-   mfspr   r4,SPRN_DAR
-   li  r10,0x00f0
-   mtspr   SPRN_DAR,r10/* Tag DAR, to be used in DTLB Error */
-   EXC_XFER_LITE(0x300, handle_page_fault)
 
 /* Instruction access exception.
  * This is never generated by the MPC8xx.
@@ -491,7 +482,15 @@ DataTLBError:
beq-FixupDAR/* must be a buggy dcbX, icbi insn. */
 DARFixed:/* Return from dcbx instruction bug workaround */
EXCEPTION_EPILOG_0
-   b   DataAccess
+   EXCEPTION_PROLOG
+   mfspr   r10,SPRN_DSISR
+   stw r10,_DSISR(r11)
+   mr  r5,r10
+   mfspr   r4,SPRN_DAR
+   li  r10,0x00f0
+   mtspr   SPRN_DAR,r10/* Tag DAR, to be used in DTLB Error */
+   /* 0x300 is DataAccess exception, needed by bad_page_fault() */
+   EXC_XFER_LITE(0x300, handle_page_fault)
 
EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_EE)
EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_EE)
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v4 12/21] powerpc/8xx: Don't use MD_TWC for walk

2014-09-19 Thread Christophe Leroy
MD_TWC can only be used properly with 4k pages.
So lets calculate level 2 table index by ourselves.

Signed-off-by: Christophe Leroy christophe.le...@c-s.fr

---
Changes in v2:
- No need to save r11 in cr, we can do without modifying r11 in DataStoreTLBMiss

Changes in v3:
- None

Changes in v4:
- None

 arch/powerpc/kernel/head_8xx.S |   28 
 1 files changed, 12 insertions(+), 16 deletions(-)

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index ad15070..0f571f5 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -297,8 +297,6 @@ InstructionTLBMiss:
addir11, r10, -0x1000
tlbie   r11
 #endif
-   DO_8xx_CPU6(0x3780, r3)
-   mtspr   SPRN_MD_EPN, r10/* Have to use MD_EPN for walk, MI_EPN 
can't */
 
/* If we are faulting a kernel address, we have to use the
 * kernel page tables.
@@ -326,10 +324,9 @@ InstructionTLBMiss:
ori r11,r11,1   /* Set valid bit */
DO_8xx_CPU6(0x2b80, r3)
mtspr   SPRN_MI_TWC, r11/* Set segment attributes */
-   DO_8xx_CPU6(0x3b80, r3)
-   mtspr   SPRN_MD_TWC, r11/* Load pte table base address */
-   mfspr   r11, SPRN_MD_TWC/* and get the pte address */
-   lwz r10, 0(r11) /* Get the pte */
+   mfspr   r11, SPRN_SRR0  /* Get effective address of fault */
+   rlwinm  r11, r11, 22, 20, 29/* Extract level 2 index */
+   lwzxr10, r10, r11   /* Get the pte */
 
 #ifdef CONFIG_SWAP
andi.   r11, r10, _PAGE_ACCESSED | _PAGE_PRESENT
@@ -395,12 +392,13 @@ DataStoreTLBMiss:
 
/* We have a pte table, so load fetch the pte from the table.
 */
-   ori r11, r11, 1 /* Set valid bit in physical L2 page */
-   DO_8xx_CPU6(0x3b80, r3)
-   mtspr   SPRN_MD_TWC, r11/* Load pte table base address */
-   mfspr   r10, SPRN_MD_TWC/* and get the pte address */
+   mfspr   r10, SPRN_MD_EPN/* Get address of fault */
+   /* Extract level 2 index */
+   rlwinm  r10, r10, 22, 20, 29
+   rlwimi  r10, r11, 0, 0, 19  /* Add level 2 base */
lwz r10, 0(r10) /* Get the pte */
 
+   ori r11, r11, 1 /* Set valid bit in physical L2 page */
/* Insert the Guarded flag into the TWC from the Linux PTE.
 * It is bit 27 of both the Linux PTE and the TWC (at least
 * I got that right :-).  It will be better when we can put
@@ -524,18 +522,16 @@ FixupDAR:/* Entry point for dcbx workaround. */
/* fetch instruction from memory. */
mfspr   r10, SPRN_SRR0
andis.  r11, r10, 0x8000/* Address = 0x8000 */
-   DO_8xx_CPU6(0x3780, r3)
-   mtspr   SPRN_MD_EPN, r10
mfspr   r11, SPRN_M_TW  /* Get level 1 table base address */
beq-3f  /* Branch if user space */
lis r11, (swapper_pg_dir-PAGE_OFFSET)@h
ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l
 3: rlwinm  r10, r10, 12, 20, 29/* Extract level 1 index */
lwzxr11, r10, r11   /* Get the level 1 entry */
-   DO_8xx_CPU6(0x3b80, r3)
-   mtspr   SPRN_MD_TWC, r11/* Load pte table base address */
-   mfspr   r11, SPRN_MD_TWC/* and get the pte address */
-   lwz r11, 0(r11) /* Get the pte */
+   rlwinm  r10, r11,0,0,19 /* Extract page descriptor page address */
+   mfspr   r11, SPRN_SRR0  /* Get effective address of fault */
+   rlwinm  r11, r11, 22, 20, 29/* Extract level 2 index */
+   lwzxr11, r10, r11   /* Get the pte */
 #ifdef CONFIG_8xx_CPU6
lwz r3, 8(r0)   /* restore r3 from memory */
 #endif
-- 
1.7.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v4 14/21] powerpc/8xx: Const for TLB RPN forced value

2014-09-19 Thread Christophe Leroy
Value 0x00f0 is used to force bits in TLB level 2 entry. This value is linked
to the page size and will vary when we change the page size. Lets define a const
for it in order to have it at only one place.

Signed-off-by: Christophe Leroy christophe.le...@c-s.fr

---
Changes in v2:
- None

Changes in v3:
- None

Changes in v4:
- None

 arch/powerpc/kernel/head_8xx.S | 19 +--
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index e8d8263..d777e3c 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -40,6 +40,13 @@
 #else
 #define DO_8xx_CPU6(val, reg)
 #endif
+
+/*
+ * Value for the bits that have fixed value in RPN entries.
+ * Also used for tagging DAR for DTLBerror.
+ */
+#define RPN_PATTERN0x00f0
+
__HEAD
 _ENTRY(_stext);
 _ENTRY(_start);
@@ -211,7 +218,7 @@ MachineCheck:
EXCEPTION_PROLOG
mfspr r4,SPRN_DAR
stw r4,_DAR(r11)
-   li r5,0x00f0
+   li r5,RPN_PATTERN
mtspr SPRN_DAR,r5   /* Tag DAR, to be used in DTLB Error */
mfspr r5,SPRN_DSISR
stw r5,_DSISR(r11)
@@ -239,7 +246,7 @@ Alignment:
EXCEPTION_PROLOG
mfspr   r4,SPRN_DAR
stw r4,_DAR(r11)
-   li  r5,0x00f0
+   li  r5,RPN_PATTERN
mtspr   SPRN_DAR,r5 /* Tag DAR, to be used in DTLB Error */
mfspr   r5,SPRN_DSISR
stw r5,_DSISR(r11)
@@ -343,7 +350,7 @@ InstructionTLBMiss:
 * set.  All other Linux PTE bits control the behavior
 * of the MMU.
 */
-   li  r11, 0x00f0
+   li  r11, RPN_PATTERN
rlwimi  r10, r11, 0, 0x07f8 /* Set 24-27, clear 21-23,28 */
DO_8xx_CPU6(0x2d80, r3)
mtspr   SPRN_MI_RPN, r10/* Update TLB entry */
@@ -447,7 +454,7 @@ DataStoreTLBMiss:
 * set.  All other Linux PTE bits control the behavior
 * of the MMU.
 */
-2: li  r11, 0x00f0
+2: li  r11, RPN_PATTERN
rlwimi  r10, r11, 0, 24, 28 /* Set 24-27, clear 28 */
DO_8xx_CPU6(0x3d80, r3)
mtspr   SPRN_MD_RPN, r10/* Update TLB entry */
@@ -482,7 +489,7 @@ DataTLBError:
EXCEPTION_PROLOG_0
 
mfspr   r11, SPRN_DAR
-   cmpwi   cr0, r11, 0x00f0
+   cmpwi   cr0, r11, RPN_PATTERN
beq-FixupDAR/* must be a buggy dcbX, icbi insn. */
 DARFixed:/* Return from dcbx instruction bug workaround */
EXCEPTION_PROLOG_1
@@ -491,7 +498,7 @@ DARFixed:/* Return from dcbx instruction bug workaround */
stw r10,_DSISR(r11)
mr  r5,r10
mfspr   r4,SPRN_DAR
-   li  r10,0x00f0
+   li  r10,RPN_PATTERN
mtspr   SPRN_DAR,r10/* Tag DAR, to be used in DTLB Error */
/* 0x300 is DataAccess exception, needed by bad_page_fault() */
EXC_XFER_LITE(0x300, handle_page_fault)
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v4 19/21] powerpc/8xx: Don't restore regs to save them again.

2014-09-19 Thread Christophe Leroy
There is not need to restore r10, r11 and cr registers at this end of ITLBmiss
handler as they are saved again to the same place in ITLBError handler we are
jumping to.

Signed-off-by: Christophe Leroy christophe.le...@c-s.fr

---
Changes in v2:
- None

Changes in v3:
- None

Changes in v4:
- None

 arch/powerpc/kernel/head_8xx.S | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index f372984..bcf0a43 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -383,8 +383,7 @@ InstructionTLBMiss:
lwz r3, 8(r0)
 #endif
mfspr   r10, SPRN_SPRG_SCRATCH2
-   EXCEPTION_EPILOG_0
-   b   InstructionTLBError
+   b   InstructionTLBError1
 
. = 0x1200
 DataStoreTLBMiss:
@@ -473,7 +472,10 @@ DataStoreTLBMiss:
  */
. = 0x1300
 InstructionTLBError:
-   EXCEPTION_PROLOG
+   EXCEPTION_PROLOG_0
+InstructionTLBError1:
+   EXCEPTION_PROLOG_1
+   EXCEPTION_PROLOG_2
mr  r4,r12
mr  r5,r9
/* 0x400 is InstructionAccess exception, needed by bad_page_fault() */
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v4 21/21] powerpc/8xx: Invalidate non present TLB as early as possible

2014-09-19 Thread Christophe Leroy
8xx sometimes need to load a invalid/non-present TLBs in
it DTLB asm handler.

These must be invalidated separaly as linux mm doesn't.

Commit 5efab4a02c89c252fb4cce097aafde5f8208dbfe was invalidating them in
arch/powerpc/mm/fault.c. 
This patch does the invalidation earlier in order to free the TLB as soon as
possible. This also has the advantage of removing some 8xx specific code from
fault.c

Signed-off-by: Christophe Leroy christophe.le...@c-s.fr

---
Changes in v3:
- New

Changes in v4:
- None (but impacted by changes in patch 3 and 7)

 arch/powerpc/kernel/head_8xx.S | 15 ++-
 arch/powerpc/mm/fault.c|  7 ---
 2 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 4964adf..3d4b8ee 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -475,8 +475,11 @@ InstructionTLBError1:
EXCEPTION_PROLOG_2
mr  r4,r12
mr  r5,r9
+   andis.  r10,r5,0x4000
+   beq+1f
+   tlbie   r4
/* 0x400 is InstructionAccess exception, needed by bad_page_fault() */
-   EXC_XFER_LITE(0x400, handle_page_fault)
+1: EXC_XFER_LITE(0x400, handle_page_fault)
 
 /* This is the data TLB error on the MPC8xx.  This could be due to
  * many reasons, including a dirty update to a pte.  We bail out to
@@ -492,11 +495,13 @@ DataTLBError:
 DARFixed:/* Return from dcbx instruction bug workaround */
EXCEPTION_PROLOG_1
EXCEPTION_PROLOG_2
-   mfspr   r10,SPRN_DSISR
-   stw r10,_DSISR(r11)
-   mr  r5,r10
+   mfspr   r5,SPRN_DSISR
+   stw r5,_DSISR(r11)
mfspr   r4,SPRN_DAR
-   li  r10,RPN_PATTERN
+   andis.  r10,r5,0x4000
+   beq+1f
+   tlbie   r4
+1: li  r10,RPN_PATTERN
mtspr   SPRN_DAR,r10/* Tag DAR, to be used in DTLB Error */
/* 0x300 is DataAccess exception, needed by bad_page_fault() */
EXC_XFER_LITE(0x300, handle_page_fault)
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 51ab9e7..9885aad 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -43,7 +43,6 @@
 #include asm/tlbflush.h
 #include asm/siginfo.h
 #include asm/debug.h
-#include mm/mmu_decl.h
 
 #include icswx.h
 
@@ -368,12 +367,6 @@ good_area:
goto bad_area;
 #endif /* CONFIG_6xx */
 #if defined(CONFIG_8xx)
-   /* 8xx sometimes need to load a invalid/non-present TLBs.
-* These must be invalidated separately as linux mm don't.
-*/
-   if (error_code  0x4000) /* no translation? */
-   _tlbil_va(address, 0, 0, 0);
-
 /* The MPC8xx seems to always set 0x8000, which is
  * undefined.  Of those that can be set, this is the only
  * one which seems bad.
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v4 16/21] powerpc/8xx: Better readibility of ERRATA CPU6 handling

2014-09-19 Thread Christophe Leroy
This patch hiddes that SPR address needed for CPU6 ERRATA handling in the macro.
Then we don't have to worry about this address directly in the code.

Signed-off-by: Christophe Leroy christophe.le...@c-s.fr

---
Changes in v2:
- None

Changes in v3:
- None

Changes in v4:
- None

 arch/powerpc/kernel/head_8xx.S |   29 -
 1 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 4dd6be0..a7af26e 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -33,12 +33,19 @@
 
 /* Macro to make the code more readable. */
 #ifdef CONFIG_8xx_CPU6
-#define DO_8xx_CPU6(val, reg)  \
-   li  reg, val;   \
-   stw reg, 12(r0);\
-   lwz reg, 12(r0);
+#define SPRN_MI_TWC_ADDR   0x2b80
+#define SPRN_MI_RPN_ADDR   0x2d80
+#define SPRN_MD_TWC_ADDR   0x3b80
+#define SPRN_MD_RPN_ADDR   0x3d80
+
+#define MTSPR_CPU6(spr, reg, treg) \
+   li  treg, spr##_ADDR;   \
+   stw treg, 12(r0);   \
+   lwz treg, 12(r0);   \
+   mtspr   spr, reg
 #else
-#define DO_8xx_CPU6(val, reg)
+#define MTSPR_CPU6(spr, reg, treg) \
+   mtspr   spr, reg
 #endif
 
 /*
@@ -334,8 +341,7 @@ InstructionTLBMiss:
 * for this segment.
 */
ori r11,r11,1   /* Set valid bit */
-   DO_8xx_CPU6(0x2b80, r3)
-   mtspr   SPRN_MI_TWC, r11/* Set segment attributes */
+   MTSPR_CPU6(SPRN_MI_TWC, r11, r3)/* Set segment attributes */
mfspr   r11, SPRN_SRR0  /* Get effective address of fault */
/* Extract level 2 index */
rlwinm  r11, r11, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29
@@ -354,8 +360,7 @@ InstructionTLBMiss:
 */
li  r11, RPN_PATTERN
rlwimi  r10, r11, 0, 0x07f8 /* Set 24-27, clear 21-23,28 */
-   DO_8xx_CPU6(0x2d80, r3)
-   mtspr   SPRN_MI_RPN, r10/* Update TLB entry */
+   MTSPR_CPU6(SPRN_MI_RPN, r10, r3)/* Update TLB entry */
 
/* Restore registers */
 #ifdef CONFIG_8xx_CPU6
@@ -424,8 +429,7 @@ DataStoreTLBMiss:
 * It is bit 25 in the Linux PTE and bit 30 in the TWC
 */
rlwimi  r11, r10, 32-5, 30, 30
-   DO_8xx_CPU6(0x3b80, r3)
-   mtspr   SPRN_MD_TWC, r11
+   MTSPR_CPU6(SPRN_MD_TWC, r11, r3)
 
/* Both _PAGE_ACCESSED and _PAGE_PRESENT has to be set.
 * We also need to know if the insn is a load/store, so:
@@ -458,8 +462,7 @@ DataStoreTLBMiss:
 */
 2: li  r11, RPN_PATTERN
rlwimi  r10, r11, 0, 24, 28 /* Set 24-27, clear 28 */
-   DO_8xx_CPU6(0x3d80, r3)
-   mtspr   SPRN_MD_RPN, r10/* Update TLB entry */
+   MTSPR_CPU6(SPRN_MD_RPN, r10, r3)/* Update TLB entry */
 
/* Restore registers */
 #ifdef CONFIG_8xx_CPU6
-- 
1.7.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v4 18/21] powerpc/8xx: _PMD_PRESENT already set in level 1 entries

2014-09-19 Thread Christophe Leroy
When a PMD entry is valid, _PMD_PRESENT is set. Therefore, forcing that bit
during TLB loading is useless.

Signed-off-by: Christophe Leroy christophe.le...@c-s.fr

---
Changes in v2:
- None

Changes in v3:
- None

Changes in v4:
- None

 arch/powerpc/kernel/head_8xx.S |2 --
 1 files changed, 0 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 48d3de8..bb7c816 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -340,7 +340,6 @@ InstructionTLBMiss:
/* We have a pte table, so load the MI_TWC with the attributes
 * for this segment.
 */
-   ori r11,r11,1   /* Set valid bit */
MTSPR_CPU6(SPRN_MI_TWC, r11, r3)/* Set segment attributes */
mfspr   r11, SPRN_SRR0  /* Get effective address of fault */
/* Extract level 2 index */
@@ -417,7 +416,6 @@ DataStoreTLBMiss:
rlwimi  r10, r11, 0, 0, 32 - PAGE_SHIFT - 1 /* Add level 2 base */
lwz r10, 0(r10) /* Get the pte */
 
-   ori r11, r11, 1 /* Set valid bit in physical L2 page */
/* Insert the Guarded flag into the TWC from the Linux PTE.
 * It is bit 27 of both the Linux PTE and the TWC (at least
 * I got that right :-).  It will be better when we can put
-- 
1.7.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v4 17/21] powerpc/8xx: set PTE bit 22 off TLBmiss

2014-09-19 Thread Christophe Leroy
No need to re-set this bit at each TLB miss. Let's set it in the PTE.

Signed-off-by: Christophe Leroy christophe.le...@c-s.fr

---
Changes in v2:
- None

Changes in v3:
- Removed PPC405 related macro from PPC8xx specific code
- PTE_NONE_MASK doesn't need PAGE_ACCESSED in Linux 2.6

Changes in v4:
- None

 arch/powerpc/include/asm/pgtable-ppc32.h | 20 
 arch/powerpc/include/asm/pte-8xx.h   |  7 +--
 arch/powerpc/kernel/head_8xx.S   | 10 ++
 3 files changed, 27 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/include/asm/pgtable-ppc32.h 
b/arch/powerpc/include/asm/pgtable-ppc32.h
index 47edde8..35a9b44 100644
--- a/arch/powerpc/include/asm/pgtable-ppc32.h
+++ b/arch/powerpc/include/asm/pgtable-ppc32.h
@@ -172,6 +172,25 @@ static inline unsigned long pte_update(pte_t *p,
 #ifdef PTE_ATOMIC_UPDATES
unsigned long old, tmp;
 
+#ifdef CONFIG_PPC_8xx
+   unsigned long tmp2;
+
+   __asm__ __volatile__(\
+1: lwarx   %0,0,%4\n\
+   andc%1,%0,%5\n\
+   or  %1,%1,%6\n\
+   /* 0x200 == Extended encoding, bit 22 */ \
+   /* Bit 22 has to be 1 if neither _PAGE_USER nor _PAGE_RW are set */ \
+   rlwimi  %1,%1,32-2,0x200\n /* get _PAGE_USER */ \
+   rlwinm  %3,%1,32-1,0x200\n /* get _PAGE_RW */ \
+   or  %1,%3,%1\n\
+   xori%1,%1,0x200\n
+  stwcx.  %1,0,%4\n\
+   bne-1b
+   : =r (old), =r (tmp), =m (*p), =r (tmp2)
+   : r (p), r (clr), r (set), m (*p)
+   : cc );
+#else /* CONFIG_PPC_8xx */
__asm__ __volatile__(\
 1: lwarx   %0,0,%3\n\
andc%1,%0,%4\n\
@@ -182,6 +201,7 @@ static inline unsigned long pte_update(pte_t *p,
: =r (old), =r (tmp), =m (*p)
: r (p), r (clr), r (set), m (*p)
: cc );
+#endif /* CONFIG_PPC_8xx */
 #else /* PTE_ATOMIC_UPDATES */
unsigned long old = pte_val(*p);
*p = __pte((old  ~clr) | set);
diff --git a/arch/powerpc/include/asm/pte-8xx.h 
b/arch/powerpc/include/asm/pte-8xx.h
index d44826e..daa4616 100644
--- a/arch/powerpc/include/asm/pte-8xx.h
+++ b/arch/powerpc/include/asm/pte-8xx.h
@@ -48,19 +48,22 @@
  */
 #define _PAGE_RW   0x0400  /* lsb PP bits, inverted in HW */
 #define _PAGE_USER 0x0800  /* msb PP bits */
+/* set when neither _PAGE_USER nor _PAGE_RW are set */
+#define _PAGE_KNLRO0x0200
 
 #define _PMD_PRESENT   0x0001
 #define _PMD_BAD   0x0ff0
 #define _PMD_PAGE_MASK 0x000c
 #define _PMD_PAGE_8M   0x000c
 
-#define _PTE_NONE_MASK _PAGE_ACCESSED
+#define _PTE_NONE_MASK _PAGE_KNLRO
 
 /* Until my rework is finished, 8xx still needs atomic PTE updates */
 #define PTE_ATOMIC_UPDATES 1
 
 /* We need to add _PAGE_SHARED to kernel pages */
-#define _PAGE_KERNEL_RO(_PAGE_SHARED)
+#define _PAGE_KERNEL_RO(_PAGE_SHARED | _PAGE_KNLRO)
+#define _PAGE_KERNEL_ROX   (_PAGE_EXEC | _PAGE_KNLRO)
 #define _PAGE_KERNEL_RW(_PAGE_DIRTY | _PAGE_RW | _PAGE_HWWRITE)
 
 #endif /* __KERNEL__ */
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index a7af26e..48d3de8 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -445,14 +445,8 @@ DataStoreTLBMiss:
and r11, r11, r10
rlwimi  r10, r11, 0, _PAGE_PRESENT
 #endif
-   /* Honour kernel RO, User NA */
-   /* 0x200 == Extended encoding, bit 22 */
-   rlwimi  r10, r10, 32-2, 0x200 /* Copy USER to bit 22, 0x200 */
-   /* r11 =  (r10  _PAGE_RW)  1 */
-   rlwinm  r11, r10, 32-1, 0x200
-   or  r10, r11, r10
-   /* invert RW and 0x200 bits */
-   xorir10, r10, _PAGE_RW | 0x200
+   /* invert RW */
+   xorir10, r10, _PAGE_RW
 
/* The Linux PTE won't go exactly into the MMU TLB.
 * Software indicator bits 22 and 28 must be clear.
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v4 20/21] powerpc/8xx: Use DAR to save r3 for CPU6 ERRATA

2014-09-19 Thread Christophe Leroy
As we are not using anymore DAR to save registers, it is now available for
saving the r3 register used for CPU6 ERRATA handling. Therefore we can
remove the major hack which was to use memory location 0 to save r3.

Signed-off-by: Christophe Leroy christophe.le...@c-s.fr

---
Changes in v3:
- New

Changes in v4:
- Fixed the patch as it didn't apply.

 arch/powerpc/kernel/head_8xx.S | 33 +++--
 1 file changed, 15 insertions(+), 18 deletions(-)

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index e21f0b2..3e8e341 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -83,13 +83,6 @@ _ENTRY(_start);
  * 8M 1:1.  I also mapped an additional I/O space 1:1 so we can get to
  * the internal processor registers before MMU_init is called.
  *
- * The TLB code currently contains a major hack.  Since I use the condition
- * code register, I have to save and restore it.  I am out of registers, so
- * I just store it in memory location 0 (the TLB handlers are not reentrant).
- * To avoid making any decisions, I need to use the segment valid bit
- * in the first level table, but that would require many changes to the
- * Linux page directory/table functions that I don't want to do right now.
- *
  * -- Dan
  */
.globl  __start
@@ -304,7 +297,7 @@ SystemCall:
  */
 InstructionTLBMiss:
 #ifdef CONFIG_8xx_CPU6
-   stw r3, 8(r0)
+   mtspr   SPRN_DAR, r3
 #endif
EXCEPTION_PROLOG_0
mtspr   SPRN_SPRG_SCRATCH2, r10
@@ -349,7 +342,10 @@ InstructionTLBMiss:
 #ifdef CONFIG_SWAP
andi.   r11, r10, _PAGE_ACCESSED | _PAGE_PRESENT
cmpwi   cr0, r11, _PAGE_ACCESSED | _PAGE_PRESENT
+   li  r11, RPN_PATTERN
bne-cr0, 2f
+#else
+   li  r11, RPN_PATTERN
 #endif
/* The Linux PTE won't go exactly into the MMU TLB.
 * Software indicator bits 21 and 28 must be clear.
@@ -357,28 +353,29 @@ InstructionTLBMiss:
 * set.  All other Linux PTE bits control the behavior
 * of the MMU.
 */
-   li  r11, RPN_PATTERN
rlwimi  r10, r11, 0, 0x07f8 /* Set 24-27, clear 21-23,28 */
MTSPR_CPU6(SPRN_MI_RPN, r10, r3)/* Update TLB entry */
 
/* Restore registers */
 #ifdef CONFIG_8xx_CPU6
-   lwz r3, 8(r0)
+   mfspr   r3, SPRN_DAR
+   mtspr   SPRN_DAR, r11   /* Tag DAR */
 #endif
mfspr   r10, SPRN_SPRG_SCRATCH2
EXCEPTION_EPILOG_0
rfi
 2:
-   mfspr   r11, SPRN_SRR1
+   mfspr   r10, SPRN_SRR1
/* clear all error bits as TLB Miss
 * sets a few unconditionally
*/
-   rlwinm  r11, r11, 0, 0x
-   mtspr   SPRN_SRR1, r11
+   rlwinm  r10, r10, 0, 0x
+   mtspr   SPRN_SRR1, r10
 
/* Restore registers */
 #ifdef CONFIG_8xx_CPU6
-   lwz r3, 8(r0)
+   mfspr   r3, SPRN_DAR
+   mtspr   SPRN_DAR, r11   /* Tag DAR */
 #endif
mfspr   r10, SPRN_SPRG_SCRATCH2
b   InstructionTLBError1
@@ -386,7 +383,7 @@ InstructionTLBMiss:
. = 0x1200
 DataStoreTLBMiss:
 #ifdef CONFIG_8xx_CPU6
-   stw r3, 8(r0)
+   mtspr   SPRN_DAR, r3
 #endif
EXCEPTION_PROLOG_0
mtspr   SPRN_SPRG_SCRATCH2, r10
@@ -457,7 +454,7 @@ DataStoreTLBMiss:
 
/* Restore registers */
 #ifdef CONFIG_8xx_CPU6
-   lwz r3, 8(r0)
+   mfspr   r3, SPRN_DAR
 #endif
mtspr   SPRN_DAR, r11   /* Tag DAR */
mfspr   r10, SPRN_SPRG_SCRATCH2
@@ -527,7 +524,7 @@ DARFixed:/* Return from dcbx instruction bug workaround */
 #define NO_SELF_MODIFYING_CODE
 FixupDAR:/* Entry point for dcbx workaround. */
 #ifdef CONFIG_8xx_CPU6
-   stw r3, 8(r0)
+   mtspr   SPRN_DAR, r3
 #endif
mtspr   SPRN_SPRG_SCRATCH2, r10
/* fetch instruction from memory. */
@@ -546,7 +543,7 @@ FixupDAR:/* Entry point for dcbx workaround. */
rlwinm  r11, r11, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29
lwzxr11, r10, r11   /* Get the pte */
 #ifdef CONFIG_8xx_CPU6
-   lwz r3, 8(r0)   /* restore r3 from memory */
+   mfspr   r3, SPRN_DAR
 #endif
/* concat physical page address(r11) and page offset(r10) */
mfspr   r10, SPRN_SRR0
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v4 15/21] powerpc/8xx: Implement 16k pages

2014-09-19 Thread Christophe Leroy
This patch activates the handling of 16k pages on the MPC8xx.

Signed-off-by: Christophe Leroy christophe.le...@c-s.fr

---
Changes in v2:
- None

Changes in v3:
- None

Changes in v4:
- None

 arch/powerpc/Kconfig   |2 +-
 arch/powerpc/include/asm/mmu-8xx.h |2 ++
 arch/powerpc/kernel/head_8xx.S |4 
 3 files changed, 7 insertions(+), 1 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 5f44d3b..dc5f64e 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -518,7 +518,7 @@ config PPC_4K_PAGES
bool 4k page size
 
 config PPC_16K_PAGES
-   bool 16k page size if 44x
+   bool 16k page size if 44x || PPC_8xx
 
 config PPC_64K_PAGES
bool 64k page size if 44x || PPC_STD_MMU_64 || PPC_BOOK3E_64
diff --git a/arch/powerpc/include/asm/mmu-8xx.h 
b/arch/powerpc/include/asm/mmu-8xx.h
index 3d11d3c..986b9e1 100644
--- a/arch/powerpc/include/asm/mmu-8xx.h
+++ b/arch/powerpc/include/asm/mmu-8xx.h
@@ -56,6 +56,7 @@
  * additional information from the MI_EPN, and MI_TWC registers.
  */
 #define SPRN_MI_RPN790
+#define MI_SPS16K  0x0008  /* Small page size (0 = 4k, 1 = 16k) */
 
 /* Define an RPN value for mapping kernel memory to large virtual
  * pages for boot initialization.  This has real page number of 0,
@@ -129,6 +130,7 @@
  * additional information from the MD_EPN, and MD_TWC registers.
  */
 #define SPRN_MD_RPN798
+#define MD_SPS16K  0x0008  /* Small page size (0 = 4k, 1 = 16k) */
 
 /* This is a temporary storage register that could be used to save
  * a processor working register during a tablewalk.
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 8966262..4dd6be0 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -45,7 +45,11 @@
  * Value for the bits that have fixed value in RPN entries.
  * Also used for tagging DAR for DTLBerror.
  */
+#ifdef CONFIG_PPC_16K_PAGES
+#define RPN_PATTERN(0x00f0 | MD_SPS16K)
+#else
 #define RPN_PATTERN0x00f0
+#endif
 
__HEAD
 _ENTRY(_stext);
-- 
1.7.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v4 11/21] powerpc/8xx: Use M_TW instead of M_TWB

2014-09-19 Thread Christophe Leroy
Use M_TW instead of M_TWB for storing Level 1 table address as M_TWB requires
4k aligned tables, which is only the case with 4k pages.
Consequently, we have to calculate the level 1 table index by ourselves.

Signed-off-by: Christophe Leroy christophe.le...@c-s.fr

---
Changes in v2:
- None

Changes in v3:
- None

Changes in v4:
- None

 arch/powerpc/kernel/head_8xx.S |   48 ++---
 1 files changed, 26 insertions(+), 22 deletions(-)

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 4a49ff3..ad15070 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -276,8 +276,8 @@ SystemCall:
. = 0x1100
 /*
  * For the MPC8xx, this is a software tablewalk to load the instruction
- * TLB.  It is modelled after the example in the Motorola manual.  The task
- * switch loads the M_TWB register with the pointer to the first level table.
+ * TLB.  The task switch loads the M_TW register with the pointer to the first
+ * level table.
  * If we discover there is no second level table (value is zero) or if there
  * is an invalid pte, we load that into the TLB, which causes another fault
  * into the TLB Error interrupt where we can handle such problems.
@@ -299,7 +299,6 @@ InstructionTLBMiss:
 #endif
DO_8xx_CPU6(0x3780, r3)
mtspr   SPRN_MD_EPN, r10/* Have to use MD_EPN for walk, MI_EPN 
can't */
-   mfspr   r10, SPRN_M_TWB /* Get level 1 table entry address */
 
/* If we are faulting a kernel address, we have to use the
 * kernel page tables.
@@ -307,14 +306,17 @@ InstructionTLBMiss:
 #ifdef CONFIG_MODULES
/* Only modules will cause ITLB Misses as we always
 * pin the first 8MB of kernel memory */
-   andi.   r11, r10, 0x0800/* Address = 0x8000 */
+   andis.  r11, r10, 0x8000/* Address = 0x8000 */
+#endif
+   mfspr   r11, SPRN_M_TW  /* Get level 1 table base address */
+#ifdef CONFIG_MODULES
beq 3f
-   lis r11, swapper_pg_dir@h
-   ori r11, r11, swapper_pg_dir@l
-   rlwimi  r10, r11, 0, 2, 19
+   lis r11, (swapper_pg_dir-PAGE_OFFSET)@h
+   ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l
 3:
 #endif
-   lwz r11, 0(r10) /* Get the level 1 entry */
+   rlwinm  r10, r10, 12, 20, 29/* Extract level 1 index */
+   lwzxr11, r10, r11   /* Get the level 1 entry */
rlwinm. r10, r11,0,0,19 /* Extract page descriptor page address */
beq 2f  /* If zero, don't try to find a pte */
 
@@ -375,18 +377,19 @@ DataStoreTLBMiss:
 #endif
EXCEPTION_PROLOG_0
mtspr   SPRN_SPRG_SCRATCH2, r10
-   mfspr   r10, SPRN_M_TWB /* Get level 1 table entry address */
+   mfspr   r10, SPRN_MD_EPN
 
/* If we are faulting a kernel address, we have to use the
 * kernel page tables.
 */
-   andi.   r11, r10, 0x0800
+   andis.  r11, r10, 0x8000
+   mfspr   r11, SPRN_M_TW  /* Get level 1 table base address */
beq 3f
-   lis r11, swapper_pg_dir@h
-   ori r11, r11, swapper_pg_dir@l
-   rlwimi  r10, r11, 0, 2, 19
+   lis r11, (swapper_pg_dir-PAGE_OFFSET)@h
+   ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l
 3:
-   lwz r11, 0(r10) /* Get the level 1 entry */
+   rlwinm  r10, r10, 12, 20, 29/* Extract level 1 index */
+   lwzxr11, r10, r11   /* Get the level 1 entry */
rlwinm. r10, r11,0,0,19 /* Extract page descriptor page address */
beq 2f  /* If zero, don't try to find a pte */
 
@@ -523,12 +526,12 @@ FixupDAR:/* Entry point for dcbx workaround. */
andis.  r11, r10, 0x8000/* Address = 0x8000 */
DO_8xx_CPU6(0x3780, r3)
mtspr   SPRN_MD_EPN, r10
-   mfspr   r11, SPRN_M_TWB /* Get level 1 table entry address */
+   mfspr   r11, SPRN_M_TW  /* Get level 1 table base address */
beq-3f  /* Branch if user space */
lis r11, (swapper_pg_dir-PAGE_OFFSET)@h
ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l
-   rlwimi  r11, r10, 32-20, 0xffc /* r11 = r11~0xffc|(r1020)0xffc */
-3: lwz r11, 0(r11) /* Get the level 1 entry */
+3: rlwinm  r10, r10, 12, 20, 29/* Extract level 1 index */
+   lwzxr11, r10, r11   /* Get the level 1 entry */
DO_8xx_CPU6(0x3b80, r3)
mtspr   SPRN_MD_TWC, r11/* Load pte table base address */
mfspr   r11, SPRN_MD_TWC/* and get the pte address */
@@ -537,6 +540,7 @@ FixupDAR:/* Entry point for dcbx workaround. */
lwz r3, 8(r0)   /* restore r3 from memory */
 #endif
/* concat physical page address(r11) and page offset(r10) */
+   mfspr   r10, SPRN_SRR0
rlwimi  r11, r10, 0, 20, 31
lwz r11,0(r11)
 /* Check if it really is a dcbx instruction. */
@@ -692,11 +696,11 @@ start_here:
 #ifdef CONFIG_8xx_CPU6
   

[PATCH v4 09/21] powerpc/8xx: Optimize verification in FixupDAR

2014-09-19 Thread Christophe Leroy
By XORing the upper part of the instruction code, we get a value that can
directly be verified with the second test and we can remove the first test.

Signed-off-by: Christophe Leroy christophe.le...@c-s.fr

---
Changes in v2:
- None

Changes in v3:
- None

Changes in v4:
- None

 arch/powerpc/kernel/head_8xx.S |6 ++
 1 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index e5a250c..5037420 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -542,10 +542,8 @@ FixupDAR:/* Entry point for dcbx workaround. */
 /* Check if it really is a dcbx instruction. */
 /* dcbt and dcbtst does not generate DTLB Misses/Errors,
  * no need to include them here */
-   srwir10, r11, 26/* check if major OP code is 31 */
-   cmpwi   cr0, r10, 31
-   bne-141f
-   rlwinm  r10, r11, 0, 21, 30
+   xoris   r10, r11, 0x7c00/* check if major OP code is 31 */
+   rlwinm  r10, r10, 0, 21, 5
cmpwi   cr0, r10, 2028  /* Is dcbz? */
beq+142f
cmpwi   cr0, r10, 940   /* Is dcbi? */
-- 
1.7.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v4 06/21] powerpc/8xx: No need to save r10 and r3 when not calling FixupDAR

2014-09-19 Thread Christophe Leroy
r10 and r3 are only used inside FixupDAR function. So lets save them inside
that function only.

Signed-off-by: Christophe Leroy christophe.le...@c-s.fr

---
Changes in v2:
- None

Changes in v3:
- None

Changes in v4:
- None

 arch/powerpc/kernel/head_8xx.S |   27 +--
 1 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 171c6ef..845abf8 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -482,20 +482,12 @@ InstructionTLBError:
  */
. = 0x1400
 DataTLBError:
-#ifdef CONFIG_8xx_CPU6
-   stw r3, 8(r0)
-#endif
EXCEPTION_PROLOG_0
-   mtspr   SPRN_SPRG_SCRATCH2, r10
 
-   mfspr   r10, SPRN_DAR
-   cmpwi   cr0, r10, 0x00f0
+   mfspr   r11, SPRN_DAR
+   cmpwi   cr0, r11, 0x00f0
beq-FixupDAR/* must be a buggy dcbX, icbi insn. */
 DARFixed:/* Return from dcbx instruction bug workaround */
-#ifdef CONFIG_8xx_CPU6
-   lwz r3, 8(r0)
-#endif
-   mfspr   r10,SPRN_SPRG_SCRATCH2
EXCEPTION_EPILOG_0
b   DataAccess
 
@@ -525,6 +517,10 @@ DARFixed:/* Return from dcbx instruction bug workaround */
  /* define if you don't want to use self modifying code */
 #define NO_SELF_MODIFYING_CODE
 FixupDAR:/* Entry point for dcbx workaround. */
+#ifdef CONFIG_8xx_CPU6
+   stw r3, 8(r0)
+#endif
+   mtspr   SPRN_SPRG_SCRATCH2, r10
/* fetch instruction from memory. */
mfspr   r10, SPRN_SRR0
andis.  r11, r10, 0x8000/* Address = 0x8000 */
@@ -540,6 +536,9 @@ FixupDAR:/* Entry point for dcbx workaround. */
mtspr   SPRN_MD_TWC, r11/* Load pte table base address */
mfspr   r11, SPRN_MD_TWC/* and get the pte address */
lwz r11, 0(r11) /* Get the pte */
+#ifdef CONFIG_8xx_CPU6
+   lwz r3, 8(r0)   /* restore r3 from memory */
+#endif
/* concat physical page address(r11) and page offset(r10) */
rlwimi  r11, r10, 0, 20, 31
lwz r11,0(r11)
@@ -560,15 +559,13 @@ FixupDAR:/* Entry point for dcbx workaround. */
beq+142f
cmpwi   cr0, r10, 1964  /* Is icbi? */
beq+142f
-141:   b   DARFixed/* Nope, go back to normal TLB processing */
+141:   mfspr   r10,SPRN_SPRG_SCRATCH2
+   b   DARFixed/* Nope, go back to normal TLB processing */
 
 144:   mfspr   r10, SPRN_DSISR
rlwinm  r10, r10,0,7,5  /* Clear store bit for buggy dcbst insn */
mtspr   SPRN_DSISR, r10
 142:   /* continue, it was a dcbx, dcbi instruction. */
-#ifdef CONFIG_8xx_CPU6
-   lwz r3, 8(r0)   /* restore r3 from memory */
-#endif
 #ifndef NO_SELF_MODIFYING_CODE
andis.  r10,r11,0x1f/* test if reg RA is r0 */
li  r10,modified_instr@l
@@ -587,6 +584,7 @@ modified_instr:
bne+143f
subfr10,r0,r10  /* r10=r10-r0, only if reg RA is r0 */
 143:   mtdar   r10 /* store faulting EA in DAR */
+   mfspr   r10,SPRN_SPRG_SCRATCH2
b   DARFixed/* Go back to normal TLB handling */
 #else
mfctr   r10
@@ -640,6 +638,7 @@ modified_instr:
mfdar   r11
mtctr   r11 /* restore ctr reg from DAR */
mtdar   r10 /* save fault EA to DAR */
+   mfspr   r10,SPRN_SPRG_SCRATCH2
b   DARFixed/* Go back to normal TLB handling */
 
/* special handling for r10,r11 since these are modified already */
-- 
1.7.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v4 08/21] powerpc/8xx: No need to restore registers and save them again.

2014-09-19 Thread Christophe Leroy
In DTLBError handler there is not need to restore r10, r11 and cr registers
after fixing DAR as they are saved again to the same place just after.

Signed-off-by: Christophe Leroy christophe.le...@c-s.fr

---
Changes in v2:
- None

Changes in v3:
- None

Changes in v4:
- None

 arch/powerpc/kernel/head_8xx.S |4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 5f04d5f..e5a250c 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -478,8 +478,8 @@ DataTLBError:
cmpwi   cr0, r11, 0x00f0
beq-FixupDAR/* must be a buggy dcbX, icbi insn. */
 DARFixed:/* Return from dcbx instruction bug workaround */
-   EXCEPTION_EPILOG_0
-   EXCEPTION_PROLOG
+   EXCEPTION_PROLOG_1
+   EXCEPTION_PROLOG_2
mfspr   r10,SPRN_DSISR
stw r10,_DSISR(r11)
mr  r5,r10
-- 
1.7.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v4 13/21] powerpc/8xx: Use PAGE size related consts

2014-09-19 Thread Christophe Leroy
For PAGE size related operations, use PAGE size consts in order to be able to
use different page size in the futur.

Signed-off-by: Christophe Leroy christophe.le...@c-s.fr

---
Changes in v2:
- None

Changes in v3:
- None

Changes in v4:
- None

 arch/powerpc/kernel/head_8xx.S |   30 ++
 1 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 0f571f5..dcaee9f 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -292,9 +292,9 @@ InstructionTLBMiss:
mtspr   SPRN_SPRG_SCRATCH2, r10
mfspr   r10, SPRN_SRR0  /* Get effective address of fault */
 #ifdef CONFIG_8xx_CPU15
-   addir11, r10, 0x1000
+   addir11, r10, PAGE_SIZE
tlbie   r11
-   addir11, r10, -0x1000
+   addir11, r10, -PAGE_SIZE
tlbie   r11
 #endif
 
@@ -313,7 +313,8 @@ InstructionTLBMiss:
ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l
 3:
 #endif
-   rlwinm  r10, r10, 12, 20, 29/* Extract level 1 index */
+   /* Extract level 1 index */
+   rlwinm  r10, r10, 32 - ((PAGE_SHIFT - 2)  1), (PAGE_SHIFT - 2)  1, 
29
lwzxr11, r10, r11   /* Get the level 1 entry */
rlwinm. r10, r11,0,0,19 /* Extract page descriptor page address */
beq 2f  /* If zero, don't try to find a pte */
@@ -325,7 +326,8 @@ InstructionTLBMiss:
DO_8xx_CPU6(0x2b80, r3)
mtspr   SPRN_MI_TWC, r11/* Set segment attributes */
mfspr   r11, SPRN_SRR0  /* Get effective address of fault */
-   rlwinm  r11, r11, 22, 20, 29/* Extract level 2 index */
+   /* Extract level 2 index */
+   rlwinm  r11, r11, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29
lwzxr10, r10, r11   /* Get the pte */
 
 #ifdef CONFIG_SWAP
@@ -385,7 +387,8 @@ DataStoreTLBMiss:
lis r11, (swapper_pg_dir-PAGE_OFFSET)@h
ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l
 3:
-   rlwinm  r10, r10, 12, 20, 29/* Extract level 1 index */
+   /* Extract level 1 index */
+   rlwinm  r10, r10, 32 - ((PAGE_SHIFT - 2)  1), (PAGE_SHIFT - 2)  1, 
29
lwzxr11, r10, r11   /* Get the level 1 entry */
rlwinm. r10, r11,0,0,19 /* Extract page descriptor page address */
beq 2f  /* If zero, don't try to find a pte */
@@ -394,8 +397,8 @@ DataStoreTLBMiss:
 */
mfspr   r10, SPRN_MD_EPN/* Get address of fault */
/* Extract level 2 index */
-   rlwinm  r10, r10, 22, 20, 29
-   rlwimi  r10, r11, 0, 0, 19  /* Add level 2 base */
+   rlwinm  r10, r10, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29
+   rlwimi  r10, r11, 0, 0, 32 - PAGE_SHIFT - 1 /* Add level 2 base */
lwz r10, 0(r10) /* Get the pte */
 
ori r11, r11, 1 /* Set valid bit in physical L2 page */
@@ -526,18 +529,20 @@ FixupDAR:/* Entry point for dcbx workaround. */
beq-3f  /* Branch if user space */
lis r11, (swapper_pg_dir-PAGE_OFFSET)@h
ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l
-3: rlwinm  r10, r10, 12, 20, 29/* Extract level 1 index */
+   /* Extract level 1 index */
+3: rlwinm  r10, r10, 32 - ((PAGE_SHIFT - 2)  1), (PAGE_SHIFT - 2)  1, 
29
lwzxr11, r10, r11   /* Get the level 1 entry */
rlwinm  r10, r11,0,0,19 /* Extract page descriptor page address */
mfspr   r11, SPRN_SRR0  /* Get effective address of fault */
-   rlwinm  r11, r11, 22, 20, 29/* Extract level 2 index */
+   /* Extract level 2 index */
+   rlwinm  r11, r11, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29
lwzxr11, r10, r11   /* Get the pte */
 #ifdef CONFIG_8xx_CPU6
lwz r3, 8(r0)   /* restore r3 from memory */
 #endif
/* concat physical page address(r11) and page offset(r10) */
mfspr   r10, SPRN_SRR0
-   rlwimi  r11, r10, 0, 20, 31
+   rlwimi  r11, r10, 0, 32 - PAGE_SHIFT, 31
lwz r11,0(r11)
 /* Check if it really is a dcbx instruction. */
 /* dcbt and dcbtst does not generate DTLB Misses/Errors,
@@ -913,12 +918,13 @@ set_dec_cpu6:
.globl  sdata
 sdata:
.globl  empty_zero_page
+   .align  PAGE_SHIFT
 empty_zero_page:
-   .space  4096
+   .space  PAGE_SIZE
 
.globl  swapper_pg_dir
 swapper_pg_dir:
-   .space  4096
+   .space  PGD_TABLE_SIZE
 
 /* Room for two PTE table poiners, usually the kernel and current user
  * pointer to their respective root page table (pgdir).
-- 
1.7.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH] powerpc/pci: remove duplicate declaration of pci_bus_find_capability

2014-09-19 Thread Wei Yang
pci_bus_find_capability() is decleared in pci.h, so it is not necessary to do
it again.

This patch removes it.

Signed-off-by: Wei Yang weiy...@linux.vnet.ibm.com
---
 arch/powerpc/kernel/pci-common.c |1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index b2814e2..9cfa069 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -1561,7 +1561,6 @@ EARLY_PCI_OP(write, byte, u8)
 EARLY_PCI_OP(write, word, u16)
 EARLY_PCI_OP(write, dword, u32)
 
-extern int pci_bus_find_capability (struct pci_bus *bus, unsigned int devfn, 
int cap);
 int early_find_capability(struct pci_controller *hose, int bus, int devfn,
  int cap)
 {
-- 
1.7.9.5

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH] powerpc/pci: remove duplicate declaration of pci_bus_find_capability

2014-09-19 Thread Gavin Shan
On Fri, Sep 19, 2014 at 05:25:22PM +0800, Wei Yang wrote:
pci_bus_find_capability() is decleared in pci.h, so it is not necessary to do
it again.

This patch removes it.

Signed-off-by: Wei Yang weiy...@linux.vnet.ibm.com

Reviewed-by: Gavin Shan gws...@linux.vnet.ibm.com

Thanks,
Gavin

---
 arch/powerpc/kernel/pci-common.c |1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/powerpc/kernel/pci-common.c 
b/arch/powerpc/kernel/pci-common.c
index b2814e2..9cfa069 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -1561,7 +1561,6 @@ EARLY_PCI_OP(write, byte, u8)
 EARLY_PCI_OP(write, word, u16)
 EARLY_PCI_OP(write, dword, u32)

-extern int pci_bus_find_capability (struct pci_bus *bus, unsigned int devfn, 
int cap);
 int early_find_capability(struct pci_controller *hose, int bus, int devfn,
 int cap)
 {
-- 
1.7.9.5


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

powerpc32: rearrange instructions order in ip_fast_csum()

2014-09-19 Thread Christophe Leroy
On PPC_8xx, lwz has a 2 cycles latency, and branching also takes 2 cycles.
As the size of the header is minimum 5 words, we can unroll the loop for the
first words to reduce number of branching, and we can re-order the instructions
to limit loading latency.

Signed-off-by: Christophe Leroy christophe.le...@c-s.fr

---
 arch/powerpc/lib/checksum_32.S | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/lib/checksum_32.S b/arch/powerpc/lib/checksum_32.S
index 6d67e05..5500704 100644
--- a/arch/powerpc/lib/checksum_32.S
+++ b/arch/powerpc/lib/checksum_32.S
@@ -26,13 +26,17 @@
 _GLOBAL(ip_fast_csum)
lwz r0,0(r3)
lwzur5,4(r3)
-   addic.  r4,r4,-2
+   addic.  r4,r4,-4
addcr0,r0,r5
mtctr   r4
blelr-
-1: lwzur4,4(r3)
-   adder0,r0,r4
+   lwzur5,4(r3)
+   lwzur4,4(r3)
+   adder0,r0,r5
+1: adder0,r0,r4
+   lwzur4,4(r3)
bdnz1b
+   adder0,r0,r4
addze   r0,r0   /* add in final carry */
rlwinm  r3,r0,16,0,31   /* fold two halves together */
add r3,r0,r3
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 0/2] powerpc32: Optimise some IP checksum functions.

2014-09-19 Thread Christophe Leroy
This patchset provides a few optimisations related to IP checksum functions.

Signed-off-by: Christophe Leroy christophe.le...@c-s.fr
Tested-by: Christophe Leroy christophe.le...@c-s.fr

---
 arch/powerpc/include/asm/checksum.h | 28 
 arch/powerpc/lib/checksum_32.S  | 16 
 2 files changed, 28 insertions(+), 16 deletions(-)
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 1/2] powerpc32: put csum_tcpudp_magic inline

2014-09-19 Thread Christophe Leroy
csum_tcpudp_magic() is only a few instructions, and does not modifies any other
register than the returned result. So it is not worth having it as a separate
function and suffer function branching and saving of volatile registers.
This patch makes it inline by use of the already existing csum_tcpudp_nofold()
function.

Signed-off-by: Christophe Leroy christophe.le...@c-s.fr

---
 arch/powerpc/include/asm/checksum.h | 15 +++
 arch/powerpc/lib/checksum_32.S  | 16 
 2 files changed, 15 insertions(+), 16 deletions(-)

diff --git a/arch/powerpc/include/asm/checksum.h 
b/arch/powerpc/include/asm/checksum.h
index 8251a3b..cfe806a 100644
--- a/arch/powerpc/include/asm/checksum.h
+++ b/arch/powerpc/include/asm/checksum.h
@@ -19,6 +19,7 @@
 #else
 extern __sum16 ip_fast_csum(const void *iph, unsigned int ihl);
 
+#ifdef __powerpc64__
 /*
  * computes the checksum of the TCP/UDP pseudo-header
  * returns a 16-bit checksum, already complemented
@@ -27,6 +28,7 @@ extern __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr,
unsigned short len,
unsigned short proto,
__wsum sum);
+#endif
 
 /*
  * computes the checksum of a memory block at buff, length len,
@@ -127,6 +129,19 @@ static inline __wsum csum_tcpudp_nofold(__be32 saddr, 
__be32 daddr,
 #endif
 }
 
+#ifndef __powerpc64__
+/*
+ * computes the checksum of the TCP/UDP pseudo-header
+ * returns a 16-bit checksum, already complemented
+ */
+static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr,
+   unsigned short len,
+   unsigned short proto,
+   __wsum sum)
+{
+   return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum));
+}
+#endif
 #endif
 #endif /* __KERNEL__ */
 #endif
diff --git a/arch/powerpc/lib/checksum_32.S b/arch/powerpc/lib/checksum_32.S
index 7874e8a..6d67e05 100644
--- a/arch/powerpc/lib/checksum_32.S
+++ b/arch/powerpc/lib/checksum_32.S
@@ -41,22 +41,6 @@ _GLOBAL(ip_fast_csum)
blr
 
 /*
- * Compute checksum of TCP or UDP pseudo-header:
- *   csum_tcpudp_magic(saddr, daddr, len, proto, sum)
- */
-_GLOBAL(csum_tcpudp_magic)
-   rlwimi  r5,r6,16,0,15   /* put proto in upper half of len */
-   addcr0,r3,r4/* add 4 32-bit words together */
-   adder0,r0,r5
-   adder0,r0,r7
-   addze   r0,r0   /* add in final carry */
-   rlwinm  r3,r0,16,0,31   /* fold two halves together */
-   add r3,r0,r3
-   not r3,r3
-   srwir3,r3,16
-   blr
-
-/*
  * computes the checksum of a memory block at buff, length len,
  * and adds in sum (32-bit)
  *
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 2/2] powerpc32: add support for csum_add()

2014-09-19 Thread Christophe Leroy
The C version of csum_add() as defined in include/net/checksum.h gives the
following assembly:
   0:   7c 04 1a 14 add r0,r4,r3
   4:   7c 64 00 10 subfc   r3,r4,r0
   8:   7c 63 19 10 subfe   r3,r3,r3
   c:   7c 63 00 50 subfr3,r3,r0

include/net/checksum.h also offers the possibility to define an arch specific
function.
This patch provides a ppc32 specific csum_add() inline function.

Signed-off-by: Christophe Leroy christophe.le...@c-s.fr

---
 arch/powerpc/include/asm/checksum.h | 13 +
 1 file changed, 13 insertions(+)

diff --git a/arch/powerpc/include/asm/checksum.h 
b/arch/powerpc/include/asm/checksum.h
index cfe806a..a13aa1f 100644
--- a/arch/powerpc/include/asm/checksum.h
+++ b/arch/powerpc/include/asm/checksum.h
@@ -141,6 +141,19 @@ static inline __sum16 csum_tcpudp_magic(__be32 saddr, 
__be32 daddr,
 {
return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum));
 }
+
+#define HAVE_ARCH_CSUM_ADD
+static inline __wsum csum_add(__wsum csum, __wsum addend)
+{
+__asm__(\n\
+   addc %0,%0,%1 \n\
+   addze %0,%0 \n\
+   
+   : =r (csum)
+   : r (addend), 0(csum));
+   return csum;
+}
+
 #endif
 #endif
 #endif /* __KERNEL__ */
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 2/2] powerpc32: add support for csum_add()

2014-09-19 Thread Joakim Tjernlund
Christophe Leroy christophe.le...@c-s.fr wrote on 2014/09/19 15:57:56:
 Subject: [PATCH 2/2] powerpc32: add support for csum_add()
 
 The C version of csum_add() as defined in include/net/checksum.h gives 
the
 following assembly:
0:   7c 04 1a 14 add r0,r4,r3
4:   7c 64 00 10 subfc   r3,r4,r0
8:   7c 63 19 10 subfe   r3,r3,r3
c:   7c 63 00 50 subfr3,r3,r0
 
 include/net/checksum.h also offers the possibility to define an arch 
specific
 function.
 This patch provides a ppc32 specific csum_add() inline function.
 
 Signed-off-by: Christophe Leroy christophe.le...@c-s.fr
 
 ---

Ouch, this is still so. Back in 2010 I reported this to gcc:
  https://gcc.gnu.org/bugzilla/show_bug.cgi?id=43892

Anyway, 
Acked-by: Joakim Tjernlund joakim.tjernl...@transmode.se
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 2/2] powerpc32: add support for csum_add()

2014-09-19 Thread Joakim Tjernlund
Christophe Leroy christophe.le...@c-s.fr wrote on 2014/09/19 15:57:56:

 +#define HAVE_ARCH_CSUM_ADD
 +static inline __wsum csum_add(__wsum csum, __wsum addend)
 +{
 +__asm__(\n\
 +   addc %0,%0,%1 \n\
 +   addze %0,%0 \n\
 +   
 +   : =r (csum)
 +   : r (addend), 0(csum));

hmm, I wonder if not this is better written as:
: +r (csum): r (addend))

  Jocke
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 03/15] powerpc/msi: Improve IRQ bitmap allocator

2014-09-19 Thread Scott Wood
On Thu, 2014-09-18 at 18:26 +1000, Michael Neuling wrote:
 From: Ian Munsie imun...@au1.ibm.com
 
 Currently msi_bitmap_alloc_hwirqs() will round up any IRQ allocation requests
 to the nearest power of 2.  eg. ask for 5 IRQs and you'll get 8.  This wastes 
 a
 lot of IRQs which can be a scarce resource.
 
 For cxl we can require multiple IRQs for every contexts that is attached to 
 the
 accelerator.  For AFU directed accelerators, there may be 1000s of contexts
 attached, hence we can easily run out of IRQs, especially if we are needlessly
 wasting them.
 
 This changes the msi_bitmap_alloc_hwirqs() to allocate only the required 
 number
 of IRQs, hence avoiding this wastage.
 
 Signed-off-by: Ian Munsie imun...@au1.ibm.com
 Signed-off-by: Michael Neuling mi...@neuling.org
 ---
  arch/powerpc/sysdev/msi_bitmap.c | 18 +-
  1 file changed, 13 insertions(+), 5 deletions(-)

This conflicts with (and partially duplicates)
http://patchwork.ozlabs.org/patch/381892/
which I have in my tree.  How should we handle it?

Laurentiu, from looking at the overlap between patches I see a problem
with your existing patch, regarding the out-of-irqs path and
msi_bitmap_free_hwirqs(), so one way or another that needs to get fixed
soon.

-Scott

 diff --git a/arch/powerpc/sysdev/msi_bitmap.c 
 b/arch/powerpc/sysdev/msi_bitmap.c
 index 2ff6302..e001559 100644
 --- a/arch/powerpc/sysdev/msi_bitmap.c
 +++ b/arch/powerpc/sysdev/msi_bitmap.c
 @@ -24,28 +24,36 @@ int msi_bitmap_alloc_hwirqs(struct msi_bitmap *bmp, int 
 num)
* This is fast, but stricter than we need. We might want to add
* a fallback routine which does a linear search with no alignment.
*/
 - offset = bitmap_find_free_region(bmp-bitmap, bmp-irq_count, order);
 + offset = bitmap_find_next_zero_area(bmp-bitmap, bmp-irq_count, 0,
 + num, (1  order) - 1);
 + if (offset  bmp-irq_count)
 + goto err;
 + bitmap_set(bmp-bitmap, offset, num);
   spin_unlock_irqrestore(bmp-lock, flags);
  
   pr_debug(msi_bitmap: allocated 0x%x (2^%d) at offset 0x%x\n,
num, order, offset);
  
   return offset;
 +err:
 + spin_unlock_irqrestore(bmp-lock, flags);
 + return -ENOMEM;
  }
 +EXPORT_SYMBOL(msi_bitmap_alloc_hwirqs);
  
  void msi_bitmap_free_hwirqs(struct msi_bitmap *bmp, unsigned int offset,
   unsigned int num)
  {
   unsigned long flags;
 - int order = get_count_order(num);
  
 - pr_debug(msi_bitmap: freeing 0x%x (2^%d) at offset 0x%x\n,
 -  num, order, offset);
 + pr_debug(msi_bitmap: freeing 0x%x at offset 0x%x\n,
 +  num, offset);
  
   spin_lock_irqsave(bmp-lock, flags);
 - bitmap_release_region(bmp-bitmap, offset, order);
 + bitmap_clear(bmp-bitmap, offset, num);
   spin_unlock_irqrestore(bmp-lock, flags);
  }
 +EXPORT_SYMBOL(msi_bitmap_free_hwirqs);
  
  void msi_bitmap_reserve_hwirq(struct msi_bitmap *bmp, unsigned int hwirq)
  {


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 03/15] powerpc/msi: Improve IRQ bitmap allocator

2014-09-19 Thread Scott Wood
On Fri, 2014-09-19 at 15:16 -0500, Scott Wood wrote:
 On Thu, 2014-09-18 at 18:26 +1000, Michael Neuling wrote:
  From: Ian Munsie imun...@au1.ibm.com
  
  Currently msi_bitmap_alloc_hwirqs() will round up any IRQ allocation 
  requests
  to the nearest power of 2.  eg. ask for 5 IRQs and you'll get 8.  This 
  wastes a
  lot of IRQs which can be a scarce resource.
  
  For cxl we can require multiple IRQs for every contexts that is attached to 
  the
  accelerator.  For AFU directed accelerators, there may be 1000s of contexts
  attached, hence we can easily run out of IRQs, especially if we are 
  needlessly
  wasting them.
  
  This changes the msi_bitmap_alloc_hwirqs() to allocate only the required 
  number
  of IRQs, hence avoiding this wastage.
  
  Signed-off-by: Ian Munsie imun...@au1.ibm.com
  Signed-off-by: Michael Neuling mi...@neuling.org
  ---
   arch/powerpc/sysdev/msi_bitmap.c | 18 +-
   1 file changed, 13 insertions(+), 5 deletions(-)
 
 This conflicts with (and partially duplicates)
 http://patchwork.ozlabs.org/patch/381892/
 which I have in my tree.  How should we handle it?
 
 Laurentiu, from looking at the overlap between patches I see a problem
 with your existing patch, regarding the out-of-irqs path and
 msi_bitmap_free_hwirqs(), so one way or another that needs to get fixed
 soon.

Given the problems with Laurentiu's patch, perhaps it'd be best for me
to just revert that patch in my tree, and respin it after this patchset
has been merged.

-Scott


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v4 04/11] drivers: base: support cpu cache information interface to userspace via sysfs

2014-09-19 Thread Stephen Boyd
On 09/03/14 10:00, Sudeep Holla wrote:
 From: Sudeep Holla sudeep.ho...@arm.com

 This patch adds initial support for providing processor cache information
 to userspace through sysfs interface. This is based on already existing
 implementations(x86, ia64, s390 and powerpc) and hence the interface is
 intended to be fully compatible.

 The main purpose of this generic support is to avoid further code
 duplication to support new architectures and also to unify all the existing
 different implementations.

 This implementation maintains the hierarchy of cache objects which reflects
 the system's cache topology. Cache devices are instantiated as needed as
 CPUs come online. The cache information is replicated per-cpu even if they are
 shared. A per-cpu array of cache information maintained is used mainly for
 sysfs-related book keeping.

 It also implements the shared_cpu_map attribute, which is essential for
 enabling both kernel and user-space to discover the system's overall cache
 topology.

 This patch also add the missing ABI documentation for the cacheinfo sysfs
 interface already, which is well defined and widely used.

 Signed-off-by: Sudeep Holla sudeep.ho...@arm.com
 Cc: Greg Kroah-Hartman gre...@linuxfoundation.org
 Cc: Stephen Boyd sb...@codeaurora.org
 Cc: linux-...@vger.kernel.org
 Cc: linux...@de.ibm.com
 Cc: linux-arm-ker...@lists.infradead.org
 Cc: linux-i...@vger.kernel.org
 Cc: linuxppc-dev@lists.ozlabs.org
 Cc: linux-s...@vger.kernel.org
 Cc: x...@kernel.org


Reviewed-by: Stephen Boyd sb...@codeaurora.org
Tested-by: Stephen Boyd sb...@codeaurora.org

-- 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
hosted by The Linux Foundation

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev