[PATCH] powerpc/fsl: add power_off support for fsl platform
From: Wang Dongsheng QIXIS System Logic FPGA support to manage system power. So we through QIXIS to power off freescale SOC. Signed-off-by: Wang Dongsheng diff --git a/arch/powerpc/platforms/85xx/corenet_generic.c b/arch/powerpc/platforms/85xx/corenet_generic.c index 1f309cc..e1a1eb5 100644 --- a/arch/powerpc/platforms/85xx/corenet_generic.c +++ b/arch/powerpc/platforms/85xx/corenet_generic.c @@ -81,6 +81,8 @@ void __init corenet_gen_setup_arch(void) pr_info("%s board\n", ppc_md.name); mpc85xx_qe_init(); + + ppc_md_fixup(); } static const struct of_device_id of_device_ids[] = { diff --git a/arch/powerpc/sysdev/fsl_soc.c b/arch/powerpc/sysdev/fsl_soc.c index 99269c0..6de9f1b 100644 --- a/arch/powerpc/sysdev/fsl_soc.c +++ b/arch/powerpc/sysdev/fsl_soc.c @@ -215,6 +215,54 @@ void fsl_rstcr_restart(char *cmd) } #endif +#define QIXIS_PWR_CTL2 0x21 +#define QIXIS_PWR_CTL2_PWR 0x80 +static void fsl_power_off(void) +{ + struct device_node *pixis_node; + void __iomem *pixis; + u32 pwroff_offset, value; + + pixis_node = of_find_compatible_node(NULL, NULL, "fsl,fpga-qixis"); + if (!pixis_node) { + pr_err("%s: Missing pixis node\n", __func__); + return; + } + + pwroff_offset = QIXIS_PWR_CTL2; + value = QIXIS_PWR_CTL2_PWR; + + pixis = of_iomap(pixis_node, 0); + of_node_put(pixis_node); + if (!pixis) { + pr_err("%s: Could not map pixis registers\n", __func__); + return; + } + + local_irq_disable(); + + setbits8(pixis + pwroff_offset, value); + + iounmap(pixis); + + while (1) + ; +} + +void ppc_md_fixup(void) +{ + struct device_node *np; + + np = of_find_compatible_node(NULL, NULL, "fsl,fpga-qixis"); + if (!np) + return; + + of_node_put(np); + + pm_power_off = fsl_power_off; + ppc_md.halt = fsl_power_off; +} + #if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE) struct platform_diu_data_ops diu_ops; EXPORT_SYMBOL(diu_ops); diff --git a/arch/powerpc/sysdev/fsl_soc.h b/arch/powerpc/sysdev/fsl_soc.h index 4c5a19e..ca90e24 100644 --- a/arch/powerpc/sysdev/fsl_soc.h +++ b/arch/powerpc/sysdev/fsl_soc.h @@ -21,6 +21,8 @@ struct device_node; extern void fsl_rstcr_restart(char *cmd); +void ppc_md_fixup(void); + /* The different ports that the DIU can be connected to */ enum fsl_diu_monitor_port { FSL_DIU_PORT_DVI, /* DVI */ -- 2.1.0.27.g96db324 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH v3 12/24] powerpc/iommu/powernv: Release replaced TCE
On Thu, Jan 29, 2015 at 08:21:53PM +1100, Alexey Kardashevskiy wrote: > At the moment writing new TCE value to the IOMMU table fails with EBUSY > if there is a valid entry already. However PAPR specification allows > the guest to write new TCE value without clearing it first. > > Another problem this patch is addressing is the use of pool locks for > external IOMMU users such as VFIO. The pool locks are to protect > DMA page allocator rather than entries and since the host kernel does > not control what pages are in use, there is no point in pool locks and > exchange()+put_page(oldtce) is sufficient to avoid possible races. > > This adds an exchange() callback to iommu_table_ops which does the same > thing as set() plus it returns replaced TCE(s) so the caller can release > the pages afterwards. > > This implements exchange() for IODA2 only. This adds a requirement > for a platform to have exchange() implemented so from now on IODA2 is > the only supported PHB for VFIO-SPAPR. > > This replaces iommu_tce_build() and iommu_clear_tce() with > a single iommu_tce_xchg(). [snip] > @@ -294,8 +303,9 @@ static long tce_iommu_build(struct tce_container > *container, > > hva = (unsigned long) page_address(page) + > (tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK); > + oldtce = 0; > > - ret = iommu_tce_build(tbl, entry + 1, hva, direction); > + ret = iommu_tce_xchg(tbl, entry + i, hva, &oldtce, direction); Is the change from entry + 1 to entry + i here an actual bug fix? If so please mention it in the patch description. Paul. ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH] mmc: sdhci-pltfm: remove the unneeded check of disabled device
Since commit cd1e65044d44 ("of/device: Don't register disabled devices"), the disabled device will not be registered at all. So we don't need to do the check again in the platform device driver. And the check in the current code is useless even if we really run into a disabled device. In this case, it just doesn't parse the dtb for the infos such as quirks or clock, but it will continue to try to init the disabled device after that check. So just remove it. Signed-off-by: Kevin Hao --- drivers/mmc/host/sdhci-pltfm.c | 54 -- 1 file changed, 26 insertions(+), 28 deletions(-) diff --git a/drivers/mmc/host/sdhci-pltfm.c b/drivers/mmc/host/sdhci-pltfm.c index c5b01d6bb85d..b609c03a9ef8 100644 --- a/drivers/mmc/host/sdhci-pltfm.c +++ b/drivers/mmc/host/sdhci-pltfm.c @@ -75,43 +75,41 @@ void sdhci_get_of_property(struct platform_device *pdev) u32 bus_width; int size; - if (of_device_is_available(np)) { - if (of_get_property(np, "sdhci,auto-cmd12", NULL)) - host->quirks |= SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12; + if (of_get_property(np, "sdhci,auto-cmd12", NULL)) + host->quirks |= SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12; - if (of_get_property(np, "sdhci,1-bit-only", NULL) || - (of_property_read_u32(np, "bus-width", &bus_width) == 0 && - bus_width == 1)) - host->quirks |= SDHCI_QUIRK_FORCE_1_BIT_DATA; + if (of_get_property(np, "sdhci,1-bit-only", NULL) || + (of_property_read_u32(np, "bus-width", &bus_width) == 0 && + bus_width == 1)) + host->quirks |= SDHCI_QUIRK_FORCE_1_BIT_DATA; - if (sdhci_of_wp_inverted(np)) - host->quirks |= SDHCI_QUIRK_INVERTED_WRITE_PROTECT; + if (sdhci_of_wp_inverted(np)) + host->quirks |= SDHCI_QUIRK_INVERTED_WRITE_PROTECT; - if (of_get_property(np, "broken-cd", NULL)) - host->quirks |= SDHCI_QUIRK_BROKEN_CARD_DETECTION; + if (of_get_property(np, "broken-cd", NULL)) + host->quirks |= SDHCI_QUIRK_BROKEN_CARD_DETECTION; - if (of_get_property(np, "no-1-8-v", NULL)) - host->quirks2 |= SDHCI_QUIRK2_NO_1_8_V; + if (of_get_property(np, "no-1-8-v", NULL)) + host->quirks2 |= SDHCI_QUIRK2_NO_1_8_V; - if (of_device_is_compatible(np, "fsl,p2020-rev1-esdhc")) - host->quirks |= SDHCI_QUIRK_BROKEN_DMA; + if (of_device_is_compatible(np, "fsl,p2020-rev1-esdhc")) + host->quirks |= SDHCI_QUIRK_BROKEN_DMA; - if (of_device_is_compatible(np, "fsl,p2020-esdhc") || - of_device_is_compatible(np, "fsl,p1010-esdhc") || - of_device_is_compatible(np, "fsl,t4240-esdhc") || - of_device_is_compatible(np, "fsl,mpc8536-esdhc")) - host->quirks |= SDHCI_QUIRK_BROKEN_TIMEOUT_VAL; + if (of_device_is_compatible(np, "fsl,p2020-esdhc") || + of_device_is_compatible(np, "fsl,p1010-esdhc") || + of_device_is_compatible(np, "fsl,t4240-esdhc") || + of_device_is_compatible(np, "fsl,mpc8536-esdhc")) + host->quirks |= SDHCI_QUIRK_BROKEN_TIMEOUT_VAL; - clk = of_get_property(np, "clock-frequency", &size); - if (clk && size == sizeof(*clk) && *clk) - pltfm_host->clock = be32_to_cpup(clk); + clk = of_get_property(np, "clock-frequency", &size); + if (clk && size == sizeof(*clk) && *clk) + pltfm_host->clock = be32_to_cpup(clk); - if (of_find_property(np, "keep-power-in-suspend", NULL)) - host->mmc->pm_caps |= MMC_PM_KEEP_POWER; + if (of_find_property(np, "keep-power-in-suspend", NULL)) + host->mmc->pm_caps |= MMC_PM_KEEP_POWER; - if (of_find_property(np, "enable-sdio-wakeup", NULL)) - host->mmc->pm_caps |= MMC_PM_WAKE_SDIO_IRQ; - } + if (of_find_property(np, "enable-sdio-wakeup", NULL)) + host->mmc->pm_caps |= MMC_PM_WAKE_SDIO_IRQ; } #else void sdhci_get_of_property(struct platform_device *pdev) {} -- 1.9.3 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH] powerpc/powernv: make sure the IOV BAR will not exceed limit after shifting
On Tue, Feb 03, 2015 at 06:19:26PM -0600, Bjorn Helgaas wrote: >On Tue, Feb 03, 2015 at 03:01:43PM +0800, Wei Yang wrote: >> The actual IOV BAR range is determined by the start address and the actual >> size for vf_num VFs BAR. After shifting the IOV BAR, there would be a >> chance the actual end address exceed the limit and overlap with other >> devices. >> >> This patch adds a check to make sure after shifting, the range will not >> overlap with other devices. > >I folded this into the previous patch (the one that adds >pnv_pci_vf_resource_shift()). And I think that needs to be folded together >with the following one ("powerpc/powernv: Allocate VF PE") because this one >references pdn->vf_pes, which is added by "Allocate VF PE". > Yes. Both need this. >> Signed-off-by: Wei Yang >> --- >> arch/powerpc/platforms/powernv/pci-ioda.c | 53 >> ++--- >> 1 file changed, 48 insertions(+), 5 deletions(-) >> >> diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c >> b/arch/powerpc/platforms/powernv/pci-ioda.c >> index 8456ae8..1a1e74b 100644 >> --- a/arch/powerpc/platforms/powernv/pci-ioda.c >> +++ b/arch/powerpc/platforms/powernv/pci-ioda.c >> @@ -854,16 +854,18 @@ static unsigned int pnv_ioda_dma_weight(struct pci_dev >> *dev) >> } >> >> #ifdef CONFIG_PCI_IOV >> -static void pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset) >> +static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset) >> { >> struct pci_dn *pdn = pci_get_pdn(dev); >> int i; >> struct resource *res; >> resource_size_t size; >> +u16 vf_num; >> >> if (!dev->is_physfn) >> -return; >> +return -EINVAL; >> >> +vf_num = pdn->vf_pes; > >I can't actually build this, but I don't think pdn->vf_pes is defined yet. > The pdn->vf_pes is defined in the next patch, it is not defined yet. I thought the incremental patch means a patch on top of the current patch set, so it is defined as the last patch. >> for (i = PCI_IOV_RESOURCES; i <= PCI_IOV_RESOURCE_END; i++) { >> res = &dev->resource[i]; >> if (!res->flags || !res->parent) >> @@ -875,11 +877,49 @@ static void pnv_pci_vf_resource_shift(struct pci_dev >> *dev, int offset) >> dev_info(&dev->dev, " Shifting VF BAR %pR to\n", res); >> size = pci_iov_resource_size(dev, i); >> res->start += size*offset; >> - >> dev_info(&dev->dev, " %pR\n", res); >> + >> +/* >> + * The actual IOV BAR range is determined by the start address >> + * and the actual size for vf_num VFs BAR. The check here is >> + * to make sure after shifting, the range will not overlap >> + * with other device. >> + */ >> +if ((res->start + (size * vf_num)) > res->end) { >> +dev_err(&dev->dev, "VF BAR%d: %pR will conflict with" >> +" other device after shift\n"); > >sriov_init() sets up "res" with enough space to contain TotalVF copies >of the VF BAR. By the time we get here, that "res" is in the resource >tree, and you should be able to see it in /proc/iomem. > >For example, if TotalVFs is 128 and VF BAR0 is 1MB in size, the >resource size would be 128 * 1MB = 0x800_. If the VF BAR0 in the >SR-IOV Capability contains a base address of 0x8000_, the resource >would be: > > [mem 0x8000_-0x87ff_] > >We have to assume there's another resource starting immediately after >this one, i.e., at 0x8800_, and we have to make sure that when we >change this resource and turn on SR-IOV, we don't overlap with it. > >The shifted resource will start at 0x8000_ + 1MB * "offset". The >hardware will respond to a range whose size is 1MB * NumVFs (NumVFs >may be smaller than TotalVFs). > >If we enable 16 VFs and shift by 23, we set VF BAR0 to 0x8000_ + >1MB * 23 = 0x8170_, and the size is 1MB * 16 = 0x100_, so the >new resource will be: > > [mem 0x8170_-0x826f_] > >That's fine; it doesn't extend past the original end of 0x87ff_. >But if we enable those same 16 VFs with a shift of 120, we set VF BAR0 >to 0x8000_ + 1MB * 120 = 0x8780_, and the size stays the same, >so the new resource will be: > > [mem 0x8780_-0x887f_] > >and that's a problem because we have two devices responding at >0x8800_. > >Your test of "res->start + (size * vf_num)) > res->end" is not strict >enough to catch this problem. > Yep, you are right. >I think we need something like the patch below. I restructured it so >we don't have to back out any resource changes if we fail. > >This shifting strategy seems to imply that the closer NumVFs is to >TotalVFs, the less flexibility you have to assign PEs, e.g., if NumVFs >== TotalVFs, you wouldn't be able to shift at all. In this example, >you could shift by anything from 0 to 128 - 16 = 112, but if you >wanted NumVFs = 64, yo
RE: [PATCH v2 08/10] powerpc/corenet: Enable CLK_QORIQ
> -Original Message- > From: Emil Medve [mailto:emilian.me...@freescale.com] > Sent: Wednesday, January 21, 2015 6:04 PM > To: linuxppc-dev@lists.ozlabs.org; Wood Scott-B07421; > mturque...@linaro.org; haoke...@gmail.com; Tang Yuantian-B29983 > Cc: Medve Emilian-EMMEDVE1 > Subject: [PATCH v2 08/10] powerpc/corenet: Enable CLK_QORIQ > > Change-Id: I1a80ad7b9f6854791bd270b746f93a91439155a6 > Signed-off-by: Emil Medve > --- > arch/powerpc/configs/corenet32_smp_defconfig | 1 + > arch/powerpc/configs/corenet64_smp_defconfig | 1 + > 2 files changed, 2 insertions(+) > > diff --git a/arch/powerpc/configs/corenet32_smp_defconfig > b/arch/powerpc/configs/corenet32_smp_defconfig > index 611efe9..cce62e9 100644 > --- a/arch/powerpc/configs/corenet32_smp_defconfig > +++ b/arch/powerpc/configs/corenet32_smp_defconfig > @@ -147,6 +147,7 @@ CONFIG_STAGING=y > CONFIG_MEMORY=y > CONFIG_VIRT_DRIVERS=y > CONFIG_FSL_HV_MANAGER=y > +CONFIG_CLK_QORIQ=y > CONFIG_EXT2_FS=y > CONFIG_EXT3_FS=y > # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set diff --git > a/arch/powerpc/configs/corenet64_smp_defconfig > b/arch/powerpc/configs/corenet64_smp_defconfig > index be24a18..c9e8057 100644 > --- a/arch/powerpc/configs/corenet64_smp_defconfig > +++ b/arch/powerpc/configs/corenet64_smp_defconfig > @@ -117,6 +117,7 @@ CONFIG_DMADEVICES=y > CONFIG_FSL_DMA=y > CONFIG_VIRT_DRIVERS=y > CONFIG_FSL_HV_MANAGER=y > +CONFIG_CLK_QORIQ=y > CONFIG_FSL_CORENET_CF=y > CONFIG_MEMORY=y > CONFIG_EXT2_FS=y > -- > 2.2.1 Acked-by: Tang Yuantian Regards, Yuantian ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: Pull request: scottwood/linux.git next
On Tue, 2015-02-03 at 19:05 -0600, Scott Wood wrote: > On Wed, 2015-02-04 at 11:58 +1100, Michael Ellerman wrote: > > On Tue, 2015-02-03 at 11:20 -0600, Scott Wood wrote: > > > Highlights include 8xx optimizations, some more work on datapath device > > > tree content, e300 machine check support, t1040 corenet error reporting, > > > and various cleanups and fixes. > > > > > > The following changes since commit > > > 31494cf3532cfee0bf5c913ac9962971aab7b1d4: > > > > > > powerpc/powernv: Don't alloc IRQ map if necessary (2015-01-28 15:28:10 > > > +1100) > > > > > > are available in the git repository at: > > > > > > git://git.kernel.org/pub/scm/linux/kernel/git/scottwood/linux.git next > > > > > > for you to fetch changes up to 0dc294f717d41bfbafc746a7a96a7bc0f114c20c: > > > > > > powerpc/mm: bail out early when flushing TLB page (2015-01-30 18:39:00 > > > -0600) > > > > > > Hi Scott, > > > > It's not a big deal but a few of the subjects are a bit off in here. I'm not > > going to ask you to respin or anything, but in general if we can stick with > > "powerpc[/topic]: ..." that'd be good. > > Sorry... I normally fix those up but apparently I didn't notice this > time. Yeah no stress, it's easy enough to miss amongst all the other details you're checking before merging. It actually made me think I should write a commit hook that checks for it. > > > perf/powerpc: reset event hw state when adding it to the PMU > > > powerpc: defconfigs: add MTD_SPI_NOR (new dependency for M25P80) > > > powerpc32: adds handling of _PAGE_RO > > > powerpc32: Use kmem_cache memory for PGDIR > > > PowerPC-83xx: Deletion of an unnecessary check before the function > > > call "of_node_put" > > > > > > Also there was a v2 of this one: > > > > > powerpc/mm: bail out early when flushing TLB page > > > > Do you want to merge that as an incremental fix, or I can, let me know? > > I had pushed a day or so before v2 came along... Go ahead and merge the > incremental fix. Will do. cheers ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: Pull request: scottwood/linux.git next
On Wed, 2015-02-04 at 11:58 +1100, Michael Ellerman wrote: > On Tue, 2015-02-03 at 11:20 -0600, Scott Wood wrote: > > Highlights include 8xx optimizations, some more work on datapath device > > tree content, e300 machine check support, t1040 corenet error reporting, > > and various cleanups and fixes. > > > > The following changes since commit 31494cf3532cfee0bf5c913ac9962971aab7b1d4: > > > > powerpc/powernv: Don't alloc IRQ map if necessary (2015-01-28 15:28:10 > > +1100) > > > > are available in the git repository at: > > > > git://git.kernel.org/pub/scm/linux/kernel/git/scottwood/linux.git next > > > > for you to fetch changes up to 0dc294f717d41bfbafc746a7a96a7bc0f114c20c: > > > > powerpc/mm: bail out early when flushing TLB page (2015-01-30 18:39:00 > > -0600) > > > Hi Scott, > > It's not a big deal but a few of the subjects are a bit off in here. I'm not > going to ask you to respin or anything, but in general if we can stick with > "powerpc[/topic]: ..." that'd be good. Sorry... I normally fix those up but apparently I didn't notice this time. > > perf/powerpc: reset event hw state when adding it to the PMU > > powerpc: defconfigs: add MTD_SPI_NOR (new dependency for M25P80) > > powerpc32: adds handling of _PAGE_RO > > powerpc32: Use kmem_cache memory for PGDIR > > PowerPC-83xx: Deletion of an unnecessary check before the function > > call "of_node_put" > > > Also there was a v2 of this one: > > > powerpc/mm: bail out early when flushing TLB page > > Do you want to merge that as an incremental fix, or I can, let me know? I had pushed a day or so before v2 came along... Go ahead and merge the incremental fix. -Scott ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: Pull request: scottwood/linux.git next
On Tue, 2015-02-03 at 11:20 -0600, Scott Wood wrote: > Highlights include 8xx optimizations, some more work on datapath device > tree content, e300 machine check support, t1040 corenet error reporting, > and various cleanups and fixes. > > The following changes since commit 31494cf3532cfee0bf5c913ac9962971aab7b1d4: > > powerpc/powernv: Don't alloc IRQ map if necessary (2015-01-28 15:28:10 > +1100) > > are available in the git repository at: > > git://git.kernel.org/pub/scm/linux/kernel/git/scottwood/linux.git next > > for you to fetch changes up to 0dc294f717d41bfbafc746a7a96a7bc0f114c20c: > > powerpc/mm: bail out early when flushing TLB page (2015-01-30 18:39:00 > -0600) Hi Scott, It's not a big deal but a few of the subjects are a bit off in here. I'm not going to ask you to respin or anything, but in general if we can stick with "powerpc[/topic]: ..." that'd be good. > perf/powerpc: reset event hw state when adding it to the PMU > powerpc: defconfigs: add MTD_SPI_NOR (new dependency for M25P80) > powerpc32: adds handling of _PAGE_RO > powerpc32: Use kmem_cache memory for PGDIR > PowerPC-83xx: Deletion of an unnecessary check before the function call > "of_node_put" Also there was a v2 of this one: > powerpc/mm: bail out early when flushing TLB page Do you want to merge that as an incremental fix, or I can, let me know? cheers ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH v2 0/4] VPHN parsing fixes
On Tue, 2015-02-03 at 08:46 +0100, Greg Kurz wrote: > On Tue, 03 Feb 2015 13:47:35 +1100 > Michael Ellerman wrote: > > > On Thu, 2015-01-29 at 19:03 +0100, Greg Kurz wrote: > > > On Wed, 17 Dec 2014 10:40:46 +0100 > > > Greg Kurz wrote: > > > > Hi, > > > > > > > > This series addresses remarks from Ben and Michael (see individual > > > > patches). > > > > The most notable changes are: > > > > - the parsing code being pull out into a separate file in patch 3/4. > > > > This > > > > allows to write userland tests like the one below. > > > > - a full rewrite of the parsing logic in patch 4/4 > > > > > > > > > > Ping ? > > > > Sorry, lots of patches needing review. > > Heh, no problem. :) Since this isn't bugfix, I can even repost later, when > the review pressure is lower. Yeah if you can repost in ~2 weeks that would be perfect. > > This looks pretty good at a glance, but did you actually write a userspace > > test > > for it? If so please send it. I'm happy to rework it into something that > > can go > > in selftests. > > > > Yes, I wrote the premise of a test program. You can find it in the cover mail > of > this series: > > https://lists.ozlabs.org/pipermail/linuxppc-dev/2014-December/123601.html > > I'll have a look at selftests. Thanks. That test looks like a good start, there might be some more cases to test though? If you just copy the tools/testing/selftests/powerpc/tm directory and rename to vhpn, you can probably work out the rest. cheers ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH] powerpc/powernv: make sure the IOV BAR will not exceed limit after shifting
On Tue, Feb 03, 2015 at 03:01:43PM +0800, Wei Yang wrote: > The actual IOV BAR range is determined by the start address and the actual > size for vf_num VFs BAR. After shifting the IOV BAR, there would be a > chance the actual end address exceed the limit and overlap with other > devices. > > This patch adds a check to make sure after shifting, the range will not > overlap with other devices. I folded this into the previous patch (the one that adds pnv_pci_vf_resource_shift()). And I think that needs to be folded together with the following one ("powerpc/powernv: Allocate VF PE") because this one references pdn->vf_pes, which is added by "Allocate VF PE". > Signed-off-by: Wei Yang > --- > arch/powerpc/platforms/powernv/pci-ioda.c | 53 > ++--- > 1 file changed, 48 insertions(+), 5 deletions(-) > > diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c > b/arch/powerpc/platforms/powernv/pci-ioda.c > index 8456ae8..1a1e74b 100644 > --- a/arch/powerpc/platforms/powernv/pci-ioda.c > +++ b/arch/powerpc/platforms/powernv/pci-ioda.c > @@ -854,16 +854,18 @@ static unsigned int pnv_ioda_dma_weight(struct pci_dev > *dev) > } > > #ifdef CONFIG_PCI_IOV > -static void pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset) > +static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset) > { > struct pci_dn *pdn = pci_get_pdn(dev); > int i; > struct resource *res; > resource_size_t size; > + u16 vf_num; > > if (!dev->is_physfn) > - return; > + return -EINVAL; > > + vf_num = pdn->vf_pes; I can't actually build this, but I don't think pdn->vf_pes is defined yet. > for (i = PCI_IOV_RESOURCES; i <= PCI_IOV_RESOURCE_END; i++) { > res = &dev->resource[i]; > if (!res->flags || !res->parent) > @@ -875,11 +877,49 @@ static void pnv_pci_vf_resource_shift(struct pci_dev > *dev, int offset) > dev_info(&dev->dev, " Shifting VF BAR %pR to\n", res); > size = pci_iov_resource_size(dev, i); > res->start += size*offset; > - > dev_info(&dev->dev, " %pR\n", res); > + > + /* > + * The actual IOV BAR range is determined by the start address > + * and the actual size for vf_num VFs BAR. The check here is > + * to make sure after shifting, the range will not overlap > + * with other device. > + */ > + if ((res->start + (size * vf_num)) > res->end) { > + dev_err(&dev->dev, "VF BAR%d: %pR will conflict with" > + " other device after shift\n"); sriov_init() sets up "res" with enough space to contain TotalVF copies of the VF BAR. By the time we get here, that "res" is in the resource tree, and you should be able to see it in /proc/iomem. For example, if TotalVFs is 128 and VF BAR0 is 1MB in size, the resource size would be 128 * 1MB = 0x800_. If the VF BAR0 in the SR-IOV Capability contains a base address of 0x8000_, the resource would be: [mem 0x8000_-0x87ff_] We have to assume there's another resource starting immediately after this one, i.e., at 0x8800_, and we have to make sure that when we change this resource and turn on SR-IOV, we don't overlap with it. The shifted resource will start at 0x8000_ + 1MB * "offset". The hardware will respond to a range whose size is 1MB * NumVFs (NumVFs may be smaller than TotalVFs). If we enable 16 VFs and shift by 23, we set VF BAR0 to 0x8000_ + 1MB * 23 = 0x8170_, and the size is 1MB * 16 = 0x100_, so the new resource will be: [mem 0x8170_-0x826f_] That's fine; it doesn't extend past the original end of 0x87ff_. But if we enable those same 16 VFs with a shift of 120, we set VF BAR0 to 0x8000_ + 1MB * 120 = 0x8780_, and the size stays the same, so the new resource will be: [mem 0x8780_-0x887f_] and that's a problem because we have two devices responding at 0x8800_. Your test of "res->start + (size * vf_num)) > res->end" is not strict enough to catch this problem. I think we need something like the patch below. I restructured it so we don't have to back out any resource changes if we fail. This shifting strategy seems to imply that the closer NumVFs is to TotalVFs, the less flexibility you have to assign PEs, e.g., if NumVFs == TotalVFs, you wouldn't be able to shift at all. In this example, you could shift by anything from 0 to 128 - 16 = 112, but if you wanted NumVFs = 64, you could only shift by 0 to 64. Is that true? I think your M64 BAR gets split into 256 segments, regardless of what TotalVFs is, so if you expanded the resource to 256 * 1MB for this example, you would be able to shift by up to 256 - NumVFs. Do you actually do this somewhere? I pushed an updated pci/virtualization branch with these updates. I think there's also a leak that need
Re: [PATCH v3] net/fsl_pq_mdio: Document supported compatibles
From: Emil Medve Date: Sun, 1 Feb 2015 15:58:31 -0600 > From: Shruti Kanetkar > > The device tree binding(s) document has fallen out of sync with the > driver code. Update the list of supported devices to reflect current > driver capabilities > > Change-Id: I440d8de2ee2d9c3b7b23e69b3da851cab18a4c9a > Signed-off-by: Shruti Kanetkar > Signed-off-by: Emil Medve Applied to net-next, thanks. ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH] powerpc/dts: Remove B4860 emulator support
From: Emil Medve Probably we should have not upstreamed this in the first place Signed-off-by: Emil Medve --- arch/powerpc/boot/dts/b4860emu.dts | 223 - 1 file changed, 223 deletions(-) delete mode 100644 arch/powerpc/boot/dts/b4860emu.dts diff --git a/arch/powerpc/boot/dts/b4860emu.dts b/arch/powerpc/boot/dts/b4860emu.dts deleted file mode 100644 index 2aa5cd3..000 --- a/arch/powerpc/boot/dts/b4860emu.dts +++ /dev/null @@ -1,223 +0,0 @@ -/* - * B4860 emulator Device Tree Source - * - * Copyright 2013 Freescale Semiconductor Inc. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Freescale Semiconductor nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * - * ALTERNATIVELY, this software may be distributed under the terms of the - * GNU General Public License ("GPL") as published by the Free Software - * Foundation, either version 2 of that License or (at your option) any - * later version. - * - * This software is provided by Freescale Semiconductor "as is" and any - * express or implied warranties, including, but not limited to, the implied - * warranties of merchantability and fitness for a particular purpose are - * disclaimed. In no event shall Freescale Semiconductor be liable for any - * direct, indirect, incidental, special, exemplary, or consequential damages - * (including, but not limited to, procurement of substitute goods or services; - * loss of use, data, or profits; or business interruption) however caused and - * on any theory of liability, whether in contract, strict liability, or tort - * (including negligence or otherwise) arising in any way out of the use of - * this software, even if advised of the possibility of such damage. - */ - -/dts-v1/; - -/include/ "fsl/e6500_power_isa.dtsi" - -/ { - compatible = "fsl,B4860"; - #address-cells = <2>; - #size-cells = <2>; - interrupt-parent = <&mpic>; - - aliases { - ccsr = &soc; - - serial0 = &serial0; - serial1 = &serial1; - serial2 = &serial2; - serial3 = &serial3; - dma0 = &dma0; - dma1 = &dma1; - }; - - cpus { - #address-cells = <1>; - #size-cells = <0>; - - cpu0: PowerPC,e6500@0 { - device_type = "cpu"; - reg = <0 1>; - next-level-cache = <&L2>; - fsl,portid-mapping = <0x8000>; - }; - cpu1: PowerPC,e6500@2 { - device_type = "cpu"; - reg = <2 3>; - next-level-cache = <&L2>; - fsl,portid-mapping = <0x8000>; - }; - cpu2: PowerPC,e6500@4 { - device_type = "cpu"; - reg = <4 5>; - next-level-cache = <&L2>; - fsl,portid-mapping = <0x8000>; - }; - cpu3: PowerPC,e6500@6 { - device_type = "cpu"; - reg = <6 7>; - next-level-cache = <&L2>; - fsl,portid-mapping = <0x8000>; - }; - }; -}; - -/ { - model = "fsl,B4860QDS"; - compatible = "fsl,B4860EMU", "fsl,B4860QDS"; - #address-cells = <2>; - #size-cells = <2>; - interrupt-parent = <&mpic>; - - ifc: localbus@ffe124000 { - reg = <0xf 0xfe124000 0 0x2000>; - ranges = <0 0 0xf 0xe800 0x0800 - 2 0 0xf 0xff80 0x0001 - 3 0 0xf 0xffdf 0x8000>; - - nor@0,0 { - #address-cells = <1>; - #size-cells = <1>; - compatible = "cfi-flash"; - reg = <0x0 0x0 0x800>; - bank-width = <2>; - device-width = <1>; - }; - }; - - memory { - device_type = "memory"; - }; - - soc: soc@ffe00 { - ranges = <0x 0xf 0xfe00 0x100>; - reg = <0xf 0xfe00 0 0x1000>; - }; -}; - -&ifc { - #address-cells = <2>; - #size-cells = <1>; -
Pull request: scottwood/linux.git next
Highlights include 8xx optimizations, some more work on datapath device tree content, e300 machine check support, t1040 corenet error reporting, and various cleanups and fixes. The following changes since commit 31494cf3532cfee0bf5c913ac9962971aab7b1d4: powerpc/powernv: Don't alloc IRQ map if necessary (2015-01-28 15:28:10 +1100) are available in the git repository at: git://git.kernel.org/pub/scm/linux/kernel/git/scottwood/linux.git next for you to fetch changes up to 0dc294f717d41bfbafc746a7a96a7bc0f114c20c: powerpc/mm: bail out early when flushing TLB page (2015-01-30 18:39:00 -0600) Alessio Igor Bogani (2): powerpc/85xx: Add support for Emerson/Artesyn MVME2500. powerpc: dts: pq3/85xx: Fix GPIO address Alexandru-Cezar Sardan (1): perf/powerpc: reset event hw state when adding it to the PMU Andy Fleming (1): powerpc/config: Enable MDIO support Arseny Solokha (1): powerpc/mm: bail out early when flushing TLB page Brian Norris (1): powerpc: defconfigs: add MTD_SPI_NOR (new dependency for M25P80) Emil Medve (4): powerpc/dts: Remove T4240 emulator support powerpc: Remove duplicate tlbcam_index declarations dt/bindings: b/qman: Fix the alloc-ranges in the example(s) dt/bindings: b/qman: Add phandle to the portals Esben Haabendal (1): powerpc: Add machine_check cpu function for e300c3 cpus Kim Phillips (1): powerpc/fsl_pci: Fix pci stack build bug with FRAME_WARN Kumar Gala (2): powerpc/mpc85xx: Create dts components for the FSL QorIQ DPAA BMan powerpc/mpc85xx: Create dts components for the FSL QorIQ DPAA QMan LEROY Christophe (9): powerpc32: adds handling of _PAGE_RO powerpc/8xx: use _PAGE_RO instead of _PAGE_RW powerpc/8xx: reduce pressure on TLB due to context switches powerpc/8xx: remove remaining unnecessary code in FixupDAR powerpc/8xx: remove tests on PGDIR entry validity powerpc32: Use kmem_cache memory for PGDIR powerpc/8xx: Take benefit of aligned PGDIR powerpc/8xx: Optimise access to swapper_pg_dir powerpc/8xx: Remove duplicated code in set_context() Markus Elfring (1): PowerPC-83xx: Deletion of an unnecessary check before the function call "of_node_put" Rickard Strandqvist (1): powerpc/qe: Use strlcpy() Scott Wood (2): powerpc/mpc85xx: Add ranges to etsec2 nodes memory/fsl-corenet-cf: Add t1040 support Shaohui Xie (1): power/fsl: add MDIO dt binding for FMan Tom Huynh (1): powerpc/perf: fix fsl_emb_pmu_start to write correct pmc value .../devicetree/bindings/powerpc/fsl/fman.txt | 70 + Documentation/devicetree/bindings/soc/fsl/bman.txt | 12 +- Documentation/devicetree/bindings/soc/fsl/qman.txt | 14 +- arch/powerpc/boot/dts/fsl/pq3-etsec2-0.dtsi| 1 + arch/powerpc/boot/dts/fsl/pq3-etsec2-1.dtsi| 1 + arch/powerpc/boot/dts/fsl/pq3-etsec2-2.dtsi| 1 + arch/powerpc/boot/dts/fsl/pq3-gpio-0.dtsi | 6 +- arch/powerpc/boot/dts/fsl/qoriq-bman1-portals.dtsi | 90 +++ arch/powerpc/boot/dts/fsl/qoriq-bman1.dtsi | 41 +++ arch/powerpc/boot/dts/fsl/qoriq-qman1-portals.dtsi | 101 arch/powerpc/boot/dts/fsl/qoriq-qman1.dtsi | 41 +++ arch/powerpc/boot/dts/fsl/qoriq-qman3.dtsi | 41 +++ arch/powerpc/boot/dts/mvme2500.dts | 280 arch/powerpc/boot/dts/t4240emu.dts | 281 - arch/powerpc/configs/corenet32_smp_defconfig | 2 + arch/powerpc/configs/corenet64_smp_defconfig | 5 + arch/powerpc/configs/mpc85xx_defconfig | 16 +- arch/powerpc/configs/mpc85xx_smp_defconfig | 1 + arch/powerpc/include/asm/pci-bridge.h | 4 + arch/powerpc/include/asm/pgtable-ppc32.h | 23 +- arch/powerpc/include/asm/pgtable.h | 7 +- arch/powerpc/include/asm/pte-8xx.h | 9 +- arch/powerpc/include/asm/pte-common.h | 25 +- arch/powerpc/kernel/cputable.c | 1 + arch/powerpc/kernel/head_8xx.S | 123 - arch/powerpc/mm/fsl_booke_mmu.c| 2 - arch/powerpc/mm/mmu_context_nohash.c | 43 +++- arch/powerpc/mm/pgtable_32.c | 19 +- arch/powerpc/mm/tlb_nohash.c | 5 +- arch/powerpc/perf/core-fsl-emb.c | 10 +- arch/powerpc/platforms/83xx/usb.c | 3 +- arch/powerpc/platforms/85xx/Kconfig| 6 + arch/powerpc/platforms/85xx/Makefile | 1 + arch/powerpc/platforms/85xx/mvme2500.c | 74 ++ arch/powerpc/sysdev/fsl_pci.c | 11 +- arch/powerpc/sysdev/indirect_pci.c | 25 +- arch/powerpc/sysdev/qe_lib/qe.c| 6 +- drivers/memory/fsl-corenet-cf.c
Re: [PATCH v3 00/11] powerpc8xx: Further optimisation of TLB handling
On Tue, 2015-02-03 at 12:38 +0100, Christophe Leroy wrote: > This patchset provides a further optimisation of TLB handling in the 8xx. > Main changes are based on: > - Using processor handling of PGD/PTE Validity bits instead of testing > ourselves > the entries validity > - Aligning PGD address to allow direct bit manipulation > - Not saving registers like CR when not needed > > It also adds support to any TASK_SIZE Please respin with just the changes that haven't already been applied to my next branch. -Scott ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
cacheable_memcpy() versus memcpy() ==> 8% improvment on FTP throughput
In powerpc32 architecture we have a function called cacheable_memcpy() which does same thing as memcpy() but using dcbz/dcbt instructions for an optimised copy (just like __copy_tofrom_user()) What seems strange is that it is almost nowhere used (only used in drivers/net/ethernet/ibm/emac/core.c) I replaced all memcpy() in include/linux/skbuff.h and net/core/skbuff.c by cacheable_memcpy() and I get around 8% improvement on FTP throughput on MPC885. What could be done to generalise the use of cacheable_memcpy() instead of memcpy() whenever possible ? Indeed, in order to use cacheable_memcpy(), we need * The destination to be cacheable * The source and destination to not overlap on the same cachelines Could we check, when calling memcpy(), whether the destination is cacheable or not, and if yes redirect the call to cacheable_memcpy() ? How can we check that ? Christophe ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH v2 01/15] PCI: Add pci_device_to_OF_node() stub for !CONFIG_OF
On Tue, Feb 03, 2015 at 09:37:24AM +0800, Kevin Hao wrote: > Add a stub for pci_device_to_OF_node() so drivers don't need to > use #ifdef CONFIG_OF around calls to it. > > Signed-off-by: Kevin Hao > Acked-by: Bjorn Helgaas Applied to pci/misc for v3.20, thanks! > --- > Hi Bjorn, > > Here is the revised version with the update of the subject and commit log > in case that you want to take it for 3.20. Otherwise we will merge this > version to powerpc tree during the 3.21 cycle. > > include/linux/pci.h | 2 ++ > 1 file changed, 2 insertions(+) > > diff --git a/include/linux/pci.h b/include/linux/pci.h > index 8323cbf93913..421eb6a9e600 100644 > --- a/include/linux/pci.h > +++ b/include/linux/pci.h > @@ -1863,6 +1863,8 @@ static inline void pci_set_of_node(struct pci_dev *dev) > { } > static inline void pci_release_of_node(struct pci_dev *dev) { } > static inline void pci_set_bus_of_node(struct pci_bus *bus) { } > static inline void pci_release_bus_of_node(struct pci_bus *bus) { } > +static inline struct device_node * > +pci_device_to_OF_node(const struct pci_dev *pdev) { return NULL; } > #endif /* CONFIG_OF */ > > #ifdef CONFIG_EEH > -- > 1.9.3 > ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 01/15] PCI: introduce pci_device_to_OF_node() for !CONFIG_OF
On Mon, Feb 2, 2015 at 6:25 PM, Michael Ellerman wrote: > On Mon, 2015-02-02 at 09:47 -0600, Bjorn Helgaas wrote: >> On Sun, Feb 1, 2015 at 8:28 PM, Michael Ellerman wrote: >> > On Sat, 2015-01-31 at 21:47 +0800, Kevin Hao wrote: >> >> So we can avoid the ugly #ifdef in some drivers. >> >> >> >> Signed-off-by: Kevin Hao >> >> --- >> >> include/linux/pci.h | 2 ++ >> >> 1 file changed, 2 insertions(+) >> > >> > >> > Hi Bjorn, >> > >> > Do you mind putting this into your next for 3.20? Or giving us an ACK for >> > it if >> > you prefer. >> >> I think it makes more sense to merge this along with the other 14 >> patches that remove the #ifdefs (at least, I assume that's what they >> do; I haven't seen them). > > Yeah that is what they do. > > The problem with doing it that way is the rest of the patches are splattered > all across the tree, so getting them merged as a series will require a lot of > searching for acks. > > If you merged this one for 3.20 we could then merge the rest of the series one > at a time as the respective maintainers get around to it during the 3.21 > cycle. > > I probably should have said all that in my original mail :) > > Anyway it's not a big deal, but that's why it'd be nice if you could take it. OK, sure, I didn't get the hint that it would be preferable for me to take it. I'll merge it for v3.20. Bjorn ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH] PowerPC-rheap: Delete an unnecessary check before the function call "kfree"
From: Markus Elfring Date: Tue, 3 Feb 2015 14:34:10 +0100 The kfree() function tests whether its argument is NULL and then returns immediately. Thus the test around the call is not needed. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring --- arch/powerpc/lib/rheap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/lib/rheap.c b/arch/powerpc/lib/rheap.c index a1060a8..69abf84 100644 --- a/arch/powerpc/lib/rheap.c +++ b/arch/powerpc/lib/rheap.c @@ -284,7 +284,7 @@ EXPORT_SYMBOL_GPL(rh_create); */ void rh_destroy(rh_info_t * info) { - if ((info->flags & RHIF_STATIC_BLOCK) == 0 && info->block != NULL) + if ((info->flags & RHIF_STATIC_BLOCK) == 0) kfree(info->block); if ((info->flags & RHIF_STATIC_INFO) == 0) -- 2.2.2 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH] PowerPC-PCI: Delete unnecessary checks before the function call "kfree"
From: Markus Elfring Date: Tue, 3 Feb 2015 13:55:53 +0100 The kfree() function tests whether its argument is NULL and then returns immediately. Thus the test around the call is not needed. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring --- arch/powerpc/platforms/cell/celleb_pci.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/platforms/cell/celleb_pci.c b/arch/powerpc/platforms/cell/celleb_pci.c index 3ce70de..9b11b5d 100644 --- a/arch/powerpc/platforms/cell/celleb_pci.c +++ b/arch/powerpc/platforms/cell/celleb_pci.c @@ -393,11 +393,10 @@ static int __init celleb_setup_fake_pci_device(struct device_node *node, error: if (mem_init_done) { - if (config && *config) + if (config) kfree(*config); - if (res && *res) + if (res) kfree(*res); - } else { if (config && *config) { size = 256; -- 2.2.2 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v2 1/2] powerpc32: put csum_tcpudp_magic inline
csum_tcpudp_magic() is only a few instructions, and does not modifies any other register than the returned result. So it is not worth having it as a separate function and suffer function branching and saving of volatile registers. This patch makes it inline by use of the already existing csum_tcpudp_nofold() function. Signed-off-by: Christophe Leroy --- v2: no change arch/powerpc/include/asm/checksum.h | 15 +++ arch/powerpc/lib/checksum_32.S | 16 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h index 8251a3b..cfe806a 100644 --- a/arch/powerpc/include/asm/checksum.h +++ b/arch/powerpc/include/asm/checksum.h @@ -19,6 +19,7 @@ #else extern __sum16 ip_fast_csum(const void *iph, unsigned int ihl); +#ifdef __powerpc64__ /* * computes the checksum of the TCP/UDP pseudo-header * returns a 16-bit checksum, already complemented @@ -27,6 +28,7 @@ extern __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, unsigned short len, unsigned short proto, __wsum sum); +#endif /* * computes the checksum of a memory block at buff, length len, @@ -127,6 +129,19 @@ static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, #endif } +#ifndef __powerpc64__ +/* + * computes the checksum of the TCP/UDP pseudo-header + * returns a 16-bit checksum, already complemented + */ +static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, + unsigned short len, + unsigned short proto, + __wsum sum) +{ + return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum)); +} +#endif #endif #endif /* __KERNEL__ */ #endif diff --git a/arch/powerpc/lib/checksum_32.S b/arch/powerpc/lib/checksum_32.S index 7874e8a..6d67e05 100644 --- a/arch/powerpc/lib/checksum_32.S +++ b/arch/powerpc/lib/checksum_32.S @@ -41,22 +41,6 @@ _GLOBAL(ip_fast_csum) blr /* - * Compute checksum of TCP or UDP pseudo-header: - * csum_tcpudp_magic(saddr, daddr, len, proto, sum) - */ -_GLOBAL(csum_tcpudp_magic) - rlwimi r5,r6,16,0,15 /* put proto in upper half of len */ - addcr0,r3,r4/* add 4 32-bit words together */ - adder0,r0,r5 - adder0,r0,r7 - addze r0,r0 /* add in final carry */ - rlwinm r3,r0,16,0,31 /* fold two halves together */ - add r3,r0,r3 - not r3,r3 - srwir3,r3,16 - blr - -/* * computes the checksum of a memory block at buff, length len, * and adds in "sum" (32-bit) * -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v2 2/2] powerpc32: add support for csum_add()
The C version of csum_add() as defined in include/net/checksum.h gives the following assembly: 0: 7c 04 1a 14 add r0,r4,r3 4: 7c 64 00 10 subfc r3,r4,r0 8: 7c 63 19 10 subfe r3,r3,r3 c: 7c 63 00 50 subfr3,r3,r0 include/net/checksum.h also offers the possibility to define an arch specific function. This patch provides a ppc32 specific csum_add() inline function. Signed-off-by: Christophe Leroy --- v2: changed constraints on the __asm__ arch/powerpc/include/asm/checksum.h | 12 1 file changed, 12 insertions(+) diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h index cfe806a..1e48cc7 100644 --- a/arch/powerpc/include/asm/checksum.h +++ b/arch/powerpc/include/asm/checksum.h @@ -141,6 +141,18 @@ static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, { return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum)); } + +#define HAVE_ARCH_CSUM_ADD +static inline __wsum csum_add(__wsum csum, __wsum addend) +{ +__asm__("\n\ + addc %0,%0,%1 \n\ + addze %0,%0 \n\ + " + : "+r" (csum) : "r" (addend)); + return csum; +} + #endif #endif #endif /* __KERNEL__ */ -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH] powerpc32: rearrange instructions order in ip_fast_csum()
On PPC_8xx, lwz has a 2 cycles latency, and branching also takes 2 cycles. As the size of the header is minimum 5 words, we can unroll the loop for the first words to reduce number of branching, and we can re-order the instructions to limit loading latency. Signed-off-by: Christophe Leroy --- arch/powerpc/lib/checksum_32.S | 10 +++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/lib/checksum_32.S b/arch/powerpc/lib/checksum_32.S index 6d67e05..5500704 100644 --- a/arch/powerpc/lib/checksum_32.S +++ b/arch/powerpc/lib/checksum_32.S @@ -26,13 +26,17 @@ _GLOBAL(ip_fast_csum) lwz r0,0(r3) lwzur5,4(r3) - addic. r4,r4,-2 + addic. r4,r4,-4 addcr0,r0,r5 mtctr r4 blelr- -1: lwzur4,4(r3) - adder0,r0,r4 + lwzur5,4(r3) + lwzur4,4(r3) + adder0,r0,r5 +1: adder0,r0,r4 + lwzur4,4(r3) bdnz1b + adder0,r0,r4 addze r0,r0 /* add in final carry */ rlwinm r3,r0,16,0,31 /* fold two halves together */ add r3,r0,r3 -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v2 0/2] powerpc32: Optimise some IP checksum functions.
This patchset provides a few optimisations related to IP checksum functions. Signed-off-by: Christophe Leroy Tested-by: Christophe Leroy --- arch/powerpc/include/asm/checksum.h | 28 arch/powerpc/lib/checksum_32.S | 16 2 files changed, 28 insertions(+), 16 deletions(-) ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 10/11] powerpc/8xx: Use SPRG2 instead of DAR for saving r3
We now have SPRG2 available as in it not used anymore for saving CR, so we don't need to crash DAR anymore for saving r3 for CPU6 ERRATA handling. Signed-off-by: Christophe Leroy --- v2: no change v3: no change arch/powerpc/kernel/head_8xx.S | 9 - 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index e803ad0..ead6448 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -309,7 +309,7 @@ SystemCall: InstructionTLBMiss: #ifdef CONFIG_8xx_CPU6 - mtspr SPRN_DAR, r3 + mtspr SPRN_SPRG_SCRATCH2, r3 #endif EXCEPTION_PROLOG_0 @@ -362,8 +362,7 @@ InstructionTLBMiss: /* Restore registers */ #ifdef CONFIG_8xx_CPU6 - mfspr r3, SPRN_DAR - mtspr SPRN_DAR, r11 /* Tag DAR */ + mfspr r3, SPRN_SPRG_SCRATCH2 #endif EXCEPTION_EPILOG_0 rfi @@ -371,7 +370,7 @@ InstructionTLBMiss: . = 0x1200 DataStoreTLBMiss: #ifdef CONFIG_8xx_CPU6 - mtspr SPRN_DAR, r3 + mtspr SPRN_SPRG_SCRATCH2, r3 #endif EXCEPTION_PROLOG_0 mfcrr10 @@ -441,7 +440,7 @@ DataStoreTLBMiss: /* Restore registers */ #ifdef CONFIG_8xx_CPU6 - mfspr r3, SPRN_DAR + mfspr r3, SPRN_SPRG_SCRATCH2 #endif mtspr SPRN_DAR, r11 /* Tag DAR */ EXCEPTION_EPILOG_0 -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 09/11] powerpc/8xx: dont save CR in SCRATCH registers
CR only needs to be preserved when checking if we are handling a kernel address. So we can preserve CR in a register: - In ITLBMiss, check is done only when CONFIG_MODULES is defined. Otherwise we don't need to do anything at all with CR. - We use r10, then we reload SRR0/MD_EPN into r10 when CR is restored Signed-off-by: Christophe Leroy --- v2: removed the CPU6 specific handling of CR which was saving (only) 1 cycle but was making the code more difficult to maintain due to too many different cases v3: no change (but impacted by patch 07) arch/powerpc/kernel/head_8xx.S | 29 +++-- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 429428c..90c18ad 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -312,10 +312,6 @@ InstructionTLBMiss: mtspr SPRN_DAR, r3 #endif EXCEPTION_PROLOG_0 - mfcrr10 - mtspr SPRN_SPRG_SCRATCH2, r10 - mfspr r10, SPRN_SRR0 /* Get effective address of fault */ - INVALIDATE_ADJACENT_PAGES_CPU15(r11, r10) /* If we are faulting a kernel address, we have to use the * kernel page tables. @@ -323,13 +319,20 @@ InstructionTLBMiss: #ifdef CONFIG_MODULES /* Only modules will cause ITLB Misses as we always * pin the first 8MB of kernel memory */ - andis. r11, r10, 0x8000/* Address >= 0x8000 */ -#endif + mfspr r11, SPRN_SRR0 /* Get effective address of fault */ + INVALIDATE_ADJACENT_PAGES_CPU15(r10, r11) + mfcrr10 + andis. r11, r11, 0x8000/* Address >= 0x8000 */ mfspr r11, SPRN_M_TW /* Get level 1 table */ -#ifdef CONFIG_MODULES beq 3f lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha 3: + mtcrr10 + mfspr r10, SPRN_SRR0 /* Get effective address of fault */ +#else + mfspr r10, SPRN_SRR0 /* Get effective address of fault */ + INVALIDATE_ADJACENT_PAGES_CPU15(r11, r10) + mfspr r11, SPRN_M_TW /* Get level 1 table base address */ #endif /* Insert level 1 index */ rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 @@ -362,8 +365,6 @@ InstructionTLBMiss: mfspr r3, SPRN_DAR mtspr SPRN_DAR, r11 /* Tag DAR */ #endif - mfspr r10, SPRN_SPRG_SCRATCH2 - mtcrr10 EXCEPTION_EPILOG_0 rfi @@ -374,17 +375,19 @@ DataStoreTLBMiss: #endif EXCEPTION_PROLOG_0 mfcrr10 - mtspr SPRN_SPRG_SCRATCH2, r10 - mfspr r10, SPRN_MD_EPN /* If we are faulting a kernel address, we have to use the * kernel page tables. */ - andis. r11, r10, 0x8000 + mfspr r11, SPRN_MD_EPN + andis. r11, r11, 0x8000 mfspr r11, SPRN_M_TW /* Get level 1 table */ beq 3f lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha 3: + mtcrr10 + mfspr r10, SPRN_MD_EPN + /* Insert level 1 index */ rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r11)/* Get the level 1 entry */ @@ -441,8 +444,6 @@ DataStoreTLBMiss: mfspr r3, SPRN_DAR #endif mtspr SPRN_DAR, r11 /* Tag DAR */ - mfspr r10, SPRN_SPRG_SCRATCH2 - mtcrr10 EXCEPTION_EPILOG_0 rfi -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 11/11] powerpc/8xx: Add support for TASK_SIZE greater than 0x80000000
By default, TASK_SIZE is set to 0x8000 for PPC_8xx, which is most likely sufficient for most cases. However, kernel configuration allows to set TASK_SIZE to another value, so the 8xx shall handle it. Signed-off-by: Christophe Leroy --- v2: no change v3: no change (but impacted by patch 07) arch/powerpc/kernel/head_8xx.S | 25 +++-- 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 663315c..7388e20 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -48,6 +48,19 @@ mtspr spr, reg #endif +/* Macro to test if an address is a kernel address */ +#if CONFIG_TASK_SIZE <= 0x8000 +#define IS_KERNEL(tmp, addr) \ + andis. tmp, addr, 0x8000 /* Address >= 0x8000 */ +#define BRANCH_UNLESS_KERNEL(label)beq label +#else +#define IS_KERNEL(tmp, addr) \ + rlwinm tmp, addr, 16, 16, 31; \ + cmpli cr0, tmp, PAGE_OFFSET >> 16 +#define BRANCH_UNLESS_KERNEL(label)blt label +#endif + + /* * Value for the bits that have fixed value in RPN entries. * Also used for tagging DAR for DTLBerror. @@ -322,9 +335,9 @@ InstructionTLBMiss: mfspr r11, SPRN_SRR0 /* Get effective address of fault */ INVALIDATE_ADJACENT_PAGES_CPU15(r10, r11) mfcrr10 - andis. r11, r11, 0x8000/* Address >= 0x8000 */ + IS_KERNEL(r11, r11) mfspr r11, SPRN_M_TW /* Get level 1 table */ - beq 3f + BRANCH_UNLESS_KERNEL(3f) lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha 3: mtcrr10 @@ -379,9 +392,9 @@ DataStoreTLBMiss: * kernel page tables. */ mfspr r11, SPRN_MD_EPN - andis. r11, r11, 0x8000 + IS_KERNEL(r11, r11) mfspr r11, SPRN_M_TW /* Get level 1 table */ - beq 3f + BRANCH_UNLESS_KERNEL(3f) lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha 3: mtcrr10 @@ -516,9 +529,9 @@ FixupDAR:/* Entry point for dcbx workaround. */ mtspr SPRN_SPRG_SCRATCH2, r10 /* fetch instruction from memory. */ mfspr r10, SPRN_SRR0 - andis. r11, r10, 0x8000/* Address >= 0x8000 */ + IS_KERNEL(r11, r10) mfspr r11, SPRN_M_TW /* Get level 1 table */ - beq 3f + BRANCH_UNLESS_KERNEL(3f) lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha /* Insert level 1 index */ 3: rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 05/11] powerpc/8xx: Optimise access to swapper_pg_dir
All accessed to PGD entries are done via 0(r11). By using lower part of swapper_pg_dir as load index to r11, we can remove the ori instruction. Signed-off-by: Christophe Leroy --- v2: fixed/added comments to explain what is the real content of M_TW v3: no change arch/powerpc/kernel/head_8xx.S | 35 +++ 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index ae05f28..a485ad7 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -319,16 +319,15 @@ InstructionTLBMiss: * pin the first 8MB of kernel memory */ andis. r11, r10, 0x8000/* Address >= 0x8000 */ #endif - mfspr r11, SPRN_M_TW /* Get level 1 table base address */ + mfspr r11, SPRN_M_TW /* Get level 1 table */ #ifdef CONFIG_MODULES beq 3f - lis r11, (swapper_pg_dir-PAGE_OFFSET)@h - ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l + lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha 3: #endif /* Insert level 1 index */ rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 - lwz r11, 0(r11) /* Get the level 1 entry */ + lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r11)/* Get the level 1 entry */ /* Load the MI_TWC with the attributes for this "segment." */ MTSPR_CPU6(SPRN_MI_TWC, r11, r3)/* Set segment attributes */ @@ -374,14 +373,13 @@ DataStoreTLBMiss: * kernel page tables. */ andis. r11, r10, 0x8000 - mfspr r11, SPRN_M_TW /* Get level 1 table base address */ + mfspr r11, SPRN_M_TW /* Get level 1 table */ beq 3f - lis r11, (swapper_pg_dir-PAGE_OFFSET)@h - ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l + lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha 3: /* Insert level 1 index */ rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 - lwz r11, 0(r11) /* Get the level 1 entry */ + lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r11)/* Get the level 1 entry */ /* We have a pte table, so load fetch the pte from the table. */ @@ -509,13 +507,12 @@ FixupDAR:/* Entry point for dcbx workaround. */ /* fetch instruction from memory. */ mfspr r10, SPRN_SRR0 andis. r11, r10, 0x8000/* Address >= 0x8000 */ - mfspr r11, SPRN_M_TW /* Get level 1 table base address */ - beq-3f /* Branch if user space */ - lis r11, (swapper_pg_dir-PAGE_OFFSET)@h - ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l + mfspr r11, SPRN_M_TW /* Get level 1 table */ + beq 3f + lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha /* Insert level 1 index */ 3: rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 - lwz r11, 0(r11) /* Get the level 1 entry */ + lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r11)/* Get the level 1 entry */ rlwinm r11, r11,0,0,19 /* Extract page descriptor page address */ /* Insert level 2 index */ rlwimi r11, r10, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29 @@ -670,8 +667,7 @@ start_here: * init's THREAD like the context switch code does, but this is * easier..until someone changes init's static structures. */ - lis r6, swapper_pg_dir@h - ori r6, r6, swapper_pg_dir@l + lis r6, swapper_pg_dir@ha tophys(r6,r6) #ifdef CONFIG_8xx_CPU6 lis r4, cpu6_errata_word@h @@ -850,6 +846,13 @@ _GLOBAL(set_context) stw r4, 0x4(r5) #endif + /* Register M_TW will contain base address of level 1 table minus the +* lower part of the kernel PGDIR base address, so that all accesses to +* level 1 table are done relative to lower part of kernel PGDIR base +* address. +*/ + li r5, (swapper_pg_dir-PAGE_OFFSET)@l + sub r4, r4, r5 #ifdef CONFIG_8xx_CPU6 lis r6, cpu6_errata_word@h ori r6, r6, cpu6_errata_word@l @@ -857,7 +860,7 @@ _GLOBAL(set_context) li r7, 0x3f80 stw r7, 12(r6) lwz r7, 12(r6) -mtspr SPRN_M_TW, r4 /* Update MMU base address */ + mtspr SPRN_M_TW, r4 /* Update pointeur to level 1 table */ li r7, 0x3380 stw r7, 12(r6) lwz r7, 12(r6) -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 07/11] powerpc/8xx: macro for handling CPU15 errata
Having a macro will help keep clear code. Signed-off-by: Christophe Leroy --- v2: no change v3: Fixed the macro (missing -) and changed macro name to be more explicit arch/powerpc/kernel/head_8xx.S | 18 -- 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 987a589..59039a6 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -297,6 +297,17 @@ SystemCall: * We have to use the MD_xxx registers for the tablewalk because the * equivalent MI_xxx registers only perform the attribute functions. */ + +#ifdef CONFIG_8xx_CPU15 +#define INVALIDATE_ADJACENT_PAGES_CPU15(tmp, addr) \ + additmp, addr, PAGE_SIZE; \ + tlbie tmp;\ + additmp, addr, -PAGE_SIZE; \ + tlbie tmp +#else +#define INVALIDATE_ADJACENT_PAGES_CPU15(tmp, addr) +#endif + InstructionTLBMiss: #ifdef CONFIG_8xx_CPU6 mtspr SPRN_DAR, r3 @@ -304,12 +315,7 @@ InstructionTLBMiss: EXCEPTION_PROLOG_0 mtspr SPRN_SPRG_SCRATCH2, r10 mfspr r10, SPRN_SRR0 /* Get effective address of fault */ -#ifdef CONFIG_8xx_CPU15 - addir11, r10, PAGE_SIZE - tlbie r11 - addir11, r10, -PAGE_SIZE - tlbie r11 -#endif + INVALIDATE_ADJACENT_PAGES_CPU15(r11, r10) /* If we are faulting a kernel address, we have to use the * kernel page tables. -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 06/11] powerpc/8xx: Remove duplicated code in set_context()
Signed-off-by: Christophe Leroy --- v2: no change v3: no change arch/powerpc/kernel/head_8xx.S | 10 -- 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index a485ad7..a1571b3 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -853,23 +853,21 @@ _GLOBAL(set_context) */ li r5, (swapper_pg_dir-PAGE_OFFSET)@l sub r4, r4, r5 + tophys (r4, r4) #ifdef CONFIG_8xx_CPU6 lis r6, cpu6_errata_word@h ori r6, r6, cpu6_errata_word@l - tophys (r4, r4) li r7, 0x3f80 stw r7, 12(r6) lwz r7, 12(r6) +#endif mtspr SPRN_M_TW, r4 /* Update pointeur to level 1 table */ +#ifdef CONFIG_8xx_CPU6 li r7, 0x3380 stw r7, 12(r6) lwz r7, 12(r6) -mtspr SPRN_M_CASID, r3 /* Update context */ -#else -mtspr SPRN_M_CASID,r3/* Update context */ - tophys (r4, r4) - mtspr SPRN_M_TW, r4 /* and pgd */ #endif + mtspr SPRN_M_CASID, r3/* Update context */ SYNC blr -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 08/11] powerpc/8xx: Handle CR out of exception PROLOG/EPILOG
In order to be able to reduce scope during which CR is saved, we take CR saving/restoring out of exception PROLOG and EPILOG Signed-off-by: Christophe Leroy --- v2: no change v3: no change (but impacted by patch 07) arch/powerpc/kernel/head_8xx.S | 10 +++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 59039a6..429428c 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -116,13 +116,13 @@ turn_on_mmu: */ #define EXCEPTION_PROLOG \ EXCEPTION_PROLOG_0; \ + mfcrr10;\ EXCEPTION_PROLOG_1; \ EXCEPTION_PROLOG_2 #define EXCEPTION_PROLOG_0 \ mtspr SPRN_SPRG_SCRATCH0,r10; \ - mtspr SPRN_SPRG_SCRATCH1,r11; \ - mfcrr10 + mtspr SPRN_SPRG_SCRATCH1,r11 #define EXCEPTION_PROLOG_1 \ mfspr r11,SPRN_SRR1; /* check whether user or kernel */ \ @@ -162,7 +162,6 @@ turn_on_mmu: * Exception exit code. */ #define EXCEPTION_EPILOG_0 \ - mtcrr10;\ mfspr r10,SPRN_SPRG_SCRATCH0; \ mfspr r11,SPRN_SPRG_SCRATCH1 @@ -313,6 +312,7 @@ InstructionTLBMiss: mtspr SPRN_DAR, r3 #endif EXCEPTION_PROLOG_0 + mfcrr10 mtspr SPRN_SPRG_SCRATCH2, r10 mfspr r10, SPRN_SRR0 /* Get effective address of fault */ INVALIDATE_ADJACENT_PAGES_CPU15(r11, r10) @@ -363,6 +363,7 @@ InstructionTLBMiss: mtspr SPRN_DAR, r11 /* Tag DAR */ #endif mfspr r10, SPRN_SPRG_SCRATCH2 + mtcrr10 EXCEPTION_EPILOG_0 rfi @@ -372,6 +373,7 @@ DataStoreTLBMiss: mtspr SPRN_DAR, r3 #endif EXCEPTION_PROLOG_0 + mfcrr10 mtspr SPRN_SPRG_SCRATCH2, r10 mfspr r10, SPRN_MD_EPN @@ -440,6 +442,7 @@ DataStoreTLBMiss: #endif mtspr SPRN_DAR, r11 /* Tag DAR */ mfspr r10, SPRN_SPRG_SCRATCH2 + mtcrr10 EXCEPTION_EPILOG_0 rfi @@ -465,6 +468,7 @@ InstructionTLBError: . = 0x1400 DataTLBError: EXCEPTION_PROLOG_0 + mfcrr10 mfspr r11, SPRN_DAR cmpwi cr0, r11, RPN_PATTERN -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 01/11] powerpc/8xx: remove remaining unnecessary code in FixupDAR
Since commit 33fb845a6f01 ("powerpc/8xx: Don't use MD_TWC for walk"), MD_EPN and MD_TWC are not writen anymore in FixupDAR so saving r3 has become useless. Signed-off-by: Christophe Leroy --- v2: no change v3: no change arch/powerpc/kernel/head_8xx.S | 6 -- 1 file changed, 6 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 3d4b8ee..79b8a23 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -532,9 +532,6 @@ DARFixed:/* Return from dcbx instruction bug workaround */ /* define if you don't want to use self modifying code */ #define NO_SELF_MODIFYING_CODE FixupDAR:/* Entry point for dcbx workaround. */ -#ifdef CONFIG_8xx_CPU6 - mtspr SPRN_DAR, r3 -#endif mtspr SPRN_SPRG_SCRATCH2, r10 /* fetch instruction from memory. */ mfspr r10, SPRN_SRR0 @@ -551,9 +548,6 @@ FixupDAR:/* Entry point for dcbx workaround. */ /* Extract level 2 index */ rlwinm r11, r11, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29 lwzxr11, r10, r11 /* Get the pte */ -#ifdef CONFIG_8xx_CPU6 - mfspr r3, SPRN_DAR -#endif /* concat physical page address(r11) and page offset(r10) */ mfspr r10, SPRN_SRR0 rlwimi r11, r10, 0, 32 - PAGE_SHIFT, 31 -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 03/11] powerpc32: Use kmem_cache memory for PGDIR
When pages are not 4K, PGDIR table is allocated with kmalloc(). In order to optimise TLB handlers, aligned memory is needed. kmalloc() doesn't provide aligned memory blocks, so lets use a kmem_cache pool instead. Signed-off-by: Christophe Leroy --- v2: changed to apply cleanly to linux-next (due to patch ab090dc) v3: no change arch/powerpc/include/asm/pgtable-ppc32.h | 4 arch/powerpc/mm/pgtable_32.c | 16 ++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/pgtable-ppc32.h b/arch/powerpc/include/asm/pgtable-ppc32.h index c718bfd..affc703 100644 --- a/arch/powerpc/include/asm/pgtable-ppc32.h +++ b/arch/powerpc/include/asm/pgtable-ppc32.h @@ -341,10 +341,14 @@ static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry) #define __pte_to_swp_entry(pte)((swp_entry_t) { pte_val(pte) >> 3 }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val << 3 }) +#ifndef CONFIG_PPC_4K_PAGES +void pgtable_cache_init(void); +#else /* * No page table caches to initialise */ #define pgtable_cache_init() do { } while (0) +#endif extern int get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep, pmd_t **pmdp); diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 50fad38..c06599f 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -73,13 +73,25 @@ extern unsigned long p_mapped_by_tlbcam(phys_addr_t pa); #define PGDIR_ORDER(32 + PGD_T_LOG2 - PGDIR_SHIFT) +#ifndef CONFIG_PPC_4K_PAGES +static struct kmem_cache *pgtable_cache; + +void pgtable_cache_init(void) +{ + pgtable_cache = kmem_cache_create("PGDIR cache", 1 << PGDIR_ORDER, + 1 << PGDIR_ORDER, 0, NULL); + if (pgtable_cache == NULL) + panic("Couldn't allocate pgtable caches"); +} +#endif + pgd_t *pgd_alloc(struct mm_struct *mm) { pgd_t *ret; /* pgdir take page or two with 4K pages and a page fraction otherwise */ #ifndef CONFIG_PPC_4K_PAGES - ret = kzalloc(1 << PGDIR_ORDER, GFP_KERNEL); + ret = kmem_cache_alloc(pgtable_cache, GFP_KERNEL | __GFP_ZERO); #else ret = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, PGDIR_ORDER - PAGE_SHIFT); @@ -90,7 +102,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm) void pgd_free(struct mm_struct *mm, pgd_t *pgd) { #ifndef CONFIG_PPC_4K_PAGES - kfree((void *)pgd); + kmem_cache_free(pgtable_cache, (void *)pgd); #else free_pages((unsigned long)pgd, PGDIR_ORDER - PAGE_SHIFT); #endif -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 02/11] powerpc/8xx: remove tests on PGDIR entry validity
Kernel MMU handling code handles validity of entries via _PMD_PRESENT which corresponds to V bit in MD_TWC and MI_TWC. When the V bit is not set, MPC8xx triggers TLBError exception. So we don't have to check that and branch ourself to TLBError. We can set TLB entries with non present entries, remove all those tests and let the 8xx handle it. This reduce the number of cycle when the entries are valid which is the case most of the time, and doesn't significantly increase the time for handling invalid entries. Signed-off-by: Christophe Leroy --- v2: no change v3: no change arch/powerpc/kernel/head_8xx.S | 41 - 1 file changed, 8 insertions(+), 33 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 79b8a23..2c329f1 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -329,12 +329,9 @@ InstructionTLBMiss: /* Extract level 1 index */ rlwinm r10, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 lwzxr11, r10, r11 /* Get the level 1 entry */ - rlwinm. r10, r11,0,0,19 /* Extract page descriptor page address */ - beq 2f /* If zero, don't try to find a pte */ + rlwinm r10, r11,0,0,19 /* Extract page descriptor page address */ - /* We have a pte table, so load the MI_TWC with the attributes -* for this "segment." -*/ + /* Load the MI_TWC with the attributes for this "segment." */ MTSPR_CPU6(SPRN_MI_TWC, r11, r3)/* Set segment attributes */ mfspr r11, SPRN_SRR0 /* Get effective address of fault */ /* Extract level 2 index */ @@ -342,13 +339,11 @@ InstructionTLBMiss: lwzxr10, r10, r11 /* Get the pte */ #ifdef CONFIG_SWAP - andi. r11, r10, _PAGE_ACCESSED | _PAGE_PRESENT - cmpwi cr0, r11, _PAGE_ACCESSED | _PAGE_PRESENT - li r11, RPN_PATTERN - bne-cr0, 2f -#else - li r11, RPN_PATTERN + rlwinm r11, r10, 32-5, _PAGE_PRESENT + and r11, r11, r10 + rlwimi r10, r11, 0, _PAGE_PRESENT #endif + li r11, RPN_PATTERN /* The Linux PTE won't go exactly into the MMU TLB. * Software indicator bits 21 and 28 must be clear. * Software indicator bits 24, 25, 26, and 27 must be @@ -366,21 +361,6 @@ InstructionTLBMiss: mfspr r10, SPRN_SPRG_SCRATCH2 EXCEPTION_EPILOG_0 rfi -2: - mfspr r10, SPRN_SRR1 - /* clear all error bits as TLB Miss -* sets a few unconditionally - */ - rlwinm r10, r10, 0, 0x - mtspr SPRN_SRR1, r10 - - /* Restore registers */ -#ifdef CONFIG_8xx_CPU6 - mfspr r3, SPRN_DAR - mtspr SPRN_DAR, r11 /* Tag DAR */ -#endif - mfspr r10, SPRN_SPRG_SCRATCH2 - b InstructionTLBError1 . = 0x1200 DataStoreTLBMiss: @@ -403,8 +383,6 @@ DataStoreTLBMiss: /* Extract level 1 index */ rlwinm r10, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 lwzxr11, r10, r11 /* Get the level 1 entry */ - rlwinm. r10, r11,0,0,19 /* Extract page descriptor page address */ - beq 2f /* If zero, don't try to find a pte */ /* We have a pte table, so load fetch the pte from the table. */ @@ -450,7 +428,7 @@ DataStoreTLBMiss: * set. All other Linux PTE bits control the behavior * of the MMU. */ -2: li r11, RPN_PATTERN + li r11, RPN_PATTERN rlwimi r10, r11, 0, 24, 28 /* Set 24-27, clear 28 */ MTSPR_CPU6(SPRN_MD_RPN, r10, r3)/* Update TLB entry */ @@ -469,10 +447,7 @@ DataStoreTLBMiss: */ . = 0x1300 InstructionTLBError: - EXCEPTION_PROLOG_0 -InstructionTLBError1: - EXCEPTION_PROLOG_1 - EXCEPTION_PROLOG_2 + EXCEPTION_PROLOG mr r4,r12 mr r5,r9 andis. r10,r5,0x4000 -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 00/11] powerpc8xx: Further optimisation of TLB handling
This patchset provides a further optimisation of TLB handling in the 8xx. Main changes are based on: - Using processor handling of PGD/PTE Validity bits instead of testing ourselves the entries validity - Aligning PGD address to allow direct bit manipulation - Not saving registers like CR when not needed It also adds support to any TASK_SIZE Patchset: 01 - powerpc/8xx: remove remaining unnecessary code in FixupDAR 02 - powerpc/8xx: remove tests on PGDIR entry validity 03 - powerpc32: Use kmem_cache memory for PGDIR 04 - powerpc/8xx: Take benefit of aligned PGDIR 05 - powerpc/8xx: Optimise access to swapper_pg_dir 06 - powerpc/8xx: Remove duplicated code in set_context() 07 - powerpc/8xx: macro for handling CPU15 errata 08 - powerpc/8xx: Handle CR out of exception PROLOG/EPILOG 09 - powerpc/8xx: dont save CR in SCRATCH registers 10 - powerpc/8xx: Use SPRG2 instead of DAR for saving r3 11 - powerpc/8xx: Add support for TASK_SIZE greater than 0x8000 All changes have been successfully tested on MPC885 Signed-off-by: Christophe Leroy Tested-by: Christophe Leroy --- v3: 01-06 no change ; 07-11 changed arch/powerpc/include/asm/pgtable-ppc32.h | 4 + arch/powerpc/kernel/head_8xx.S | 197 +++ arch/powerpc/mm/pgtable_32.c | 16 ++- 3 files changed, 111 insertions(+), 106 deletions(-) ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 04/11] powerpc/8xx: Take benefit of aligned PGDIR
L1 base address is now aligned so we can insert L1 index into r11 directly and then preserve r10 Signed-off-by: Christophe Leroy --- v2: no change v3: no change arch/powerpc/kernel/head_8xx.S | 34 +++--- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 2c329f1..ae05f28 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -326,16 +326,15 @@ InstructionTLBMiss: ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l 3: #endif - /* Extract level 1 index */ - rlwinm r10, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 - lwzxr11, r10, r11 /* Get the level 1 entry */ - rlwinm r10, r11,0,0,19 /* Extract page descriptor page address */ + /* Insert level 1 index */ + rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 + lwz r11, 0(r11) /* Get the level 1 entry */ /* Load the MI_TWC with the attributes for this "segment." */ MTSPR_CPU6(SPRN_MI_TWC, r11, r3)/* Set segment attributes */ - mfspr r11, SPRN_SRR0 /* Get effective address of fault */ + rlwinm r11, r11,0,0,19 /* Extract page descriptor page address */ /* Extract level 2 index */ - rlwinm r11, r11, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29 + rlwinm r10, r10, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29 lwzxr10, r10, r11 /* Get the pte */ #ifdef CONFIG_SWAP @@ -380,13 +379,12 @@ DataStoreTLBMiss: lis r11, (swapper_pg_dir-PAGE_OFFSET)@h ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l 3: - /* Extract level 1 index */ - rlwinm r10, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 - lwzxr11, r10, r11 /* Get the level 1 entry */ + /* Insert level 1 index */ + rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 + lwz r11, 0(r11) /* Get the level 1 entry */ /* We have a pte table, so load fetch the pte from the table. */ - mfspr r10, SPRN_MD_EPN/* Get address of fault */ /* Extract level 2 index */ rlwinm r10, r10, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29 rlwimi r10, r11, 0, 0, 32 - PAGE_SHIFT - 1 /* Add level 2 base */ @@ -515,16 +513,14 @@ FixupDAR:/* Entry point for dcbx workaround. */ beq-3f /* Branch if user space */ lis r11, (swapper_pg_dir-PAGE_OFFSET)@h ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l - /* Extract level 1 index */ -3: rlwinm r10, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 - lwzxr11, r10, r11 /* Get the level 1 entry */ - rlwinm r10, r11,0,0,19 /* Extract page descriptor page address */ - mfspr r11, SPRN_SRR0 /* Get effective address of fault */ - /* Extract level 2 index */ - rlwinm r11, r11, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29 - lwzxr11, r10, r11 /* Get the pte */ + /* Insert level 1 index */ +3: rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 + lwz r11, 0(r11) /* Get the level 1 entry */ + rlwinm r11, r11,0,0,19 /* Extract page descriptor page address */ + /* Insert level 2 index */ + rlwimi r11, r10, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29 + lwz r11, 0(r11) /* Get the pte */ /* concat physical page address(r11) and page offset(r10) */ - mfspr r10, SPRN_SRR0 rlwimi r11, r10, 0, 32 - PAGE_SHIFT, 31 lwz r11,0(r11) /* Check if it really is a dcbx instruction. */ -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev