Re: [PATCH] mm/memtest: Add ARCH_USE_MEMTEST
Hi Anshuman, On 2/5/21 4:10 AM, Anshuman Khandual wrote: > early_memtest() does not get called from all architectures. Hence enabling > CONFIG_MEMTEST and providing a valid memtest=[1..N] kernel command line > option might not trigger the memory pattern tests as would be expected in > normal circumstances. This situation is misleading. Documentation already mentions which architectures support that: memtest=[KNL,X86,ARM,PPC] Enable memtest yet I admit that not all reflected there > > The change here prevents the above mentioned problem after introducing a > new config option ARCH_USE_MEMTEST that should be subscribed on platforms > that call early_memtest(), in order to enable the config CONFIG_MEMTEST. > Conversely CONFIG_MEMTEST cannot be enabled on platforms where it would > not be tested anyway. > Is that generic pattern? What about other cross arch parameters? Do they already use similar subscription or they rely on documentation? I'm not against the patch just want to check if things are consistent... Cheers Vladimir
Re: [PATCH 29/33] dma-direct: retry allocations using GFP_DMA for small masks
On 10/01/18 08:00, Christoph Hellwig wrote: > If an attempt to allocate memory succeeded, but isn't inside the > supported DMA mask, retry the allocation with GFP_DMA set as a > last resort. > > Based on the x86 code, but an off by one error in what is now > dma_coherent_ok has been fixed vs the x86 code. > > Signed-off-by: Christoph Hellwig <h...@lst.de> > --- > lib/dma-direct.c | 25 - > 1 file changed, 24 insertions(+), 1 deletion(-) > > diff --git a/lib/dma-direct.c b/lib/dma-direct.c > index 8f76032ebc3c..4e43c2bb7f5f 100644 > --- a/lib/dma-direct.c > +++ b/lib/dma-direct.c > @@ -35,6 +35,11 @@ check_addr(struct device *dev, dma_addr_t dma_addr, size_t > size, > return true; > } > > +static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t > size) > +{ > + return phys_to_dma(dev, phys) + size - 1 <= dev->coherent_dma_mask; > +} > + > static void *dma_direct_alloc(struct device *dev, size_t size, > dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) > { > @@ -48,11 +53,29 @@ static void *dma_direct_alloc(struct device *dev, size_t > size, > if (dev->coherent_dma_mask <= DMA_BIT_MASK(32) && !(gfp & GFP_DMA)) > gfp |= GFP_DMA32; > > +again: > /* CMA can be used only in the context which permits sleeping */ > - if (gfpflags_allow_blocking(gfp)) > + if (gfpflags_allow_blocking(gfp)) { > page = dma_alloc_from_contiguous(dev, count, page_order, gfp); > + if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) { > + dma_release_from_contiguous(dev, page, count); > + page = NULL; > + } > + } > if (!page) > page = alloc_pages_node(dev_to_node(dev), gfp, page_order); > + > + if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) { > + __free_pages(page, page_order); > + page = NULL; > + > + if (dev->coherent_dma_mask < DMA_BIT_MASK(32) && > + !(gfp & GFP_DMA)) { > + gfp = (gfp & ~GFP_DMA32) | GFP_DMA; > + goto again; > + } > + } > + > if (!page) > return NULL; > > Reviewed-by: Vladimir Murzin <vladimir.mur...@arm.com> Cheers Vladimir
Re: [PATCH 11/33] dma-mapping: move swiotlb arch helpers to a new header
On 10/01/18 08:00, Christoph Hellwig wrote: > index 9110988b92a1..f00833acb626 100644 > --- a/arch/mips/include/asm/mach-cavium-octeon/dma-coherence.h > +++ b/arch/mips/include/asm/mach-cavium-octeon/dma-coherence.h > @@ -61,6 +61,14 @@ static inline void plat_post_dma_flush(struct device *dev) > { > } > > +static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t > size) > +{ > + if (!dev->dma_mask) > + return false; > + > + return addr + size <= *dev->dma_mask; > +} > + I know it is copy, but it seems it has off by one error and it should be return addr + size - 1 <= *dev->dma_mask; > dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr); > phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr); > snip... > diff --git a/arch/mips/include/asm/mach-loongson64/dma-coherence.h > b/arch/mips/include/asm/mach-loongson64/dma-coherence.h > index 1602a9e9e8c2..5cfda8f893e9 100644 > --- a/arch/mips/include/asm/mach-loongson64/dma-coherence.h > +++ b/arch/mips/include/asm/mach-loongson64/dma-coherence.h > @@ -17,6 +17,14 @@ > > struct device; > > +static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t > size) > +{ > + if (!dev->dma_mask) > + return false; > + > + return addr + size <= *dev->dma_mask; ditto Cheers Vladimir
Re: [PATCH 31/67] dma-direct: make dma_direct_{alloc, free} available to other implementations
On 29/12/17 08:18, Christoph Hellwig wrote: > So that they don't need to indirect through the operation vector. > > Signed-off-by: Christoph Hellwig <h...@lst.de> > --- > arch/arm/mm/dma-mapping-nommu.c | 9 +++-- > include/linux/dma-direct.h | 5 + > lib/dma-direct.c| 6 +++--- > 3 files changed, 11 insertions(+), 9 deletions(-) > > diff --git a/arch/arm/mm/dma-mapping-nommu.c b/arch/arm/mm/dma-mapping-nommu.c > index 49e9831dc0f1..b4cf3e4e9d4a 100644 > --- a/arch/arm/mm/dma-mapping-nommu.c > +++ b/arch/arm/mm/dma-mapping-nommu.c > @@ -11,7 +11,7 @@ > > #include > #include > -#include > +#include > #include > > #include > @@ -39,7 +39,6 @@ static void *arm_nommu_dma_alloc(struct device *dev, size_t > size, >unsigned long attrs) > > { > - const struct dma_map_ops *ops = _direct_ops; > void *ret; > > /* > @@ -48,7 +47,7 @@ static void *arm_nommu_dma_alloc(struct device *dev, size_t > size, >*/ > > if (attrs & DMA_ATTR_NON_CONSISTENT) > - return ops->alloc(dev, size, dma_handle, gfp, attrs); > + return dma_direct_alloc(dev, size, dma_handle, gfp, attrs); > > ret = dma_alloc_from_global_coherent(size, dma_handle); > > @@ -70,10 +69,8 @@ static void arm_nommu_dma_free(struct device *dev, size_t > size, > void *cpu_addr, dma_addr_t dma_addr, > unsigned long attrs) > { > - const struct dma_map_ops *ops = _direct_ops; > - > if (attrs & DMA_ATTR_NON_CONSISTENT) { > - ops->free(dev, size, cpu_addr, dma_addr, attrs); > + dma_direct_free(dev, size, cpu_addr, dma_addr, attrs); > } else { > int ret = dma_release_from_global_coherent(get_order(size), > cpu_addr); > diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h > index 10e924b7cba7..4788bf0bf683 100644 > --- a/include/linux/dma-direct.h > +++ b/include/linux/dma-direct.h > @@ -38,4 +38,9 @@ static inline void dma_mark_clean(void *addr, size_t size) > } > #endif /* CONFIG_ARCH_HAS_DMA_MARK_CLEAN */ > > +void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t > *dma_handle, > + gfp_t gfp, unsigned long attrs); > +void dma_direct_free(struct device *dev, size_t size, void *cpu_addr, > + dma_addr_t dma_addr, unsigned long attrs); > + > #endif /* _LINUX_DMA_DIRECT_H */ > diff --git a/lib/dma-direct.c b/lib/dma-direct.c > index f8467cb3d89a..7e913728e099 100644 > --- a/lib/dma-direct.c > +++ b/lib/dma-direct.c > @@ -33,8 +33,8 @@ static bool dma_coherent_ok(struct device *dev, phys_addr_t > phys, size_t size) > return phys_to_dma(dev, phys) + size <= dev->coherent_dma_mask; > } > > -static void *dma_direct_alloc(struct device *dev, size_t size, > - dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) > +void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t > *dma_handle, > + gfp_t gfp, unsigned long attrs) > { > unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; > int page_order = get_order(size); > @@ -71,7 +71,7 @@ static void *dma_direct_alloc(struct device *dev, size_t > size, > return page_address(page); > } > > -static void dma_direct_free(struct device *dev, size_t size, void *cpu_addr, > +void dma_direct_free(struct device *dev, size_t size, void *cpu_addr, > dma_addr_t dma_addr, unsigned long attrs) > { > unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; > Reviewed-by: Vladimir Murzin <vladimir.mur...@arm.com> Thanks Vladimir
Re: [PATCH 30/67] dma-direct: retry allocations using GFP_DMA for small masks
On 29/12/17 08:18, Christoph Hellwig wrote: > If we got back an allocation that wasn't inside the support coherent mask, > retry the allocation using GFP_DMA. > > Based on the x86 code. > > Signed-off-by: Christoph Hellwig> --- > lib/dma-direct.c | 25 - > 1 file changed, 24 insertions(+), 1 deletion(-) > > diff --git a/lib/dma-direct.c b/lib/dma-direct.c > index ab81de3ac1d3..f8467cb3d89a 100644 > --- a/lib/dma-direct.c > +++ b/lib/dma-direct.c > @@ -28,6 +28,11 @@ check_addr(struct device *dev, dma_addr_t dma_addr, size_t > size, > return true; > } > > +static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t > size) > +{ > + return phys_to_dma(dev, phys) + size <= dev->coherent_dma_mask; Shouldn't it be: phys_to_dma(dev, phys) + size - 1 <= dev->coherent_dma_mask ? > +} > + > static void *dma_direct_alloc(struct device *dev, size_t size, > dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) > { > @@ -35,11 +40,29 @@ static void *dma_direct_alloc(struct device *dev, size_t > size, > int page_order = get_order(size); > struct page *page = NULL; > > +again: > /* CMA can be used only in the context which permits sleeping */ > - if (gfpflags_allow_blocking(gfp)) > + if (gfpflags_allow_blocking(gfp)) { > page = dma_alloc_from_contiguous(dev, count, page_order, gfp); > + if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) { > + dma_release_from_contiguous(dev, page, count); > + page = NULL; > + } > + } > if (!page) > page = alloc_pages_node(dev_to_node(dev), gfp, page_order); > + > + if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) { > + __free_pages(page, page_order); > + page = NULL; > + > + if (dev->coherent_dma_mask < DMA_BIT_MASK(32) && > + !(gfp & GFP_DMA)) { > + gfp = (gfp & ~GFP_DMA32) | GFP_DMA; > + goto again; Shouldn't we limit number of attempts? Thanks Vladimir
Re: [PATCH 26/67] dma-direct: use phys_to_dma
On 29/12/17 08:18, Christoph Hellwig wrote: > This means it uses whatever linear remapping scheme that the architecture > provides is used in the generic dma_direct ops. > > Signed-off-by: Christoph Hellwig <h...@lst.de> > --- > lib/dma-direct.c | 18 +++--- > 1 file changed, 7 insertions(+), 11 deletions(-) > > diff --git a/lib/dma-direct.c b/lib/dma-direct.c > index 439db40854b7..0e087650e86b 100644 > --- a/lib/dma-direct.c > +++ b/lib/dma-direct.c > @@ -1,12 +1,11 @@ > // SPDX-License-Identifier: GPL-2.0 > /* > - * lib/dma-noop.c > - * > - * DMA operations that map to physical addresses without flushing memory. > + * DMA operations that map physical memory directly without using an IOMMU or > + * flushing caches. > */ > #include > #include > -#include > +#include > #include > #include > > @@ -17,7 +16,7 @@ static void *dma_direct_alloc(struct device *dev, size_t > size, > > ret = (void *)__get_free_pages(gfp, get_order(size)); > if (ret) > - *dma_handle = virt_to_phys(ret) - PFN_PHYS(dev->dma_pfn_offset); > + *dma_handle = phys_to_dma(dev, virt_to_phys(ret)); > > return ret; > } > @@ -32,7 +31,7 @@ static dma_addr_t dma_direct_map_page(struct device *dev, > struct page *page, > unsigned long offset, size_t size, enum dma_data_direction dir, > unsigned long attrs) > { > - return page_to_phys(page) + offset - PFN_PHYS(dev->dma_pfn_offset); > + return phys_to_dma(dev, page_to_phys(page)) + offset; > } > > static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, > @@ -42,12 +41,9 @@ static int dma_direct_map_sg(struct device *dev, struct > scatterlist *sgl, > struct scatterlist *sg; > > for_each_sg(sgl, sg, nents, i) { > - dma_addr_t offset = PFN_PHYS(dev->dma_pfn_offset); > - void *va; > - > BUG_ON(!sg_page(sg)); > - va = sg_virt(sg); > - sg_dma_address(sg) = (dma_addr_t)virt_to_phys(va) - offset; > + > + sg_dma_address(sg) = phys_to_dma(dev, sg_phys(sg)); > sg_dma_len(sg) = sg->length; > } > > >From ARM NOMMU perspective Reviewed-by: Vladimir Murzin <vladimir.mur...@arm.com> Thanks Vladimir
Re: [PATCH 25/67] dma-direct: rename dma_noop to dma_direct
+ b/arch/riscv/Kconfig > @@ -83,7 +83,7 @@ config PGTABLE_LEVELS > config HAVE_KPROBES > def_bool n > > -config DMA_NOOP_OPS > +config DMA_DIRECT_OPS > def_bool y > > menu "Platform type" > diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig > index 829c67986db7..9376637229c9 100644 > --- a/arch/s390/Kconfig > +++ b/arch/s390/Kconfig > @@ -140,7 +140,7 @@ config S390 > select HAVE_DEBUG_KMEMLEAK > select HAVE_DMA_API_DEBUG > select HAVE_DMA_CONTIGUOUS > - select DMA_NOOP_OPS > + select DMA_DIRECT_OPS > select HAVE_DYNAMIC_FTRACE > select HAVE_DYNAMIC_FTRACE_WITH_REGS > select HAVE_EFFICIENT_UNALIGNED_ACCESS > diff --git a/include/asm-generic/dma-mapping.h > b/include/asm-generic/dma-mapping.h > index 164031531d85..880a292d792f 100644 > --- a/include/asm-generic/dma-mapping.h > +++ b/include/asm-generic/dma-mapping.h > @@ -4,7 +4,7 @@ > > static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type > *bus) > { > - return _noop_ops; > + return _direct_ops; > } > > #endif /* _ASM_GENERIC_DMA_MAPPING_H */ > diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h > index 72568bf4fc12..ff3528de5322 100644 > --- a/include/linux/dma-mapping.h > +++ b/include/linux/dma-mapping.h > @@ -143,7 +143,7 @@ struct dma_map_ops { > bool is_phys; > }; > > -extern const struct dma_map_ops dma_noop_ops; > +extern const struct dma_map_ops dma_direct_ops; > extern const struct dma_map_ops dma_virt_ops; > > #define DMA_BIT_MASK(n) (((n) == 64) ? ~0ULL : ((1ULL<<(n))-1)) > diff --git a/lib/Kconfig b/lib/Kconfig > index c5e84fbcb30b..9d3d649c9dc9 100644 > --- a/lib/Kconfig > +++ b/lib/Kconfig > @@ -409,7 +409,7 @@ config HAS_DMA > depends on !NO_DMA > default y > > -config DMA_NOOP_OPS > +config DMA_DIRECT_OPS > bool > depends on HAS_DMA && (!64BIT || ARCH_DMA_ADDR_T_64BIT) > default n > diff --git a/lib/Makefile b/lib/Makefile > index d11c48ec8ffd..749851abe85a 100644 > --- a/lib/Makefile > +++ b/lib/Makefile > @@ -28,7 +28,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ > > lib-$(CONFIG_MMU) += ioremap.o > lib-$(CONFIG_SMP) += cpumask.o > -lib-$(CONFIG_DMA_NOOP_OPS) += dma-noop.o > +lib-$(CONFIG_DMA_DIRECT_OPS) += dma-direct.o > lib-$(CONFIG_DMA_VIRT_OPS) += dma-virt.o > > lib-y+= kobject.o klist.o > diff --git a/lib/dma-noop.c b/lib/dma-direct.c > similarity index 53% > rename from lib/dma-noop.c > rename to lib/dma-direct.c > index c3728a0551f5..439db40854b7 100644 > --- a/lib/dma-noop.c > +++ b/lib/dma-direct.c > @@ -10,9 +10,8 @@ > #include > #include > > -static void *dma_noop_alloc(struct device *dev, size_t size, > - dma_addr_t *dma_handle, gfp_t gfp, > - unsigned long attrs) > +static void *dma_direct_alloc(struct device *dev, size_t size, > + dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) > { > void *ret; > > @@ -23,24 +22,21 @@ static void *dma_noop_alloc(struct device *dev, size_t > size, > return ret; > } > > -static void dma_noop_free(struct device *dev, size_t size, > - void *cpu_addr, dma_addr_t dma_addr, > - unsigned long attrs) > +static void dma_direct_free(struct device *dev, size_t size, void *cpu_addr, > + dma_addr_t dma_addr, unsigned long attrs) > { > free_pages((unsigned long)cpu_addr, get_order(size)); > } > > -static dma_addr_t dma_noop_map_page(struct device *dev, struct page *page, > - unsigned long offset, size_t size, > - enum dma_data_direction dir, > - unsigned long attrs) > +static dma_addr_t dma_direct_map_page(struct device *dev, struct page *page, > + unsigned long offset, size_t size, enum dma_data_direction dir, > + unsigned long attrs) > { > return page_to_phys(page) + offset - PFN_PHYS(dev->dma_pfn_offset); > } > > -static int dma_noop_map_sg(struct device *dev, struct scatterlist *sgl, int > nents, > - enum dma_data_direction dir, > - unsigned long attrs) > +static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, > + int nents, enum dma_data_direction dir, unsigned long attrs) > { > int i; > struct scatterlist *sg; > @@ -58,12 +54,11 @@ static int dma_noop_map_sg(struct device *dev, struct > scatterlist *sgl, int nent > return nents; > } > > -const struct dma_map_ops dma_noop_ops = { > - .alloc = dma_noop_alloc, > - .free = dma_noop_free, > - .map_page = dma_noop_map_page, > - .map_sg = dma_noop_map_sg, > +const struct dma_map_ops dma_direct_ops = { > + .alloc = dma_direct_alloc, > + .free = dma_direct_free, > + .map_page = dma_direct_map_page, > + .map_sg = dma_direct_map_sg, > .is_phys= true, > }; > - > -EXPORT_SYMBOL(dma_noop_ops); > +EXPORT_SYMBOL(dma_direct_ops); > >From ARM NOMMU perspective Reviewed-by: Vladimir Murzin <vladimir.mur...@arm.com> Thanks Vladimir
Re: consolidate direct dma mapping and swiotlb support
On 29/12/17 08:18, Christoph Hellwig wrote: > Almost every architecture supports a direct dma mapping implementation, > where no iommu is used and the device dma address is a 1:1 mapping to > the physical address or has a simple linear offset. Currently the > code for this implementation is most duplicated over the architectures, > and the duplicated again in the swiotlb code, and then duplicated again > for special cases like the x86 memory encryption DMA ops. > > This series takes the existing very simple dma-noop dma mapping > implementation, enhances it with all the x86 features and quirks, and > creates a common set of architecture hooks for it and the swiotlb code. > > It then switches a large number of architectures to this generic > direct map implement and the new generic swiotlb dma_map ops. > > Note that for now this only handles architectures that do cache coherent > DMA, but a similar consolidation for non-coherent architectures is in the > work for later merge windows. Is it available in your dma-mapping.git or somewhere else? Cheers Vladimir > > ___ > linux-arm-kernel mailing list > linux-arm-ker...@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/linux-arm-kernel >
[PATCH] powerpc: fix section mismatch warning in free_lppacas
While cross-building for PPC64 I've got bunch of WARNING: arch/powerpc/kernel/built-in.o(.text.unlikely+0x2d2): Section mismatch in reference from the function .free_lppacas() to the variable .init.data:lppaca_size The function .free_lppacas() references the variable __initdata lppaca_size. This is often because .free_lppacas lacks a __initdata annotation or the annotation of lppaca_size is wrong. Fix it by using proper annotation for free_lppacas. Additionally, annotate {allocate,new}_llpcas properly. Signed-off-by: Vladimir Murzin murzi...@gmail.com --- arch/powerpc/kernel/paca.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 3fc16e3..0620eaa 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -46,7 +46,7 @@ struct lppaca lppaca[] = { static struct lppaca *extra_lppacas; static long __initdata lppaca_size; -static void allocate_lppacas(int nr_cpus, unsigned long limit) +static void __init allocate_lppacas(int nr_cpus, unsigned long limit) { if (nr_cpus = NR_LPPACAS) return; @@ -57,7 +57,7 @@ static void allocate_lppacas(int nr_cpus, unsigned long limit) PAGE_SIZE, limit)); } -static struct lppaca *new_lppaca(int cpu) +static struct lppaca * __init new_lppaca(int cpu) { struct lppaca *lp; @@ -70,7 +70,7 @@ static struct lppaca *new_lppaca(int cpu) return lp; } -static void free_lppacas(void) +static void __init free_lppacas(void) { long new_size = 0, nr; -- 1.8.1.5 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v2 1/2] powerpc: net: filter: fix DIVWU instruction opcode
Currently DIVWU stands for *signed* divw opcode: 7d 2a 4b 96 divwu r9,r10,r9 7d 2a 4b d6 divwr9,r10,r9 Use the *unsigned* divw opcode for DIVWU. Suggested-by: Vassili Karpov av1...@comtv.ru Reviewed-by: Vassili Karpov av1...@comtv.ru Signed-off-by: Vladimir Murzin murzi...@gmail.com Acked-by: Matt Evans m...@ozlabs.org --- Changelog v1-v2 Added credit to Vassili Karpov (malc) who kindly reviewed generated assembly [1] and highlighted usage of signed division. Note: temporary, for technical reason, he's not able to receive email. [1]http://www.mail-archive.com/linuxppc-dev@lists.ozlabs.org/msg71635.html arch/powerpc/include/asm/ppc-opcode.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index eccfc16..0a4a683 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -171,7 +171,7 @@ #define PPC_INST_MULLW 0x7c0001d6 #define PPC_INST_MULHWU0x7c16 #define PPC_INST_MULLI 0x1c00 -#define PPC_INST_DIVWU 0x7c0003d6 +#define PPC_INST_DIVWU 0x7c000396 #define PPC_INST_RLWINM0x5400 #define PPC_INST_RLDICR0x7804 #define PPC_INST_SLW 0x7c30 -- 1.8.1.5 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v2 2/2] ppc: bpf_jit: support MOD operation
commit b6069a9570 (filter: add MOD operation) added generic support for modulus operation in BPF. This patch brings JIT support for PPC64 Signed-off-by: Vladimir Murzin murzi...@gmail.com Acked-by: Matt Evans m...@ozlabs.org --- Changelog v1-v2 Definition for r_scratch2 was moved to header file. arch/powerpc/net/bpf_jit.h | 1 + arch/powerpc/net/bpf_jit_comp.c | 20 2 files changed, 21 insertions(+) diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index 8a5dfaf..42a115a 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -39,6 +39,7 @@ #define r_X5 #define r_addr 6 #define r_scratch1 7 +#define r_scratch2 8 #define r_D14 #define r_HL 15 #define r_M16 diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index bf56e33..cbb2702 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -193,6 +193,26 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, PPC_MUL(r_A, r_A, r_scratch1); } break; + case BPF_S_ALU_MOD_X: /* A %= X; */ + ctx-seen |= SEEN_XREG; + PPC_CMPWI(r_X, 0); + if (ctx-pc_ret0 != -1) { + PPC_BCC(COND_EQ, addrs[ctx-pc_ret0]); + } else { + PPC_BCC_SHORT(COND_NE, (ctx-idx*4)+12); + PPC_LI(r_ret, 0); + PPC_JMP(exit_addr); + } + PPC_DIVWU(r_scratch1, r_A, r_X); + PPC_MUL(r_scratch1, r_X, r_scratch1); + PPC_SUB(r_A, r_A, r_scratch1); + break; + case BPF_S_ALU_MOD_K: /* A %= K; */ + PPC_LI32(r_scratch2, K); + PPC_DIVWU(r_scratch1, r_A, r_scratch2); + PPC_MUL(r_scratch1, r_scratch2, r_scratch1); + PPC_SUB(r_A, r_A, r_scratch1); + break; case BPF_S_ALU_DIV_X: /* A /= X; */ ctx-seen |= SEEN_XREG; PPC_CMPWI(r_X, 0); -- 1.8.1.5 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [RESEND PATCH 2/2] ppc: bpf_jit: support MOD operation
On Mon, Sep 23, 2013 at 01:13:45AM +1000, Matt Evans wrote: Hi Vladimir, On 21 Sep 2013, at 17:25, Vladimir Murzin murzi...@gmail.com wrote: commit b6069a9570 (filter: add MOD operation) added generic support for modulus operation in BPF. This patch brings JIT support for PPC64 Signed-off-by: Vladimir Murzin murzi...@gmail.com Acked-by: Matt Evans m...@ozlabs.org Not this version, though; see below. --- arch/powerpc/net/bpf_jit_comp.c | 22 ++ 1 file changed, 22 insertions(+) diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index bf56e33..96f24dc 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -193,6 +193,28 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, PPC_MUL(r_A, r_A, r_scratch1); } break; +case BPF_S_ALU_MOD_X: /* A %= X; */ +ctx-seen |= SEEN_XREG; +PPC_CMPWI(r_X, 0); +if (ctx-pc_ret0 != -1) { +PPC_BCC(COND_EQ, addrs[ctx-pc_ret0]); +} else { +PPC_BCC_SHORT(COND_NE, (ctx-idx*4)+12); +PPC_LI(r_ret, 0); +PPC_JMP(exit_addr); +} +PPC_DIVWU(r_scratch1, r_A, r_X); +PPC_MUL(r_scratch1, r_X, r_scratch1); +PPC_SUB(r_A, r_A, r_scratch1); +break; +case BPF_S_ALU_MOD_K: /* A %= K; */ +#define r_scratch2 (r_scratch1 + 1) Old version of this patch, still? I had hoped that r_scratch2 would be defined in the header. Oops.. been keeping the old version.. sorry for that, Matt :( +PPC_LI32(r_scratch2, K); +PPC_DIVWU(r_scratch1, r_A, r_scratch2); +PPC_MUL(r_scratch1, r_scratch2, r_scratch1); +PPC_SUB(r_A, r_A, r_scratch1); +#undef r_scratch2 And remember this guy too.. :) I've included the patch below. Nothing is missed this time, I hope ;) Matt +break; case BPF_S_ALU_DIV_X: /* A /= X; */ ctx-seen |= SEEN_XREG; PPC_CMPWI(r_X, 0); -- 1.8.1.5 --- From: Vladimir Murzin murzi...@gmail.com Date: Wed, 28 Aug 2013 01:29:39 +0400 Subject: [PATCH 2/2] ppc: bpf_jit: support MOD operation commit b6069a9570 (filter: add MOD operation) added generic support for modulus operation in BPF. This patch brings JIT support for PPC64 Signed-off-by: Vladimir Murzin murzi...@gmail.com Acked-by: Matt Evans m...@ozlabs.org --- arch/powerpc/net/bpf_jit.h | 1 + arch/powerpc/net/bpf_jit_comp.c | 20 2 files changed, 21 insertions(+) diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index 8a5dfaf..42a115a 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -39,6 +39,7 @@ #define r_X5 #define r_addr 6 #define r_scratch1 7 +#define r_scratch2 8 #define r_D14 #define r_HL 15 #define r_M16 diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index bf56e33..cbb2702 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -193,6 +193,26 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, PPC_MUL(r_A, r_A, r_scratch1); } break; + case BPF_S_ALU_MOD_X: /* A %= X; */ + ctx-seen |= SEEN_XREG; + PPC_CMPWI(r_X, 0); + if (ctx-pc_ret0 != -1) { + PPC_BCC(COND_EQ, addrs[ctx-pc_ret0]); + } else { + PPC_BCC_SHORT(COND_NE, (ctx-idx*4)+12); + PPC_LI(r_ret, 0); + PPC_JMP(exit_addr); + } + PPC_DIVWU(r_scratch1, r_A, r_X); + PPC_MUL(r_scratch1, r_X, r_scratch1); + PPC_SUB(r_A, r_A, r_scratch1); + break; + case BPF_S_ALU_MOD_K: /* A %= K; */ + PPC_LI32(r_scratch2, K); + PPC_DIVWU(r_scratch1, r_A, r_scratch2); + PPC_MUL(r_scratch1, r_scratch2, r_scratch1); + PPC_SUB(r_A, r_A, r_scratch1); + break; case BPF_S_ALU_DIV_X: /* A /= X; */ ctx-seen |= SEEN_XREG; PPC_CMPWI(r_X, 0); -- 1.8.1.5 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[RESEND PATCH 1/2] powerpc: net: filter: fix DIVWU instruction opcode
Currently DIVWU stands for *signed* divw opcode: 7d 2a 4b 96 divwu r9,r10,r9 7d 2a 4b d6 divwr9,r10,r9 Use the *unsigned* divw opcode for DIVWU. Signed-off-by: Vladimir Murzin murzi...@gmail.com Acked-by: Matt Evans m...@ozlabs.org --- arch/powerpc/include/asm/ppc-opcode.h |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index d7fe9f5..c91842c 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -218,7 +218,7 @@ #define PPC_INST_MULLW 0x7c0001d6 #define PPC_INST_MULHWU0x7c16 #define PPC_INST_MULLI 0x1c00 -#define PPC_INST_DIVWU 0x7c0003d6 +#define PPC_INST_DIVWU 0x7c000396 #define PPC_INST_RLWINM0x5400 #define PPC_INST_RLDICR0x7804 #define PPC_INST_SLW 0x7c30 -- 1.7.10.4 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[RESEND PATCH 2/2] ppc: bpf_jit: support MOD operation
commit b6069a9570 (filter: add MOD operation) added generic support for modulus operation in BPF. This patch brings JIT support for PPC64 Signed-off-by: Vladimir Murzin murzi...@gmail.com Acked-by: Matt Evans m...@ozlabs.org --- arch/powerpc/net/bpf_jit_comp.c | 22 ++ 1 file changed, 22 insertions(+) diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index bf56e33..96f24dc 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -193,6 +193,28 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, PPC_MUL(r_A, r_A, r_scratch1); } break; + case BPF_S_ALU_MOD_X: /* A %= X; */ + ctx-seen |= SEEN_XREG; + PPC_CMPWI(r_X, 0); + if (ctx-pc_ret0 != -1) { + PPC_BCC(COND_EQ, addrs[ctx-pc_ret0]); + } else { + PPC_BCC_SHORT(COND_NE, (ctx-idx*4)+12); + PPC_LI(r_ret, 0); + PPC_JMP(exit_addr); + } + PPC_DIVWU(r_scratch1, r_A, r_X); + PPC_MUL(r_scratch1, r_X, r_scratch1); + PPC_SUB(r_A, r_A, r_scratch1); + break; + case BPF_S_ALU_MOD_K: /* A %= K; */ +#define r_scratch2 (r_scratch1 + 1) + PPC_LI32(r_scratch2, K); + PPC_DIVWU(r_scratch1, r_A, r_scratch2); + PPC_MUL(r_scratch1, r_scratch2, r_scratch1); + PPC_SUB(r_A, r_A, r_scratch1); +#undef r_scratch2 + break; case BPF_S_ALU_DIV_X: /* A /= X; */ ctx-seen |= SEEN_XREG; PPC_CMPWI(r_X, 0); -- 1.8.1.5 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH] powerpc: net: filter: fix DIVWU instruction opcode
Currently DIVWU stands for *signed* divw opcode: 7d 2a 4b 96 divwu r9,r10,r9 7d 2a 4b d6 divwr9,r10,r9 Use the *unsigned* divw opcode for DIVWU. Signed-off-by: Vladimir Murzin murzi...@gmail.com --- arch/powerpc/include/asm/ppc-opcode.h |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index d7fe9f5..c91842c 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -218,7 +218,7 @@ #define PPC_INST_MULLW 0x7c0001d6 #define PPC_INST_MULHWU0x7c16 #define PPC_INST_MULLI 0x1c00 -#define PPC_INST_DIVWU 0x7c0003d6 +#define PPC_INST_DIVWU 0x7c000396 #define PPC_INST_RLWINM0x5400 #define PPC_INST_RLDICR0x7804 #define PPC_INST_SLW 0x7c30 -- 1.7.10.4 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 1/3] powerpc: export cpu_to_chip_id
On Wed, Sep 11, 2013 at 12:07:45PM +0530, Vasant Hegde wrote: On 09/10/2013 10:08 PM, Vladimir Murzin wrote: While cross-building for PPC64 I've got Vladimir, Below commit ID fixes this issue. Great! For whatever reason I thought _GPL is default policy for exported symbols... but, never mind ;) Sorry for the noise Vladimir commit 256588fda10f2a712631f8a4e72641a66adebdb8 Author: Guenter Roeck li...@roeck-us.net Date: Mon Sep 9 18:37:56 2013 -0700 powerpc: Export cpu_to_chip_id() to fix build error -Vasant ERROR: .cpu_to_chip_id [drivers/block/mtip32xx/mtip32xx.ko] undefined! mtip32xx refer to topology_physical_package_id, which stands for cpu_to_chip-id. Make cpu_to_chip-id (and topology_physical_package_id) available for external users. Signed-off-by: Vladimir Murzin murzi...@gmail.com --- arch/powerpc/kernel/smp.c |1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 442d8e2..1d9c1c2 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -611,6 +611,7 @@ int cpu_to_chip_id(int cpu) of_node_put(np); return of_get_ibm_chip_id(np); } +EXPORT_SYMBOL_GPL(cpu_to_chip_id); /* Helper routines for cpu to core mapping */ int cpu_core_index_of_thread(int cpu) ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH] ppc: bpf_jit: support MOD operation
On Wed, Sep 04, 2013 at 09:04:04AM +0200, Daniel Borkmann wrote: On 09/03/2013 10:52 PM, Daniel Borkmann wrote: On 09/03/2013 09:58 PM, Vladimir Murzin wrote: [...] Do you have a test case/suite by any chance ? Ben. Hi Ben! Thanks for your feedback. This patch is only compile tested. I have no real hardware, but I'll probably bring up qemu ppc64 till end of the week... Meanwhile, I've made simple how-to for testing. You can use it if you wish. It is mainly based on the [1] and rechecked on x86-64. Finally I've managed to bring up qemu ppc64 and done simple testing. As a result I could see difference in opcodes for divide instruction - I've just sent the patch for that. WRT mod instruction result is: For BPF program (000) ldh [12] (001) jeq #0x800 jt 2jf 10 (002) ldh [16] (003) sub #20 (004) mod #5 (005) jeq #0x0 jt 10 jf 6 (006) ldb [20] (007) and #0x20 (008) jeq #0x20jt 9jf 10 (009) ret #65535 (010) ret #0 The following code is generated (with patch divw to divwu applied) 244 bytes emitted from JIT compiler (pass:3, flen:11) d15c0018 + x: 0: mflrr0 4: std r0,16(r1) 8: std r14,-144(r1) c: std r15,-136(r1) 10: stdur1,-288(r1) 14: lwz r7,108(r3) 18: lwz r15,104(r3) 1c: subfr15,r7,r15 20: ld r14,216(r3) 24: lis r7,-16384 28: rldicr r7,r7,32,31 2c: orisr7,r7,9 30: ori r7,r7,43428 34: mtlrr7 38: li r6,12 3c: blrl 40: blt-0x00dc 44: nop 48: cmplwi r4,2048 4c: bne-0x00d8 50: nop 54: lis r7,-16384 58: rldicr r7,r7,32,31 5c: orisr7,r7,9 60: ori r7,r7,43428 64: mtlrr7 68: li r6,16 6c: blrl 70: blt-0x00dc 74: nop 78: addir4,r4,-20 7c: li r8,5 80: divwu r7,r4,r8 84: mullw r7,r8,r7 88: subfr4,r7,r4 8c: cmplwi r4,0 90: beq-0x00d8 94: nop 98: lis r7,-16384 9c: rldicr r7,r7,32,31 a0: orisr7,r7,9 a4: ori r7,r7,43456 a8: mtlrr7 ac: li r6,20 b0: blrl b4: blt-0x00dc b8: nop bc: andi. r4,r4,32 c0: cmplwi r4,32 c4: bne-0x00d8 c8: nop cc: li r3,-1 d0: addis r3,r3,1 d4: b 0x00dc d8: li r3,0 dc: addir1,r1,288 e0: ld r0,16(r1) e4: mtlrr0 e8: ld r14,-144(r1) ec: ld r15,-136(r1) f0: blr Raw codes are flen=11 proglen=244 pass=3 image=d15c0018 JIT code: : 7c 08 02 a6 f8 01 00 10 f9 c1 ff 70 f9 e1 ff 78 JIT code: 0010: f8 21 fe e1 80 e3 00 6c 81 e3 00 68 7d e7 78 50 JIT code: 0020: e9 c3 00 d8 3c e0 c0 00 78 e7 07 c6 64 e7 00 09 JIT code: 0030: 60 e7 a9 a4 7c e8 03 a6 38 c0 00 0c 4e 80 00 21 JIT code: 0040: 41 80 00 9c 60 00 00 00 28 04 08 00 40 82 00 8c JIT code: 0050: 60 00 00 00 3c e0 c0 00 78 e7 07 c6 64 e7 00 09 JIT code: 0060: 60 e7 a9 a4 7c e8 03 a6 38 c0 00 10 4e 80 00 21 JIT code: 0070: 41 80 00 6c 60 00 00 00 38 84 ff ec 39 00 00 05 JIT code: 0080: 7c e4 43 96 7c e8 39 d6 7c 87 20 50 28 04 00 00 JIT code: 0090: 41 82 00 48 60 00 00 00 3c e0 c0 00 78 e7 07 c6 JIT code: 00a0: 64 e7 00 09 60 e7 a9 c0 7c e8 03 a6 38 c0 00 14 JIT code: 00b0: 4e 80 00 21 41 80 00 28 60 00 00 00 70 84 00 20 JIT code: 00c0: 28 04 00 20 40 82 00 14 60 00 00 00 38 60 ff ff JIT code: 00d0: 3c 63 00 01 48 00 00 08 38 60 00 00 38 21 01 20 JIT code: 00e0: e8 01 00 10 7c 08 03 a6 e9 c1 ff 70 e9 e1 ff 78 JIT code: 00f0: 4e 80 00 20 Ben, How do you feel about it? Please also cc netdev on BPF related changes. Actually, your test plan can be further simplified ... For retrieving and disassembling the JIT image, we have bpf_jit_disasm [1]. 1) echo 2 /proc/sys/net/core/bpf_jit_enable 2) ... attach filter ... 3) bpf_jit_disasm -o For generating a simple stupid test filter, you can use bpfc [2] (also see its man page). E.g. ... # cat blub ldi #10 mod #8 ret a # bpfc blub { 0x0, 0, 0, 0x000a }, { 0x94, 0, 0, 0x0008 }, { 0x16, 0, 0, 0x }, Plus something like ... ldxi #0 mod x ret a Thanks Daniel! Unfortunately, I couldn't trigger JIT compiler with the pair bpfc/netsniff-ng (even for x86-64). I guess I missed something. I'd be very grateful if you point at my mistakes. For longer-term testing, also trinity has BPF support. ;) Wow! Could do give some hint how to run this for BPF only? And load this array e.g. either into a small C program that attaches this as BPF filter, or simply do bpfc blub blub2 and run netsniff-ng -f blub2\ -s -i eth0, that should also do it. Then, when attached, the kernel should truncate
Re: [PATCH] powerpc: net: filter: fix DIVWU instruction opcode
On Thu, Sep 12, 2013 at 10:28:03AM +0930, Matt Evans wrote: On 12 Sep 2013, at 10:02, Michael Neuling mi...@neuling.org wrote: Vladimir Murzin murzi...@gmail.com wrote: Currently DIVWU stands for *signed* divw opcode: 7d 2a 4b 96divwu r9,r10,r9 7d 2a 4b d6divwr9,r10,r9 Use the *unsigned* divw opcode for DIVWU. This looks like it's in only used in the BPF JIT code. Matt, any chance you an ACK/NACK this? Sure, that looks sensible, thanks Vladimir. Acked-by: Matt Evans m...@ozlabs.org Thanks! Vladimir Mikey Signed-off-by: Vladimir Murzin murzi...@gmail.com --- arch/powerpc/include/asm/ppc-opcode.h |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index d7fe9f5..c91842c 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -218,7 +218,7 @@ #define PPC_INST_MULLW0x7c0001d6 #define PPC_INST_MULHWU0x7c16 #define PPC_INST_MULLI0x1c00 -#define PPC_INST_DIVWU0x7c0003d6 +#define PPC_INST_DIVWU0x7c000396 #define PPC_INST_RLWINM0x5400 #define PPC_INST_RLDICR0x7804 #define PPC_INST_SLW0x7c30 -- 1.7.10.4 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH] ppc: bpf_jit: support MOD operation
On Thu, Sep 12, 2013 at 02:18:37AM +0100, Matt Evans wrote: Hi Ben, Vladimir, *dusts off very thick PPC cobwebs* Sorry for the delay as I'm travelling, didn't get to this until now. On 02/09/2013, at 9:45 PM, Benjamin Herrenschmidt wrote: On Mon, 2013-09-02 at 19:48 +0200, Vladimir Murzin wrote: Ping On Wed, Aug 28, 2013 at 02:49:52AM +0400, Vladimir Murzin wrote: commit b6069a9570 (filter: add MOD operation) added generic support for modulus operation in BPF. Sorry, nobody got a chance to review that yet. Unfortunately Matt doesn't work for us anymore and none of us has experience with the BPF code, so somebody (possibly me) will need to spend a bit of time figuring it out before verifying that is correct. Do you have a test case/suite by any chance ? Ben. This patch brings JIT support for PPC64 Signed-off-by: Vladimir Murzin murzi...@gmail.com --- arch/powerpc/net/bpf_jit_comp.c | 22 ++ 1 file changed, 22 insertions(+) diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index bf56e33..96f24dc 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -193,6 +193,28 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, PPC_MUL(r_A, r_A, r_scratch1); } break; + case BPF_S_ALU_MOD_X: /* A %= X; */ + ctx-seen |= SEEN_XREG; + PPC_CMPWI(r_X, 0); + if (ctx-pc_ret0 != -1) { + PPC_BCC(COND_EQ, addrs[ctx-pc_ret0]); + } else { + PPC_BCC_SHORT(COND_NE, (ctx-idx*4)+12); + PPC_LI(r_ret, 0); + PPC_JMP(exit_addr); + } + PPC_DIVWU(r_scratch1, r_A, r_X); + PPC_MUL(r_scratch1, r_X, r_scratch1); + PPC_SUB(r_A, r_A, r_scratch1); + break; Without having compiled tested this, it looks fine to me (especially with the corrected DIVWU opcode in the other patch, oops...). + case BPF_S_ALU_MOD_K: /* A %= K; */ +#define r_scratch2 (r_scratch1 + 1) + PPC_LI32(r_scratch2, K); + PPC_DIVWU(r_scratch1, r_A, r_scratch2); + PPC_MUL(r_scratch1, r_scratch2, r_scratch1); + PPC_SUB(r_A, r_A, r_scratch1); +#undef r_scratch2 + break; If you need another scratch register, it should really be defined in bpf_jit.h instead. Once you define r_scratch2 in there, Acked-by: Matt Evans m...@ozlabs.org Thanks! Matt Thanks! Vladimir case BPF_S_ALU_DIV_X: /* A /= X; */ ctx-seen |= SEEN_XREG; PPC_CMPWI(r_X, 0); -- 1.8.1.5 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 1/3] powerpc: export cpu_to_chip_id
While cross-building for PPC64 I've got ERROR: .cpu_to_chip_id [drivers/block/mtip32xx/mtip32xx.ko] undefined! mtip32xx refer to topology_physical_package_id, which stands for cpu_to_chip-id. Make cpu_to_chip-id (and topology_physical_package_id) available for external users. Signed-off-by: Vladimir Murzin murzi...@gmail.com --- arch/powerpc/kernel/smp.c |1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 442d8e2..1d9c1c2 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -611,6 +611,7 @@ int cpu_to_chip_id(int cpu) of_node_put(np); return of_get_ibm_chip_id(np); } +EXPORT_SYMBOL_GPL(cpu_to_chip_id); /* Helper routines for cpu to core mapping */ int cpu_core_index_of_thread(int cpu) -- 1.7.10.4 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 2/3] powerpc: fix section mismatch warning for prom_rtas_call
While cross-building for PPC64 I've got WARNING: vmlinux.o(.text.unlikely+0x1ba): Section mismatch in reference from the function .prom_rtas_call() to the variable .init.data:dt_string_start The function .prom_rtas_call() references the variable __initdata dt_string_start. This is often because .prom_rtas_call lacks a __initdata annotation or the annotation of dt_string_start is wrong. WARNING: vmlinux.o(.meminit.text+0xeb0): Section mismatch in reference from the function .free_area_init_core.isra.47() to the function .init.text:.set_pageblock_order() The function __meminit .free_area_init_core.isra.47() references a function __init .set_pageblock_order(). If .set_pageblock_order is only used by .free_area_init_core.isra.47 then annotate .set_pageblock_order with a matching annotation. Fix it by proper annotation of prom_rtas_call. Signed-off-by: Vladimir Murzin murzi...@gmail.com --- arch/powerpc/kernel/prom_init.c |3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 7b6391b..12e656f 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -1297,7 +1297,8 @@ static void __init prom_query_opal(void) prom_opal_align = 0x1; } -static int prom_rtas_call(int token, int nargs, int nret, int *outputs, ...) +static int __init prom_rtas_call(int token, int nargs, int nret, +int *outputs, ...) { struct rtas_args rtas_args; va_list list; -- 1.7.10.4 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 3/3] fbdev/ps3fb: fix section mismatch warning for ps3fb_probe
While cross-building for PPC64 I've got WARNING: drivers/video/built-in.o(.text+0x9f9ca): Section mismatch in reference from the function .ps3fb_probe() to th e variable .init.data:ps3fb_fix The function .ps3fb_probe() references the variable __initdata ps3fb_fix. This is often because .ps3fb_probe lacks a __initdata annotation or the annotation of ps3fb_fix is wrong. WARNING: drivers/video/built-in.o(.text+0x9f9d2): Section mismatch in reference from the function .ps3fb_probe() to the variable .init.data:ps3fb_fix The function .ps3fb_probe() references the variable __initdata ps3fb_fix. This is often because .ps3fb_probe lacks a __initdata annotation or the annotation of ps3fb_fix is wrong. WARNING: drivers/built-in.o(.text+0xe222a): Section mismatch in reference from the function .ps3fb_probe() to the variable .init.data:ps3fb_fix The function .ps3fb_probe() references the variable __initdata ps3fb_fix. This is often because .ps3fb_probe lacks a __initdata annotation or the annotation of ps3fb_fix is wrong. WARNING: drivers/built-in.o(.text+0xe2232): Section mismatch in reference from the function .ps3fb_probe() to the variable .init.data:ps3fb_fix The function .ps3fb_probe() references the variable __initdata ps3fb_fix. This is often because .ps3fb_probe lacks a __initdata annotation or the annotation of ps3fb_fix is wrong. WARNING: vmlinux.o(.text+0x561d4a): Section mismatch in reference from the function .ps3fb_probe() to the variable .init.data:ps3fb_fix The function .ps3fb_probe() references the variable __initdata ps3fb_fix. This is often because .ps3fb_probe lacks a __initdata annotation or the annotation of ps3fb_fix is wrong. Mismatch was introduced with 48c68c4f Drivers: video: remove __dev* attributes. Remove __init data annotation from ps3fb_fix. Signed-off-by: Vladimir Murzin murzi...@gmail.com --- drivers/video/ps3fb.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/ps3fb.c b/drivers/video/ps3fb.c index dbfe2c1..b269abd 100644 --- a/drivers/video/ps3fb.c +++ b/drivers/video/ps3fb.c @@ -952,7 +952,7 @@ static struct fb_ops ps3fb_ops = { .fb_compat_ioctl = ps3fb_ioctl }; -static struct fb_fix_screeninfo ps3fb_fix __initdata = { +static struct fb_fix_screeninfo ps3fb_fix = { .id = DEVICE_NAME, .type = FB_TYPE_PACKED_PIXELS, .visual = FB_VISUAL_TRUECOLOR, -- 1.7.10.4 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH] ppc: bpf_jit: support MOD operation
On Tue, Sep 03, 2013 at 06:45:50AM +1000, Benjamin Herrenschmidt wrote: On Mon, 2013-09-02 at 19:48 +0200, Vladimir Murzin wrote: Ping On Wed, Aug 28, 2013 at 02:49:52AM +0400, Vladimir Murzin wrote: commit b6069a9570 (filter: add MOD operation) added generic support for modulus operation in BPF. Sorry, nobody got a chance to review that yet. Unfortunately Matt doesn't work for us anymore and none of us has experience with the BPF code, so somebody (possibly me) will need to spend a bit of time figuring it out before verifying that is correct. Do you have a test case/suite by any chance ? Ben. Hi Ben! Thanks for your feedback. This patch is only compile tested. I have no real hardware, but I'll probably bring up qemu ppc64 till end of the week... Meanwhile, I've made simple how-to for testing. You can use it if you wish. It is mainly based on the [1] and rechecked on x86-64. 1. get the tcpdump utility (git clone git://bpf.tcpdump.org/tcpdump) 2. get the libcap library (git clone git://bpf.tcpdump.org/libpcap) 2.1. apply patch for libcap [2] (against libcap-1.3 branch) 2.2. build libcap (./configure make ln -s libcap.so.1.3.0 libcap.so) 3. build tcpdump (LDFLAGS=-L/path/to/libcap ./configure make) 4. run # ./tcpdump -d (ip[2:2] - 20) % 5 != 0 ip[6] 0x20 = 0x20 (000) ldh [14] (001) jeq #0x800 jt 2 jf 10 (002) ldh [18] (003) sub #20 (004) mod #5 (005) jeq #0x0 jt 10 jf 6 (006) ldb [22] (007) and #0x20 (008) jeq #0x20 jt 9 jf 10 (009) ret #65535 (010) ret #0 to get pseudo code (we are interested the most into line #4) 5. enable bpf jit compiler # echo 2 /proc/sys/net/core/bpf_jit_enable 6. run ./tcpdump -nv (ip[2:2] - 20) % 5 != 0 ip[6] 0x20 = 0x20 7. check dmesg for lines starting with (output for x86-64 is provided as an example) [ 3768.329253] flen=11 proglen=99 pass=3 image=a003c000 [ 3768.329254] JIT code: a003c000: 55 48 89 e5 48 83 ec 60 48 89 5d f8 44 8b 4f 60 [ 3768.329255] JIT code: a003c010: 44 2b 4f 64 4c 8b 87 c0 00 00 00 0f b7 47 76 86 [ 3768.329256] JIT code: a003c020: c4 3d 00 08 00 00 75 37 be 02 00 00 00 e8 9f 3e [ 3768.329257] JIT code: a003c030: 02 e1 83 e8 14 31 d2 b9 05 00 00 00 f7 f1 89 d0 [ 3768.329258] JIT code: a003c040: 85 c0 74 1b be 06 00 00 00 e8 9f 3e 02 e1 25 20 [ 3768.329259] JIT code: a003c050: 00 00 00 83 f8 20 75 07 b8 ff ff 00 00 eb 02 31 [ 3768.329259] JIT code: a003c060: c0 c9 c3 8. make sure generated opcodes (JIT code) implement pseudo code form step 4. Reference [1] http://comments.gmane.org/gmane.linux.network/242456 [2] http://permalink.gmane.org/gmane.network.tcpdump.devel/5973 P.S. I hope net people will corect me if I'm wrong there Cheers Vladimir Murzin This patch brings JIT support for PPC64 Signed-off-by: Vladimir Murzin murzi...@gmail.com --- arch/powerpc/net/bpf_jit_comp.c | 22 ++ 1 file changed, 22 insertions(+) diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index bf56e33..96f24dc 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -193,6 +193,28 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, PPC_MUL(r_A, r_A, r_scratch1); } break; + case BPF_S_ALU_MOD_X: /* A %= X; */ + ctx-seen |= SEEN_XREG; + PPC_CMPWI(r_X, 0); + if (ctx-pc_ret0 != -1) { + PPC_BCC(COND_EQ, addrs[ctx-pc_ret0]); + } else { + PPC_BCC_SHORT(COND_NE, (ctx-idx*4)+12); + PPC_LI(r_ret, 0); + PPC_JMP(exit_addr); + } + PPC_DIVWU(r_scratch1, r_A, r_X); + PPC_MUL(r_scratch1, r_X, r_scratch1); + PPC_SUB(r_A, r_A, r_scratch1); + break; + case BPF_S_ALU_MOD_K: /* A %= K; */ +#define r_scratch2 (r_scratch1 + 1) + PPC_LI32(r_scratch2, K); + PPC_DIVWU(r_scratch1, r_A, r_scratch2); + PPC_MUL(r_scratch1, r_scratch2, r_scratch1); + PPC_SUB(r_A, r_A, r_scratch1); +#undef r_scratch2 + break; case BPF_S_ALU_DIV_X: /* A /= X; */ ctx-seen |= SEEN_XREG; PPC_CMPWI(r_X, 0); -- 1.8.1.5 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH] ppc: bpf_jit: support MOD operation
Ping On Wed, Aug 28, 2013 at 02:49:52AM +0400, Vladimir Murzin wrote: commit b6069a9570 (filter: add MOD operation) added generic support for modulus operation in BPF. This patch brings JIT support for PPC64 Signed-off-by: Vladimir Murzin murzi...@gmail.com --- arch/powerpc/net/bpf_jit_comp.c | 22 ++ 1 file changed, 22 insertions(+) diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index bf56e33..96f24dc 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -193,6 +193,28 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, PPC_MUL(r_A, r_A, r_scratch1); } break; + case BPF_S_ALU_MOD_X: /* A %= X; */ + ctx-seen |= SEEN_XREG; + PPC_CMPWI(r_X, 0); + if (ctx-pc_ret0 != -1) { + PPC_BCC(COND_EQ, addrs[ctx-pc_ret0]); + } else { + PPC_BCC_SHORT(COND_NE, (ctx-idx*4)+12); + PPC_LI(r_ret, 0); + PPC_JMP(exit_addr); + } + PPC_DIVWU(r_scratch1, r_A, r_X); + PPC_MUL(r_scratch1, r_X, r_scratch1); + PPC_SUB(r_A, r_A, r_scratch1); + break; + case BPF_S_ALU_MOD_K: /* A %= K; */ +#define r_scratch2 (r_scratch1 + 1) + PPC_LI32(r_scratch2, K); + PPC_DIVWU(r_scratch1, r_A, r_scratch2); + PPC_MUL(r_scratch1, r_scratch2, r_scratch1); + PPC_SUB(r_A, r_A, r_scratch1); +#undef r_scratch2 + break; case BPF_S_ALU_DIV_X: /* A /= X; */ ctx-seen |= SEEN_XREG; PPC_CMPWI(r_X, 0); -- 1.8.1.5 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH] ppc: bpf_jit: support MOD operation
commit b6069a9570 (filter: add MOD operation) added generic support for modulus operation in BPF. This patch brings JIT support for PPC64 Signed-off-by: Vladimir Murzin murzi...@gmail.com --- arch/powerpc/net/bpf_jit_comp.c | 22 ++ 1 file changed, 22 insertions(+) diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index bf56e33..96f24dc 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -193,6 +193,28 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, PPC_MUL(r_A, r_A, r_scratch1); } break; + case BPF_S_ALU_MOD_X: /* A %= X; */ + ctx-seen |= SEEN_XREG; + PPC_CMPWI(r_X, 0); + if (ctx-pc_ret0 != -1) { + PPC_BCC(COND_EQ, addrs[ctx-pc_ret0]); + } else { + PPC_BCC_SHORT(COND_NE, (ctx-idx*4)+12); + PPC_LI(r_ret, 0); + PPC_JMP(exit_addr); + } + PPC_DIVWU(r_scratch1, r_A, r_X); + PPC_MUL(r_scratch1, r_X, r_scratch1); + PPC_SUB(r_A, r_A, r_scratch1); + break; + case BPF_S_ALU_MOD_K: /* A %= K; */ +#define r_scratch2 (r_scratch1 + 1) + PPC_LI32(r_scratch2, K); + PPC_DIVWU(r_scratch1, r_A, r_scratch2); + PPC_MUL(r_scratch1, r_scratch2, r_scratch1); + PPC_SUB(r_A, r_A, r_scratch1); +#undef r_scratch2 + break; case BPF_S_ALU_DIV_X: /* A /= X; */ ctx-seen |= SEEN_XREG; PPC_CMPWI(r_X, 0); -- 1.8.1.5 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev