Re: [PATCH 1/3] powerpc: add driver for simple GPIO banks
On Fri, Oct 10, 2008 at 08:55:01PM +0400, Anton Vorontsov wrote: The driver supports very simple GPIO controllers, that is, when a controller provides just a 'data' register. Such controllers may be found in various BCSRs (Board's FPGAs used to control board's switches, LEDs, chip-selects, Ethernet/USB PHY power, etc). So far we support only 1-byte GPIO banks. Support for other widths may be implemented when/if needed. p.s. To avoid made up compatible entries (like compatible = simple-gpio), boards must call the simple_gpiochip_init() to pass the compatible string. Signed-off-by: Anton Vorontsov [EMAIL PROTECTED] --- Documentation/powerpc/dts-bindings/fsl/board.txt | 30 arch/powerpc/platforms/Kconfig | 11 ++ arch/powerpc/sysdev/Makefile |1 + arch/powerpc/sysdev/simple_gpio.c| 157 ++ arch/powerpc/sysdev/simple_gpio.h| 13 ++ 5 files changed, 212 insertions(+), 0 deletions(-) create mode 100644 arch/powerpc/sysdev/simple_gpio.c create mode 100644 arch/powerpc/sysdev/simple_gpio.h diff --git a/Documentation/powerpc/dts-bindings/fsl/board.txt b/Documentation/powerpc/dts-bindings/fsl/board.txt index 74ae6f1..e97877f 100644 --- a/Documentation/powerpc/dts-bindings/fsl/board.txt +++ b/Documentation/powerpc/dts-bindings/fsl/board.txt @@ -27,3 +27,33 @@ Example (MPC8610HPCD): compatible = fsl,fpga-pixis; reg = 0xe800 32; }; + +* Freescale BCSR GPIO banks + +Some BCSR registers act as simple GPIO controllers, each such +register can be represented by the gpio-controller node. + +Required properities: +- compatible : Should be fsl,board-bcsr-gpio; +- reg : Should contain the address and the lenght of the GPIO bank + register; +- #gpio-cells : Should be two. The first cell is the pin number and the + second cell is used to specify optional paramters (currently unused); +- gpio-controller : Marks the port as GPIO controller. + +Example: + + [EMAIL PROTECTED],0 { + #address-cells = 1; + #size-cells = 1; + device_type = board-control; This device_type field should not be used. I know it is in the existing samples, which is another bug. But please don't document it (and thereby further encourage it). -- David Gibson| I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson ___ Linuxppc-dev mailing list Linuxppc-dev@ozlabs.org https://ozlabs.org/mailman/listinfo/linuxppc-dev
[PATCH RFC] pata_platform: add 8 bit data io support
To avoid adding another rare used ata_port member, new bit is added to ata_port-flags. Originally, I hacked pata_platform to make it 8bit only to support 8bit data wired CF card. This patch is more generic. With this patch, __pata_platform_probe() interface is changed, and pata_of_platform is broken, so a small patch is needed. Signed-off-by: Wang Jian [EMAIL PROTECTED] --- drivers/ata/pata_platform.c | 63 -- include/linux/ata.h |8 + include/linux/ata_platform.h |4 ++ include/linux/libata.h |1 + 4 files changed, 73 insertions(+), 3 deletions(-) diff --git a/drivers/ata/pata_platform.c b/drivers/ata/pata_platform.c index 8f65ad6..d2276ad 100644 --- a/drivers/ata/pata_platform.c +++ b/drivers/ata/pata_platform.c @@ -50,9 +50,62 @@ static struct scsi_host_template pata_platform_sht = { ATA_PIO_SHT(DRV_NAME), }; +static void pata_platform_postreset(struct ata_link *link, unsigned int *classes) +{ + struct ata_port *ap = link-ap; + struct ata_device *dev; + u8 select = ATA_DEVICE_OBS; + + /* Call default callback first */ + ata_sff_postreset(link, classes); + + if (!(ap-flags ATA_FLAG_8BIT_DATA)) + return; + + /* Set 8-bit mode. We know we can do that */ + ata_link_for_each_dev(dev, link) { + if (dev-devno) + select |= ATA_DEV1; + + iowrite8(SETFEATURES_8BIT_ON, ap-ioaddr.feature_addr); + iowrite8(select, ap-ioaddr.device_addr); + iowrite8(ATA_CMD_SET_FEATURES, ap-ioaddr.command_addr); + } +} + +static unsigned int pata_platform_data_xfer(struct ata_device *dev, + unsigned char *buf, unsigned int buflen, int rw) +{ + struct ata_port *ap = dev-link-ap; + + if (!(ap-flags ATA_FLAG_8BIT_DATA)) + return ata_sff_data_xfer(dev, buf, buflen, rw); + + if (rw == READ) + ioread8_rep(ap-ioaddr.data_addr, buf, buflen); + else + iowrite8_rep(ap-ioaddr.data_addr, buf, buflen); + + return buflen; +} + +static unsigned int pata_platform_data_xfer_noirq(struct ata_device *dev, + unsigned char *buf, unsigned int buflen, int rw) +{ + unsigned long flags; + unsigned int consumed; + + local_irq_save(flags); + consumed = pata_platform_data_xfer(dev, buf, buflen, rw); + local_irq_restore(flags); + + return consumed; +} + static struct ata_port_operations pata_platform_port_ops = { .inherits = ata_sff_port_ops, - .sff_data_xfer = ata_sff_data_xfer_noirq, + .postreset = pata_platform_postreset, + .sff_data_xfer = pata_platform_data_xfer_noirq, .cable_detect = ata_cable_unknown, .set_mode = pata_platform_set_mode, .port_start = ATA_OP_NULL, @@ -106,7 +159,8 @@ int __devinit __pata_platform_probe(struct device *dev, struct resource *ctl_res, struct resource *irq_res, unsigned int ioport_shift, - int __pio_mask) + int __pio_mask, + unsigned int data_width) { struct ata_host *host; struct ata_port *ap; @@ -140,6 +194,9 @@ int __devinit __pata_platform_probe(struct device *dev, ap-pio_mask = __pio_mask; ap-flags |= ATA_FLAG_SLAVE_POSS; + if (data_width == ATA_DATA_WIDTH_8BIT) + ap-flags |= ATA_FLAG_8BIT_DATA; + /* * Use polling mode if there's no IRQ */ @@ -242,7 +299,7 @@ static int __devinit pata_platform_probe(struct platform_device *pdev) return __pata_platform_probe(pdev-dev, io_res, ctl_res, irq_res, pp_info ? pp_info-ioport_shift : 0, -pio_mask); +pio_mask, pp_info-data_width); } static int __devexit pata_platform_remove(struct platform_device *pdev) diff --git a/include/linux/ata.h b/include/linux/ata.h index be00973..4ce26df 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -45,6 +45,11 @@ enum { ATA_MAX_SECTORS_LBA48 = 65535,/* TODO: 65536? */ ATA_MAX_SECTORS_TAPE= 65535, + ATA_DATA_WIDTH_8BIT = 1, + ATA_DATA_WIDTH_16BIT= 2, + ATA_DATA_WIDTH_DEFAULT = 2, + ATA_DATA_WIDTH_32BIT= 4, + ATA_ID_WORDS= 256, ATA_ID_CONFIG = 0, ATA_ID_CYLS = 1, @@ -280,6 +285,9 @@ enum { XFER_PIO_0 = 0x08, XFER_PIO_SLOW = 0x00, + SETFEATURES_8BIT_ON = 0x01, /* Enable 8 bit data transfers */ + SETFEATURES_8BIT_OFF= 0x81, /* Disable 8 bit data transfers */ +
Re: [PATCH RFC] pata_platform: add 8 bit data io support
On Sun, 2008-10-12 at 02:00 +0800, Wang Jian wrote: To avoid adding another rare used ata_port member, new bit is added to ata_port-flags. Originally, I hacked pata_platform to make it 8bit only to support 8bit data wired CF card. This patch is more generic. With this patch, __pata_platform_probe() interface is changed, and pata_of_platform is broken, so a small patch is needed. Signed-off-by: Wang Jian [EMAIL PROTECTED] --- A couple of things. First I would personally prefer (but I'm not the libata maintainer so it's up to Jeff ...) if you had a separate patch that adds the 8-bit support to libata core first, and then a patch that modifies pata_platform. Then, in order to avoid breaking bisection, I would like you to fixup pata_of_platform in the same patch that modifies __pata_platform_probe so there is no breakage in between patches. Now, regarding the patch itself, if the core grows a 8-bit flag, then I strongly suspect the core should also grow the 8-bit xfer function rather than having it hidden in pata_platform. Cheers, Ben. ___ Linuxppc-dev mailing list Linuxppc-dev@ozlabs.org https://ozlabs.org/mailman/listinfo/linuxppc-dev
Re: performance: memcpy vs. __copy_tofrom_user
On Thu, 2008-10-09 at 10:37 -0500, Matt Sealey wrote: Ahem, but nobody here wants AltiVec in the kernel do they? It depends. We do use altivec in the kernel for example for RAID accelerations. The reason where we require a -real-good- reason to do it is simply because of the drawbacks. The cost of enabling altivec in the kernel can be high (especially if the user is using it) and it's not context switched for kernel code (just like the FPU) for obvious performance reasons. Thus any use of altivec in the kernel must be done within non-preemptible sections, which can cause higher latencies in preemptible kernels. Ben. ___ Linuxppc-dev mailing list Linuxppc-dev@ozlabs.org https://ozlabs.org/mailman/listinfo/linuxppc-dev
Re: performance: memcpy vs. __copy_tofrom_user
On Wed, 2008-10-08 at 12:40 -0500, Scott Wood wrote: The performance difference most likely comes from the fact that copy to/from user can assume that the memory is cacheable, while memcpy is occasionally used on cache-inhibited memory -- so dcbz isn't used. We may be better off handling the alignment fault on those occasions, and we should use dcba on chips that support it. Note that the kernel memcpy isn't supposed to be used for non-cacheable memory. That's what memcpy_to/fromio are for. But Paul has a point that for small copies especially, the cost of the cache instructions outweigh their benefit. Cheers, Ben. ___ Linuxppc-dev mailing list Linuxppc-dev@ozlabs.org https://ozlabs.org/mailman/listinfo/linuxppc-dev
2.6.27 g5 config?
Hi, I always have trouble getting an initial build to boot. Usually it is with keyboards and video console. Currently, I can't seem to find the root disk. With the attached config I get the early messages, a pair of penguin images. And then I think it says system restarting. Happens kinda fast. If anyone is willing to look at the attached config I would appreciate it. kevin # # Automatically generated make config: don't edit # Linux kernel version: 2.6.27 # Sat Oct 11 12:11:01 2008 # CONFIG_PPC64=y # # Processor support # CONFIG_POWER4_ONLY=y CONFIG_POWER4=y # CONFIG_TUNE_CELL is not set CONFIG_PPC_FPU=y CONFIG_ALTIVEC=y # CONFIG_VSX is not set CONFIG_PPC_STD_MMU=y CONFIG_PPC_MM_SLICES=y CONFIG_VIRT_CPU_ACCOUNTING=y CONFIG_SMP=y CONFIG_NR_CPUS=2 CONFIG_64BIT=y CONFIG_WORD_SIZE=64 CONFIG_PPC_MERGE=y CONFIG_MMU=y CONFIG_GENERIC_CMOS_UPDATE=y CONFIG_GENERIC_TIME=y CONFIG_GENERIC_TIME_VSYSCALL=y CONFIG_GENERIC_CLOCKEVENTS=y CONFIG_GENERIC_HARDIRQS=y CONFIG_HAVE_SETUP_PER_CPU_AREA=y CONFIG_IRQ_PER_CPU=y CONFIG_STACKTRACE_SUPPORT=y CONFIG_HAVE_LATENCYTOP_SUPPORT=y CONFIG_TRACE_IRQFLAGS_SUPPORT=y CONFIG_LOCKDEP_SUPPORT=y CONFIG_RWSEM_XCHGADD_ALGORITHM=y CONFIG_ARCH_HAS_ILOG2_U32=y CONFIG_ARCH_HAS_ILOG2_U64=y CONFIG_GENERIC_HWEIGHT=y CONFIG_GENERIC_CALIBRATE_DELAY=y CONFIG_GENERIC_FIND_NEXT_BIT=y CONFIG_ARCH_NO_VIRT_TO_BUS=y CONFIG_PPC=y CONFIG_EARLY_PRINTK=y CONFIG_COMPAT=y CONFIG_SYSVIPC_COMPAT=y CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y CONFIG_ARCH_MAY_HAVE_PC_FDC=y CONFIG_PPC_OF=y CONFIG_OF=y # CONFIG_PPC_UDBG_16550 is not set CONFIG_GENERIC_TBSYNC=y CONFIG_AUDIT_ARCH=y CONFIG_GENERIC_BUG=y # CONFIG_DEFAULT_UIMAGE is not set CONFIG_HIBERNATE_64=y CONFIG_ARCH_HIBERNATION_POSSIBLE=y CONFIG_ARCH_SUSPEND_POSSIBLE=y # CONFIG_PPC_DCR_NATIVE is not set # CONFIG_PPC_DCR_MMIO is not set # CONFIG_PPC_OF_PLATFORM_PCI is not set CONFIG_DEFCONFIG_LIST=/lib/modules/$UNAME_RELEASE/.config # # General setup # CONFIG_EXPERIMENTAL=y CONFIG_LOCK_KERNEL=y CONFIG_INIT_ENV_ARG_LIMIT=32 CONFIG_LOCALVERSION= CONFIG_LOCALVERSION_AUTO=y CONFIG_SWAP=y CONFIG_SYSVIPC=y CONFIG_SYSVIPC_SYSCTL=y CONFIG_POSIX_MQUEUE=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT_V3=y # CONFIG_TASKSTATS is not set CONFIG_AUDIT=y CONFIG_AUDITSYSCALL=y CONFIG_AUDIT_TREE=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=17 # CONFIG_CGROUPS is not set # CONFIG_GROUP_SCHED is not set CONFIG_SYSFS_DEPRECATED=y CONFIG_SYSFS_DEPRECATED_V2=y # CONFIG_RELAY is not set CONFIG_NAMESPACES=y # CONFIG_UTS_NS is not set # CONFIG_IPC_NS is not set # CONFIG_USER_NS is not set # CONFIG_PID_NS is not set # CONFIG_BLK_DEV_INITRD is not set CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_SYSCTL=y # CONFIG_EMBEDDED is not set CONFIG_SYSCTL_SYSCALL=y CONFIG_KALLSYMS=y CONFIG_KALLSYMS_ALL=y CONFIG_KALLSYMS_EXTRA_PASS=y CONFIG_HOTPLUG=y CONFIG_PRINTK=y CONFIG_BUG=y CONFIG_ELF_CORE=y CONFIG_COMPAT_BRK=y CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_ANON_INODES=y CONFIG_EPOLL=y CONFIG_SIGNALFD=y CONFIG_TIMERFD=y CONFIG_EVENTFD=y CONFIG_SHMEM=y CONFIG_VM_EVENT_COUNTERS=y CONFIG_SLAB=y # CONFIG_SLUB is not set # CONFIG_SLOB is not set # CONFIG_PROFILING is not set # CONFIG_MARKERS is not set CONFIG_HAVE_OPROFILE=y # CONFIG_KPROBES is not set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y CONFIG_HAVE_IOREMAP_PROT=y CONFIG_HAVE_KPROBES=y CONFIG_HAVE_KRETPROBES=y CONFIG_HAVE_ARCH_TRACEHOOK=y CONFIG_HAVE_DMA_ATTRS=y CONFIG_USE_GENERIC_SMP_HELPERS=y # CONFIG_HAVE_CLK is not set CONFIG_PROC_PAGE_MONITOR=y # CONFIG_HAVE_GENERIC_DMA_COHERENT is not set CONFIG_SLABINFO=y CONFIG_RT_MUTEXES=y # CONFIG_TINY_SHMEM is not set CONFIG_BASE_SMALL=0 CONFIG_MODULES=y # CONFIG_MODULE_FORCE_LOAD is not set CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y CONFIG_MODVERSIONS=y # CONFIG_MODULE_SRCVERSION_ALL is not set CONFIG_KMOD=y CONFIG_STOP_MACHINE=y CONFIG_BLOCK=y # CONFIG_BLK_DEV_IO_TRACE is not set # CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEV_INTEGRITY is not set CONFIG_BLOCK_COMPAT=y # # IO Schedulers # CONFIG_IOSCHED_NOOP=y CONFIG_IOSCHED_AS=y CONFIG_IOSCHED_DEADLINE=y CONFIG_IOSCHED_CFQ=y CONFIG_DEFAULT_AS=y # CONFIG_DEFAULT_DEADLINE is not set # CONFIG_DEFAULT_CFQ is not set # CONFIG_DEFAULT_NOOP is not set CONFIG_DEFAULT_IOSCHED=anticipatory CONFIG_CLASSIC_RCU=y # # Platform support # CONFIG_PPC_MULTIPLATFORM=y # CONFIG_PPC_PSERIES is not set # CONFIG_PPC_ISERIES is not set CONFIG_PPC_PMAC=y CONFIG_PPC_PMAC64=y # CONFIG_PPC_MAPLE is not set # CONFIG_PPC_PASEMI is not set # CONFIG_PPC_PS3 is not set # CONFIG_PPC_CELL is not set # CONFIG_PPC_CELL_NATIVE is not set # CONFIG_PPC_IBM_CELL_BLADE is not set # CONFIG_PPC_CELLEB is not set # CONFIG_PQ2ADS is not set CONFIG_PPC_NATIVE=y # CONFIG_IPIC is not set CONFIG_MPIC=y # CONFIG_MPIC_WEIRD is not set # CONFIG_PPC_I8259 is not set CONFIG_U3_DART=y # CONFIG_PPC_RTAS is not set # CONFIG_MMIO_NVRAM is not set CONFIG_MPIC_U3_HT_IRQS=y # CONFIG_PPC_MPC106 is not set CONFIG_PPC_970_NAP=y # CONFIG_PPC_INDIRECT_IO is not set #
Re: performance: memcpy vs. __copy_tofrom_user
Benjamin Herrenschmidt wrote: On Thu, 2008-10-09 at 10:37 -0500, Matt Sealey wrote: Ahem, but nobody here wants AltiVec in the kernel do they? It depends. We do use altivec in the kernel for example for RAID accelerations. The reason where we require a -real-good- reason to do it is simply because of the drawbacks. The cost of enabling altivec in the kernel can be high (especially if the user is using it) and it's not context switched for kernel code (just like the FPU) for obvious performance reasons. Thus any use of altivec in the kernel must be done within non-preemptible sections, which can cause higher latencies in preemptible kernels. Would the examples (page copy, page clear) be an okay place to do it? These sections can't be preempted anyway (right?), and it's noted that doing it with AltiVec is a tad faster than using MMU tricks or standard copies? In Scott's case, while optimizing memcpy for 48byte blocks was a joke, this is 3 load/stores in AltiVec, which as long as every SKB is 16 byte aligned (is there any reason why it would not be? :) skb_clone might not be something you want to dump AltiVec into and would make a mess if an skb got extended somehow, but the principle is outlined in a very good document from a very long time ago; http://www.motorola.com.cn/semiconductors/sndf/conference/PDF/AH1109.pdf I think a lot of it still holds true as long as you really don't care about preemption under these circumstances (where network throughput is more important, and where AltiVec actually *reduces* CPU time, the overhead of disabling preemption is lower anyway). You could say the same about the RAID functions - I bet LatencyTOP has a field day when you're using RAID5 AltiVec. But if you're more concerned about fast disk access, would you really care (especially since the algorithm is automatically selected on boot, you've not much chance of having any choice in the matter anyway)? Granted it also doesn't help Scott one bit. Sorry :D -- Matt Sealey [EMAIL PROTECTED] Genesi, Manager, Developer Relations ___ Linuxppc-dev mailing list Linuxppc-dev@ozlabs.org https://ozlabs.org/mailman/listinfo/linuxppc-dev
Re: [PATCH RFC] pata_platform: add 8 bit data io support
Benjamin Herrenschmidt 写道: On Sun, 2008-10-12 at 02:00 +0800, Wang Jian wrote: To avoid adding another rare used ata_port member, new bit is added to ata_port-flags. Originally, I hacked pata_platform to make it 8bit only to support 8bit data wired CF card. This patch is more generic. With this patch, __pata_platform_probe() interface is changed, and pata_of_platform is broken, so a small patch is needed. Signed-off-by: Wang Jian [EMAIL PROTECTED] --- A couple of things. First I would personally prefer (but I'm not the libata maintainer so it's up to Jeff ...) if you had a separate patch that adds the 8-bit support to libata core first, and then a patch that modifies pata_platform. I will do that if my 8-bit mode patch is done right technically. Then, in order to avoid breaking bisection, I would like you to fixup pata_of_platform in the same patch that modifies __pata_platform_probe so there is no breakage in between patches. Yes. Now, regarding the patch itself, if the core grows a 8-bit flag, then I strongly suspect the core should also grow the 8-bit xfer function rather than having it hidden in pata_platform. This is the main reason I send a single RFC patch. Where to add 8-bit mode should be decided first. Because 8-bit mode is mostly used for embedded devices, my opinion is 8-bit mode in pata_platform is enough. However, look at pata_platform_data_xfer() I added, the code can be merged into ata_sff_data_xfer() of libata-sff.c easily. Moving the code there is trivial if necessary. Another problem should be addressed: using flags v.s. using data_width member. I add a bit to indicate 8 bit mode, but this seems to be a problem for future 32 bit I/O support in libata. ___ Linuxppc-dev mailing list Linuxppc-dev@ozlabs.org https://ozlabs.org/mailman/listinfo/linuxppc-dev
Re: [RFC 0/6] Proposal for a Generic PWM Device API
Jon Loeliger wrote: On Fri, 2008-10-10 at 09:04 -0500, Bill Gatliff wrote: Jon Smirl wrote: What do the device tree deities have to say about PWM support? Dunno. What lists are they on? :) Perhaps [EMAIL PROTECTED] too. I thought this was what ePAPR was for. Why would it need all that discussion if it's being codified into a proper standard? Someone should just submit a reasonable extension to a reasonable extension-managing body :) -- Matt Sealey [EMAIL PROTECTED] Genesi, Manager, Developer Relations ___ Linuxppc-dev mailing list Linuxppc-dev@ozlabs.org https://ozlabs.org/mailman/listinfo/linuxppc-dev
[PROBLEM] Soft lockup on Linux 2.6.27, 2 patches, Cell/PPC64
-BEGIN PGP SIGNED MESSAGE- Hash: SHA1 Hello, I recently built 2.6.27 with these patches on my PS3. http://www.kernel.org/pub/linux/kernel/people/geoff/cell/ps3-linux-patches/ps3-wip/ps3vram-driver.patch http://www.kernel.org/pub/linux/kernel/people/geoff/cell/ps3-linux-patches/ps3-wip/ps3vram-proc-fs.patch These patches enable the 'ps3vram' module, which creates a MTD node /dev/mtdblock0. In addition to the 256 MB of XDR ram used by the system, I can use 245 MB of the video ram as a fast swap (getting a somewhat valuable 60 MB/s read/write speed on a random access device). I was using the mtdblock0 as a swap space when the soft lockup occurred while leaving the `top` program open. Now I am not sure if the patch is the issue. None of the functions in that list are functions in the patch... but this is my first time at debugging a kernel bug, some of the functions have the word 'page' so it might be due to problems occurring while paging to that mtdblock0 device, but surely calls to the functions in that patch would appear. How would I start debugging this? The trace is also available in pastebin: http://pastebin.com/m2ea72e52 BUG: soft lockup - CPU#0 stuck for 61s! [top:22788] Modules linked in: evdev hci_usb usbhid bluetooth usb_storage snd_ps3 ehci_hcd snd_pcm ohci_hcd snd_page_alloc snd_timer usbcore snd sg ps3_lpm soundcore irq event stamp: 5018780 hardirqs last enabled at (5018779): [c0007c1c] restore+0x1c/0xe4 hardirqs last disabled at (5018780): [c0003600] decrementer_common+0x100/0x180 softirqs last enabled at (5018778): [c0020928] .call_do_softirq+0x14/0x24 softirqs last disabled at (5018773): [c0020928] .call_do_softirq+0x14/0x24 NIP: c0084110 LR: c0084468 CTR: c03181d0 REGS: c6f37280 TRAP: 0901 Not tainted (2.6.27) MSR: 80008032 EE,IR,DR CR: 42004424 XER: TASK = c798[22788] 'top' THREAD: c6f34000 CPU: 0 GPR00: 0001 c6f37500 c05543d0 c6f37570 GPR04: c008427c 0001 GPR08: 0830 0001 c0b96874 GPR12: 80008032 c0586300 NIP [c0084110] .csd_flag_wait+0x14/0x1c LR [c0084468] .smp_call_function_single+0x13c/0x164 Call Trace: [c6f37500] [c0084468] .smp_call_function_single+0x13c/0x164 (unreliable) [c6f375c0] [c0084578] .smp_call_function_mask+0xe8/0x244 [c6f37720] [c005809c] .on_each_cpu+0x24/0x9c [c6f377c0] [c009bde4] .drain_all_pages+0x24/0x3c [c6f37840] [c009c0c8] .__alloc_pages_internal+0x2cc/0x464 [c6f37950] [c00c3d54] .__slab_alloc+0x1f8/0x6cc [c6f37a10] [c00c466c] .kmem_cache_alloc+0x74/0x108 [c6f37ab0] [c00cd200] .get_empty_filp+0x98/0x1a0 [c6f37b40] [c00d9fa0] .__path_lookup_intent_open+0x40/0xd0 [c6f37bf0] [c00da294] .do_filp_open+0xc0/0x7f0 [c6f37d80] [c00c9818] .do_sys_open+0x88/0x154 [c6f37e30] [c00076dc] syscall_exit+0x0/0x40 Instruction dump: 2f88 3860fff0 409e000c f88b0008 3860 ebc1fff0 4e800020 7c0004ac 80030020 780907e1 4d820020 7c210b78 7c421378 4be8 4e800020 7c0802a6 - -- - -Thanks Aaron Tokhy -BEGIN PGP SIGNATURE- Version: GnuPG v2.0.9 (GNU/Linux) Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org iEYEARECAAYFAkjxfeYACgkQO3nEAs/Ru1mjtwCfW25E51GIAY5KOcpJOp2TeUrz hhQAni7m4UM7ojCPnjEsmiAEVxpLoljh =AVql -END PGP SIGNATURE- ___ Linuxppc-dev mailing list Linuxppc-dev@ozlabs.org https://ozlabs.org/mailman/listinfo/linuxppc-dev
Re: performance: memcpy vs. __copy_tofrom_user
Would the examples (page copy, page clear) be an okay place to do it? These sections can't be preempted anyway (right?), and it's noted that doing it with AltiVec is a tad faster than using MMU tricks or standard copies? I think typically page copying and clearing -are- preemptible. I'm not sure what you mean by MMU tricks, but it's not clear whether using altivec will result in any significant performance gain here, considering the cost of enabling/disabling altivec (added to handling the preemption issue). However, nothing prevents you from trying to do it and we'll see what the results are with hard numbers. In Scott's case, while optimizing memcpy for 48byte blocks was a joke, this is 3 load/stores in AltiVec, which as long as every SKB is 16 byte aligned (is there any reason why it would not be? :) In this case, the cost of enabling/saving/restoring altivec will far outweight any benefit. In addition, skb's are often not well aligned due to the alignment tricks done with packet headers. skb_clone might not be something you want to dump AltiVec into and would make a mess if an skb got extended somehow, but the principle is outlined in a very good document from a very long time ago; http://www.motorola.com.cn/semiconductors/sndf/conference/PDF/AH1109.pdf I think a lot of it still holds true as long as you really don't care about preemption under these circumstances (where network throughput is more important, and where AltiVec actually *reduces* CPU time, the overhead of disabling preemption is lower anyway). You could say the same about the RAID functions - I bet LatencyTOP has a field day when you're using RAID5 AltiVec. RAID6 actually :-) In any case, as I said, people are welcome to implement something that can be put to the test and measured. If it proves beneficial enough, then I see no reason not to merge it. Basically, enough talks, just do something and we'll see whether it proves useful or not. Cheers, Ben. ___ Linuxppc-dev mailing list Linuxppc-dev@ozlabs.org https://ozlabs.org/mailman/listinfo/linuxppc-dev
[patch] mutex: optimise generic mutex implementations
Speed up generic mutex implementations. - atomic operations which both modify the variable and return something imply full smp memory barriers before and after the memory operations involved (failing atomic_cmpxchg, atomic_add_unless, etc don't imply a barrier because they don't modify the target). See Documentation/atomic_ops.txt. So remove extra barriers and branches. - All architectures support atomic_cmpxchg. This has no relation to __HAVE_ARCH_CMPXCHG. We can just take the atomic_cmpxchg path unconditionally This reduces a simple single threaded fastpath lock+unlock test from 590 cycles to 203 cycles on a ppc970 system. Signed-off-by: Nick Piggin [EMAIL PROTECTED] --- Index: linux-2.6/include/asm-generic/mutex-dec.h === --- linux-2.6.orig/include/asm-generic/mutex-dec.h +++ linux-2.6/include/asm-generic/mutex-dec.h @@ -22,8 +22,6 @@ __mutex_fastpath_lock(atomic_t *count, v { if (unlikely(atomic_dec_return(count) 0)) fail_fn(count); - else - smp_mb(); } /** @@ -41,10 +39,7 @@ __mutex_fastpath_lock_retval(atomic_t *c { if (unlikely(atomic_dec_return(count) 0)) return fail_fn(count); - else { - smp_mb(); - return 0; - } + return 0; } /** @@ -63,7 +58,6 @@ __mutex_fastpath_lock_retval(atomic_t *c static inline void __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *)) { - smp_mb(); if (unlikely(atomic_inc_return(count) = 0)) fail_fn(count); } @@ -98,15 +92,9 @@ __mutex_fastpath_trylock(atomic_t *count * just as efficient (and simpler) as a 'destructive' probing of * the mutex state would be. */ -#ifdef __HAVE_ARCH_CMPXCHG - if (likely(atomic_cmpxchg(count, 1, 0) == 1)) { - smp_mb(); + if (likely(atomic_cmpxchg(count, 1, 0) == 1)) return 1; - } return 0; -#else - return fail_fn(count); -#endif } #endif Index: linux-2.6/include/asm-generic/mutex-xchg.h === --- linux-2.6.orig/include/asm-generic/mutex-xchg.h +++ linux-2.6/include/asm-generic/mutex-xchg.h @@ -27,8 +27,6 @@ __mutex_fastpath_lock(atomic_t *count, v { if (unlikely(atomic_xchg(count, 0) != 1)) fail_fn(count); - else - smp_mb(); } /** @@ -46,10 +44,7 @@ __mutex_fastpath_lock_retval(atomic_t *c { if (unlikely(atomic_xchg(count, 0) != 1)) return fail_fn(count); - else { - smp_mb(); - return 0; - } + return 0; } /** @@ -67,7 +62,6 @@ __mutex_fastpath_lock_retval(atomic_t *c static inline void __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *)) { - smp_mb(); if (unlikely(atomic_xchg(count, 1) != 0)) fail_fn(count); } @@ -110,7 +104,6 @@ __mutex_fastpath_trylock(atomic_t *count if (prev 0) prev = 0; } - smp_mb(); return prev; } ___ Linuxppc-dev mailing list Linuxppc-dev@ozlabs.org https://ozlabs.org/mailman/listinfo/linuxppc-dev