Re: [PATCH 41/49] ext4: Add multi block allocator for ext4
On Wed, Jan 23, 2008 at 02:07:27PM -0800, Andrew Morton wrote: > > On Mon, 21 Jan 2008 22:02:20 -0500 "Theodore Ts'o" <[EMAIL PROTECTED]> > > wrote: > > From: Alex Tomas <[EMAIL PROTECTED]> > > > > Signed-off-by: Alex Tomas <[EMAIL PROTECTED]> > > Signed-off-by: Andreas Dilger <[EMAIL PROTECTED]> > > Signed-off-by: Aneesh Kumar K.V <[EMAIL PROTECTED]> > > Signed-off-by: Eric Sandeen <[EMAIL PROTECTED]> > > Signed-off-by: "Theodore Ts'o" <[EMAIL PROTECTED]> > > > > ... > > > > +#if BITS_PER_LONG == 64 > > +#define mb_correct_addr_and_bit(bit, addr) \ > > +{ \ > > + bit += ((unsigned long) addr & 7UL) << 3; \ > > + addr = (void *) ((unsigned long) addr & ~7UL); \ > > +} > > +#elif BITS_PER_LONG == 32 > > +#define mb_correct_addr_and_bit(bit, addr) \ > > +{ \ > > + bit += ((unsigned long) addr & 3UL) << 3; \ > > + addr = (void *) ((unsigned long) addr & ~3UL); \ > > +} > > +#else > > +#error "how many bits you are?!" > > +#endif > > Why do these exist? Initial version on mballoc supported on x86 32 this was there to give compile warning on 64 bit platform. I guess we can remove that now. Or may be we can keep it as such because it is harmless. > > > +static inline int mb_test_bit(int bit, void *addr) > > +{ > > + mb_correct_addr_and_bit(bit, addr); > > + return ext4_test_bit(bit, addr); > > +} > > ext2_test_bit() already handles bitnum > wordsize. > > If mb_correct_addr_and_bit() is actually needed then some suitable comment > would help. ext4_test_bit on powerpc needs the addr to be 8 byte aligned. Othewise it fails > > > +static inline void mb_set_bit(int bit, void *addr) > > +{ > > + mb_correct_addr_and_bit(bit, addr); > > + ext4_set_bit(bit, addr); > > +} > > + > > +static inline void mb_set_bit_atomic(spinlock_t *lock, int bit, void *addr) > > +{ > > + mb_correct_addr_and_bit(bit, addr); > > + ext4_set_bit_atomic(lock, bit, addr); > > +} > > + > > +static inline void mb_clear_bit(int bit, void *addr) > > +{ > > + mb_correct_addr_and_bit(bit, addr); > > + ext4_clear_bit(bit, addr); > > +} > > + > > +static inline void mb_clear_bit_atomic(spinlock_t *lock, int bit, void > > *addr) > > +{ > > + mb_correct_addr_and_bit(bit, addr); > > + ext4_clear_bit_atomic(lock, bit, addr); > > +} > > + > > +static inline void *mb_find_buddy(struct ext4_buddy *e4b, int order, int > > *max) > > uninlining this will save about eighty squigabytes of text. Fixed > > Please review all of ext4/jbd2 with a view to removig unnecessary and wrong > inlings. > > > +{ > > + char *bb; > > + > > + /* FIXME!! is this needed */ > > + BUG_ON(EXT4_MB_BITMAP(e4b) == EXT4_MB_BUDDY(e4b)); > > + BUG_ON(max == NULL); > > + > > + if (order > e4b->bd_blkbits + 1) { > > + *max = 0; > > + return NULL; > > + } > > + > > + /* at order 0 we see each particular block */ > > + *max = 1 << (e4b->bd_blkbits + 3); > > + if (order == 0) > > + return EXT4_MB_BITMAP(e4b); > > + > > + bb = EXT4_MB_BUDDY(e4b) + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order]; > > + *max = EXT4_SB(e4b->bd_sb)->s_mb_maxs[order]; > > + > > + return bb; > > +} > > + > > > > ... > > > > +#else > > +#define mb_free_blocks_double(a, b, c, d) > > +#define mb_mark_used_double(a, b, c) > > +#define mb_cmp_bitmaps(a, b) > > +#endif > > Please use the do{}while(0) thing. Or, better, proper C functions which > have typechecking (unless this will cause undefined-var compile errors, > which happens sometimes) makde static inline void. > > > +/* find most significant bit */ > > +static int fmsb(unsigned short word) > > +{ > > + int order; > > + > > + if (word > 255) { > > + order = 7; > > + word >>= 8; > > + } else { > > + order = -1; > > + } > > + > > + do { > > + order++; > > + word >>= 1; > > + } while (word != 0); > > + > > + return order; > > +} > > Did we just reinvent fls()? replaced by fls. > > > +/* FIXME!! need more doc */ > > +static void ext4_mb_mark_free_simple(struct super_block *sb, > > + void *buddy, unsigned first, int len, > > + struct ext4_group_info *grp) > > +{ > > + struct ext4_sb_info *sbi = EXT4_SB(sb); > > + unsigned short min; > > + unsigned short max; > > + unsigned short chunk; > > + unsigned short border; > > + > > + BUG_ON(len >= EXT4_BLOCKS_PER_GROUP(sb)); > > + > > + border = 2 << sb->s_blocksize_bits; > > Won't this explode with >= 32k blocksize? > > > + while (len > 0) { > > + /* find how many blocks can be covered since this position */ > > + max = ffs(first | border) - 1; > > + > > + /* find how many blocks of power 2 we need to mark */ > > + min = fmsb(len); > > + > > + if (max < min) > > + min = max; > > + chunk
Re: [PATCH -mm] fix variable use in AVR32 pte_alloc_one
> On Wed, 23 Jan 2008 13:20:10 +0100 Haavard Skinnemoen <[EMAIL PROTECTED]> > wrote: > On Wed, 23 Jan 2008 22:57:34 +1100 > Ben Nizette <[EMAIL PROTECTED]> wrote: > > > > > Some parts of this function use 'page', some 'pte'. As such, an AVR32 > > -mm build fails with an undefined reference to 'page'. > > > > Signed-Off-By: Ben Nizette <[EMAIL PROTECTED]> > > > > --- > > Index: linux-2.6.24-rc8-mm1/include/asm-avr32/pgalloc.h > > === > > --- linux-2.6.24-rc8-mm1.orig/include/asm-avr32/pgalloc.h > > +++ linux-2.6.24-rc8-mm1/include/asm-avr32/pgalloc.h > > @@ -52,9 +52,9 @@ static inline struct page *pte_alloc_one > > struct page *pte; > > > > pte = alloc_page(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO); > > - if (!page) > > + if (!pte) > > return NULL; > > - pgtable_page_ctor(page); > > + pgtable_page_ctor(pte); > > return pte; > > } > > Hmm...I can't see anything like this on my current avr32-arch branch, > but I think I mistakenly pushed out some unfinished code about a week > ago and rewound it shortly afterwards. If Andrew pulled during that > window, I guess it must have made it into -mm :-( > > But thanks for testing and for providing a fix. I'll check the code > that I was working on and apply the patch if it's still broken. > This bustage was added by the -mm-only config_highpte-vs-sub-page-page-tables.patch Thanks for the fix. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [CALL FOR TESTING] Make Ext3 fsck way faster [2.6.24-rc6 -mm patch]
> On Wed, 23 Jan 2008 04:12:16 -0500 Abhishek Rai <[EMAIL PROTECTED]> wrote: > > I'm wondering about the interaction between this code and the > > buffer_boundary() logic. I guess we should disable the buffer_boundary() > > handling when this code is in effect. Have you reviewed and tested that > > aspect? > > Thanks for pointing this out, I had totally missed this issue in my change. > I've now made the call to set_buffer_boundary() in ext3_get_blocks_handle() > subject to metacluster option being set. > Did it make any performance difference? iirc the buffer_boundary stuff was worth around 10% on a single linear read of a large, well-laid-out file. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 2/6] io context sharing: preliminary support
On Wed, Jan 23 2008, Andrew Morton wrote: > > On Tue, 22 Jan 2008 10:49:17 +0100 Jens Axboe <[EMAIL PROTECTED]> wrote: > > -void put_io_context(struct io_context *ioc) > > +int put_io_context(struct io_context *ioc) > > { > > if (ioc == NULL) > > - return; > > + return 1; > > > > BUG_ON(atomic_read(>refcount) == 0); > > > > @@ -3856,7 +3856,9 @@ void put_io_context(struct io_context *ioc) > > rcu_read_unlock(); > > > > kmem_cache_free(iocontext_cachep, ioc); > > + return 1; > > } > > + return 0; > > } > > Document the return value? (and the function) Will do. > I assume this return value gets used in some other patch. Yeah, it is. -- Jens Axboe -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 4/6] block: cfq: make the io contect sharing lockless
On Wed, Jan 23 2008, Andrew Morton wrote: > > On Tue, 22 Jan 2008 10:49:19 +0100 Jens Axboe <[EMAIL PROTECTED]> wrote: > > The io context sharing introduced a per-ioc spinlock, that would protect > > the cfq io context lookup. That is a regression from the original, since > > we never needed any locking there because the ioc/cic were process private. > > > > The cic lookup is changed from an rbtree construct to a radix tree, which > > we can then use RCU to make the reader side lockless. That is the > > performance > > critical path, modifying the radix tree is only done on process creation > > (when that process first does IO, actually) and on process exit (if that > > process has done IO). > > Perhaps Paul would review the rcu usage here sometime? That would indeed be awesome :-) > > +/* > > + * Add cic into ioc, using cfqd as the search key. This enables us to > > lookup > > + * the process specific cfq io context when entered from the block layer. > > + * Also adds the cic to a per-cfqd list, used when this queue is removed. > > + */ > > +static inline int > > There's a lot of pointless inlining in there. Will kill. > > +++ b/block/ll_rw_blk.c > > @@ -3831,6 +3831,16 @@ int __init blk_dev_init(void) > > return 0; > > } > > > > +static void cfq_dtor(struct io_context *ioc) > > +{ > > + struct cfq_io_context *cic[1]; > > + int r; > > + > > + r = radix_tree_gang_lookup(>radix_root, (void **) cic, 0, 1); > > + if (r > 0) > > + cic[0]->dtor(ioc); > > +} > > Some comments here might help others who are wondering why we can't just > use radix_tree_lookup(). Sure, will add a comment. > > @@ -3900,7 +3911,7 @@ struct io_context *alloc_io_context(gfp_t gfp_flags, > > int node) > > ret->last_waited = jiffies; /* doesn't matter... */ > > ret->nr_batch_requests = 0; /* because this is 0 */ > > ret->aic = NULL; > > - ret->cic_root.rb_node = NULL; > > + INIT_RADIX_TREE(>radix_root, GFP_ATOMIC | __GFP_HIGH); > > Did this need to be atomic? It's actually only ever used with a radix_tree_preload() where the proper gfp mask is passed, the actual radix_tree_insert() is done under lock protecting the tree. -- Jens Axboe -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 1/6] ioprio: move io priority from task_struct to io_context
On Wed, Jan 23 2008, Andrew Morton wrote: > > On Tue, 22 Jan 2008 10:49:16 +0100 Jens Axboe <[EMAIL PROTECTED]> wrote: > > This is where it belongs and then it doesn't take up space for a > > process that doesn't do IO. > > > > ... > > > > struct io_context *get_io_context(gfp_t gfp_flags, int node) > > { > > - struct io_context *ret; > > - ret = current_io_context(gfp_flags, node); > > - if (likely(ret)) > > - atomic_inc(>refcount); > > + struct io_context *ret = NULL; > > + > > + do { > > + ret = current_io_context(gfp_flags, node); > > + if (unlikely(!ret)) > > + break; > > + } while (!atomic_inc_not_zero(>refcount)); > > Looks weird. Could do with a comment. Or unweirding ;) > > What's going on here? In the unlikely event that we find a task that is on its way to exiting. This hunk should actually be a part of the cfq lockless stuff... > > return ret; > > } > > EXPORT_SYMBOL(get_io_context); > > diff --git a/fs/ioprio.c b/fs/ioprio.c > > index e4e01bc..a760040 100644 > > --- a/fs/ioprio.c > > +++ b/fs/ioprio.c > > @@ -41,18 +41,29 @@ static int set_task_ioprio(struct task_struct *task, > > int ioprio) > > return err; > > > > task_lock(task); > > + do { > > + ioc = task->io_context; > > + /* see wmb() in current_io_context() */ > > + smp_read_barrier_depends(); > > + if (ioc) > > + break; > > > > - task->ioprio = ioprio; > > - > > - ioc = task->io_context; > > - /* see wmb() in current_io_context() */ > > - smp_read_barrier_depends(); > > + ioc = alloc_io_context(GFP_ATOMIC, -1); > > + if (!ioc) { > > + err = -ENOMEM; > > + break; > > + } > > + task->io_context = ioc; > > + ioc->task = task; > > + } while (1); > > argh. Can't sit there in a loop retrying GFP_ATOMIC! It's not, read the loop again! > > - if (ioc) > > + if (!err) { > > + ioc->ioprio = ioprio; > > ioc->ioprio_changed = 1; > > + } > > > > task_unlock(task); > > - return 0; > > + return err; > > } > > > > asmlinkage long sys_ioprio_set(int which, int who, int ioprio) > > > > ... > > > > void put_io_context(struct io_context *ioc); > > void exit_io_context(void); > > struct io_context *get_io_context(gfp_t gfp_flags, int node); > > +struct io_context *alloc_io_context(gfp_t, int); > > void copy_io_context(struct io_context **pdst, struct io_context **psrc); > > void swap_io_context(struct io_context **ioc1, struct io_context **ioc2); > > The rest of the declarations around here nicely name their args. A clear sign I didn't put those declarations there, but the inconsistent style is surely not a good thing. Will fix that up. > > +static int copy_io(struct task_struct *tsk) > > +{ > > + struct io_context *ioc = current->io_context; > > + > > + if (!ioc) > > + return 0; > > + > > + if (ioprio_valid(ioc->ioprio)) { > > + tsk->io_context = alloc_io_context(GFP_KERNEL, -1); > > + if (unlikely(!tsk->io_context)) > > + return -ENOMEM; > > + > > + tsk->io_context->task = tsk; > > + tsk->io_context->ioprio = ioc->ioprio; > > + } > > + > > + return 0; > > +} > > Should this depend on CONFIG_BLOCK? Good questions, checks... Looks like it would break, I'll do a !CONFIG_BLOCK fixup round. -- Jens Axboe -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] Remove BKL from sysctl(2)
> Yes - that's why I am wondering if we want a general 'sysctl' mutex or > just to fix the specific case. The corename case is nasty as it is timing > dependant against user activity rather than the less interesting "root > can be silly" type of problem. I think for the strings it would be better to just do a kind of copy-on-write. As in don't use the array directly, but a pointer instead and then sysctl allocates a new string, switches the pointer around and then does a RCU delayed free on the old string if it wasn't in .data. This would also have the advantage that the small upper limits these strings currently have are gone and actually save a little memory in the common case of them not being changed. Only case that wouldn't fix would be someone keeping the string accessed over a sleep point. And for preemptible kernels it would need rcu_read_lock(). So no free lunch without code auditing. But it would be probably the best way to go forward longer term. For multi number arrays the code ideally needs to be robust against temporary inconsistencies. BTW while interesting in theory in practice i suspect the actual likelyhood of a user actually hitting such a race is pretty small because sysctls tend to be set up a boot only where not much is going on. -Andi -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: JMicron PATA 2.6.24-rc7 can't find drives
Alan Cox wrote: I have SATA drive connected to SATA0 and Maxtor IDE drive + DVD burner connected to single IDE slot. Booting this kernel off the SATA drive works perfectly, however the IDE drive is not found/put into /dev. However the DVD is properly recognized and created as sr0. Is this known problem? I've looked all over google and found out that people have problems with CD/DVD drives with pata_jmicron but in my case DVD is working ok but HDD is not. Is there a solution for this or can I give you more information to help me solve this? The output of dmesg after the boot would be a good start Here it is: http://pastebin.com/md726431 Thank you very much! -Marcin Gil -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: LowFree/LowMem problem
Matthias Wolle <[EMAIL PROTECTED]> writes: > Jan 17 23:31:58 franklin72 kernel: sshd invoked oom-killer: gfp_mask=0xd0, > order=0, oomkilladj=0 > Jan 17 23:31:58 franklin72 kernel: cat invoked oom-killer: gfp_mask=0x84d0, > order=0, oomkilladj=0 > Jan 17 23:31:58 franklin72 kernel: [] out_of_memory+0x69/0x1a4 > Jan 17 23:31:58 franklin72 kernel: [] __alloc_pages+0x20a/0x291 > Jan 17 23:31:58 franklin72 kernel: [] __pte_alloc+0x11/0x94 Do you perhaps have a kernel compiled without CONFIG_HIGHPTE? Normally __pte_alloc should be able to allocate highmem unless that option is not set. Before HIGHPTE was implemented running out of low memory due to page tables was pretty common. BTW the ultimate fix for most lowmem problems is to go 64bit -Andi -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [ofa-general] InfiniBand/RDMA merge plans for 2.6.25
On Jan 18, 2008 1:11 AM, Roland Dreier <[EMAIL PROTECTED]> wrote: > Anyway, here are all the pending things that I'm aware of. As usual, > if something isn't already in my tree and isn't listed below, I > probably missed it or dropped it by mistake. Please remind me again > in that case. Are there any plans to merge the SDP (Sockets Direct Protocol) implementation ? Bart. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] x86: ignore spurious faults
On Thursday 24 January 2008 08:02:11 Jeremy Fitzhardinge wrote: > Andi Kleen wrote: > > Jeremy Fitzhardinge <[EMAIL PROTECTED]> writes: > >> /* > >> + * Handle a spurious fault caused by a stale TLB entry. This allows > > > > vmalloc_fault already has nearly the same logic. You should look > > at sharing the code. > > Hm, I see what you mean, but its hard to see how to share much code > there. It's a case of "two lines common, one line different" repeated 4 > times or so. The core common logic is checking if the fault agrees with the page tables. If not do different things. -Andi -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [rfc] lockless get_user_pages for dio (and more)
On Thursday 17 January 2008 06:58, Dave Kleikamp wrote: > We weren't able to get in any runs before the holidays, but we finally > have some good news from our performance team: > > "To test the effects of the patch, an OLTP workload was run on an IBM > x3850 M2 server with 2 processors (quad-core Intel Xeon processors at > 2.93 GHz) using IBM DB2 v9.5 running Linux 2.6.24rc7 kernel. Comparing > runs with and without the patch resulted in an overall performance > benefit of ~9.8%. Correspondingly, oprofiles showed that samples from > __up_read and __down_read routines that is seen during thread contention > for system resources was reduced from 2.8% down to .05%. Monitoring > the /proc/vmstat output from the patched run showed that the counter for > fast_gup contained a very high number while the fast_gup_slow value was > zero." Just for reference, I've attached a more complete patch for x86, which has to be applied on top of the pte_special patch posted in another thread. No need to test anything at this point... the generated code for this version is actually slightly better than the last one despite the extra condition being tested for. With a few tweak I was actually able to reduce the number of tests in the inner loop, and adding noinline to the leaf functions helps keep them in registers. I'm currently having a look at an initial powerpc 64 patch, hopefully we'll see similar improvements there. Will post that when I get further along with it. Thanks, Nick Introduce a new "fast_gup" (for want of a better name right now) which is basically a get_user_pages with a less general API that is more suited to the common case. - task and mm are always current and current->mm - force is always 0 - pages is always non-NULL - don't pass back vmas This allows (at least on x86), an optimistic lockless pagetable walk, without taking any page table locks or even mmap_sem. Page table existence is guaranteed by turning interrupts off (combined with the fact that we're always looking up the current mm, which would need an IPI before its pagetables could be shot down from another CPU). Many other architectures could do the same thing. Those that don't IPI could potentially RCU free the page tables and do speculative references on the pages (a la lockless pagecache) to achieve a lockless fast_gup. --- Index: linux-2.6/arch/x86/lib/Makefile_64 === --- linux-2.6.orig/arch/x86/lib/Makefile_64 +++ linux-2.6/arch/x86/lib/Makefile_64 @@ -10,4 +10,4 @@ obj-$(CONFIG_SMP) += msr-on-cpu.o lib-y := csum-partial_64.o csum-copy_64.o csum-wrappers_64.o delay_64.o \ usercopy_64.o getuser_64.o putuser_64.o \ thunk_64.o clear_page_64.o copy_page_64.o bitstr_64.o bitops_64.o -lib-y += memcpy_64.o memmove_64.o memset_64.o copy_user_64.o rwlock_64.o copy_user_nocache_64.o +lib-y += memcpy_64.o memmove_64.o memset_64.o copy_user_64.o rwlock_64.o copy_user_nocache_64.o gup.o Index: linux-2.6/arch/x86/lib/gup.c === --- /dev/null +++ linux-2.6/arch/x86/lib/gup.c @@ -0,0 +1,189 @@ +/* + * Lockless fast_gup for x86 + * + * Copyright (C) 2007 Nick Piggin + * Copyright (C) 2007 Novell Inc. + */ +#include +#include +#include +#include + +/* + * The performance critical leaf functions are made noinline otherwise gcc + * inlines everything into a single function which results in too much + * register pressure. + */ +static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) +{ + unsigned long mask, result; + pte_t *ptep; + + result = _PAGE_PRESENT|_PAGE_USER; + if (write) + result |= _PAGE_RW; + mask = result | _PAGE_SPECIAL; + + ptep = pte_offset_map(, addr); + do { + /* + * XXX: careful. On 3-level 32-bit, the pte is 64 bits, and + * we need to make sure we load the low word first, then the + * high. This means _PAGE_PRESENT should be clear if the high + * word was not valid. Currently, the C compiler can issue + * the loads in any order, and I don't know of a wrapper + * function that will do this properly, so it is broken on + * 32-bit 3-level for the moment. + */ + pte_t pte = *ptep; + struct page *page; + + if ((pte_val(pte) & mask) != result) + return 0; + VM_BUG_ON(!pfn_valid(pte_pfn(pte))); + page = pte_page(pte); + get_page(page); + pages[*nr] = page; + (*nr)++; + + } while (ptep++, addr += PAGE_SIZE, addr != end); + pte_unmap(ptep - 1); + + return 1; +} + +static inline void get_head_page_multiple(struct page *page, int nr) +{ + VM_BUG_ON(page != compound_head(page)); + VM_BUG_ON(page_count(page) == 0); + atomic_add(nr, >_count); +} + +static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) +{ + unsigned long mask; + pte_t pte = *(pte_t *) + struct page *head, *page; + int refs; + + mask = _PAGE_PRESENT|_PAGE_USER; + if (write) + mask
Re: [PATCH v2][POWERPC] Always build setup-bus.c on powerpc
On Thu, 2008-01-24 at 00:38 -0600, Kumar Gala wrote: > The common arch/powerpc code calls in to functions in setup-bus.c > so some builds of ppc32 would fail. > > Note, ppc32 usage of setup-irq.c is limited to arch/ppc and should be > removed when arch/ppc goes away. > > Signed-off-by: Kumar Gala <[EMAIL PROTECTED]> > Signed-off-by: Greg Kroah-Hartman <[EMAIL PROTECTED]> Ack. Funny I didn't catch it on my 4xx testing, I must have always had HOTPLUG enabled. > --- > > Here's the proper diff, will send this via paulus. > > drivers/pci/Makefile |2 +- > 1 files changed, 1 insertions(+), 1 deletions(-) > > diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile > index 5550556..f697f3d 100644 > --- a/drivers/pci/Makefile > +++ b/drivers/pci/Makefile > @@ -32,7 +32,7 @@ obj-$(CONFIG_ARM) += setup-bus.o setup-irq.o > obj-$(CONFIG_PARISC) += setup-bus.o > obj-$(CONFIG_SUPERH) += setup-bus.o setup-irq.o > obj-$(CONFIG_PPC32) += setup-irq.o > -obj-$(CONFIG_PPC64) += setup-bus.o > +obj-$(CONFIG_PPC) += setup-bus.o > obj-$(CONFIG_MIPS) += setup-bus.o setup-irq.o > obj-$(CONFIG_X86_VISWS) += setup-irq.o > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] x86: ignore spurious faults
Andi Kleen wrote: Jeremy Fitzhardinge <[EMAIL PROTECTED]> writes: /* + * Handle a spurious fault caused by a stale TLB entry. This allows vmalloc_fault already has nearly the same logic. You should look at sharing the code. Hm, I see what you mean, but its hard to see how to share much code there. It's a case of "two lines common, one line different" repeated 4 times or so. J -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 5/5 v3] Add the memory management driver to RapidIO.
On Jul 26, 2007, at 3:42 AM, Zhang Wei wrote: This patch adds the memory management driver to RapidIO. The RapidIO system size is changed to automatically detection. Add the memory mapping driver to RapidIO basic driver. Multi master ports are supported. Add a simple Bitmap RapidIO space allocator driver. Can we break up the functionality here into separate patches. I'm seeing 4 logical patches: * system size is changed to automatic detection * support for multi master ports * bitmap allocator * memory map driver Signed-off-by: Zhang Wei <[EMAIL PROTECTED]> --- drivers/net/rionet.c| 17 +- drivers/rapidio/Kconfig | 18 +- drivers/rapidio/Makefile|1 + drivers/rapidio/rio-access.c| 10 +- drivers/rapidio/rio-scan.c | 53 +++-- drivers/rapidio/rio-sysfs.c |3 +- drivers/rapidio/rio.c | 479 ++ - drivers/rapidio/rio.h |9 +- drivers/rapidio/sallocator/Kconfig |9 + drivers/rapidio/sallocator/Makefile | 12 + drivers/rapidio/sallocator/bitmap.c | 382 ++ ++ include/linux/rio.h | 71 +- include/linux/rio_drv.h | 41 +++- 13 files changed, 1046 insertions(+), 59 deletions(-) create mode 100644 drivers/rapidio/sallocator/Kconfig create mode 100644 drivers/rapidio/sallocator/Makefile create mode 100644 drivers/rapidio/sallocator/bitmap.c [snip] +config RAPIDIO_PROC_FS + bool "I/O and Memory resource debug" + depends on RAPIDIO && PROC_FS + default y + ---help--- + Enable this option, it will create a /proc/riores node for + monitoring the RapidIO I/O and Memory resource. + This isn't going to be acceptable. You should use sysfs or debugfs not proc. - k -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] [2/8] GBPAGES: Add feature macros for the gbpages cpuid bit
On Wednesday 23 January 2008 22:26:35 Jan Engelhardt wrote: > On Jan 19 2008 07:48, Andi Kleen wrote: > >Subject: [PATCH] [2/8] GBPAGES: Add feature macros for the gbpages cpuid > > bit > > Is there already a flag for /proc/cpuinfo or could you add one? There is already one called pdpe1gb. I don't think it's a very clear name, although AMD calls it the same. Calling it gbpages in /proc/cpuinfo would have been probably better (and my old original patch did that too), but I didn't catch the new name submitted by someone else in time. -Andi -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[RESEND][PATCH] pci-skeleton: Misc fixes to build neatly
Hello Jeff, I'm sorry for the previous patch, which was mangled by gmail, with some broken lines. I'm trying to resend it by git-send-email. Please let me know in case of any problems. Thanks a lot. Best Regards, Jike -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] pci-skeleton: Misc fixes to build neatly
The pci-skeleton.c has several problems with compilation, such as missing args when calling synchronize_irq(). Fix it. Signed-off-by: Jike Song <[EMAIL PROTECTED]> --- drivers/net/pci-skeleton.c | 49 ++- 1 files changed, 25 insertions(+), 24 deletions(-) diff --git a/drivers/net/pci-skeleton.c b/drivers/net/pci-skeleton.c index ed402e0..fffc49b 100644 --- a/drivers/net/pci-skeleton.c +++ b/drivers/net/pci-skeleton.c @@ -541,7 +541,7 @@ static void netdrv_hw_start (struct net_device *dev); #define NETDRV_W32_F(reg, val32) do { writel ((val32), ioaddr + (reg)); readl (ioaddr + (reg)); } while (0) -#if MMIO_FLUSH_AUDIT_COMPLETE +#ifdef MMIO_FLUSH_AUDIT_COMPLETE /* write MMIO register */ #define NETDRV_W8(reg, val8) writeb ((val8), ioaddr + (reg)) @@ -603,7 +603,7 @@ static int __devinit netdrv_init_board (struct pci_dev *pdev, return -ENOMEM; } SET_NETDEV_DEV(dev, >dev); - tp = dev->priv; + tp = netdev_priv(dev); /* enable device (incl. PCI PM wakeup), and bus-mastering */ rc = pci_enable_device (pdev); @@ -759,7 +759,7 @@ static int __devinit netdrv_init_one (struct pci_dev *pdev, return i; } - tp = dev->priv; + tp = netdev_priv(dev); assert (ioaddr != NULL); assert (dev != NULL); @@ -783,7 +783,7 @@ static int __devinit netdrv_init_one (struct pci_dev *pdev, dev->base_addr = (unsigned long) ioaddr; /* dev->priv/tp zeroed and aligned in alloc_etherdev */ - tp = dev->priv; + tp = netdev_priv(dev); /* note: tp->chipset set in netdrv_init_board */ tp->drv_flags = PCI_COMMAND_IO | PCI_COMMAND_MEMORY | @@ -841,7 +841,7 @@ static void __devexit netdrv_remove_one (struct pci_dev *pdev) assert (dev != NULL); - np = dev->priv; + np = netdev_priv(dev); assert (np != NULL); unregister_netdev (dev); @@ -974,7 +974,7 @@ static void mdio_sync (void *mdio_addr) static int mdio_read (struct net_device *dev, int phy_id, int location) { - struct netdrv_private *tp = dev->priv; + struct netdrv_private *tp = netdev_priv(dev); void *mdio_addr = tp->mmio_addr + Config4; int mii_cmd = (0xf6 << 10) | (phy_id << 5) | location; int retval = 0; @@ -1017,7 +1017,7 @@ static int mdio_read (struct net_device *dev, int phy_id, int location) static void mdio_write (struct net_device *dev, int phy_id, int location, int value) { - struct netdrv_private *tp = dev->priv; + struct netdrv_private *tp = netdev_priv(dev); void *mdio_addr = tp->mmio_addr + Config4; int mii_cmd = (0x5002 << 16) | (phy_id << 23) | (location << 18) | value; @@ -1060,7 +1060,7 @@ static void mdio_write (struct net_device *dev, int phy_id, int location, static int netdrv_open (struct net_device *dev) { - struct netdrv_private *tp = dev->priv; + struct netdrv_private *tp = netdev_priv(dev); int retval; #ifdef NETDRV_DEBUG void *ioaddr = tp->mmio_addr; @@ -1121,7 +1121,7 @@ static int netdrv_open (struct net_device *dev) /* Start the hardware at open or resume. */ static void netdrv_hw_start (struct net_device *dev) { - struct netdrv_private *tp = dev->priv; + struct netdrv_private *tp = netdev_priv(dev); void *ioaddr = tp->mmio_addr; u32 i; @@ -1191,7 +1191,7 @@ static void netdrv_hw_start (struct net_device *dev) /* Initialize the Rx and Tx rings, along with various 'dev' bits. */ static void netdrv_init_ring (struct net_device *dev) { - struct netdrv_private *tp = dev->priv; + struct netdrv_private *tp = netdev_priv(dev); int i; DPRINTK ("ENTER\n"); @@ -1213,7 +1213,7 @@ static void netdrv_init_ring (struct net_device *dev) static void netdrv_timer (unsigned long data) { struct net_device *dev = (struct net_device *) data; - struct netdrv_private *tp = dev->priv; + struct netdrv_private *tp = netdev_priv(dev); void *ioaddr = tp->mmio_addr; int next_tick = 60 * HZ; int mii_lpa; @@ -1252,9 +1252,10 @@ static void netdrv_timer (unsigned long data) } -static void netdrv_tx_clear (struct netdrv_private *tp) +static void netdrv_tx_clear (struct net_device *dev) { int i; + struct netdrv_private *tp = netdev_priv(dev); atomic_set (>cur_tx, 0); atomic_set (>dirty_tx, 0); @@ -1278,7 +1279,7 @@ static void netdrv_tx_clear (struct netdrv_private *tp) static void netdrv_tx_timeout (struct net_device *dev) { - struct netdrv_private *tp = dev->priv; + struct netdrv_private *tp = netdev_priv(dev); void *ioaddr = tp->mmio_addr; int i; u8 tmp8; @@ -1311,7 +1312,7 @@ static void netdrv_tx_timeout (struct net_device *dev) /* Stop a shared interrupt from scavenging while we are. */
Re: [PATCH] x86: ignore spurious faults
Jeremy Fitzhardinge <[EMAIL PROTECTED]> writes: > > /* > + * Handle a spurious fault caused by a stale TLB entry. This allows vmalloc_fault already has nearly the same logic. You should look at sharing the code. -Andi -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH -mm] fix variable use in AVR32 pte_alloc_one
Haavard Skinnemoen wrote: Hmm...I can't see anything like this on my current avr32-arch branch, but I think I mistakenly pushed out some unfinished code about a week ago and rewound it shortly afterwards. If Andrew pulled during that window, I guess it must have made it into -mm :-( But thanks for testing and for providing a fix. I'll check the code that I was working on and apply the patch if it's still broken. Cool, np. FWIW I'm trying to get -mm to fly on AVR32 so I have access to the latest gpiolib stuff. Despite this patch and "[PATCH -mm] define empty unxlate_dev_mem_ptr on AVR32" the build still fails due to some timerfd syscall horkenation. Shall look in to that this arvo too. Haavard -- --Ben. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [kvm-devel] [PATCH] export notifier #1
Gerd Hoffmann wrote: Another maybe workable approach for Xen is to go through pv_ops (although pte_clear doesn't go through pv_ops right now, so this would be an additional hook too ...). It does for 32-bit PAE. Making pte_clear uniform across all pagetable modes would be a nice cleanup. J -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.24-rc8-mm1 Badness at net/ipv4/tcp_input.c:2506
Hi, The following call trace is seen in the 2.6.24-rc8-mm1 kernel, which is same as one of the call trace you have given a debug patch at http://marc.info/?l=linux-netdev=120107165228368=2 i was not able to apply the debug patch, can you kindly rebase the patch for 2.6.24-rc8-mm1 or let me know, if i can help you in debugging this call trace. Jan 24 11:13:57 p55lp6 kernel: [60656.708573] Badness at net/ipv4/tcp_input.c:2506 Jan 24 11:13:57 p55lp6 kernel: [60656.708583] NIP: c03776e0 LR: c03776a8 CTR: c03aaf8c Jan 24 11:13:57 p55lp6 kernel: [60656.708597] REGS: cf6f34a0 TRAP: 0700 Not tainted (2.6.24-rc8-mm1) Jan 24 11:13:57 p55lp6 kernel: [60656.708608] MSR: 80029032 CR: 2488 XER: 0018 Jan 24 11:13:57 p55lp6 kernel: [60656.708636] TASK = c0571710[0] 'swapper' THREAD: c067 CPU: 0 Jan 24 11:13:57 p55lp6 kernel: [60656.708648] GPR00: fffc cf6f3720 c0664ab0 Jan 24 11:13:57 p55lp6 kernel: [60656.708663] GPR04: 0001 040e 00d0 Jan 24 11:13:57 p55lp6 kernel: [60656.708677] GPR08: 001bce5f 0001 fffc Jan 24 11:13:57 p55lp6 kernel: [60656.708690] GPR12: cf6f34f0 c0572180 0004 Jan 24 11:13:57 p55lp6 kernel: [60656.708704] GPR16: 0001 3e3133e9 0402 011f Jan 24 11:13:57 p55lp6 kernel: [60656.708718] GPR20: 011f 0004 0002 3e3133e9 Jan 24 11:13:57 p55lp6 kernel: [60656.708732] GPR24: c0004c09fe80 040e Jan 24 11:13:57 p55lp6 kernel: [60656.708745] GPR28: 0002 040e c0628570 c001791ff8d8 Jan 24 11:13:57 p55lp6 kernel: [60656.708760] NIP [c03776e0] .tcp_fastretrans_alert+0xfc/0xe20 Jan 24 11:13:58 p55lp6 kernel: [60656.708778] LR [c03776a8] .tcp_fastretrans_alert+0xc4/0xe20 Jan 24 11:13:58 p55lp6 kernel: [60656.708792] Call Trace: Jan 24 11:13:58 p55lp6 kernel: [60656.708799] [cf6f3720] [c0628570] 0xc0628570 (unreliable) Jan 24 11:13:58 p55lp6 kernel: [60656.708819] [cf6f37d0] [c037a3b0] .tcp_ack+0xf34/0x10e4 Jan 24 11:13:58 p55lp6 kernel: [60656.708836] [cf6f3920] [c037dce8] .tcp_rcv_established+0x114/0x8a8 Jan 24 11:13:58 p55lp6 kernel: [60656.708854] [cf6f39d0] [c038599c] .tcp_v4_do_rcv+0x5c/0x260 Jan 24 11:13:58 p55lp6 kernel: [60656.708871] [cf6f3a90] [c0387c24] .tcp_v4_rcv+0x8a0/0x93c Jan 24 11:13:58 p55lp6 kernel: [60656.70] [cf6f3b50] [c0363fec] .ip_local_deliver_finish+0x164/0x284 Jan 24 11:13:58 p55lp6 kernel: [60656.709148] [cf6f3be0] [c0363df8] .ip_rcv_finish+0x480/0x510 Jan 24 11:13:58 p55lp6 kernel: [60656.709165] [cf6f3ca0] [c0332438] .netif_receive_skb+0x564/0x630 Jan 24 11:13:58 p55lp6 kernel: [60656.709184] [cf6f3d70] [d01e2630] .ibmveth_poll+0x238/0x3b4 [ibmveth] Jan 24 11:13:58 p55lp6 kernel: [60656.709208] [cf6f3e30] [c03354c4] .net_rx_action+0x118/0x2e4 Jan 24 11:13:58 p55lp6 kernel: [60656.709226] [cf6f3ef0] [c007ca30] .__do_softirq+0xa8/0x164 Jan 24 11:13:58 p55lp6 kernel: [60656.709244] [cf6f3f90] [c002b7f8] .call_do_softirq+0x14/0x24 Jan 24 11:13:58 p55lp6 kernel: [60656.709262] [c0673920] [c000bf74] .do_softirq+0x74/0xc0 Jan 24 11:13:58 p55lp6 kernel: [60656.709280] [c06739b0] [c007cb84] .irq_exit+0x54/0x6c Jan 24 11:13:58 p55lp6 kernel: [60656.709297] [c0673a30] [c000c954] .do_IRQ+0x1b8/0x200 Jan 24 11:13:58 p55lp6 kernel: [60656.709314] [c0673ae0] [c0004c18] hardware_interrupt_entry+0x18/0x1c Jan 24 11:13:58 p55lp6 kernel: [60656.709332] --- Exception: 501 at .local_irq_restore+0x3c/0x40 Jan 24 11:13:58 p55lp6 kernel: [60656.709349] LR = .cpu_idle+0x13c/0x240 Jan 24 11:13:58 p55lp6 kernel: [60656.709357] [c0673dd0] [c00123c8] .cpu_idle+0x130/0x240 (unreliable) Jan 24 11:13:58 p55lp6 kernel: [60656.709377] [c0673e60] [c0009718] .rest_init+0x78/0x90 Jan 24 11:13:58 p55lp6 kernel: [60656.709394] [c0673ee0] [c04d09dc] .start_kernel+0x3e8/0x40c Jan 24 11:13:58 p55lp6 kernel: [60656.709412] [c0673f90] [c0008580] .start_here_common+0x54/0xd4 Jan 24 11:13:58 p55lp6 kernel: [60656.709429] Instruction dump: Jan 24 11:13:58 p55lp6 kernel: [60656.709437] 419e0010 3800 901f06ac 4810 801f06ac 2f80 409e0014 801f06b0 Jan 24 11:13:58 p55lp6 kernel: [60656.709464] 3881 2f80 409e0008 3880 <0b04> e87e80b0 4be81355 6000 (gdb) p tcp_fastretrans_alert $1 = {void (struct sock *, int, int)} 0x1ec0 (gdb) p/x 0x1ec0+0xc4 $2 = 0x1f84 (gdb) l *0x1f84 0x1f84 is in
[PATCH v2][POWERPC] Always build setup-bus.c on powerpc
The common arch/powerpc code calls in to functions in setup-bus.c so some builds of ppc32 would fail. Note, ppc32 usage of setup-irq.c is limited to arch/ppc and should be removed when arch/ppc goes away. Signed-off-by: Kumar Gala <[EMAIL PROTECTED]> Signed-off-by: Greg Kroah-Hartman <[EMAIL PROTECTED]> --- Here's the proper diff, will send this via paulus. drivers/pci/Makefile |2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile index 5550556..f697f3d 100644 --- a/drivers/pci/Makefile +++ b/drivers/pci/Makefile @@ -32,7 +32,7 @@ obj-$(CONFIG_ARM) += setup-bus.o setup-irq.o obj-$(CONFIG_PARISC) += setup-bus.o obj-$(CONFIG_SUPERH) += setup-bus.o setup-irq.o obj-$(CONFIG_PPC32) += setup-irq.o -obj-$(CONFIG_PPC64) += setup-bus.o +obj-$(CONFIG_PPC) += setup-bus.o obj-$(CONFIG_MIPS) += setup-bus.o setup-irq.o obj-$(CONFIG_X86_VISWS) += setup-irq.o -- 1.5.3.7 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 2/3] Platform changes for UCC TDM driver for MPC8323ERDB.Also includes related QE changes and dts entries.
This patch needs to come before the previous one ("UCC TDM driver for QE based MPC83xx platforms") as that uses some of the fields defined here. On Thu, 24 Jan 2008 10:19:44 +0530 (IST) Poonam_Aggrwal-b10812 <[EMAIL PROTECTED]> wrote: > > +u32 get_brg_clk(enum qe_clock brgclk, enum qe_clock *brg_source) > { > - struct device_node *qe; > - if (brg_clk) > - return brg_clk; > + struct device_node *qe, *brg, *clocks; > + enum qe_clock brg_src; > + u32 brg_input_freq = 0; > + u32 brg_num; > + int ret; > + const unsigned int *prop; > > - qe = of_find_node_by_type(NULL, "qe"); > - if (qe) { > + *brg_source = 0; > + > + brg_num = brgclk - QE_BRG1; > + brg = of_find_compatible_node(NULL, NULL, "fsl,cpm-brg"); > + if (brg) { If you did if (!brg) { . . goto err; } Then you would save indenting all the rest of this function. > + prop = of_get_property(brg, > + "fsl,brg-sources", ); Join these lines. > + of_node_put(brg); > + > + if (prop) > + brg_src = *(prop + brg_num); > + else { > + printk(KERN_ERR "%s: invalid fsl,brg-sources in device " > + "tree\n", __FUNCTION__); > + ret = -EINVAL; > + goto err; > + } > + if (brg_src == 0) { > + *brg_source = 0; > + if (brg_clk > 0) > + return brg_clk; > + qe = of_find_node_by_type(NULL, "qe"); > + if (qe) { Again testing (!qe) and jumping to err would save another level if indentation. > + unsigned int size; > + prop = of_get_property > + (qe, "brg-frequency", ); And you wouldn't have to split things like this. > + if (!prop) { > + printk(KERN_ERR "%s: QE brg-frequency" > + "not present in device tree\n", > + __FUNCTION__); > + ret = -EINVAL; > + of_node_put(qe); > + goto err; > + } > + if (*prop) { > + of_node_put(qe); > + brg_clk = *prop; > + return *prop; > + } else { This else (and indentation) is unnecessary as you just returned above. > + } else { > + *brg_source = brg_src + QE_CLK1 - 1; > + clocks = of_find_compatible_node(NULL, NULL, > + "fsl,cpm-clocks"); > + if (!clocks) { > + printk(KERN_ERR "%s: no clocks node in device" > + " tree \n", __FUNCTION__); > + ret = -EINVAL; > + goto err; > + } else { Same here. > + } else { > + printk(KERN_ERR "%s: no brg node in device tree\n", > + __FUNCTION__); > + ret = -EINVAL; > + goto err; This goto is redundant. > + } > +err: return ret; Put the label on a line by itself and indent it one space (that means that "diff -p will reference the funstion anem instead of the label). > @@ -152,6 +152,10 @@ struct ucc_fast_info { > enum ucc_fast_rx_decoding_method renc; > enum ucc_fast_transparent_tcrc tcrc; > enum ucc_fast_sync_len synl; > + char *tdm_rx_clk; > + char *tdm_tx_clk; > + char *tdm_rx_sync; > + char *tdm_tx_sync; If you make these "const char *" you won't have to cast the results of of_get_property() that you assign to them. -- Cheers, Stephen Rothwell[EMAIL PROTECTED] http://www.canb.auug.org.au/~sfr/ pgp78Z3GKVXwn.pgp Description: PGP signature
Re: [PATCH] [POWERPC] Always build setup-bus.c on powerpc
On Jan 24, 2008, at 12:28 AM, Benjamin Herrenschmidt wrote: On Thu, 2008-01-24 at 00:07 -0600, Kumar Gala wrote: The common arch/powerpc code calls in to functions in setup-bus.c so some builds of ppc32 would fail. Note, ppc32 usage of setup-irq.c is limited to arch/ppc and should be removed when arch/ppc goes away. I don't understand... the old code would build setup-bus.o for both PPC32 and PPC64 cases, how did you make it fail ? The patch is bogus. The old makefile rules looked like: obj-$(CONFIG_PPC64) += setup-bus.o obj-$(CONFIG_PPC32) += setup-irq.o pmac most like builds because CONFIG_HOTPLUG pulls in setup-bus.o (I'll fix my foobar'd patch and send this via Paul). - k -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] [POWERPC] Always build setup-bus.c on powerpc
On Thu, 2008-01-24 at 00:07 -0600, Kumar Gala wrote: > The common arch/powerpc code calls in to functions in setup-bus.c > so some builds of ppc32 would fail. > > Note, ppc32 usage of setup-irq.c is limited to arch/ppc and should be > removed when arch/ppc goes away. I don't understand... the old code would build setup-bus.o for both PPC32 and PPC64 cases, how did you make it fail ? Ben. > Signed-off-by: Kumar Gala <[EMAIL PROTECTED]> > --- > > Greg, recent changes that BenH has made to the arch/powerpc pci code make > this necessary. If you don't have an issue I'd like this patch to go via > the paulus's powerpc.git tree. > > - k > > drivers/pci/Makefile |4 ++-- > 1 files changed, 2 insertions(+), 2 deletions(-) > > diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile > index 945bf02..869d689 100644 > --- a/drivers/pci/Makefile > +++ b/drivers/pci/Makefile > @@ -31,8 +31,8 @@ obj-$(CONFIG_ALPHA) += setup-bus.o setup-irq.o > obj-$(CONFIG_ARM) += setup-bus.o setup-irq.o > obj-$(CONFIG_PARISC) += setup-bus.o > obj-$(CONFIG_SUPERH) += setup-bus.o setup-irq.o > -obj-$(CONFIG_PPC32) += setup-irq.o setup-bus.o > -obj-$(CONFIG_PPC64) += setup-bus.o > +obj-$(CONFIG_PPC) += setup-bus.o > +obj-$(CONFIG_PPC32) += setup-irq.o > obj-$(CONFIG_MIPS) += setup-bus.o setup-irq.o > obj-$(CONFIG_X86_VISWS) += setup-irq.o > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] [POWERPC] Always build setup-bus.c on powerpc
On Thu, Jan 24, 2008 at 12:07:32AM -0600, Kumar Gala wrote: > The common arch/powerpc code calls in to functions in setup-bus.c > so some builds of ppc32 would fail. > > Note, ppc32 usage of setup-irq.c is limited to arch/ppc and should be > removed when arch/ppc goes away. > > Signed-off-by: Kumar Gala <[EMAIL PROTECTED]> > --- > > Greg, recent changes that BenH has made to the arch/powerpc pci code make > this necessary. If you don't have an issue I'd like this patch to go via > the paulus's powerpc.git tree. No objection at all. Feel free to add my: Signed-off-by: Greg Kroah-Hartman <[EMAIL PROTECTED]> to this patch and send it through Paul. thanks, greg k-h -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: CONFIG_MARKERS
On Thu, 2008-01-24 at 00:25 -0500, [EMAIL PROTECTED] wrote: > Remember - when a user tries a Linux box with a proprietary module, and the > experience sucks because the module sucks, they will walk away thinking > "Linux sucks", not "That module sucks". Worse, if they're technically inclined, they'll think Linux sucks for encoding philosophical policy into the kernel. Remember, a proprietary driver is only "illegal" to distribute, it's not an infringement for someone to write a non-GPL driver, or to have one on their computer. Jon. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] UCC TDM driver for QE based MPC83xx platforms.
On Thu, 24 Jan 2008 10:16:42 +0530 (IST) Poonam_Aggrwal-b10812 <[EMAIL PROTECTED]> wrote: > > +static int ucc_tdm_probe(struct of_device *ofdev, > + const struct of_device_id *match) > +{ > + struct device_node *np = ofdev->node; > + struct resource res; > + const unsigned int *prop; > + u32 ucc_num, device_num, err, ret = 0; > + struct device_node *np_tmp = NULL; You don't need to initialise this. > + dma_addr_t physaddr; > + void *tdm_buff; > + struct ucc_tdm_info *ut_info; > + > + prop = of_get_property(np, "device-id", NULL); You should check for (prop == NULL). > + ucc_num = *prop - 1; > + if ((ucc_num < 0) || (ucc_num > 7)) > + return -ENODEV; > + > + ut_info = _info[ucc_num]; > + if (ut_info == NULL) { This cannot be NULL as you have just taken the address of an array element. > + tdm_ctrl[device_num]->ut_info = ut_info; > + > + tdm_ctrl[device_num]->ut_info->uf_info.ucc_num = ucc_num; ^ This is the same as "ut_info". > + tdm_ctrl[device_num]->ut_info->uf_info.tdm_tx_clk = > + (char *) of_get_property(np, "fsl,tdm-tx-clk", NULL); ^ We don't normall put spaces here. > + tdm_ctrl[device_num]->ut_info->uf_info.tdm_rx_clk = > + (char *) of_get_property(np, "fsl,tdm-rx-clk", NULL); ^ Ditto. And later as well. > + tdm_ctrl[device_num]->ut_info->uf_info.irq = > + irq_of_parse_and_map(np, 0); > + err = of_address_to_resource(np, 0, ); > + if (err) { > + ret = EINVAL; This should be -EINVAL or err. > + goto get_property_error; You need to do something about unmapping the irq in the error path. > + tdm_ctrl[device_num]->uf_regs = of_iomap(np, 0); > + > + np_tmp = of_find_compatible_node(np_tmp, "slic", "legerity-slic"); > + if (np_tmp != NULL) > + tdm_ctrl[device_num]->leg_slic = 1; > + else > + tdm_ctrl[device_num]->leg_slic = 0; of_node_ut(np_tmp); > + tdm_buff = dma_alloc_coherent(NULL, 2 * NR_BUFS * SAMPLE_DEPTH * > + tdm_ctrl[device_num]->cfg_ctrl.active_num_ts, > + , GFP_KERNEL); > + if (!tdm_buff) { > + printk(KERN_ERR "ucc-tdm: could not allocate buffer" > + "descriptors\n"); > + ret = -ENOMEM; > + goto get_property_error; You need to unmap the uf_regs in the error path. > +get_property_error: > + kfree(tdm_ctrl[device_num]); Do you need to set "tdm_ctrl[device_num]" to NULL and decrement num_tdm_devices? > + return ret; > +} > + > +static int ucc_tdm_remove(struct of_device *ofdev) > +{ > + struct tdm_ctrl *tdm_c; > + struct ucc_tdm_info *ut_info; > + u32 ucc_num; > + > + tdm_c = dev_get_drvdata(&(ofdev->dev)); dev_set_drvdata(_dev->dev, NULL); > + ucc_num = tdm_c->ut_info->uf_info.ucc_num; > + ut_info = _info[ucc_num]; > + tdm_stop(tdm_c); > + tdm_deinit(tdm_c); > + > + ucc_fast_free(tdm_c->uf_private); > + > + dma_free_coherent(NULL, 2 * NR_BUFS * SAMPLE_DEPTH * > + tdm_c->cfg_ctrl.active_num_ts, > + tdm_c->tdm_input_data, > + tdm_c->dma_input_addr); > + You need to unmap the uf_reg and the irq. > +static struct of_device_id ucc_tdm_match[] = { const, please. > + { > + .type = "tdm", > + .compatible = "fsl,ucc-tdm", > + }, {}, We euld normall format this like: { .type = "tdm", .compatible = "fsl,ucc-tdm", }, {}, > +static struct of_platform_driver ucc_tdm_driver = { .driver = { > + .name = DRV_NAME, }, -- Cheers, Stephen Rothwell[EMAIL PROTECTED] http://www.canb.auug.org.au/~sfr/ pgpNvReFakjIs.pgp Description: PGP signature
[PATCH] 2.4: fix memory corruption from misinterpreted bad_inode_ops return values
This is a 2.4 backport of a linux-2.6 change by Eric Sandeen (commit be6aab0e9fa6d3c6d75aa1e38ac972d8b4ee82b8) CVE-2006-5753 was assigned for this issue. I've built and boot-tested this, but I'm not sure how to exercise these codepaths. Commit log from 2.6 follows. CVE-2006-5753 is for a case where an inode can be marked bad, switching the ops to bad_inode_ops, which are all connected as: static int return_EIO(void) { return -EIO; } #define EIO_ERROR ((void *) (return_EIO)) static struct inode_operations bad_inode_ops = { .create = bad_inode_create ...etc... The problem here is that the void cast causes return types to not be promoted, and for ops such as listxattr which expect more than 32 bits of return value, the 32-bit -EIO is interpreted as a large positive 64-bit number, i.e. 0xfffa instead of 0xfffa. This goes particularly badly when the return value is taken as a number of bytes to copy into, say, a user's buffer for example... I originally had coded up the fix by creating a return_EIO_ macro for each return type, like this: static int return_EIO_int(void) { return -EIO; } #define EIO_ERROR_INT ((void *) (return_EIO_int)) static struct inode_operations bad_inode_ops = { .create = EIO_ERROR_INT, ...etc... but Al felt that it was probably better to create an EIO-returner for each actual op signature. Since so few ops share a signature, I just went ahead & created an EIO function for each individual file & inode op that returns a value. Signed-off-by: dann frazier <[EMAIL PROTECTED]> --- fs/bad_inode.c | 191 +++- 1 files changed, 161 insertions(+), 30 deletions(-) diff --git a/fs/bad_inode.c b/fs/bad_inode.c index 850ba5e..b6b1d7d 100644 --- a/fs/bad_inode.c +++ b/fs/bad_inode.c @@ -9,6 +9,76 @@ #include #include #include +#include + +static loff_t bad_file_llseek(struct file *file, loff_t offset, int origin) +{ + return -EIO; +} + +static ssize_t bad_file_read(struct file *filp, char __user *buf, + size_t size, loff_t *ppos) +{ +return -EIO; +} + +static ssize_t bad_file_write(struct file *filp, const char __user *buf, + size_t siz, loff_t *ppos) +{ +return -EIO; +} + +static int bad_file_readdir(struct file *filp, void *dirent, filldir_t filldir) +{ + return -EIO; +} + +static unsigned int bad_file_poll(struct file *filp, poll_table *wait) +{ + return POLLERR; +} + +static int bad_file_ioctl (struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg) +{ + return -EIO; +} + +static int bad_file_mmap(struct file *file, struct vm_area_struct *vma) +{ + return -EIO; +} + +static int bad_file_open(struct inode *inode, struct file *filp) +{ + return -EIO; +} + +static int bad_file_flush(struct file *file) +{ + return -EIO; +} + +static int bad_file_release(struct inode *inode, struct file *filp) +{ + return -EIO; +} + +static int bad_file_fsync(struct file *file, struct dentry *dentry, + int datasync) +{ + return -EIO; +} + +static int bad_file_fasync(int fd, struct file *filp, int on) +{ + return -EIO; +} + +static int bad_file_lock(struct file *file, int cmd, struct file_lock *fl) +{ + return -EIO; +} /* * The follow_link operation is special: it must behave as a no-op @@ -20,46 +90,107 @@ static int bad_follow_link(struct dentry *dent, struct nameidata *nd) return vfs_follow_link(nd, ERR_PTR(-EIO)); } -static int return_EIO(void) +static struct file_operations bad_file_ops = +{ + llseek: bad_file_llseek, + read: bad_file_read, + write: bad_file_write, + readdir:bad_file_readdir, + poll: bad_file_poll, + ioctl: bad_file_ioctl, + mmap: bad_file_mmap, + open: bad_file_open, + flush: bad_file_flush, + release:bad_file_release, + fsync: bad_file_fsync, + fasync: bad_file_fasync, + lock: bad_file_lock, +}; + +static int bad_inode_create (struct inode *dir, struct dentry *dentry, + int mode) { return -EIO; } + +static struct dentry *bad_inode_lookup(struct inode *dir, + struct dentry *dentry) +{ + return ERR_PTR(-EIO); +} -#define EIO_ERROR ((void *) (return_EIO)) +static int bad_inode_link (struct dentry *old_dentry, struct inode *dir, + struct dentry *dentry) +{ + return -EIO; +} -static struct file_operations bad_file_ops = +static int bad_inode_unlink(struct inode *dir, struct dentry *dentry) { - llseek: EIO_ERROR, - read: EIO_ERROR, - write: EIO_ERROR, - readdir:EIO_ERROR, -
[PATCH] [POWERPC] Always build setup-bus.c on powerpc
The common arch/powerpc code calls in to functions in setup-bus.c so some builds of ppc32 would fail. Note, ppc32 usage of setup-irq.c is limited to arch/ppc and should be removed when arch/ppc goes away. Signed-off-by: Kumar Gala <[EMAIL PROTECTED]> --- Greg, recent changes that BenH has made to the arch/powerpc pci code make this necessary. If you don't have an issue I'd like this patch to go via the paulus's powerpc.git tree. - k drivers/pci/Makefile |4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile index 945bf02..869d689 100644 --- a/drivers/pci/Makefile +++ b/drivers/pci/Makefile @@ -31,8 +31,8 @@ obj-$(CONFIG_ALPHA) += setup-bus.o setup-irq.o obj-$(CONFIG_ARM) += setup-bus.o setup-irq.o obj-$(CONFIG_PARISC) += setup-bus.o obj-$(CONFIG_SUPERH) += setup-bus.o setup-irq.o -obj-$(CONFIG_PPC32) += setup-irq.o setup-bus.o -obj-$(CONFIG_PPC64) += setup-bus.o +obj-$(CONFIG_PPC) += setup-bus.o +obj-$(CONFIG_PPC32) += setup-irq.o obj-$(CONFIG_MIPS) += setup-bus.o setup-irq.o obj-$(CONFIG_X86_VISWS) += setup-irq.o -- 1.5.3.7 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 2.6.24-rc8-mm1 Kernel oops on CIFS while running fsstress
Hi Andrew, The machine drops into xmon, while running the fsstress on the cifs mounted partition. 1:mon> r R00 = R16 = R01 = c0017527f910 R17 = R02 = d0862258 R18 = R03 = 0001 R19 = R04 = 0001 R20 = c0013999fcb0 R05 = R21 = c0013c589978 R06 = R22 = c0015a9e6e00 R07 = 0001 R23 = c001786dfdf0 R08 = R24 = c0018b5a7f50 R09 = c0019012ff08 R25 = c0017527fc10 R10 = 0001 R26 = 477d R11 = c03d4f90 R27 = c0013999fcb0 R12 = d08323e8 R28 = c0017527fc10 R13 = c0572380 R29 = c001504afdf0 R14 = R30 = d0860168 R15 = R31 = d085a338 pc = d0819e04 .find_writable_file+0x8c/0x1c0 [cifs] lr = d0819de0 .find_writable_file+0x68/0x1c0 [cifs] msr = 80009032 cr = 24000888 ctr = c03d4f90 xer = trap = 300 dar = c0019012ff30 dsisr = 4001 1:mon> t [c0017527f9b0] d081f808 .cifs_setattr+0x178/0xb04 [cifs] [c0017527fae0] c01202e4 .notify_change+0x1e0/0x414 [c0017527fba0] c0101e2c .do_truncate+0x74/0xa8 [c0017527fc80] c0102170 .sys_truncate+0x1c8/0x21c [c0017527fdb0] c0015f2c .compat_sys_truncate64+0x18/0x30 [c0017527fe30] c0008734 syscall_exit+0x0/0x40 --- Exception: c00 (System Call) at 0ff2620c SP (ff87f070) is in userspace 1:mon> e cpu 0x1: Vector: 300 (Data Access) at [c0017527f690] pc: d0819e04: .find_writable_file+0x8c/0x1c0 [cifs] lr: d0819de0: .find_writable_file+0x68/0x1c0 [cifs] sp: c0017527f910 msr: 80009032 dar: c0019012ff30 dsisr: 4001 current = 0xc0018761f640 paca= 0xc0572380 pid = 12920, comm = fsstress The gdb output (gdb) p find_writable_file $1 = {struct cifsFileInfo *(struct cifsInodeInfo *)} 0x19d78 (gdb) p/x 0x19d78+0x68 $3 = 0x19de0 (gdb) l *0x19de0 0x19de0 is in find_writable_file (fs/cifs/file.c:1079). 1074cERROR(1, ("Null inode passed to cifs_writeable_file")); 1075dump_stack(); 1076return NULL; 1077} 1078 1079read_lock(); 1080refind_writable: 1081list_for_each_entry(open_file, _inode->openFileList, flist) { 1082if (open_file->closePend) 1083continue; (gdb) -- Thanks & Regards, Kamalesh Babulal, Linux Technology Center, IBM, ISTL. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [kvm-devel] [PATCH] export notifier #1
Gerd Hoffmann wrote: Another maybe workable approach for Xen is to go through pv_ops (although pte_clear doesn't go through pv_ops right now, so this would be an additional hook too ...). I think that's the way. Xen is not a secondary mmu but rather a primary mmu with some magic characteristics. -- Any sufficiently difficult bug is indistinguishable from a feature. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [kvm-devel] [PATCH] export notifier #1
Christoph Lameter wrote: On Wed, 23 Jan 2008, Robin Holt wrote: That won't work for kvm. If we have a hundred virtual machines, that means 99 no-op notifications. But 100 callouts holding spinlocks will not work for our implementation and even if the callouts are made with spinlocks released, we would very strongly prefer a single callout which messages the range to the other side. Andrea wont have 99 no op notifications. You will have one notification to the kvm subsystem (since there needs to be only one register operation for a subsystem that wants to get notifications). What do you do there is up to kvm. If you want to call some function 99 times then you are free to do that. What I need is a list of (mm, va) that map the page. kvm doesn't have access to that, export notifiers do. It seems reasonable that export notifier do that rmap walk since they are part of core mm, not kvm. Alternatively, kvm can change its internal rmap structure to be page based instead of (mm, hva) based. The problem here is to size this thing, as we don't know in advance (when the kvm module is loaded) whether 0% or 100% (or some value in between) of system memory will be used for kvm. -- Any sufficiently difficult bug is indistinguishable from a feature. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 36/49] ext4: Add EXT4_IOC_MIGRATE ioctl
On Wed, Jan 23, 2008 at 02:07:16PM -0800, Andrew Morton wrote: > > On Mon, 21 Jan 2008 22:02:15 -0500 "Theodore Ts'o" <[EMAIL PROTECTED]> > > wrote: > > The below patch add ioctl for migrating ext3 indirect block mapped inode > > to ext4 extent mapped inode. > > This patch adds lots of weird and inexplicable single- and double-newlines > in inappropriate places. However it frequently forgets to add newlines > between end-of-locals and start-of-code, which is usual practice. > > > +struct list_blocks_struct { > + ext4_lblk_t first_block, last_block; > + ext4_fsblk_t first_pblock, last_pblock; > +}; > Updated patch commit c4786b67cdc5b24d2548a69b62774fb54f8f1575 Author: Aneesh Kumar K.V <[EMAIL PROTECTED]> Date: Tue Jan 22 09:28:55 2008 +0530 ext4: Add EXT4_IOC_MIGRATE ioctl The below patch add ioctl for migrating ext3 indirect block mapped inode to ext4 extent mapped inode. Signed-off-by: Aneesh Kumar K.V <[EMAIL PROTECTED]> diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile index ae6e7e5..d5fd80b 100644 --- a/fs/ext4/Makefile +++ b/fs/ext4/Makefile @@ -6,7 +6,7 @@ obj-$(CONFIG_EXT4DEV_FS) += ext4dev.o ext4dev-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ - ext4_jbd2.o + ext4_jbd2.o migrate.o ext4dev-$(CONFIG_EXT4DEV_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o ext4dev-$(CONFIG_EXT4DEV_FS_POSIX_ACL) += acl.o diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 03d1bbb..323cd76 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -75,7 +75,7 @@ static ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix) * stores a large physical block number into an extent struct, * breaking it into parts */ -static void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb) +void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb) { ex->ee_start_lo = cpu_to_le32((unsigned long) (pb & 0x)); ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0x); diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index c0e5b8c..2ed7c37 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -254,6 +254,9 @@ flags_err: return err; } + case EXT4_IOC_MIGRATE: + return ext4_ext_migrate(inode, filp, cmd, arg); + default: return -ENOTTY; } diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c new file mode 100644 index 000..deb2327 --- /dev/null +++ b/fs/ext4/migrate.c @@ -0,0 +1,588 @@ +/* + * Copyright IBM Corporation, 2007 + * Author Aneesh Kumar K.V <[EMAIL PROTECTED]> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2.1 of the GNU Lesser General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + */ + +#include +#include +#include + +/* + * The contiguous blocks details which can be + * represented by a single extent + */ +struct list_blocks_struct { + ext4_lblk_t first_block, last_block; + ext4_fsblk_t first_pblock, last_pblock; +}; + +static int finish_range(handle_t *handle, struct inode *inode, + struct list_blocks_struct *lb) + +{ + int retval = 0, needed; + struct ext4_extent newext; + struct ext4_ext_path *path; + if (lb->first_pblock == 0) + return 0; + + /* Add the extent to temp inode*/ + newext.ee_block = cpu_to_le32(lb->first_block); + newext.ee_len = cpu_to_le16(lb->last_block - lb->first_block + 1); + ext4_ext_store_pblock(, lb->first_pblock); + path = ext4_ext_find_extent(inode, lb->first_block, NULL); + + if (IS_ERR(path)) { + retval = PTR_ERR(path); + goto err_out; + } + + /* +* Calculate the credit needed to inserting this extent +* Since we are doing this in loop we may accumalate extra +* credit. But below we try to not accumalate too much +* of them by restarting the journal. +*/ + needed = ext4_ext_calc_credits_for_insert(inode, path); + + /* +* Make sure the credit we accumalated is not really high +*/ + if (needed && handle->h_buffer_credits >= EXT4_RESERVE_TRANS_BLOCKS) { + + retval = ext4_journal_restart(handle, needed); + if (retval) + goto err_out; + } + if (needed) { + retval = ext4_journal_extend(handle, needed); + if (retval != 0) { + /* +* IF not able to extend the journal restart the journal +*/ +
Re: 2.6.24 regression: pan hanging unkilleable and un-straceable
On Tue, 22 Jan 2008 16:25:58 +1100, Nick Piggin said: > > Index: linux-2.6/kernel/sched.c > === > --- linux-2.6.orig/kernel/sched.c > +++ linux-2.6/kernel/sched.c > @@ -4920,8 +4920,7 @@ static void show_task(struct task_struct > printk(KERN_CONT "%5lu %5d %6d\n", free, > task_pid_nr(p), task_pid_nr(p->real_parent)); > > - if (state != TASK_RUNNING) > - show_stack(p, NULL); > + show_stack(p, NULL); > } Maybe something like this would be better? if (state == TASK_RUNNING) printk("running task, stack trace may be inaccurate\n"); show_stack(p, NULL); Just a thought pgpob2DMsMsGv.pgp Description: PGP signature
Re: [PATCH 30/49] ext4: Convert truncate_mutex to read write semaphore.
On Wed, Jan 23, 2008 at 02:06:59PM -0800, Andrew Morton wrote: > > On Mon, 21 Jan 2008 22:02:09 -0500 "Theodore Ts'o" <[EMAIL PROTECTED]> > > wrote: > > +int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t > > block, > > + unsigned long max_blocks, struct buffer_head *bh, > > + int create, int extend_disksize) > > +{ > > + int retval; > > + if (create) { > > + down_write((_I(inode)->i_data_sem)); > > + } else { > > + down_read((_I(inode)->i_data_sem)); > > + } > > + if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { > > + retval = ext4_ext_get_blocks(handle, inode, block, max_blocks, > > + bh, create, extend_disksize); > > + } else { > > + retval = ext4_get_blocks_handle(handle, inode, block, > > + max_blocks, bh, create, extend_disksize); > > + } > > + if (create) { > > + up_write((_I(inode)->i_data_sem)); > > + } else { > > + up_read((_I(inode)->i_data_sem)); > > + } > > This function has many unneeded braces. checkpatch used to detect this > but it seems to have broken. The follow up patch "ext4: Take read lock during overwrite case" removes those single line if statement. > > > + return retval; > > +} > > static int ext4_get_block(struct inode *inode, sector_t iblock, > > struct buffer_head *bh_result, int create) > > Mising newline. Fixed. -aneesh -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: CONFIG_MARKERS
On Wed, 23 Jan 2008 09:48:12 EST, Mathieu Desnoyers said: > This specific one is a kernel policy matter, and I personally don't > have a strong opinion about it. I agree that you raise a good counter > argument : it can be useful to proprietary modules users to be able to > extract tracing information from those modules to argue with their > vendors that their driver/hardware is broken (a tracer is _very_ useful > in that kind of situation). Amen, brother. Been there, done that, got the tshirt (not on Linux, but other operating systems). > However, it is also useful to proprieraty > module writers who can benefit from the merged kernel/modules traces. > Do we want to give them this ability ? The proprietary module writer has the *source* for the kernel and their module. There's no way you can prevent the proprietary module writers from using this feature as long as you allow other module writers to use it. > It would surely help writing > better proprieraty kernel modules. The biggest complaint against proprietary modules is that they make it impossible for *us* to debug. And you want to argue *against* a feature that would allow them to develop better code that causes less crashes, and therefor less people *asking* for us to debug it? Remember - when a user tries a Linux box with a proprietary module, and the experience sucks because the module sucks, they will walk away thinking "Linux sucks", not "That module sucks". pgpUv0OWFr24L.pgp Description: PGP signature
Re: [PATCH 23/49] Add buffer head related helper functions
On Wed, Jan 23, 2008 at 02:06:48PM -0800, Andrew Morton wrote: > > On Mon, 21 Jan 2008 22:02:02 -0500 "Theodore Ts'o" <[EMAIL PROTECTED]> > > wrote: > > +} > > +EXPORT_SYMBOL(bh_uptodate_or_lock); > > +/** > > Missing newline. > > > + * bh_submit_read: Submit a locked buffer for reading > > + * @bh: struct buffer_head > > + * > > + * Returns a negative error > > + */ > > +int bh_submit_read(struct buffer_head *bh) > > +{ > > + if (!buffer_locked(bh)) > > + lock_buffer(bh); > > + > > + if (buffer_uptodate(bh)) > > + return 0; > > Here it can lock the buffer then return zero > > > + get_bh(bh); > > + bh->b_end_io = end_buffer_read_sync; > > + submit_bh(READ, bh); > > + wait_on_buffer(bh); > > + if (buffer_uptodate(bh)) > > + return 0; > > Here it will unlock the buffer and return zero. > > This function is unusable when passed an unlocked buffer. > Updated patch below. commit 70d4ca32604e0935a8b9a49c5ac8b9c64c810693 Author: Aneesh Kumar K.V <[EMAIL PROTECTED]> Date: Thu Jan 24 10:50:24 2008 +0530 Add buffer head related helper functions Add buffer head related helper function bh_uptodate_or_lock and bh_submit_read which can be used by file system Signed-off-by: Aneesh Kumar K.V <[EMAIL PROTECTED]> diff --git a/fs/buffer.c b/fs/buffer.c index 7249e01..82aa2db 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -3213,6 +3213,53 @@ static int buffer_cpu_notify(struct notifier_block *self, return NOTIFY_OK; } +/** + * bh_uptodate_or_lock: Test whether the buffer is uptodate + * @bh: struct buffer_head + * + * Return true if the buffer is up-to-date and false, + * with the buffer locked, if not. + */ +int bh_uptodate_or_lock(struct buffer_head *bh) +{ + if (!buffer_uptodate(bh)) { + lock_buffer(bh); + if (!buffer_uptodate(bh)) + return 0; + unlock_buffer(bh); + } + return 1; +} +EXPORT_SYMBOL(bh_uptodate_or_lock); + +/** + * bh_submit_read: Submit a locked buffer for reading + * @bh: struct buffer_head + * + * Returns zero on success and -EIO on error.If the input + * buffer is not locked returns -EINVAL + * + */ +int bh_submit_read(struct buffer_head *bh) +{ + if (!buffer_locked(bh)) + return -EINVAL; + + if (buffer_uptodate(bh)) { + unlock_buffer(bh); + return 0; + } + + get_bh(bh); + bh->b_end_io = end_buffer_read_sync; + submit_bh(READ, bh); + wait_on_buffer(bh); + if (buffer_uptodate(bh)) + return 0; + return -EIO; +} +EXPORT_SYMBOL(bh_submit_read); + void __init buffer_init(void) { int nrpages; diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index da0d83f..e98801f 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -192,6 +192,8 @@ int sync_dirty_buffer(struct buffer_head *bh); int submit_bh(int, struct buffer_head *); void write_boundary_block(struct block_device *bdev, sector_t bblock, unsigned blocksize); +int bh_uptodate_or_lock(struct buffer_head *bh); +int bh_submit_read(struct buffer_head *bh); extern int buffer_heads_over_limit; -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC][PATCH 1/2]: MM: Make Paget Tables Relocatable--Conditional TLB Flush
Hello. This is a nitpick, but all of archtectures code except generic use MMF_NNED_FLUSH at clear_bit()... ^ Please fix misspell. Bye. > > diff -uprwNbB -X 2.6.23/Documentation/dontdiff 2.6.23/arch/alpha/kernel/smp.c > 2.6.23a/arch/alpha/kernel/smp.c > --- 2.6.23/arch/alpha/kernel/smp.c2007-10-09 13:31:38.0 -0700 > +++ 2.6.23a/arch/alpha/kernel/smp.c 2007-10-29 13:50:06.0 -0700 > @@ -850,6 +850,8 @@ flush_tlb_mm(struct mm_struct *mm) > { > preempt_disable(); > > + clear_bit(MMF_NNED_FLUSH, mm->flags); > + > if (mm == current->active_mm) { > flush_tlb_current(mm); > if (atomic_read(>mm_users) <= 1) { -- Yasunori Goto -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[GIT PATCH] ACPI patches for 2.6.24-rc8
Hi Linus, please pull from: git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux-acpi-2.6.git release This fixes a couple of 2.6.24 regressions, and also installs the latest DMI blacklist. While the DMI blacklist is a lot of text, it is almost entirely about avoiding dmesg spam to make the release more supportable. Indeed, I plan to submit the DMI update to 2.6.23.stable. This will update the files shown below. thanks! -Len ps. individual patches are available on [EMAIL PROTECTED] and a consolidated plain patch is available here: ftp://ftp.kernel.org/pub/linux/kernel/people/lenb/acpi/patches/release/2.6.24/acpi-release-20070126-2.6.24-rc8.diff.gz drivers/acpi/blacklist.c| 381 drivers/acpi/bus.c |2 drivers/acpi/ec.c | 26 + drivers/acpi/fan.c | 40 ++ drivers/acpi/osl.c | 173 -- drivers/acpi/processor_throttling.c |4 drivers/firmware/dmi-id.c |2 drivers/firmware/dmi_scan.c |8 include/acpi/acpi_bus.h |3 include/linux/acpi.h|7 include/linux/dmi.h |4 11 files changed, 591 insertions(+), 59 deletions(-) through these commits: Alexey Starikovskiy (2): ACPI: processor: Fix null pointer dereference in throttling ACPI: EC: fix dmesg spam regression Len Brown (9): DMI: move dmi_available declaration to linux/dmi.h DMI: create dmi_get_slot() ACPI: create acpi_dmi_dump() ACPI: on OSI(Linux), print needed DMI rather than requesting dmidecode output ACPI: Delete Intel Customer Reference Board (CRB) from OSI(Linux) DMI list ACPI: make _OSI(Linux) console messages smarter ACPI: Add ThinkPad R61, ThinkPad T61 to OSI(Linux) white-list ACPI: DMI blacklist to reduce console warnings on OSI(Linux) systems. Revert "ACPI: Fan: Drop force_power_state acpi_device option" Márton Németh (2): ACPI: EC: add leading zeros to debug messages ACPI: EC: "DEBUG" needs to be defined earlier with this log: commit 63eac9badbe35054c0ae61a9dbcf4830c7429040 Merge: 3645ca8... a1bd4e3... Author: Len Brown <[EMAIL PROTECTED]> Date: Wed Jan 23 23:50:01 2008 -0500 Pull dmi-2.6.24 into release branch commit 3645ca8359328ea4c75bce4af54ad24028381f30 Merge: 0f23a6b... ec68373... Author: Len Brown <[EMAIL PROTECTED]> Date: Wed Jan 23 23:48:46 2008 -0500 Pull bugzilla-9798 into release branch commit 0f23a6b0c182b1040cb0b89e20527cd07d85aebf Merge: 7456337... d772b3b... Author: Len Brown <[EMAIL PROTECTED]> Date: Wed Jan 23 23:48:33 2008 -0500 Pull bugzilla-8459 into release branch commit 7456337d1fd38e463674dcb5a3df21ca332cb8b0 Merge: 667984d... d1154be... Author: Len Brown <[EMAIL PROTECTED]> Date: Wed Jan 23 23:48:19 2008 -0500 Pull bugzilla-9747 into release branch commit ec68373c04495edbe39fb94fad963fb781e062e5 Author: Len Brown <[EMAIL PROTECTED]> Date: Wed Jan 23 22:41:20 2008 -0500 Revert "ACPI: Fan: Drop force_power_state acpi_device option" This reverts commit 93ad7c07ad487b036add8760dabcc35666a550ef. http://bugzilla.kernel.org/show_bug.cgi?id=9798 Signed-off-by: Len Brown <[EMAIL PROTECTED]> commit d772b3b323a15588a757f5af28e51a57d0d2f622 Author: Márton Németh <[EMAIL PROTECTED]> Date: Wed Jan 23 22:34:09 2008 -0500 ACPI: EC: "DEBUG" needs to be defined earlier The "DEBUG" symbol needs to be defined before #including to get the pr_debug() working. Signed-off-by: Márton Németh <[EMAIL PROTECTED]> Signed-off-by: Len Brown <[EMAIL PROTECTED]> commit 86dae0154a49b67c908faffeb33ba37eddceba74 Author: Márton Németh <[EMAIL PROTECTED]> Date: Wed Jan 23 22:33:06 2008 -0500 ACPI: EC: add leading zeros to debug messages Add leading zeros to pr_debug() calls. For example if x=0x0a, the format "0x%2x" will result the string "0x a", the format "0x%2.2x" will result "0x0a". Signed-off-by: Márton Németh <[EMAIL PROTECTED]> Signed-off-by: Len Brown <[EMAIL PROTECTED]> commit 03d1d99c55649ca641b86d2e3489b167ede1671a Author: Alexey Starikovskiy <[EMAIL PROTECTED]> Date: Wed Jan 23 22:28:34 2008 -0500 ACPI: EC: fix dmesg spam regression Return OBF_1 optimization workaround http://bugzilla.kernel.org/show_bug.cgi?id=8459 Signed-off-by: Alexey Starikovskiy <[EMAIL PROTECTED]> Signed-off-by: Len Brown <[EMAIL PROTECTED]> commit a1bd4e35e8d9df24db7d7addd74cbfcc87ec9fb3 Author: Len Brown <[EMAIL PROTECTED]> Date: Wed Jan 23 21:19:27 2008 -0500 ACPI: DMI blacklist to reduce console warnings on OSI(Linux) systems. This DMI blacklist reduces the console messages on systems which have a BIOS that invokes OSI(Linux). As the DMI blacklist already knows about these systems, the request for DMI info itself is disabled. Further, if OSI(Linux) has
Re: [PATCH 3/4] firewire: enforce access order between generation and node ID
On Thursday 24 January 2008 11:54, Stefan Richter wrote: > fw_device.node_id and fw_device.generation are accessed without mutexes. > We have to ensure that all readers will get to see node_id updates > before generation updates. > > An earlier incarnation of this patch fixes an inability to recognize > devices after "giving up on config rom", > https://bugzilla.redhat.com/show_bug.cgi?id=429950 > > Signed-off-by: Stefan Richter <[EMAIL PROTECTED]> > --- > > Rework of patches > firewire: fw-core: enforce write order when updating > fw_device.generation and parts of > firewire: fw-core: react on bus resets while the config ROM is being > fetched firewire: fw-sbp2: enforce read order of device generation and node > ID from November 1 2007. > > Update: > - write site and read sites folded into one patch > - added fix to fw_device_enable_phys_dma() and fill_bus_reset_event() > - smp_ barriers are sufficient > - comments, changelog I don't know the firewire subsystem at all, but the barriers seem right (in that they match your description of the problem), and comments for them are really good. Thanks, Nick -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH UCC TDM 3/3 ] Modified Documentation to explain dts entries for TDM driver
From: Poonam Aggrwal <[EMAIL PROTECTED]> Modified Documentation to explain new properties introduced for UCC TDM driver. Also two new nodes have been added "brg" and "clocks" to configure a BRG from device tree. Signed-off-by: Poonam Aggrwal <[EMAIL PROTECTED]> Signed-off-by: Ashish Kalra <[EMAIL PROTECTED]> Signed-off-by: Kim Phillips <[EMAIL PROTECTED]> Signed-off-by: Michael Barkowski <[EMAIL PROTECTED]> --- Documentation/powerpc/booting-without-of.txt | 96 +- 1 files changed, 94 insertions(+), 2 deletions(-) diff --git a/Documentation/powerpc/booting-without-of.txt b/Documentation/powerpc/booting-without-of.txt index e9a3cb1..94a6b4b 100644 --- a/Documentation/powerpc/booting-without-of.txt +++ b/Documentation/powerpc/booting-without-of.txt @@ -1613,8 +1613,8 @@ platforms are moved over to use the flattened-device-tree model. Required properties: - device_type : should be "network", "hldc", "uart", "transparent" -"bisync" or "atm". - - compatible : could be "ucc_geth" or "fsl_atm" and so on. +"bisync", "atm" or "tdm". + - compatible : could be "ucc_geth", "fsl_atm" or "fsl,ucc_tdm" and so on. - model : should be "UCC". - device-id : the ucc number(1-8), corresponding to UCCx in UM. - reg : Offset and length of the register set for the device @@ -1666,7 +1666,44 @@ platforms are moved over to use the flattened-device-tree model. pio-handle = <140001>; }; + Required properties for tdm device_type: + - instead of tx-clock and rx-clock following clock properties are + required: + - fsl,tdm-tx-clk : This property selects the TX clock source for TDM + from a bank of clocks. + - fsl,tdm-rx-clk : This property selects the RX clock source for TDM + from a bank of clocks. + - fsl,tdm-tx-sync : This property selects the TX Frame sync source + for TDM from a bank of clocks. + - fsl,tdm-rx-sync : This property selects the TX Frame sync source + for TDM from a bank of clocks. + + All the above mentioned properties are string type with possible + values + "CLK1", "CLK2", "CLK3"..."CLK24" and so on + "BRG1", "BRG2", "BRG3"..."BRG16" and so on + + - fsl,tdm-num : TDM to be used (1,2,3 or 4 for TDMA TDMB TDMC TDMD) + - fsl,si-num : Serial Interface to be used. + Example: + [EMAIL PROTECTED] { + device_type = "tdm"; + compatible = "fsl,ucc-tdm"; + model = "UCC"; + device-id = <1>; + fsl,tdm-num = <1>; + fsl,si-num = <1>; + fsl,tdm-tx-clk = "CLK1"; + fsl,tdm-rx-clk = "CLK1"; + fsl,tdm-tx-sync = "BRG9"; + fsl,tdm-rx-sync = "BRG9"; + reg = <2000 200>; + interrupts = <20>; + interrupt-parent = <>; + pio-handle = <>; + }; + v) Parallel I/O Ports This node configures Parallel I/O ports for CPUs with QE support. @@ -1772,6 +1809,61 @@ platforms are moved over to use the flattened-device-tree model. }; }; + viii) Clocks (clocks) + This node specifies the frequency values for all the external clocks + viz CLK1 to CLK24 in Hz. + + Required Properties: + - compatible : should be "fsl,cpm-clocks". + - #clock-cells : It specifies the number of cells occupied by clock-frequency +property. Currently #clock-cells = 1 is only supported and implemented. +This property is kept for future in case we need frequencies higher than +4 GHz. + - clock-frequency : It is a list of u32 values to represent the frequency +of each external clock(CLK1 to CLK24) in Hz.Each entry occupies +number of cells specified by #clock-cells property(1 for now). + + Example: + + clocks { + compatible = "fsl,cpm-clocks"; + #clock-cells = <1>; + /* clock freqs in Hz(for CLK1~CLK24). +* CLK11 is 1024KHz, +* all other clocks unused +*/ + clock-frequency = <0 0 0 0 0 0 + 0 0 0 0 0 d#1024000 0 + 0 0 0 0 0 0 + 0 0 0 0 0 0>; + }; + + ix) Baud Rate Generator (BRG) + + Required properties: + - compatible : shpuld be "fsl,cpm-brg" + - fsl,brg-sources : define the input clock for all 16 BRGs. The input +clock source could be 1 to 24 for CLK1 to CLK24. Zero means that the +particular BRG will be driven by QE clock(BRGCLK). + - reg : This property defines the address and size of the memory-mapped +registers of the BRG. + + Example: + + [EMAIL PROTECTED] { + compatible = "fsl,qe-brg"; + /* input clock sources for all the 16 BRGs. +
[PATCH 2/3] Platform changes for UCC TDM driver for MPC8323ERDB.Also includes related QE changes and dts entries.
From: Poonam Aggrwal <[EMAIL PROTECTED]> This patch makes necessary changes in the QE and UCC framework to support TDM. It also adds support to configure the BRG properly through device tree entries. Includes the device tree changes for UCC TDM driver as well. It also includes device tree entries for UCC TDM driver. Tested on MPC8323ERDB platform. Signed-off-by: Poonam Aggrwal <[EMAIL PROTECTED]> Signed-off-by: Ashish Kalra <[EMAIL PROTECTED]> Signed-off-by: Kim Phillips <[EMAIL PROTECTED]> Signed-off-by: Michael Barkowski <[EMAIL PROTECTED]> --- arch/powerpc/boot/dts/mpc832x_rdb.dts | 58 +++ arch/powerpc/sysdev/qe_lib/qe.c | 205 -- arch/powerpc/sysdev/qe_lib/ucc.c | 265 + arch/powerpc/sysdev/qe_lib/ucc_fast.c | 37 + include/asm-powerpc/qe.h |8 + include/asm-powerpc/ucc.h |4 + include/asm-powerpc/ucc_fast.h|4 + 7 files changed, 568 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/boot/dts/mpc832x_rdb.dts b/arch/powerpc/boot/dts/mpc832x_rdb.dts index 388c8a7..c0e6283 100644 --- a/arch/powerpc/boot/dts/mpc832x_rdb.dts +++ b/arch/powerpc/boot/dts/mpc832x_rdb.dts @@ -105,6 +105,17 @@ device_type = "par_io"; num-ports = <7>; + ucc1pio:[EMAIL PROTECTED] { + pio-map = < + /* port pin dir open_drain assignment has_irq */ + 0 e 2 0 1 0/* CLK11 */ + 3 16 1 0 2 0/* BRG9 */ + 3 1b 1 0 2 0/* BRG3 */ + 0 0 3 0 2 0/* TDMATxD0 */ + 0 4 3 0 2 0/* TDMARxD0 */ + 3 1b 2 0 1 0>; /* CLK1 */ + }; + ucc2pio:[EMAIL PROTECTED] { pio-map = < /* port pin dir open_drain assignment has_irq */ @@ -169,6 +180,36 @@ }; }; + clocks { + compatible = "fsl,cpm-clocks"; + /* clock freqs in Hz(for CLK1~CLK24). +* CLK11 is 1024KHz, +* all other clocks unused +* #clock-cells define number of cells +* used by the clock-frequency. +* right now only #clock cells=1 is +* implemented. Provision is there to +* handle frequencies >4Gig +*/ + #clock-cells = <1>; + clock-frequency = <0 0 0 0 0 0 + 0 0 0 0 d#1024000 0 + 0 0 0 0 0 0 + 0 0 0 0 0 0>; + }; + + [EMAIL PROTECTED] { + compatible = "fsl,cpm-brg"; + /* input clock sources for all the 16 BRGs. +* 1-24 for CLK1 to CLK24. +* BRG9 uses CLK11,BRG1 and BRG2-8 use +* the QE clock. +*/ + fsl,brg-sources = <0 0 0 0 0 0 0 0 + b 0 0 0 0 0 0 0>; + reg = <640 7f>; + }; + [EMAIL PROTECTED] { device_type = "spi"; compatible = "fsl_spi"; @@ -187,6 +228,23 @@ mode = "cpu"; }; + [EMAIL PROTECTED] { + device_type = "tdm"; + compatible = "fsl,ucc-tdm"; + model = "UCC"; + device-id = <1>; + fsl,tdm-num = <1>; + fsl,si-num = <1>; + fsl,tdm-tx-clk = "CLK1"; + fsl,tdm-rx-clk = "CLK1"; + fsl,tdm-tx-sync = "BRG9"; + fsl,tdm-rx-sync = "BRG9"; + reg = <2000 200>; + interrupts = <20>; + interrupt-parent = <>; + pio-handle = <>; + }; + [EMAIL PROTECTED] { device_type = "network"; compatible = "ucc_geth"; diff --git a/arch/powerpc/sysdev/qe_lib/qe.c b/arch/powerpc/sysdev/qe_lib/qe.c index 1df3b4a..fddc3d8 100644 --- a/arch/powerpc/sysdev/qe_lib/qe.c +++ b/arch/powerpc/sysdev/qe_lib/qe.c @@ -149,20 +149,189 @@ EXPORT_SYMBOL(qe_issue_cmd); */ static unsigned int brg_clk = 0; -unsigned int get_brg_clk(void) +u32 get_brg_clk(enum qe_clock brgclk,
[PATCH] UCC TDM driver for QE based MPC83xx platforms.
From: Poonam Agarwal-b10812 <[EMAIL PROTECTED]> The UCC TDM driver basically multiplexes and demultiplexes data from different channels. It can interface with for example SLIC kind of devices to receive TDM data demultiplex it and send to upper modules. At the transmit end it receives data for different channels multiplexes it and sends them on the TDM channel. It internally uses TSA( Time Slot Assigner) which does multiplexing and demultiplexing, UCC to perform SDMA between host buffers and the TSA, CMX to connect TSA to UCC. It can be used by a kernel module which can call tdm_register_client to get access to a TDM device. The driver is right now a misc driver with no subsystem as such. There can be a platform independent TDM layer which is planned to be done after this. TDM bus sort of thing. The dts file keeps a track of the TDM devices present on the board. Depending on them the TDM driver initializes those many driver instances while coming up. The driver on the upper level can plug to more than one tdm clients depending on the availablity of TDM devices. At every new request of a TDM client to bind with a TDM device, a free driver instance is allocated to the client. The interface can be described as follows. tdm_register_client(struct tdm_client *) This API returns a pointer to the structure tdm_client which is of type struct tdm_client { u32 client_id; u32 (*tdm_read)(u32 client_id, short chn_id, short *pcm_buffer, short len); u32 (*tdm_write)(u32 client_id, short chn_id, short *pcm_buffer, short len); wait_queue_head_t *wakeup_event; } It consists of: - client_id: It is basically to identify the particular TDM device/driver instance. - tdm_read: It is a function pointer returned by the TDM driver to be used to read TDM data from a particular TDM channel. - tdm_write: It is a function pointer returned by the TDM driver to be used to write TDM data to a particular TDM channel. - wakeup_event: It is address of a wait_queue event on which the client keeps on sleeping, and the TDM driver wakes it up periodically. The driver is configured to wake up the client after every 10ms. Once the TDM client gets registered to a TDM driver instance and a TDM device, it interfaces with the driver using tdm_read, tdm_write and wakeup_event. This driver will run on MPC8323E-RDB platforms. Signed-off-by: Poonam Aggrwal <[EMAIL PROTECTED]> Signed-off-by: Ashish Kalra <[EMAIL PROTECTED]> Signed-off-by: Kim Phillips <[EMAIL PROTECTED]> Signed-off-by: Michael Barkowski <[EMAIL PROTECTED]> --- drivers/misc/Kconfig | 14 + drivers/misc/Makefile |1 + drivers/misc/ucc_tdm.c | 1000 drivers/misc/ucc_tdm.h | 221 +++ 4 files changed, 1236 insertions(+), 0 deletions(-) create mode 100644 drivers/misc/ucc_tdm.c create mode 100644 drivers/misc/ucc_tdm.h diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index b5e67c0..628b14b 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -232,4 +232,18 @@ config ATMEL_SSC If unsure, say N. +config UCC_TDM + bool "Freescale UCC TDM Driver" + depends on QUICC_ENGINE && UCC_FAST + default n + ---help--- + The TDM driver is for UCC based TDM devices for example, TDM device on + MPC832x RDB. Select it to run PowerVoIP on MPC832x RDB board. + The TDM driver can interface with SLIC kind of devices to transmit + and receive TDM samples. The TDM driver receives Time Division + multiplexed samples(for different channels) from the SLIC device, + demutiplexes them and sends them to the upper layers. At the transmit + end the TDM drivers receives samples for different channels, it + multiplexes them and sends them to the SLIC device. + endif # MISC_DEVICES diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile index 87f2685..6f0c49d 100644 --- a/drivers/misc/Makefile +++ b/drivers/misc/Makefile @@ -17,3 +17,4 @@ obj-$(CONFIG_SONY_LAPTOP) += sony-laptop.o obj-$(CONFIG_THINKPAD_ACPI)+= thinkpad_acpi.o obj-$(CONFIG_FUJITSU_LAPTOP) += fujitsu-laptop.o obj-$(CONFIG_EEPROM_93CX6) += eeprom_93cx6.o +obj-$(CONFIG_UCC_TDM) += ucc_tdm.o diff --git a/drivers/misc/ucc_tdm.c b/drivers/misc/ucc_tdm.c new file mode 100644 index 000..98e7c72 --- /dev/null +++ b/drivers/misc/ucc_tdm.c @@ -0,0 +1,1000 @@ +/* + * drivers/misc/ucc_tdm.c + * + * UCC Based Linux TDM Driver + * This driver is designed to support UCC based TDM for PowerPC processors. + * This driver can interface with SLIC device to run VOIP kind of + * applications. + * + * Author: Ashish Kalra & Poonam Aggrwal + * + * Copyright (c) 2007 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by
[patch] lockdep: fix kernel crash on module unload
Hi Linus, Peter, Ingo, we're seeing a VERY repeatable oops on module unload on the Intel wireless drivers with lockdep enabled; turns out Michael Wu had already rootcaused this type of crash (see URL below), however the fix he has would increase the structure size of the lockdep metadata a lot; a much simpler fix for the oops is in the patch below; the lockdep module unload code just was not agressive enough in detecting which metadata structures to nuke. With this patch a very repeatable oops on unload went away (Reinette tested it and it survived > 100 unload cycles now). The oops happens even in 2.6.24-rc8 and the fix is rather obvious, so it might even be 2.6.24 material (or for 24-stable just after that) Subject: lockdep: fix kernel crash on module unload From: Arjan van de Ven <[EMAIL PROTECTED]> Michael Wu noticed in his lkml post at http://marc.info/?l=linux-kernel=119396182726091=2 that certain wireless drivers ended up having their name in module memory, which would then crash the kernel on module unload. The patch he proposed was a bit clumsy in that it increased the size of a lockdep entry significantly; the patch below tries another approach, it checks, on module teardown, if the name of a class is in module space and then zaps the class. This is very similar to what we already do with keys that are in module space. Signed-off-by: Arjan van de Ven <[EMAIL PROTECTED]> --- kernel/lockdep.c |7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) Index: linux-2.6.24-rc8/kernel/lockdep.c === --- linux-2.6.24-rc8.orig/kernel/lockdep.c +++ linux-2.6.24-rc8/kernel/lockdep.c @@ -2932,7 +2932,7 @@ static void zap_class(struct lock_class } -static inline int within(void *addr, void *start, unsigned long size) +static inline int within(const void *addr, void *start, unsigned long size) { return addr >= start && addr < start + size; } @@ -2954,9 +2954,12 @@ void lockdep_free_key_range(void *start, head = classhash_table + i; if (list_empty(head)) continue; - list_for_each_entry_safe(class, next, head, hash_entry) + list_for_each_entry_safe(class, next, head, hash_entry) { if (within(class->key, start, size)) zap_class(class); + else if (within(class->name, start, size)) + zap_class(class); + } } graph_unlock(); -- If you want to reach me at my work email, use [EMAIL PROTECTED] For development, discussion and tips for power savings, visit http://www.lesswatts.org -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[RFC][PATCH 8/8] mem_notify v5: support fasync feature
implement FASYNC capability to /dev/mem_notify. fd = open("/dev/mem_notify", O_RDONLY); fcntl(fd, F_SETOWN, getpid()); flags = fcntl(fd, F_GETFL); fcntl(fd, F_SETFL, flags|FASYNC); /* when low memory, receive SIGIO */ ChangeLog v5: new Signed-off-by: KOSAKI Motohiro <[EMAIL PROTECTED]> --- mm/mem_notify.c | 95 +--- 1 file changed, 90 insertions(+), 5 deletions(-) Index: b/mm/mem_notify.c === --- a/mm/mem_notify.c 2008-01-23 23:09:08.0 +0900 +++ b/mm/mem_notify.c 2008-01-23 23:09:27.0 +0900 @@ -23,18 +23,58 @@ #define PROC_WAKEUP_GUARD (10*HZ) struct mem_notify_file_info { - unsigned long last_proc_notify; + unsigned long last_proc_notify; + struct file *file; + + struct list_head fa_list; + int fa_fd; }; static DECLARE_WAIT_QUEUE_HEAD(mem_wait); static atomic_long_t nr_under_memory_pressure_zones = ATOMIC_LONG_INIT(0); static atomic_t nr_watcher_task = ATOMIC_INIT(0); +static LIST_HEAD(mem_notify_fasync_list); +static DEFINE_SPINLOCK(mem_notify_fasync_lock); +static atomic_t nr_fasync_task = ATOMIC_INIT(0); atomic_long_t last_mem_notify = ATOMIC_LONG_INIT(INITIAL_JIFFIES); + +static void mem_notify_kill_fasync_nr(int sig, int band, int nr) +{ + struct mem_notify_file_info *iter, *saved_iter; + LIST_HEAD(l_fired); + + if (!nr) + return; + + spin_lock(_notify_fasync_lock); + + list_for_each_entry_safe_reverse(iter, saved_iter, _notify_fasync_list, fa_list) { + struct fown_struct * fown; + + fown = >file->f_owner; + if (!(sig == SIGURG && fown->signum == 0)) + send_sigio(fown, iter->fa_fd, band); + + list_del(>fa_list); + list_add(>fa_list, _fired); + if(!--nr) + break; + } + + /* rotate moving for FIFO wakeup */ + list_splice(_fired, _notify_fasync_list); + + spin_unlock(_notify_fasync_lock); +} + + void __memory_pressure_notify(struct zone* zone, int pressure) { int nr_wakeup; + int nr_poll_wakeup = 0; + int nr_fasync_wakeup = 0; int flags; spin_lock_irqsave(_wait.lock, flags); @@ -47,13 +87,18 @@ void __memory_pressure_notify(struct zon if (pressure) { int nr_watcher = atomic_read(_watcher_task); + int nr_fasync = atomic_read(_fasync_task); nr_wakeup = (nr_watcher >> 4) + 1; if (unlikely(nr_wakeup > 100)) nr_wakeup = 100; + nr_fasync_wakeup = nr_wakeup * nr_fasync/nr_watcher; + nr_poll_wakeup = nr_wakeup - nr_fasync_wakeup; + atomic_long_set(_mem_notify, jiffies); - wake_up_locked_nr(_wait, nr_wakeup); + wake_up_locked_nr(_wait, nr_poll_wakeup); + mem_notify_kill_fasync_nr(SIGIO, POLL_IN, nr_fasync_wakeup); } spin_unlock_irqrestore(_wait.lock, flags); @@ -71,6 +116,9 @@ static int mem_notify_open(struct inode } info->last_proc_notify = INITIAL_JIFFIES; + INIT_LIST_HEAD(>fa_list); + info->file = file; + info->fa_fd = -1; file->private_data = info; atomic_inc(_watcher_task); out: @@ -79,7 +127,16 @@ out: static int mem_notify_release(struct inode *inode, struct file *file) { - kfree(file->private_data); + struct mem_notify_file_info *info = file->private_data; + + spin_lock(_notify_fasync_lock); + if (!list_empty(>fa_list)) { + list_del(>fa_list); + atomic_dec(_fasync_task); + } + spin_unlock(_notify_fasync_lock); + + kfree(info); atomic_dec(_watcher_task); return 0; } @@ -106,9 +163,37 @@ out: return retval; } +static int mem_notify_fasync(int fd, struct file *filp, int on) +{ + struct mem_notify_file_info *info = filp->private_data; + int result = 0; + + spin_lock(_notify_fasync_lock); + if (on) { + if (list_empty(>fa_list)) { + info->fa_fd = fd; + list_add(>fa_list, _notify_fasync_list); + result = 1; + } else { + info->fa_fd = fd; + } + } else { + if (!list_empty(>fa_list)) { + list_del_init(>fa_list); + info->fa_fd = -1; + result = -1; + } + } + if (result != 0) + atomic_add(result, _fasync_task); + spin_unlock(_notify_fasync_lock); + return abs(result); +} + struct file_operations mem_notify_fops = { - .open = mem_notify_open, + .open= mem_notify_open,
[RFC][PATCH 6/8] mem_notify v5: (optional) fixed incorrect shrink_zone
on X86, ZONE_DMA is very very small. It is often no used at all. Unfortunately, when NR_ACTIVE==0, NR_INACTIVE==0, shrink_zone() try to reclaim 1 page. because zone->nr_scan_active += (zone_page_state(zone, NR_ACTIVE) >> priority) + 1; ^ it cause unnecessary low memory notify ;-) ChangeLog v5: new --- mm/vmscan.c | 21 - 1 file changed, 16 insertions(+), 5 deletions(-) Index: b/mm/vmscan.c === --- a/mm/vmscan.c 2008-01-18 14:18:27.0 +0900 +++ b/mm/vmscan.c 2008-01-18 14:49:06.0 +0900 @@ -948,7 +948,7 @@ static inline void note_zone_scanning_pr static inline int zone_is_near_oom(struct zone *zone) { - return zone->pages_scanned >= (zone_page_state(zone, NR_ACTIVE) + return zone->pages_scanned > (zone_page_state(zone, NR_ACTIVE) + zone_page_state(zone, NR_INACTIVE))*3; } @@ -1214,18 +1214,29 @@ static unsigned long shrink_zone(int pri unsigned long nr_inactive; unsigned long nr_to_scan; unsigned long nr_reclaimed = 0; + unsigned long tmp; + unsigned long zone_active; + unsigned long zone_inactive; if (scan_global_lru(sc)) { /* * Add one to nr_to_scan just to make sure that the kernel * will slowly sift through the active list. */ - zone->nr_scan_active += - (zone_page_state(zone, NR_ACTIVE) >> priority) + 1; + zone_active = zone_page_state(zone, NR_ACTIVE); + tmp = (zone_active >> priority) + 1; + if (unlikely(tmp > zone_active)) + tmp = zone_active; + zone->nr_scan_active += tmp; nr_active = zone->nr_scan_active; - zone->nr_scan_inactive += - (zone_page_state(zone, NR_INACTIVE) >> priority) + 1; + + zone_inactive = zone_page_state(zone, NR_INACTIVE); + tmp = (zone_inactive >> priority) + 1; + if (unlikely(tmp > zone_inactive)) + tmp = zone_inactive; + zone->nr_scan_inactive += tmp; nr_inactive = zone->nr_scan_inactive; + if (nr_inactive >= sc->swap_cluster_max) zone->nr_scan_inactive = 0; else -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[RFC][PATCH 7/8] mem_notify v5: ignore very small zone for prevent incorrect low mem notify.
on X86, ZONE_DMA is very very small. it cause undesirable low mem notification. It should ignored. but on other some architecture, ZONE_DMA have 4GB. 4GB is large as it is not possible to ignored. therefore, ignore or not is decided by zone size. ChangeLog: v5: new Signed-off-by: KOSAKI Motohiro <[EMAIL PROTECTED]> --- include/linux/mem_notify.h |3 +++ mm/page_alloc.c|6 +- 2 files changed, 8 insertions(+), 1 deletion(-) Index: b/include/linux/mem_notify.h === --- a/include/linux/mem_notify.h2008-01-23 22:06:04.0 +0900 +++ b/include/linux/mem_notify.h2008-01-23 22:08:02.0 +0900 @@ -22,6 +22,9 @@ static inline void memory_pressure_notif unsigned long target; unsigned long pages_high, pages_free, pages_reserve; + if (unlikely(zone->mem_notify_status == -1)) + return; + if (pressure) { target = atomic_long_read(_mem_notify) + MEM_NOTIFY_FREQ; if (likely(time_before(jiffies, target))) Index: b/mm/page_alloc.c === --- a/mm/page_alloc.c 2008-01-23 22:07:57.0 +0900 +++ b/mm/page_alloc.c 2008-01-23 22:08:02.0 +0900 @@ -3470,7 +3470,11 @@ static void __meminit free_area_init_cor zone->zone_pgdat = pgdat; zone->prev_priority = DEF_PRIORITY; - zone->mem_notify_status = 0; + + if (zone->present_pages < (pgdat->node_present_pages / 10)) + zone->mem_notify_status = -1; + else + zone->mem_notify_status = 0; zone_pcp_init(zone); INIT_LIST_HEAD(>active_list); -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[RFC][PATCH 5/8] mem_notify v5: add new mem_notify field to /proc/zoneinfo
show new member of zone struct by /proc/zoneinfo. ChangeLog: v5: change display order to at last. Signed-off-by: Marcelo Tosatti <[EMAIL PROTECTED]> Signed-off-by: KOSAKI Motohiro <[EMAIL PROTECTED]> --- mm/vmstat.c |8 +--- 1 file changed, 5 insertions(+), 3 deletions(-) Index: b/mm/vmstat.c === --- a/mm/vmstat.c 2008-01-23 22:06:05.0 +0900 +++ b/mm/vmstat.c 2008-01-23 22:08:00.0 +0900 @@ -795,10 +795,12 @@ static void zoneinfo_show_print(struct s seq_printf(m, "\n all_unreclaimable: %u" "\n prev_priority: %i" - "\n start_pfn: %lu", - zone_is_all_unreclaimable(zone), + "\n start_pfn: %lu" + "\n mem_notify_status: %i", + zone_is_all_unreclaimable(zone), zone->prev_priority, - zone->zone_start_pfn); + zone->zone_start_pfn, + zone->mem_notify_status); seq_putc(m, '\n'); } -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[RFC][PATCH 4/8] mem_notify v5: memory_pressure_notify() caller
the notification point to happen whenever the VM moves an anonymous page to the inactive list - this is a pretty good indication that there are unused anonymous pages present which will be very likely swapped out soon. and, It is judged out of trouble at the fllowing situations. o memory pressure decrease and stop moves an anonymous page to the inactive list. o free pages increase than (pages_high+lowmem_reserve)*2. ChangeLog: v5: add out of trouble notify to exit of balance_pgdat(). Signed-off-by: Marcelo Tosatti <[EMAIL PROTECTED]> Signed-off-by: KOSAKI Motohiro <[EMAIL PROTECTED]> --- mm/page_alloc.c | 12 mm/vmscan.c | 26 ++ 2 files changed, 38 insertions(+) Index: b/mm/vmscan.c === --- a/mm/vmscan.c 2008-01-23 22:06:08.0 +0900 +++ b/mm/vmscan.c 2008-01-23 22:07:57.0 +0900 @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -1089,10 +1090,14 @@ static void shrink_active_list(unsigned struct page *page; struct pagevec pvec; int reclaim_mapped = 0; + bool inactivated_anon = 0; if (sc->may_swap) reclaim_mapped = calc_reclaim_mapped(sc, zone, priority); + if (!reclaim_mapped) + memory_pressure_notify(zone, 0); + lru_add_drain(); spin_lock_irq(>lru_lock); pgmoved = sc->isolate_pages(nr_pages, _hold, , sc->order, @@ -1116,6 +1121,13 @@ static void shrink_active_list(unsigned if (!reclaim_mapped || (total_swap_pages == 0 && PageAnon(page)) || page_referenced(page, 0, sc->mem_cgroup)) { + /* deal with the case where there is no +* swap but an anonymous page would be +* moved to the inactive list. +*/ + if (!total_swap_pages && reclaim_mapped && + PageAnon(page)) + inactivated_anon = 1; list_add(>lru, _active); continue; } @@ -1123,8 +1135,12 @@ static void shrink_active_list(unsigned list_add(>lru, _active); continue; } + if (PageAnon(page)) + inactivated_anon = 1; list_add(>lru, _inactive); } + if (inactivated_anon) + memory_pressure_notify(zone, 1); pagevec_init(, 1); pgmoved = 0; @@ -1158,6 +1174,8 @@ static void shrink_active_list(unsigned pagevec_strip(); spin_lock_irq(>lru_lock); } + if (!reclaim_mapped) + memory_pressure_notify(zone, 0); pgmoved = 0; while (!list_empty(_active)) { @@ -1659,6 +1677,14 @@ out: goto loop_again; } + for (i = pgdat->nr_zones - 1; i >= 0; i--) { + struct zone *zone = pgdat->node_zones + i; + + if (!populated_zone(zone)) + continue; + memory_pressure_notify(zone, 0); + } + return nr_reclaimed; } Index: b/mm/page_alloc.c === --- a/mm/page_alloc.c 2008-01-23 22:06:08.0 +0900 +++ b/mm/page_alloc.c 2008-01-23 23:09:32.0 +0900 @@ -44,6 +44,7 @@ #include #include #include +#include #include #include @@ -435,6 +436,8 @@ static inline void __free_one_page(struc unsigned long page_idx; int order_size = 1 << order; int migratetype = get_pageblock_migratetype(page); + unsigned long prev_free; + unsigned long notify_threshold; if (unlikely(PageCompound(page))) destroy_compound_page(page, order); @@ -444,6 +447,7 @@ static inline void __free_one_page(struc VM_BUG_ON(page_idx & (order_size - 1)); VM_BUG_ON(bad_range(zone, page)); + prev_free = zone_page_state(zone, NR_FREE_PAGES); __mod_zone_page_state(zone, NR_FREE_PAGES, order_size); while (order < MAX_ORDER-1) { unsigned long combined_idx; @@ -465,6 +469,14 @@ static inline void __free_one_page(struc list_add(>lru, >free_area[order].free_list[migratetype]); zone->free_area[order].nr_free++; + + notify_threshold = (zone->pages_high + + zone->lowmem_reserve[MAX_NR_ZONES-1]) * 2; + + if (unlikely((zone->mem_notify_status == 1) && +(prev_free <= notify_threshold) && +(zone_page_state(zone, NR_FREE_PAGES) > notify_threshold))) + memory_pressure_notify(zone, 0); } static
[RFC][PATCH 3/8] mem_notify v5: introduce /dev/mem_notify new device (the core of this patch series)
the core of this patch series. add /dev/mem_notify device for notification low memory to user process. fd = open("/dev/mem_notify", O_RDONLY); if (fd < 0) { exit(1); } pollfds.fd = fd; pollfds.events = POLLIN; pollfds.revents = 0; err = poll(, 1, -1); // wake up at low memory ... Signed-off-by: Marcelo Tosatti <[EMAIL PROTECTED]> Signed-off-by: KOSAKI Motohiro <[EMAIL PROTECTED]> --- Documentation/devices.txt |1 drivers/char/mem.c |6 ++ include/linux/mem_notify.h | 42 include/linux/mmzone.h |1 mm/Makefile|2 mm/mem_notify.c| 114 + mm/page_alloc.c|1 7 files changed, 166 insertions(+), 1 deletion(-) Index: b/drivers/char/mem.c === --- a/drivers/char/mem.c2008-01-23 19:21:34.0 +0900 +++ b/drivers/char/mem.c2008-01-23 21:12:44.0 +0900 @@ -34,6 +34,8 @@ # include #endif +extern struct file_operations mem_notify_fops; + /* * Architectures vary in how they handle caching for addresses * outside of main memory. @@ -869,6 +871,9 @@ static int memory_open(struct inode * in filp->f_op = _fops; break; #endif + case 13: + filp->f_op = _notify_fops; + break; default: return -ENXIO; } @@ -901,6 +906,7 @@ static const struct { #ifdef CONFIG_CRASH_DUMP {12,"oldmem",S_IRUSR | S_IWUSR | S_IRGRP, _fops}, #endif + {13,"mem_notify", S_IRUGO, _notify_fops}, }; static struct class *mem_class; Index: b/include/linux/mem_notify.h === --- /dev/null 1970-01-01 00:00:00.0 + +++ b/include/linux/mem_notify.h2008-01-23 23:09:32.0 +0900 @@ -0,0 +1,42 @@ +/* + * Notify applications of memory pressure via /dev/mem_notify + * + * Copyright (C) 2008 Marcelo Tosatti <[EMAIL PROTECTED]>, + *KOSAKI Motohiro <[EMAIL PROTECTED]> + * + * Released under the GPL, see the file COPYING for details. + */ + +#ifndef _LINUX_MEM_NOTIFY_H +#define _LINUX_MEM_NOTIFY_H + +#define MEM_NOTIFY_FREQ (HZ/5) + +extern atomic_long_t last_mem_notify; + +extern void __memory_pressure_notify(struct zone *zone, int pressure); + + +static inline void memory_pressure_notify(struct zone *zone, int pressure) +{ + unsigned long target; + unsigned long pages_high, pages_free, pages_reserve; + + if (pressure) { + target = atomic_long_read(_mem_notify) + MEM_NOTIFY_FREQ; + if (likely(time_before(jiffies, target))) + return; + + pages_high = zone->pages_high; + pages_free = zone_page_state(zone, NR_FREE_PAGES); + pages_reserve = zone->lowmem_reserve[MAX_NR_ZONES-1]; + if (unlikely(pages_free > (pages_high+pages_reserve)*2)) + return; + + } else if (likely(!zone->mem_notify_status)) + return; + + __memory_pressure_notify(zone, pressure); +} + +#endif /* _LINUX_MEM_NOTIFY_H */ Index: b/include/linux/mmzone.h === --- a/include/linux/mmzone.h2008-01-23 19:22:56.0 +0900 +++ b/include/linux/mmzone.h2008-01-23 21:12:44.0 +0900 @@ -283,6 +283,7 @@ struct zone { */ int prev_priority; + int mem_notify_status; ZONE_PADDING(_pad2_) /* Rarely used or read-mostly fields */ Index: b/mm/Makefile === --- a/mm/Makefile 2008-01-23 19:22:28.0 +0900 +++ b/mm/Makefile 2008-01-23 21:12:44.0 +0900 @@ -11,7 +11,7 @@ obj-y := bootmem.o filemap.o mempool.o page_alloc.o page-writeback.o pdflush.o \ readahead.o swap.o truncate.o vmscan.o \ prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \ - page_isolation.o $(mmu-y) + page_isolation.o mem_notify.o $(mmu-y) obj-$(CONFIG_PROC_PAGE_MONITOR) += pagewalk.o obj-$(CONFIG_BOUNCE) += bounce.o Index: b/mm/mem_notify.c === --- /dev/null 1970-01-01 00:00:00.0 + +++ b/mm/mem_notify.c 2008-01-23 23:09:31.0 +0900 @@ -0,0 +1,114 @@ +/* + * Notify applications of memory pressure via /dev/mem_notify + * + * Copyright (C) 2008 Marcelo Tosatti <[EMAIL PROTECTED]>, + *KOSAKI Motohiro <[EMAIL PROTECTED]> + * + * Released under the GPL, see the file COPYING for details. + */ + +#include +#include +#include
[RFC][PATCH 2/8] mem_notify v5: introduce wake_up_locked_nr() new API
introduce new API wake_up_locked_nr() and wake_up_locked_all(). it it similar as wake_up_nr() and wake_up_all(), but it doesn't lock. Signed-off-by: Marcelo Tosatti <[EMAIL PROTECTED]> Signed-off-by: KOSAKI Motohiro <[EMAIL PROTECTED]> --- include/linux/wait.h |7 +-- kernel/sched.c |5 +++-- 2 files changed, 8 insertions(+), 4 deletions(-) Index: linux-2.6.24-rc6-mm1-memnotify/include/linux/wait.h === --- linux-2.6.24-rc6-mm1-memnotify.orig/include/linux/wait.h2008-01-17 18:28:33.0 +0900 +++ linux-2.6.24-rc6-mm1-memnotify/include/linux/wait.h 2008-01-17 18:56:16.0 +0900 @@ -142,7 +142,7 @@ static inline void __remove_wait_queue(w } void FASTCALL(__wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key)); -extern void FASTCALL(__wake_up_locked(wait_queue_head_t *q, unsigned int mode)); +void FASTCALL(__wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr, void *key)); extern void FASTCALL(__wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr)); void FASTCALL(__wake_up_bit(wait_queue_head_t *, void *, int)); int FASTCALL(__wait_on_bit(wait_queue_head_t *, struct wait_bit_queue *, int (*)(void *), unsigned)); @@ -155,7 +155,10 @@ wait_queue_head_t *FASTCALL(bit_waitqueu #define wake_up(x) __wake_up(x, TASK_NORMAL, 1, NULL) #define wake_up_nr(x, nr) __wake_up(x, TASK_NORMAL, nr, NULL) #define wake_up_all(x) __wake_up(x, TASK_NORMAL, 0, NULL) -#define wake_up_locked(x) __wake_up_locked((x), TASK_NORMAL) + +#define wake_up_locked(x) __wake_up_locked((x), TASK_NORMAL, 1, NULL) +#define wake_up_locked_nr(x, nr) __wake_up_locked((x), TASK_NORMAL, nr, NULL) +#define wake_up_locked_all(x) __wake_up_locked((x), TASK_NORMAL, 0, NULL) #define wake_up_interruptible(x) __wake_up(x, TASK_INTERRUPTIBLE, 1, NULL) #define wake_up_interruptible_nr(x, nr)__wake_up(x, TASK_INTERRUPTIBLE, nr, NULL) Index: linux-2.6.24-rc6-mm1-memnotify/kernel/sched.c === --- linux-2.6.24-rc6-mm1-memnotify.orig/kernel/sched.c 2008-01-17 18:31:12.0 +0900 +++ linux-2.6.24-rc6-mm1-memnotify/kernel/sched.c 2008-01-17 18:56:16.0 +0900 @@ -3837,9 +3837,10 @@ EXPORT_SYMBOL(__wake_up); /* * Same as __wake_up but called with the spinlock in wait_queue_head_t held. */ -void __wake_up_locked(wait_queue_head_t *q, unsigned int mode) +void __wake_up_locked(wait_queue_head_t *q, unsigned int mode, + int nr_exclusive, void *key) { - __wake_up_common(q, mode, 1, 0, NULL); + __wake_up_common(q, mode, nr_exclusive, 0, key); } /** -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[RFC][PATCH 1/8] mem_notify v5: introduce poll_wait_exclusive() new API
There are 2 way of adding item to wait_queue, 1. add_wait_queue() 2. add_wait_queue_exclusive() and add_wait_queue_exclusive() is very useful API. unforunately, poll_wait_exclusive() against poll_wait() doesn't exist. it means there is no way that wake up only 1 process where polled. wake_up() is wake up all sleeping process by poll_wait(), not 1 process. this patch introduce poll_wait_exclusive() new API for allow wake up only 1 process. unsigned int kosaki_poll(struct file *file, struct poll_table_struct *wait) { poll_wait_exclusive(file, _wait_queue, wait); if (data_exist) return POLLIN | POLLRDNORM; return 0; } Signed-off-by: Marcelo Tosatti <[EMAIL PROTECTED]> Signed-off-by: KOSAKI Motohiro <[EMAIL PROTECTED]> --- fs/eventpoll.c |7 +-- fs/select.c |9 ++--- include/linux/poll.h | 11 +-- 3 files changed, 20 insertions(+), 7 deletions(-) Index: linux-2.6.24-rc6-mm1-memnotify/fs/eventpoll.c === --- linux-2.6.24-rc6-mm1-memnotify.orig/fs/eventpoll.c 2008-01-17 18:28:15.0 +0900 +++ linux-2.6.24-rc6-mm1-memnotify/fs/eventpoll.c 2008-01-17 18:55:47.0 +0900 @@ -675,7 +675,7 @@ out_unlock: * target file wakeup lists. */ static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead, -poll_table *pt) +poll_table *pt, int exclusive) { struct epitem *epi = ep_item_from_epqueue(pt); struct eppoll_entry *pwq; @@ -684,7 +684,10 @@ static void ep_ptable_queue_proc(struct init_waitqueue_func_entry(>wait, ep_poll_callback); pwq->whead = whead; pwq->base = epi; - add_wait_queue(whead, >wait); + if (exclusive) + add_wait_queue_exclusive(whead, >wait); + else + add_wait_queue(whead, >wait); list_add_tail(>llink, >pwqlist); epi->nwait++; } else { Index: linux-2.6.24-rc6-mm1-memnotify/fs/select.c === --- linux-2.6.24-rc6-mm1-memnotify.orig/fs/select.c 2008-01-17 18:28:23.0 +0900 +++ linux-2.6.24-rc6-mm1-memnotify/fs/select.c 2008-01-17 18:55:47.0 +0900 @@ -48,7 +48,7 @@ struct poll_table_page { * poll table. */ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address, - poll_table *p); + poll_table *p, int exclusive); void poll_initwait(struct poll_wqueues *pwq) { @@ -117,7 +117,7 @@ static struct poll_table_entry *poll_get /* Add a new entry */ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address, - poll_table *p) + poll_table *p, int exclusive) { struct poll_table_entry *entry = poll_get_entry(p); if (!entry) @@ -126,7 +126,10 @@ static void __pollwait(struct file *filp entry->filp = filp; entry->wait_address = wait_address; init_waitqueue_entry(>wait, current); - add_wait_queue(wait_address, >wait); + if (exclusive) + add_wait_queue_exclusive(wait_address, >wait); + else + add_wait_queue(wait_address, >wait); } #define FDS_IN(fds, n) (fds->in + n) Index: linux-2.6.24-rc6-mm1-memnotify/include/linux/poll.h === --- linux-2.6.24-rc6-mm1-memnotify.orig/include/linux/poll.h2008-01-17 18:28:32.0 +0900 +++ linux-2.6.24-rc6-mm1-memnotify/include/linux/poll.h 2008-01-17 18:55:47.0 +0900 @@ -28,7 +28,8 @@ struct poll_table_struct; /* * structures and helpers for f_op->poll implementations */ -typedef void (*poll_queue_proc)(struct file *, wait_queue_head_t *, struct poll_table_struct *); +typedef void (*poll_queue_proc)(struct file *, wait_queue_head_t *, + struct poll_table_struct *, int); typedef struct poll_table_struct { poll_queue_proc qproc; @@ -37,7 +38,13 @@ typedef struct poll_table_struct { static inline void poll_wait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p) { if (p && wait_address) - p->qproc(filp, wait_address, p); + p->qproc(filp, wait_address, p, 0); +} + +static inline void poll_wait_exclusive(struct file *filp, wait_queue_head_t *wait_address, poll_table *p) +{ + if (p && wait_address) + p->qproc(filp, wait_address, p, 1); } static inline void init_poll_funcptr(poll_table *pt, poll_queue_proc qproc) -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[RFC][PATCH 0/8] mem_notify v5
Hi The /dev/mem_notify is low memory notification device. it can avoid swappness and oom by cooperationg with the user process. You need not be annoyed by OOM any longer :) please any comments! patch list [1/8] introduce poll_wait_exclusive() new API [2/8] introduce wake_up_locked_nr() new API [3/8] introduce /dev/mem_notify new device (the core of this patch series) [4/8] memory_pressure_notify() caller [5/8] add new mem_notify field to /proc/zoneinfo [6/8] (optional) fixed incorrect shrink_zone [7/8] ignore very small zone for prevent incorrect low mem notify. [8/8] support fasync feature related discussion: -- LKML OOM notifications requirement discussion http://www.gossamer-threads.com/lists/linux/kernel/832802?nohighlight=1#832802 OOM notifications patch [Marcelo Tosatti] http://marc.info/?l=linux-kernel=119273914027743=2 mem notifications v3 [Marcelo Tosatti] http://marc.info/?l=linux-mm=119852828327044=2 Thrashing notification patch [Daniel Spang] http://marc.info/?l=linux-mm=119427416315676=2 mem notification v4 [kosaki] http://marc.info/?l=linux-mm=120035840523718=2 Changelog - v4 -> v5 (by KOSAKI Motohiro) o rebase to 2.6.24-rc8-mm1 o change display order of /proc/zoneinfo o ignore very small zone o support fcntl(F_SETFL, FASYNC) o fix some trivial bugs. v3 -> v4 (by KOSAKI Motohiro) o rebase to 2.6.24-rc6-mm1 o avoid wake up all. o add judgement point to __free_one_page(). o add zone awareness. v2 -> v3 (by Marcelo Tosatti) o changes the notification point to happen whenever the VM moves an anonymous page to the inactive list. o implement notification rate limit. v1(oom notify) -> v2 (by Marcelo Tosatti) o name change o notify timing change from just swap thrashing to just before thrashing. o also works with swapless device. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH UCC TDM 0/3] UCC based TDM driver for QE based MPC83xx platforms
Reworked patches after incorporating comments of Andrew, Stephen and Tabi and Kumar. Kumar could you please consider them for linux-2.6.25. There are three patches [PATCH 1/3] drivers/misc : UCC TDM driver for mpc83xx platforms. This driver is usable in VoIP iind of applications to interface with SLIC kind of devices to exchange TDM voice samples. The driver is right now in misc category and exposes a kind of non standard interface to the clients. TDM Driver Interface Details The TDM driver right now is a misc driver with no subsystem as such. The dts file keeps a track of the TDM devices present on the board. Depending on them the TDM driver initializes those many driver instances while coming up. The driver on the upper level can plug to more than one tdm clients depending on the availablity of TDM devices. At every new request of the TDM client to bind with a TDM device, a free driver instance is allocated to the client. The interface can be described as follows. tdm_register_client(struct tdm_client *) This API returns a pointer to the structure tdm_client which is of type struct tdm_client { u32 client_id; u32 (*tdm_read)(u32 client_id, short chn_id, short *pcm_buffer, short len); u32 (*tdm_write)(u32 client_id, short chn_id, short *pcm_buffer, short len); wait_queue_head_t *wakeup_event; } It consists of: - driver_handle: It is basically to identify the particular TDM device/driver instance. - tdm_read: It is a function pointer returned by the TDM driver to be used to read TDM data from a particular TDM channel. - tdm_write: It is a function pointer returned by the TDM driver to be used to write TDM data to a particular TDM channel. - wakeup_event: It is address of a wait_queue event on which the client keeps on sleeping, and the TDM driver wakes it up periodically. The driver is configured to wake up the client after every 10ms. Once the TDM client gets registered to a TDM driver instance and a TDM device, it interfaces with the driver using tdm_read, tdm_write and wakeup_event. Note: The TDM driver can be used by only kernel level modules. The driver does not expose any file interface for User Applications. Can be compared to the spi driver which interfaces with the SPI clients(kernel mode clients) through some APIs. This interface can be improved by writing a platform independent TDM layer. Then all the TDM platforms can be supported below this wrapper layer. This is planned to be done later. [PATCH 2/3] arch/ : Platform changes - device tree entries for UCC TDM driver for MPC8323ERDB platform. - QE changes related to TDM , like, 1) Modified ucc_fast_init so that it can be used by fast UCC based TDM driver. Mainly changes have been made to configure TDM clocks and Fsyncs. 2) Modified get_brg_clk so that it can return the input frequncy and input source of any BRG by reading the corresponding entries from device tree. 3) Added new nodes brg and clocks in the device tree which represent input clocks for different BRGs. 4) Modified qe_setbrg accordingly. - new device tree entries added for "clocks" and "brg" [PATCH 3/3] Documentation - Modified Documentation to explain the device tree entries related to UCC TDM driver and the new nodes added("clocks" and "brg") The patch applies over a merge of galak's for-2.6.25 plus for-2.6.24 plus of_doc_update branches. In brief the steps were git clone git://git.kernel.org/pub/scm/linux/kernel/git/galak/powerpc.git powerpc-galak git checkout -b for-2.6.25 origin/for-2.6.25 git checkout -b for-2.6.24 origin/for-2.6.24 git checkout -b of_doc_update origin/of_doc_update git pull . for-2.6.24# merge the other two git pull . for-2.6.25 git checkout -b tdm # clean slate for tdm rebase work Also after applying the patches changes have to be made corresponding to Tabi's patch "qe: add function qe_clock_source". The driver has been tested with a VoIP stack and application on MPC8323ERDB. With Regards Poonam -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: Enhance mmu notifiers to accomplish a lockless implementation (incomplete).
Expand the mmu_notifiers to allow for lockless callers. To accomplish this, the function receiving notifications needs to implement an rmap equivalent. The notification function is also responsible for tracking page dirty state. Version 2 brings with it __xip_unmap and do_wp_page so this is getting to the point where we can start testing. It does compile now. I am traveling tomorrow but should be able to get back to this tomorrow evening or early Friday. Thank you for your attention, Robin Holt Index: mmu_notifiers/include/linux/export_notifier.h === --- /dev/null 1970-01-01 00:00:00.0 + +++ mmu_notifiers/include/linux/export_notifier.h 2008-01-23 21:24:33.0 -0600 @@ -0,0 +1,50 @@ +#ifndef _LINUX_EXPORT_NOTIFIER_H +#define _LINUX_EXPORT_NOTIFIER_H + +#include +#include + +struct export_notifier { + struct hlist_node hlist; + const struct export_notifier_ops *ops; +}; + +struct export_notifier_ops { + /* +* Called with the page lock held after ptes are modified or removed. +* +* Must clear PageExported() +*/ + void (*invalidate_page)(struct export_notifier *em, struct page *page); +}; + +#ifdef CONFIG_EXPORT_NOTIFIER + +extern void export_notifier_register(struct export_notifier *em); +extern void export_notifier_unregister(struct export_notifier *em); + +extern struct hlist_head export_notifier_list; + +#define export_notifier(function, args...) \ + do { \ + struct export_notifier *__em; \ + struct hlist_node *__n; \ + \ + rcu_read_lock(); \ + hlist_for_each_entry_rcu(__em, __n, _notifier_list, \ + hlist) \ + if (__em->ops->function) \ + __em->ops->function(__em, args); \ + rcu_read_unlock(); \ + } while (0); + +#else + +#define export_notifier(function, args...) + +static inline void export_notifier_register(struct export_notifier *em) {} +static inline void export_notifier_unregister(struct export_notifier *em) {} + +#endif + +#endif /* _LINUX_EXPORT_NOTIFIER_H */ Index: mmu_notifiers/include/linux/page-flags.h === --- mmu_notifiers.orig/include/linux/page-flags.h 2008-01-23 21:24:27.0 -0600 +++ mmu_notifiers/include/linux/page-flags.h2008-01-23 21:57:58.0 -0600 @@ -105,6 +105,7 @@ * 64 bit | FIELDS | ?? FLAGS | * 6332 0 */ +#define PG_exported30 /* Page is referenced by something not in the rmaps */ #define PG_uncached31 /* Page has been mapped as uncached */ #endif @@ -260,6 +261,14 @@ static inline void __ClearPageTail(struc #define SetPageUncached(page) set_bit(PG_uncached, &(page)->flags) #define ClearPageUncached(page)clear_bit(PG_uncached, &(page)->flags) +#ifdef CONFIG_EXPORT_NOTIFIER +#define PageExported(page) test_bit(PG_exported, &(page)->flags) +#define SetPageExported(page) set_bit(PG_exported, &(page)->flags) +#define ClearPageExported(page)clear_bit(PG_exported, &(page)->flags) +#else +#define PageExported(page) 0 +#endif + struct page; /* forward declaration */ extern void cancel_dirty_page(struct page *page, unsigned int account_size); Index: mmu_notifiers/mm/Kconfig === --- mmu_notifiers.orig/mm/Kconfig 2008-01-23 21:24:27.0 -0600 +++ mmu_notifiers/mm/Kconfig2008-01-23 21:57:58.0 -0600 @@ -197,3 +197,8 @@ config VIRT_TO_BUS config MMU_NOTIFIER def_bool y bool "MMU notifier, for paging KVM/RDMA" + +config EXPORT_NOTIFIER + def_bool y + depends on 64BIT + bool "Export Notifier for notifying subsystems about changes to page mappings" Index: mmu_notifiers/mm/Makefile === --- mmu_notifiers.orig/mm/Makefile 2008-01-23 21:24:27.0 -0600 +++ mmu_notifiers/mm/Makefile 2008-01-23 21:24:33.0 -0600 @@ -31,4 +31,4 @@ obj-$(CONFIG_MIGRATION) += migrate.o obj-$(CONFIG_SMP) += allocpercpu.o obj-$(CONFIG_QUICKLIST) += quicklist.o obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o - +obj-$(CONFIG_EXPORT_NOTIFIER) += export_notifier.o Index:
Re: [kvm-devel] [PATCH] export notifier #1
On Wed, 2008-01-23 at 16:17 +0200, Avi Kivity wrote: > Robin Holt wrote: > > On Wed, Jan 23, 2008 at 01:51:23PM +0100, Gerd Hoffmann wrote: > > > >> Jumping in here, looks like this could develop into a direction useful > >> for Xen. > >> > >> Background: Xen has a mechanism called "grant tables" for page sharing. > >> Guest #1 can issue a "grant" for another guest #2, which in turn then > >> can use that grant to map the page owned by guest #1 into its address > >> space. This is used by the virtual network/disk drivers, i.e. typically > >> Domain-0 (which has access to the real hardware) maps pages of other > >> guests to fill in disk/network data. > >> > > > > This is extremely similar to what XPMEM is providing. > > > > > > I think that in Xen's case the page tables are the normal cpu page > tables, not an external mmu (like RDMA, kvm, and XPMEM). However, that will be useful to the DRI folks as modern video chips are growing MMU with even page fault capabilities. Ben. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [dm-devel] [PATCH 0/2] dm-band: The I/O bandwidth controller: Overview
> Hi, > > > On Wed, Jan 23, 2008 at 09:53:50PM +0900, Ryo Tsuruta wrote: > > > Dm-band gives bandwidth to each job according to its weight, > > > which each job can set its own value to. > > > At this time, a job is a group of processes with the same pid or pgrp or > > > uid. > > > > It seems to rely on 'current' to classify bios and doesn't do it until the > > map > > function is called, possibly in a different process context, so it won't > > always identify the original source of the I/O correctly: > > Yes, this should be mentioned in the document with the current implementation > as you pointed out. > > By the way, I think once a memory controller of cgroup is introduced, it will > help to track down which cgroup is the original source. do you mean to make this a part of the memory subsystem? YAMAMOTO Takashi -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [BUG] The kernel thread for md RAID1 could cause a md RAID1 array deadlock
On Tuesday January 15, [EMAIL PROTECTED] wrote: > > This message describes the details about md-RAID1 issue found by > testing the md RAID1 using the SCSI fault injection framework. > > Abstract: > Both the error handler for md RAID1 and write access request to the md RAID1 > use raid1d kernel thread. The nr_pending flag could cause a race condition > in raid1d, results in a raid1d deadlock. Thanks for finding and reporting this. I believe the following patch should fix the deadlock. If you are able to repeat your test and confirm this I would appreciate it. Thanks, NeilBrown Fix deadlock in md/raid1 when handling a read error. When handling a read error, we freeze the array to stop any other IO while attempting to over-write with correct data. This is done in the raid1d thread and must wait for all submitted IO to complete (except for requests that failed and are sitting in the retry queue - these are counted in ->nr_queue and will stay there during a freeze). However write requests need attention from raid1d as bitmap updates might be required. This can cause a deadlock as raid1 is waiting for requests to finish that themselves need attention from raid1d. So we create a new function 'flush_pending_writes' to give that attention, and call it in freeze_array to be sure that we aren't waiting on raid1d. Thanks to "K.Tanaka" <[EMAIL PROTECTED]> for finding and reporting this problem. Cc: "K.Tanaka" <[EMAIL PROTECTED]> Signed-off-by: Neil Brown <[EMAIL PROTECTED]> ### Diffstat output ./drivers/md/raid1.c | 66 ++- 1 file changed, 45 insertions(+), 21 deletions(-) diff .prev/drivers/md/raid1.c ./drivers/md/raid1.c --- .prev/drivers/md/raid1.c2008-01-18 11:19:09.0 +1100 +++ ./drivers/md/raid1.c2008-01-24 14:21:55.0 +1100 @@ -592,6 +592,37 @@ static int raid1_congested(void *data, i } +static int flush_pending_writes(conf_t *conf) +{ + /* Any writes that have been queue but are awaiting +* bitmap updates get flushed here. +* We return 1 if any requests were actually submitted. +*/ + int rv = 0; + + spin_lock_irq(>device_lock); + + if (conf->pending_bio_list.head) { + struct bio *bio; + bio = bio_list_get(>pending_bio_list); + blk_remove_plug(conf->mddev->queue); + spin_unlock_irq(>device_lock); + /* flush any pending bitmap writes to +* disk before proceeding w/ I/O */ + bitmap_unplug(conf->mddev->bitmap); + + while (bio) { /* submit pending writes */ + struct bio *next = bio->bi_next; + bio->bi_next = NULL; + generic_make_request(bio); + bio = next; + } + rv = 1; + } else + spin_unlock_irq(>device_lock); + return rv; +} + /* Barriers * Sometimes we need to suspend IO while we do something else, * either some resync/recovery, or reconfigure the array. @@ -678,10 +709,14 @@ static void freeze_array(conf_t *conf) spin_lock_irq(>resync_lock); conf->barrier++; conf->nr_waiting++; + spin_unlock_irq(>resync_lock); + + spin_lock_irq(>resync_lock); wait_event_lock_irq(conf->wait_barrier, conf->barrier+conf->nr_pending == conf->nr_queued+2, conf->resync_lock, - raid1_unplug(conf->mddev->queue)); + ({ flush_pending_writes(conf); + raid1_unplug(conf->mddev->queue); })); spin_unlock_irq(>resync_lock); } static void unfreeze_array(conf_t *conf) @@ -907,6 +942,9 @@ static int make_request(struct request_q blk_plug_device(mddev->queue); spin_unlock_irqrestore(>device_lock, flags); + /* In case raid1d snuck into freeze_array */ + wake_up(>wait_barrier); + if (do_sync) md_wakeup_thread(mddev->thread); #if 0 @@ -1473,28 +1511,14 @@ static void raid1d(mddev_t *mddev) for (;;) { char b[BDEVNAME_SIZE]; - spin_lock_irqsave(>device_lock, flags); - - if (conf->pending_bio_list.head) { - bio = bio_list_get(>pending_bio_list); - blk_remove_plug(mddev->queue); - spin_unlock_irqrestore(>device_lock, flags); - /* flush any pending bitmap writes to disk before proceeding w/ I/O */ - bitmap_unplug(mddev->bitmap); - - while (bio) { /* submit pending writes */ - struct bio *next = bio->bi_next; - bio->bi_next = NULL; - generic_make_request(bio); - bio = next; - } -
RE: fixed a bug of adma in rhel4u5 with HDS7250SASUN500G.
Robert worte. > > Kuan, does this patch (using the notifiers to see if the command is > really done) still work if one port on the controller has > ADMA disabled > because it's in ATAPI mode? I seem to recall Allen Martin mentioning > that notifiers wouldn't work in this case. > I just tried the 2.6.24-rc7 sata_nv driver with one hd and one cdrom in the same controller. I mkfs hd and mounted the cdrom and no error happened. Allen, is there anything about notifier that we should pay attention to? > > > > > * it sure seems like there are other open sata_nv ADMA > issues -- can we > > hard-confirm or deny this? bugzilla wasn't very helpful > for me. It > > doesn't seem like we can disable ADMA (to solve those > issues) and get > > enough test time in (which is what I said a week (or more?) > ago too...) > > The NCQ/non-NCQ command switching issue is still hitting some people > (last I heard Kuan was looking into this), also there's a > hotplug issue > that Tejun reported.. > I have not yet reproduced the switching issue even if i removed the udelay function according to your metholds. I tried the 2.6.24-rc7. I don't know what kernel version can easily reproduce the issue or mabye i omit some steps during test. --- This email message is for the sole use of the intended recipient(s) and may contain confidential information. Any unauthorized review, use, disclosure or distribution is prohibited. If you are not the intended recipient, please contact the sender by reply email and destroy all copies of the original message. --- -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 0/2] Relax restrictions on setting CONFIG_NUMA on x86
Hi > To rule it out, can you also try with the patch below applied please? It > should only make a difference on sparsemem so if discontigmem is still > crashing, there is likely another problem. Assuming it crashes, Aaah, sorry. I can't test again until next week. I repost at that time... > please > post the full dmesg output with loglevel=8 on the command line. Thanks You are right.. I omitted it at previous mail, sorry. because piking up dmesg is very difficult when boot time crash. ;-) - kosaki -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 12/30] ocfs2: Add group extend for online resize
On Wed, Jan 23, 2008 at 02:05:48PM -0800, Andrew Morton wrote: > > On Thu, 17 Jan 2008 14:35:38 -0800 Mark Fasheh <[EMAIL PROTECTED]> wrote: > > From: Tao Ma <[EMAIL PROTECTED]> > > > > This patch adds the ability for a userspace program to request an extend of > > last cluster group on an Ocfs2 file system. The request is made via ioctl, > > OCFS2_IOC_GROUP_EXTEND. This is derived from EXT3_IOC_GROUP_EXTEND, but is > > obviously Ocfs2 specific. > > > > tunefs.ocfs2 would call this for an online-resize operation if the last > > cluster group isn't full. > > > > ... > > > > +/* Check whether the blkno is the super block or one of the backups. */ > > +static inline void ocfs2_check_super_or_backup(struct super_block *sb, > > + sector_t blkno) > > +{ > > + int i; > > + u64 backup_blkno; > > + > > + if (blkno == OCFS2_SUPER_BLOCK_BLKNO) > > + return; > > + > > + for (i = 0; i < OCFS2_MAX_BACKUP_SUPERBLOCKS; i++) { > > + backup_blkno = ocfs2_backup_super_blkno(sb, i); > > + if (backup_blkno == blkno) > > + return; > > + } > > + > > + BUG(); > > ow, harsh. > > > +} > > ocfs2_check_super_or_backup() is too large to inline. As is > ocfs2_backup_super_blkno() and probably lots of other stuff. Ok, I added a patch to un-inline ocfs2_check_super_or_backup(). Give me some time to fix up ocfs2_backup_super_blkno() - I'll need to update the ocfs2-tools tree accordingly. > Should ocfs2_backup_super_blkno() return sector_t? Possibly? I think we're safe as-is because a 32 bit machine should not be able to mount a file system where overflow of this could happen. Internally, ocfs2 uses u64 to describe file system blocks, which is why ocfs2_backup_super_blkno() returns that. > > + * so we don't need to lock ip_io_mutex and inode doesn't need to bea > > passed > > + * into this function. > > + */ > > +int ocfs2_write_super_or_backup(struct ocfs2_super *osb, > > + struct buffer_head *bh) > > +{ > > + int ret = 0; > > + > > + mlog_entry_void(); > > + > > + BUG_ON(buffer_jbd(bh)); > > + ocfs2_check_super_or_backup(osb->sb, bh->b_blocknr); > > + > > + if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) { > > + ret = -EROFS; > > + goto out; > > + } > > + > > + lock_buffer(bh); > > + set_buffer_uptodate(bh); > > + > > + /* remove from dirty list before I/O. */ > > + clear_buffer_dirty(bh); > > + > > + get_bh(bh); /* for end_buffer_write_sync() */ > > + bh->b_end_io = end_buffer_write_sync; > > + submit_bh(WRITE, bh); > > + > > + wait_on_buffer(bh); > > + > > + if (!buffer_uptodate(bh)) { > > + ret = -EIO; > > + brelse(bh); > > Only use brelse() when the bh might be NULL. put_bh() is cleaner and quicker. Ok, fixed. There was other places in fs/ocfs2/buffer_head_io.c which got the same cleanup. I also took care of the new: if (bh) brelse(bh); patterns in resize.c. Unfortunately, Ocfs2 is littered with this code pattern. > > + } > > + > > +out: > > + mlog_exit(ret); > > + return ret; > > +} > > Did we just reimplement sync_dirty_buffer()? For a bit of background: What Tao did was copy a small core from the other I/O functions into a new one for backup super block writes. This was done for cleanliness. The actual method used for buffer_head I/O in Ocfs2 has been unchanged for years. At first glance, there *does* seem to be some similarity to sync_dirty_buffer(), but there are key differences in that dirty/uptodate state is forced in the ocfs2/buffer_head_io.c case. As such, I'm not sure that sync_dirty_buffer() is a drop-in replacement. > > +first_new_cluster, > > +cl_cpg, 1); > > + le16_add_cpu(>bg_free_bits_count, -1 * backups); > > + } > > + > > + ret = ocfs2_journal_dirty(handle, group_bh); > > + if (ret < 0) { > > + mlog_errno(ret); > > + goto out_rollback; > > + } > > + > > + /* update the inode accordingly. */ > > + ret = ocfs2_journal_access(handle, bm_inode, bm_bh, > > + OCFS2_JOURNAL_ACCESS_WRITE); > > + if (ret < 0) { > > + mlog_errno(ret); > > + goto out_rollback; > > + } > > + > > + chain = le16_to_cpu(group->bg_chain); > > + cr = (>cl_recs[chain]); > > + le32_add_cpu(>c_total, num_bits); > > + le32_add_cpu(>c_free, num_bits); > > + le32_add_cpu(>id1.bitmap1.i_total, num_bits); > > + le32_add_cpu(>i_clusters, new_clusters); > > + > > + if (backups) { > > + le32_add_cpu(>c_free, -1 * backups); > > + le32_add_cpu(>id1.bitmap1.i_used, backups); > > + } > > + > > + spin_lock(_I(bm_inode)->ip_lock); > > + OCFS2_I(bm_inode)->ip_clusters = le32_to_cpu(fe->i_clusters); > > + le64_add_cpu(>i_size, new_clusters << osb->s_clustersize_bits); > > +
Re: [PATCH] Fix boot problem in situations where the boot CPU is running on a memoryless node
On Wed, 23 Jan 2008, Nishanth Aravamudan wrote: > Right, so it might have functioned before, but the correctness was > wobbly at best... Certainly the memoryless patch series has tightened > that up, but we missed these SLAB issues. > > I see that your patch fixed Olaf's machine, Pekka. Nice work on > everyone's part tracking this stuff down. Another important result is that I found that GFP_THISNODE is actually required for proper SLAB operation and not only an optimization. Fallback can lead to very bad results. I have two customer reported instances of SLAB corruption here that can be explained now due to fallback to another node. Foreign objects enter the per cpu queue. The wrong node lock is taken during cache_flusharray(). Fields in the struct slab can become corrupted. It typically hits the list field and the inuse field. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
bluetooth : lockdep warning on rfcomm
= [ INFO: possible recursive locking detected ] 2.6.24-rc8-mm1 #8 - bluepush/3213 is trying to acquire lock: (sk_lock-AF_BLUETOOTH){--..}, at: [] l2cap_sock_bind+0x40/0x100 [l2cap] but task is already holding lock: (sk_lock-AF_BLUETOOTH){--..}, at: [] rfcomm_sock_connect+0x3e/0xe0 [rfcomm] other info that might help us debug this: 2 locks held by bluepush/3213: #0: (sk_lock-AF_BLUETOOTH){--..}, at: [] rfcomm_sock_connect+0x3e/0xe0 [rfcomm] #1: (rfcomm_mutex){--..}, at: [] rfcomm_dlc_open+0x26/0x60 [rfcomm] stack backtrace: Pid: 3213, comm: bluepush Not tainted 2.6.24-rc8-mm1 #8 [] ? printk+0x18/0x20 [] print_deadlock_bug+0xc7/0xe0 [] check_deadlock+0x6c/0x80 [] validate_chain+0x14c/0x320 [] __lock_acquire+0x1c1/0x730 [] lock_acquire+0x79/0xb0 [] ? l2cap_sock_bind+0x40/0x100 [l2cap] [] lock_sock_nested+0x55/0x70 [] ? l2cap_sock_bind+0x40/0x100 [l2cap] [] l2cap_sock_bind+0x40/0x100 [l2cap] [] kernel_bind+0xa/0x10 [] rfcomm_session_create+0x4c/0x110 [rfcomm] [] __rfcomm_dlc_open+0x129/0x150 [rfcomm] [] rfcomm_dlc_open+0x38/0x60 [rfcomm] [] rfcomm_sock_connect+0xb6/0xe0 [rfcomm] [] sys_connect+0x99/0xd0 [] ? cache_add_dev+0x39/0x1a0 [] ? put_lock_stats+0xd/0x30 [] ? lock_release_holdtime+0x60/0x80 [] ? fget+0x7c/0x100 [] ? __lock_release+0x47/0x70 [] ? fget+0x7c/0x100 [] ? copy_from_user+0x37/0x70 [] sys_socketcall+0xa5/0x230 [] ? trace_hardirqs_on+0xb9/0x130 [] ? restore_nocheck+0x12/0x15 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 13/30] ocfs2: Implement group add for online resize
On Wed, Jan 23, 2008 at 02:05:54PM -0800, Andrew Morton wrote: > > On Thu, 17 Jan 2008 14:35:39 -0800 Mark Fasheh <[EMAIL PROTECTED]> wrote: > > From: Tao Ma <[EMAIL PROTECTED]> > > > > This patch adds the ability for a userspace program to request that a > > properly formatted cluster group be added to the main allocation bitmap for > > an Ocfs2 file system. The request is made via an ioctl, OCFS2_IOC_GROUP_ADD. > > On a high level, this is similar to ext3, but we use a different ioctl as > > the structure which has to be passed through is different. > > > > During an online resize, tunefs.ocfs2 will format any new cluster groups > > which must be added to complete the resize, and call OCFS2_IOC_GROUP_ADD on > > each one. Kernel verifies that the core cluster group information is valid > > and then does the work of linking it into the global allocation bitmap. > > > > ... > > > > +/* Used to pass group descriptor data when online resize is done */ > > +struct ocfs2_new_group_input { > > + __u64 group;/* Group descriptor's blkno. */ > > + __u32 clusters; /* Total number of clusters in this group */ > > + __u32 frees;/* Total free clusters in this group */ > > + __u16 chain;/* Chain for this group */ > > + __u16 reserved1; > > + __u32 reserved2; > > +}; > > Are we sure that all architectures will lay this out in the same way with > both 32-bit and 64-bit userspace? I looked it over several times and haven't been able to find a problem - everything is aligned at 8 byte boundaries. Do you see anything that might be problematic? > > +/* Add a new group descriptor to global_bitmap. */ > > +int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input > > *input) > > +{ > > + int ret; > > + handle_t *handle; > > + struct buffer_head *main_bm_bh = NULL; > > + struct inode *main_bm_inode = NULL; > > + struct ocfs2_dinode *fe = NULL; > > + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); > > + struct buffer_head *group_bh = NULL; > > + struct ocfs2_group_desc *group = NULL; > > + struct ocfs2_chain_list *cl; > > + struct ocfs2_chain_rec *cr; > > + u16 cl_bpc; > > + > > + mlog_entry_void(); > > + > > + if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) > > + return -EROFS; > > + > > + main_bm_inode = ocfs2_get_system_file_inode(osb, > > + GLOBAL_BITMAP_SYSTEM_INODE, > > + OCFS2_INVALID_SLOT); > > + if (!main_bm_inode) { > > + ret = -EINVAL; > > + mlog_errno(ret); > > + goto out; > > + } > > + > > + mutex_lock(_bm_inode->i_mutex); > > + > > + ret = ocfs2_inode_lock(main_bm_inode, _bm_bh, 1); > > + if (ret < 0) { > > + mlog_errno(ret); > > + goto out_mutex; > > + } > > + > > + fe = (struct ocfs2_dinode *)main_bm_bh->b_data; > > + > > + if (le16_to_cpu(fe->id2.i_chain.cl_cpg) != > > +ocfs2_group_bitmap_size(osb->sb) * 8) { > > + mlog(ML_ERROR, "The disk is too old and small." > > +" Force to do offline resize."); > > + ret = -EINVAL; > > + goto out_unlock; > > + } > > + > > + ret = ocfs2_read_block(osb, input->group, _bh, 0, NULL); > > + if (ret < 0) { > > + mlog(ML_ERROR, "Can't read the group descriptor # %llu " > > +"from the device.", input->group); > > + goto out; > > Bug: goto wrong_place. (Points at fault-injection code) Ooof, good catch - thanks. A fix for this is attached to this e-mail, and of course will be in ocfs2.git. > > + } > > + > > + ocfs2_set_new_buffer_uptodate(inode, group_bh); > > + > > + ret = ocfs2_verify_group_and_input(main_bm_inode, fe, input, group_bh); > > + if (ret) { > > + mlog_errno(ret); > > + goto out_unlock; > > + } > > + > > + mlog(0, "Add a new group %llu in chain = %u, length = %u\n", > > +input->group, input->chain, input->clusters); > > + > > + handle = ocfs2_start_trans(osb, OCFS2_GROUP_ADD_CREDITS); > > + if (IS_ERR(handle)) { > > + mlog_errno(PTR_ERR(handle)); > > + ret = -EINVAL; > > + goto out_unlock; > > + } > > + > > + cl_bpc = le16_to_cpu(fe->id2.i_chain.cl_bpc); > > + cl = >id2.i_chain; > > + cr = >cl_recs[input->chain]; > > + > > + ret = ocfs2_journal_access(handle, main_bm_inode, group_bh, > > + OCFS2_JOURNAL_ACCESS_WRITE); > > + if (ret < 0) { > > + mlog_errno(ret); > > + goto out_commit; > > + } > > + > > + group = (struct ocfs2_group_desc *)group_bh->b_data; > > + group->bg_next_group = cr->c_blkno; > > + > > + ret = ocfs2_journal_dirty(handle, group_bh); > > + if (ret < 0) { > > + mlog_errno(ret); > > + goto out_commit; > > + } > > + > > + ret = ocfs2_journal_access(handle, main_bm_inode, main_bm_bh, >
Re: 2.6.24-rc8-mm1 : net tcp_input.c warnings
On Jan 23, 2008 7:01 PM, Ilpo Järvinen <[EMAIL PROTECTED]> wrote: > On Wed, 23 Jan 2008, Ilpo Järvinen wrote: > > > On Wed, 23 Jan 2008, Dave Young wrote: > > > > > On Jan 23, 2008 3:41 PM, Ilpo Järvinen <[EMAIL PROTECTED]> wrote: > > > > > > > > On Tue, 22 Jan 2008, David Miller wrote: > > > > > > > > > From: "Dave Young" <[EMAIL PROTECTED]> > > > > > Date: Wed, 23 Jan 2008 09:44:30 +0800 > > > > > > > > > > > On Jan 22, 2008 6:47 PM, Ilpo Järvinen <[EMAIL PROTECTED]> wrote: > > > > > > > [PATCH] [TCP]: debug S+L > > > > > > > > > > > > Thanks, If there's new findings I will let you know. > > > > > > > > > > Thanks for helping with this bug Dave. > > > > > > > > I noticed btw that there thing might (is likely to) spuriously trigger > > > > at > > > > WARN_ON(sacked != tp->sacked_out); because those won't be equal when > > > > SACK > > > > is not enabled. If that does happen too often, I send a fixed patch for > > > > it, yet, the fact that I print print tp->rx_opt.sack_ok allows > > > > identification of those cases already as it's zero when SACK is not > > > > enabled. > > > > > > > > Just ask if you need the updated debug patch. > > > > > > Thanks, please send, I would like to get it. > > > > There you go. I fixed non-SACK case by adding tcp_is_sack checks there and > > also added two verifys to tcp_ack to see if there's corruption outside of > > TCP. > > There's some discussion about a problem that is very likely the same as in > here (sorry for not remembering to cc you in there due to rapid progress): > >http://marc.info/?t=12010717423=1=2 Thanks. New warning trigged with your debug patch: ACPI: PCI Interrupt :00:1b.0[A] -> GSI 16 (level, low) -> IRQ 16 PCI: Setting latency timer of device :00:1b.0 to 64 e100: Intel(R) PRO/100 Network Driver, 3.5.23-k4-NAPI e100: Copyright(c) 1999-2006 Intel Corporation ACPI: PCI Interrupt :03:08.0[A] -> GSI 20 (level, low) -> IRQ 20 e100: eth1: e100_probe: addr 0xefaff000, irq 20, MAC addr 00:13:72:e7:4d:66 eth0: setting full-duplex. [ cut here ] WARNING: at net/ipv4/tcp_ipv4.c:197 tcp_verify_wq+0x1b6/0x1c0() Modules linked in: snd_seq_dummy snd_seq_oss snd_seq_midi_event snd_seq snd_seq_device snd_pcm_oss snd_mixer_oss eeprom e100 psmouse snd_hda_intel snd_pcm snd_timer btusb bluetooth serio_raw snd 3c59x sg evdev thermal soundcore rtc_cmos snd_page_alloc rtc_core rtc_lib i2c_i801 processor button intel_agp dcdbas pcspkr agpgart Pid: 0, comm: swapper Not tainted 2.6.24-rc8-mm1 #8 [] ? have_callable_console+0x20/0x30 [] warn_on_slowpath+0x54/0x80 [] ? timer_list_show_tickdevices+0xf0/0x110 [] ? native_sched_clock+0x85/0xe0 [] ? put_lock_stats+0x21/0x30 [] ? lock_release_holdtime+0x60/0x80 [] ? check_bytes_and_report+0x24/0xc0 [] ? check_bytes_and_report+0x24/0xc0 [] ? check_pad_bytes+0x61/0x80 [] tcp_verify_wq+0x1b6/0x1c0 [] ? tcp_clean_rtx_queue+0x2d9/0x5b0 [] tcp_add_reno_sack+0x30/0x50 [] tcp_fastretrans_alert+0x3d2/0x700 [] tcp_ack+0x1b3/0x3a0 [] tcp_rcv_established+0x3eb/0x710 [] tcp_v4_do_rcv+0xe5/0x100 [] tcp_v4_rcv+0x5db/0x660 [] ? tcp_v4_rcv+0x387/0x660 [] ? ip_local_deliver_finish+0x2d/0x1d0 [] ip_local_deliver_finish+0x84/0x1d0 [] ? ip_local_deliver_finish+0x2d/0x1d0 [] ? __lock_release+0x47/0x70 [] ip_local_deliver+0xb7/0xc0 [] ip_rcv_finish+0xb2/0x3c0 [] ? sock_def_readable+0x48/0xa0 [] ? sock_queue_rcv_skb+0xb1/0x1a0 [] ? sock_queue_rcv_skb+0xf7/0x1a0 [] ip_rcv+0x18f/0x290 [] ? packet_rcv_spkt+0xd0/0x130 [] netif_receive_skb+0x2b6/0x330 [] ? netif_receive_skb+0x127/0x330 [] ? process_backlog+0x83/0x100 [] process_backlog+0x8e/0x100 [] net_rx_action+0x13c/0x230 [] ? net_rx_action+0x59/0x230 [] ? __do_softirq+0x6e/0x120 [] __do_softirq+0x93/0x120 [] do_softirq+0x7a/0x80 [] irq_exit+0x65/0x90 [] do_IRQ+0x41/0x80 [] ? trace_hardirqs_on+0xb9/0x130 [] common_interrupt+0x2e/0x34 [] ? mwait_idle_with_hints+0x40/0x50 [] ? mwait_idle+0x0/0x20 [] mwait_idle+0x12/0x20 [] cpu_idle+0x61/0x110 [] rest_init+0x5d/0x60 [] start_kernel+0x1fa/0x260 [] ? unknown_bootoption+0x0/0x130 === ---[ end trace 14b601818e6903ac ]--- P: 5 L: 0 vs 0 S: 0 vs 1 w: 2044790889-2044796616 (0) TCP wq(s) < TCP wq(h) +++h+< l0 s1 f0 p5 seq: su2044790889 hs2044795029 sn2044796616 [ cut here ] WARNING: at net/ipv4/tcp_ipv4.c:197 tcp_verify_wq+0x1b6/0x1c0() Modules linked in: snd_seq_dummy snd_seq_oss snd_seq_midi_event snd_seq snd_seq_device snd_pcm_oss snd_mixer_oss eeprom e100 psmouse snd_hda_intel snd_pcm snd_timer btusb bluetooth serio_raw snd 3c59x sg evdev thermal soundcore rtc_cmos snd_page_alloc rtc_core rtc_lib i2c_i801 processor button intel_agp dcdbas pcspkr agpgart Pid: 0, comm: swapper Not tainted 2.6.24-rc8-mm1 #8 [] ? have_callable_console+0x20/0x30 [] warn_on_slowpath+0x54/0x80 [] ? generic_make_request+0x1c0/0x2e0 [] ? printk+0x18/0x20 [] ? tcp_print_queue+0x1a4/0x230 [] ? vprintk+0x308/0x320 [] tcp_verify_wq+0x1b6/0x1c0
[PATCH] correct inconsistent ntp interval/tick_length usage
I recently noticed on one of my boxes that when synched with an NTP server, the drift value reported for the system was ~283ppm. While in some cases, clock hardware can be that bad, it struck me as unusual as the system was using the acpi_pm clocksource, which is one of the more trustworthy and accurate clocksources on x86 hardware. I brought up another system and let it sync to the same NTP server, and I noticed a similar 280some ppm drift. In looking at the code, I found that the acpi_pm's constant frequency was being computed correctly at boot-up, however once the system was up, even without the ntp daemon running, the clocksource's frequency was being modified by the clocksource_adjust() function. Digging deeper, I realized that in the code that keeps track of how much the clocksource is skewing from the ntp desired time, we were using different lengths to establish how long an time interval was. The clocksource was being setup with the following interval: NTP_INTERVAL_LENGTH = NSEC_PER_SEC/NTP_INTERVAL_FREQ While the ntp code was using the tick_length_base value: tick_length_base ~= (tick_usec * NSEC_PER_USEC * USER_HZ) /NTP_INTERVAL_FREQ The subtle difference is: (tick_usec * NSEC_PER_USEC * USER_HZ) != NSEC_PER_SEC This difference in calculation was causing the clocksource correction code to apply a correction factor to the clocksource so the two intervals were the same, however this results in the actual frequency of the clocksource to be made incorrect. I believe this difference would affect all clocksources, although to differing degrees depending on the clocksource resolution. The issue was introduced when my HZ free ntp patch landed in 2.6.21-rc1, so my apologies for the mistake, and for not noticing it until now. The following patch, corrects the clocksource's initialization code so it uses the same interval length as the code in ntp.c. After applying this patch, the drift value for the same system went from ~283ppm to only 2.635ppm. I believe this patch to be good, however it does affect all arches and I've only tested on x86, so some caution is advised. I do think it would be a likely candidate for a stable 2.6.24.x release. Any thoughts or feedback would be appreciated. Signed-off-by: John Stultz <[EMAIL PROTECTED]> Index: linux/kernel/time/timekeeping.c === --- linux.orig/kernel/time/timekeeping.c +++ linux/kernel/time/timekeeping.c @@ -208,7 +208,8 @@ static void change_clocksource(void) clock->error = 0; clock->xtime_nsec = 0; - clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH); + clocksource_calculate_interval(clock, + (unsigned long)(current_tick_length()>>TICK_LENGTH_SHIFT)); tick_clock_notify(); @@ -265,7 +266,8 @@ void __init timekeeping_init(void) ntp_clear(); clock = clocksource_get_next(); - clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH); + clocksource_calculate_interval(clock, + (unsigned long)(current_tick_length()>>TICK_LENGTH_SHIFT)); clock->cycle_last = clocksource_read(clock); xtime.tv_sec = sec; -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH]PCIE ASPM support - takes 3
On Wed, 2008-01-23 at 10:26 -0800, Greg KH wrote: > On Wed, Jan 23, 2008 at 10:20:54AM +0800, Shaohua Li wrote: > > > > On Tue, 2008-01-22 at 14:58 -0800, Greg KH wrote: > > > On Fri, Jan 18, 2008 at 09:56:28AM +0800, Shaohua Li wrote: > > > > v3->v2, fixed the issues Matthew Wilcox raised. > > > > > > > > PCI Express ASPM defines a protocol for PCI Express components in the D0 > > > > state to reduce Link power by placing their Links into a low power state > > > > and instructing the other end of the Link to do likewise. This > > > > capability allows hardware-autonomous, dynamic Link power reduction > > > > beyond what is achievable by software-only controlled power management. > > > > However, The device should be configured by software appropriately. > > > > Enabling ASPM will save power, but will introduce device latency. > > > > > > > > This patch adds ASPM support in Linux. It introduces a global policy for > > > > ASPM, a sysfs file /sys/module/pcie_aspm/parameters/policy can control > > > > it. The interface can be used as a boot option too. Currently we have > > > > below setting: > > > > -default, BIOS default setting > > > > -powersave, highest power saving mode, enable all available ASPM > > > > state > > > > and clock power management > > > > -performance, highest performance, disable ASPM and clock power > > > > management > > > > By default, the 'default' policy is used currently. > > > > > > > > In my test, power difference between powersave mode and performance mode > > > > is about 1.3w in a system with 3 PCIE links. > > > > > > > > please review, any comments will be appreciated. > > > > > > Can you please fix up all of the warnings that checkpatch.pl and sparse > > > produce from this patch? > > > > > > Also, one small thing: > > > > > > > --- linux.orig/include/linux/pci.h 2008-01-16 15:59:42.0 > > > > +0800 > > > > +++ linux/include/linux/pci.h 2008-01-18 09:41:20.0 +0800 > > > > @@ -164,6 +164,10 @@ struct pci_dev { > > > >this is D0-D3, D0 being > > > > fully functional, > > > >and D3 being off. */ > > > > > > > > +#ifdef CONFIG_PCIEASPM > > > > + void*link_state;/* ASPM link state. */ > > > > +#endif > > > > > > Can we make this a "real" pointer to a structure? I note that you use > > > two different structures here in this pointer, should you really do > > > that? It's good to get type-checks whereever possible. > > The structure is just for internal use of ASPM, just don't want make it > > global. > > Yes, you don't need to expose the structure type, just name it, and then > define it in the code itself. > > But using a void pointer as you have here, allows you to assign two > different types of structures to this pointer. Are you sure that you > always get this right? :) > > Please, let's try to inforce type-saftey and set this to be a specific > type of pointer to a structure. That will require you to possibly merge > the two structures, which will require some code changes. Ok, fixed. PCI Express ASPM defines a protocol for PCI Express components in the D0 state to reduce Link power by placing their Links into a low power state and instructing the other end of the Link to do likewise. This capability allows hardware-autonomous, dynamic Link power reduction beyond what is achievable by software-only controlled power management. However, The device should be configured by software appropriately. Enabling ASPM will save power, but will introduce device latency. This patch adds ASPM support in Linux. It introduces a global policy for ASPM, a sysfs file /sys/module/pcie_aspm/parameters/policy can control it. The interface can be used as a boot option too. Currently we have below setting: -default, BIOS default setting -powersave, highest power saving mode, enable all available ASPM state and clock power management -performance, highest performance, disable ASPM and clock power management By default, the 'default' policy is used currently. In my test, power difference between powersave mode and performance mode is about 1.3w in a system with 3 PCIE links. Signed-off-by: Shaohua Li <[EMAIL PROTECTED]> --- drivers/pci/pci-sysfs.c |5 drivers/pci/pci.c |4 drivers/pci/pcie/Kconfig | 20 + drivers/pci/pcie/Makefile |3 drivers/pci/pcie/aspm.c | 802 ++ drivers/pci/probe.c |5 drivers/pci/remove.c |4 include/linux/aspm.h | 44 ++ include/linux/pci.h |5 include/linux/pci_regs.h |8 10 files changed, 900 insertions(+) Index: linux/drivers/pci/pcie/Makefile === --- linux.orig/drivers/pci/pcie/Makefile2008-01-23 10:22:14.0 +0800 +++ linux/drivers/pci/pcie/Makefile 2008-01-24
[PATCH] W1: w1_therm.c standardize units to millidegrees C
Here is the patch to standardize the temperature units to millidegrees C for the two sensor conversion routines. Previously the routines were, w1_DS18B20_convert_temp degrees C w1_DS18S20_convert_temp millidegrees C Unfortunately this will break any program using the ds18b20 value as it will now be 1000 times bigger. Fortunately there can't be that many users out there, or some of these bugs will have been fixed by now, such as the negative C error (see previous patch) that makes me think the ds18b20 is the better choice to change because of the current bugs. Signed-off-by: David Fries <[EMAIL PROTECTED]> Index: slaves/w1_therm.c === RCS file: /home/david/kernel/k/spacedout/patches/linux/drivers/w1/slaves/w1_therm.c,v retrieving revision 1.5 diff -u -p -r1.5 w1_therm.c --- slaves/w1_therm.c 24 Jan 2008 01:23:46 - 1.5 +++ slaves/w1_therm.c 24 Jan 2008 01:57:39 - @@ -92,6 +92,7 @@ struct w1_therm_family_converter int (*convert)(u8 rom[9]); }; +/* The return value is millidegrees Centigrade. */ static inline int w1_DS18B20_convert_temp(u8 rom[9]); static inline int w1_DS18S20_convert_temp(u8 rom[9]); @@ -113,7 +114,7 @@ static struct w1_therm_family_converter static inline int w1_DS18B20_convert_temp(u8 rom[9]) { s16 t = (rom[1] << 8) | rom[0]; - t /= 16; + t = t*1000/16; return t; } -- David Fries <[EMAIL PROTECTED]> http://fries.net/~david/ (PGP encryption key available) -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Enhance mmu notifiers to accomplish a lockless implementation (incomplete).
Expand the mmu_notifiers to allow for lockless callers. To accomplish this, the function receiving notifications needs to implement an rmap equivalent. The notification function is also responsible for tracking page dirty state. With this patch, I am getting fairly close to not needing the invalidate_page mmu_notifier. The combination of invalidate_range and this export_notifier covers all the paths I can see so far except __xip_unmap and do_wp_page. __xip_unmap is not so much of a concern, but I would like to handle it as well. The one that really concerns me is do_wp_page. I am having difficulty figuring out a way to handle this without holding locks. For either of these callers of ptep_clear_flush, I welcome suggestions on methods to call a notifier without holding any non-sleepable locks. I am traveling tomorrow but should be able to get back to this tomorrow evening or early Friday. This has not even been compiled yet. Just marking it up for now. Thank you for your attention, Robin Holt Index: mmu_notifiers/include/linux/export_notifier.h === --- /dev/null 1970-01-01 00:00:00.0 + +++ mmu_notifiers/include/linux/export_notifier.h 2008-01-23 19:46:05.0 -0600 @@ -0,0 +1,48 @@ +#ifndef _LINUX_EXPORT_NOTIFIER_H +#define _LINUX_EXPORT_NOTIFIER_H + +#include +#include + +struct export_notifier { + struct hlist_node list; + const struct export_notifier_ops *ops; +}; + +struct export_notifier_ops { + /* +* Called with the page lock held after ptes are modified or removed. +* +* Must clear PageExported() +*/ + void (*invalidate_page)(struct export_notifier *em, struct page *page); +}; + +#ifdef CONFIG_EXPORT_NOTIFIER + +extern void export_notifier_register(struct export_notifier *em); +extern void export_notifier_unregister(struct export_notifier *em); + +extern struct hlist_head export_notifier_list; + +#define export_notifier(function, args...) \ + do { \ + struct export_notifier *__em; \ + \ + rcu_read_lock(); \ + hlist_for_each_entry_rcu(__em, _notifier_list, list) \ + if (__em->ops->function) \ + __em->ops->function(__em, args); \ + rcu_read_unlock(); \ + } while (0); + +#else + +#define export_notifier(function, args...) + +static inline void export_notifier_register(struct export_notifier *em) {} +static inline void export_notifier_unregister(struct export_notifier *em) {} + +#endif + +#endif /* _LINUX_EXPORT_NOTIFIER_H */ Index: mmu_notifiers/include/linux/page-flags.h === --- mmu_notifiers.orig/include/linux/page-flags.h 2008-01-23 19:44:40.0 -0600 +++ mmu_notifiers/include/linux/page-flags.h2008-01-23 19:46:05.0 -0600 @@ -105,6 +105,7 @@ * 64 bit | FIELDS | ?? FLAGS | * 6332 0 */ +#define PG_exported30 /* Page is referenced by something not in the rmaps */ #define PG_uncached31 /* Page has been mapped as uncached */ #endif @@ -260,6 +261,14 @@ static inline void __ClearPageTail(struc #define SetPageUncached(page) set_bit(PG_uncached, &(page)->flags) #define ClearPageUncached(page)clear_bit(PG_uncached, &(page)->flags) +#ifdef CONFIG_EXPORT_NOTIFIER +#define PageExported(page) test_bit(PG_exported, &(page)->flags) +#define SetPageExported(page) set_bit(PG_exported, &(page)->flags) +#define ClearPageExported(page)clear_bit(PG_exported, &(page)->flags) +#else +#define PageExported(page) 0 +#endif + struct page; /* forward declaration */ extern void cancel_dirty_page(struct page *page, unsigned int account_size); Index: mmu_notifiers/mm/Kconfig === --- mmu_notifiers.orig/mm/Kconfig 2008-01-23 19:44:39.0 -0600 +++ mmu_notifiers/mm/Kconfig2008-01-23 19:46:06.0 -0600 @@ -197,3 +197,8 @@ config VIRT_TO_BUS config MMU_NOTIFIER def_bool y bool "MMU notifier, for paging KVM/RDMA" + +config EXPORT_NOTIFIER + def_bool y + depends on 64BIT + bool "Export Notifier for notifying subsystems about changes to page mappings" Index: mmu_notifiers/mm/Makefile === --- mmu_notifiers.orig/mm/Makefile 2008-01-23
Re: [PATCH] [POWERPC] Add fixed-phy support for fs_enet
On Tue, 18 Dec 2007, Jochen Friedrich wrote: > This patch adds support to use the fixed-link property > of an ethernet node to fs_enet for the > CONFIG_PPC_CPM_NEW_BINDING case. > > Signed-off-by: Jochen Friedrich <[EMAIL PROTECTED]> > Acked-by: Jeff Garzik <[EMAIL PROTECTED]> > Acked-by: Vitali Bordug <[EMAIL PROTECTED]> > --- > drivers/net/fs_enet/fs_enet-main.c |9 - > 1 files changed, 8 insertions(+), 1 deletions(-) > applied. - k -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: sata_nv and 2.6.24 (was Re: fixed a bug of adma in rhel4u5 with HDS7250SASUN500G.)
Jeff Garzik wrote: Robert Hancock wrote: Jeff Garzik wrote: Ping... sata_nv status is still a bit open for 2.6.24, and I would like to move us forward a bit. * Kuan's patch... it has been confirmed (and is needed), correct? can someone work up a good patch for 2.6.24? The only one I ever received was badly word-wrapped, and at the time, Robert seemed uncertain of it, so I waited. I can get you one later today hopefully. A question came up on this patch, whether it will cause problems with ATAPI mode - waiting for a response from the NVIDIA guys. * ADMA ATAPI 4GB issues... playing tricks with the ordering of allocations and DMA masks is just way too fragile. We just cannot guarantee that all allocators work that way. The obvious solution to me seems to be hardcoding the consistent DMA mask to 32-bit, but using 64-bit for regular dma mask if-and-only-if ADMA is enabled. That's not enough to fix the problem since there's issues with actual transfer data being allocated above 4GB as well, not just the consistent allocations (it appears that blk_queue_bounce_limit setting to 32-bit doesn't prevent this on x86_64). Either we play some funky games with changing the DMA mask of the entire device to 32-bit if either port is in ATAPI mode (which blew up when I tried it) or we add the ability to set the DMA mask independently on each port (like by setting the mask on the SCSI device and using that for DMA mapping instead) which requires core changes. Its all funky games that no other driver is doing... There is one guaranteed to work scenario -- set all masks and bounce limits etc. to 32-bit. There is also one highly-likely-to-work scenario, disabling ADMA by default. Sure, if you don't mind a potentially significant performance regression. All the DMA mask problems are due to the fact that the mask settings for both ports are ganged together on the PCI device. If we could set the DMA masks on the SCSI device or something else that was port-specific, and do the command DMA mapping against that device, then most of the wierdness goes away. It does seem like we're starting to get a bit of NVIDIA interest in looking into ADMA issues, which is definitely welcome. * it sure seems like there are other open sata_nv ADMA issues -- can we hard-confirm or deny this? bugzilla wasn't very helpful for me. It doesn't seem like we can disable ADMA (to solve those issues) and get enough test time in (which is what I said a week (or more?) ago too...) The NCQ/non-NCQ command switching issue is still hitting some people (last I heard Kuan was looking into this), also there's a hotplug issue that Tejun reported.. The former implies we need to disable swncq for 2.6.24, if it's not stable yet. Huh? Nothing to do with SWNCQ, which last I checked was still off by default. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: Usb generic serial.
On Thu, Jan 10, 2008 at 01:29:27PM +0100, Paul Chavent wrote: > Hello and happy new year 2008. > > In drivers/usb/serial/usb-serial.c there is a function (fixup_generic / > set_to_generic_if_null) that hooks null fops to the generic driver ones. > > The drivers/usb/serial/generic.c implements throttle, unthrottle and > resume, but those aren't used by default in fixup_generic. Is there any > reasons ? Could we submit a patch for that ? Do you have a driver that needs to use these throttle/unthrottle and resume functions? If so, we can change things to use them, but so far, it hasn't been needed. thanks, greg k-h -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [WARNING -rc8] at fs/sysfs/dir.c:424 sysfs_add_one(), related with processor (ACPI)
On Wed, Jan 23, 2008 at 02:06:43PM -0800, Andrew Morton wrote: > > On Mon, 21 Jan 2008 18:53:18 +0100 "Miguel Ojeda" <[EMAIL PROTECTED]> wrote: > > Booting 2.6.24-rc8 I get this: > > > > > > sysfs: duplicate filename 'fan' can not be created > > WARNING: at fs/sysfs/dir.c:424 sysfs_add_one() > > Pid: 819, comm: modprobe Not tainted 2.6.24-rc8 #2 > > [] sysfs_add_one+0x9f/0xe0 > > [] create_dir+0x48/0x90 > > [] sysfs_create_dir+0x29/0x50 > > [] kobject_get+0xf/0x20 > > [] kobject_add+0x8f/0x1b0 > > [] kobject_register+0x21/0x50 > > [] bus_add_driver+0x71/0x1e0 > > [] acpi_fan_init+0x2f/0x4d [fan] > > [] sys_init_module+0x126/0x19b0 > > [] rb_insert_color+0xb7/0xe0 > > [] acpi_bus_register_driver+0x0/0x38 > > [] syscall_call+0x7/0xb > > === > > kobject_add failed for fan with -EEXIST, don't try to register things > > with the same name in the same directory. > > Pid: 819, comm: modprobe Not tainted 2.6.24-rc8 #2 > > [] kobject_add+0x111/0x1b0 > > [] kobject_register+0x21/0x50 > > [] bus_add_driver+0x71/0x1e0 > > [] acpi_fan_init+0x2f/0x4d [fan] > > [] sys_init_module+0x126/0x19b0 > > [] rb_insert_color+0xb7/0xe0 > > [] acpi_bus_register_driver+0x0/0x38 > > [] syscall_call+0x7/0xb > > === > > processor: exports duplicate symbol acpi_processor_set_thermal_limit > > (owned by kernel) > > Could apply following debug patch and see the result? diff -upr linux/fs/sysfs/dir.c linux.new/fs/sysfs/dir.c --- linux/fs/sysfs/dir.c2008-01-23 09:56:24.0 +0800 +++ linux.new/fs/sysfs/dir.c2008-01-23 09:59:12.0 +0800 @@ -418,6 +418,8 @@ void sysfs_addrm_start(struct sysfs_addr */ int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) { + if (!strcmp(sd->s_name, "fan")) + dump_stack(); if (sysfs_find_dirent(acxt->parent_sd, sd->s_name)) { printk(KERN_WARNING "sysfs: duplicate filename '%s' " "can not be created\n", sd->s_name); -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: sata_nv and 2.6.24 (was Re: fixed a bug of adma in rhel4u5 with HDS7250SASUN500G.)
Robert Hancock wrote: Jeff Garzik wrote: Ping... sata_nv status is still a bit open for 2.6.24, and I would like to move us forward a bit. * Kuan's patch... it has been confirmed (and is needed), correct? can someone work up a good patch for 2.6.24? The only one I ever received was badly word-wrapped, and at the time, Robert seemed uncertain of it, so I waited. I can get you one later today hopefully. * ADMA ATAPI 4GB issues... playing tricks with the ordering of allocations and DMA masks is just way too fragile. We just cannot guarantee that all allocators work that way. The obvious solution to me seems to be hardcoding the consistent DMA mask to 32-bit, but using 64-bit for regular dma mask if-and-only-if ADMA is enabled. That's not enough to fix the problem since there's issues with actual transfer data being allocated above 4GB as well, not just the consistent allocations (it appears that blk_queue_bounce_limit setting to 32-bit doesn't prevent this on x86_64). Either we play some funky games with changing the DMA mask of the entire device to 32-bit if either port is in ATAPI mode (which blew up when I tried it) or we add the ability to set the DMA mask independently on each port (like by setting the mask on the SCSI device and using that for DMA mapping instead) which requires core changes. Its all funky games that no other driver is doing... There is one guaranteed to work scenario -- set all masks and bounce limits etc. to 32-bit. There is also one highly-likely-to-work scenario, disabling ADMA by default. * it sure seems like there are other open sata_nv ADMA issues -- can we hard-confirm or deny this? bugzilla wasn't very helpful for me. It doesn't seem like we can disable ADMA (to solve those issues) and get enough test time in (which is what I said a week (or more?) ago too...) The NCQ/non-NCQ command switching issue is still hitting some people (last I heard Kuan was looking into this), also there's a hotplug issue that Tejun reported.. The former implies we need to disable swncq for 2.6.24, if it's not stable yet. Jeff -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH -v8 3/4] Enable the MS_ASYNC functionality in sys_msync()
On Thursday 24 January 2008 04:05, Linus Torvalds wrote: > On Wed, 23 Jan 2008, Anton Salikhmetov wrote: > > + > > + if (pte_dirty(*pte) && pte_write(*pte)) { > > Not correct. > > You still need to check "pte_present()" before you can test any other > bits. For a non-present pte, none of the other bits are defined, and for > all we know there might be architectures out there that require them to > be non-dirty. > > As it is, you just possibly randomly corrupted the pte. > > Yeah, on all architectures I know of, it the pte is clear, neither of > those tests will trigger, so it just happens to work, but it's still > wrong. Probably it can fail for !present nonlinear mappings on many architectures. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] Missing usb_find_device symbol from usb.c
On Tue, Jan 22, 2008 at 08:18:32PM +0100, Wilco Beekhuizen wrote: > 2.6.24-rc8 (possibly lower rc's too) has usb_find_device removed from > usb.c. This causes problems when compiling modules that need > usb_find_device. > This patch puts the symbol back in place. Unless someone has a good > reason not to include usb_find_device? I don't see any users in the kernel tree of this function that can be built as a module. Do you know of any external modules that need it? Any pointers to the code anywhere? thanks, greg k-h -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] bluetooth : move children of connection device to NULL before connection down
On Thu, Jan 24, 2008 at 09:19:26AM +0800, Dave Young wrote: > On Wed, Jan 23, 2008 at 02:06:29PM -0800, Andrew Morton wrote: > > > On Tue, 22 Jan 2008 07:18:16 +0100 Marcel Holtmann <[EMAIL PROTECTED]> > > > wrote: > > > Hi Dave, > > > > > > > > Add people missed in cc-list. > > > > > > > > Thanks Dave for your continued efforts on Bluetooth bugs like this. > > > > > > > > Marcel, are you going to review/ACK/integrate/push-upstream/whatever > > > > any of these Bluetooth patches? > > > > > > > > It hasn't been getting much love from you as of late, you are one of > > > > the listed maintainers, and I don't want to lose any of Dave's > > > > valuable bug fixing work. > > > > > > I will be fully back in business next week. Just got stuck in a project > > > that needed 200% of my time to get it going. > > > > > > > These patches in -mm: > > > > bluetooth-hidp_process_hid_control-remove-unnecessary-parameter-dealing.patch > > bluetooth-uninlining.patch > > drivers-bluetooth-bpa10xc-fix-memleak.patch > > drivers-bluetooth-btsdioc-fix-double-free.patch > > bluetooth-blacklist-another-broadcom-bcm2035-device.patch > > bluetooth-rfcomm-tty_close-before-destruct.patch > > hci_ldisc-fix-null-pointer-deref.patch > > > > could benefit from some attention please. > > Hi, andrew > > For the patch bluetooth-rfcomm-tty_close-before-destruct.patch I have to > rethinkabout it. > > 1. The subject is not correct, should be rfcomm-tty-destroy-before-tty_close. > 2. Don't know what I was thinking that time, could you replace it with the > following better one? Sorry for that. > > --- > rfcomm dev could be deleted in tty_hangup, so we must not call rfcomm_dev_del > again to prevent from destroying rfcomm dev before tty close. > > Signed-off-by: Dave Young <[EMAIL PROTECTED]> > > --- > net/bluetooth/rfcomm/tty.c |2 ++ > 1 file changed, 2 insertions(+) > > diff -upr a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c > --- a/net/bluetooth/rfcomm/tty.c 2008-01-24 09:03:59.0 +0800 > +++ b/net/bluetooth/rfcomm/tty.c 2008-01-24 09:03:59.0 +0800 > @@ -429,6 +429,8 @@ static int rfcomm_release_dev(void __use > if (dev->tty) > tty_vhangup(dev->tty); > > + if (!test_bit(RFCOMM_RELEASE_ONHUP, >flags)) > + rfcomm_dev_del(dev); > rfcomm_dev_del(dev); ~ > rfcomm_dev_put(dev); > return 0; Please ignore the previous silly one, now resubmit : -- rfcomm dev could be deleted in tty_hangup, so we must not call rfcomm_dev_del again to prevent from destroying rfcomm dev before tty close. Signed-off-by: Dave Young <[EMAIL PROTECTED]> --- net/bluetooth/rfcomm/tty.c |3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff -upr a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c --- a/net/bluetooth/rfcomm/tty.c2008-01-24 09:21:56.0 +0800 +++ b/net/bluetooth/rfcomm/tty.c2008-01-24 09:21:56.0 +0800 @@ -429,7 +429,8 @@ static int rfcomm_release_dev(void __use if (dev->tty) tty_vhangup(dev->tty); - rfcomm_dev_del(dev); + if (!test_bit(RFCOMM_RELEASE_ONHUP, >flags)) + rfcomm_dev_del(dev); rfcomm_dev_put(dev); return 0; } -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH][ppc] logical/bitand typo in powerpc/boot/4xx.c
On Wed, 23 Jan 2008 23:37:33 +0100 Roel Kluin <[EMAIL PROTECTED]> wrote: > logical/bitand typo > > Signed-off-by: Roel Kluin <[EMAIL PROTECTED]> > --- > diff --git a/arch/powerpc/boot/4xx.c b/arch/powerpc/boot/4xx.c > index ebf9e21..dcfb459 100644 > --- a/arch/powerpc/boot/4xx.c > +++ b/arch/powerpc/boot/4xx.c > @@ -104,7 +104,7 @@ void ibm4xx_denali_fixup_memsize(void) > val = DDR_GET_VAL(val, DDR_CS_MAP, DDR_CS_MAP_SHIFT); > cs = 0; > while (val) { > - if (val && 0x1) > + if (val & 0x1) > cs++; > val = val >> 1; > } Hm, good catch. Stefan, have you had problems with this code at all? josh -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 08/30] ocfs2: Readpages support
On Wed, Jan 23, 2008 at 02:05:41PM -0800, Andrew Morton wrote: > > On Thu, 17 Jan 2008 14:35:34 -0800 Mark Fasheh <[EMAIL PROTECTED]> wrote: > > Add ->readpages support to Ocfs2. This is rather trivial - all it required > > is a small update to ocfs2_get_block (for mapping full extents via b_size) > > and an ocfs2_readpages() function which partially mirrors ocfs2_readpage(). > > > > Was there any performance improvement? To be honest, most of my testing was aimed at validating the correctness of this patch. I think there's a pretty good case to be made for having an ocfs2_readpages() method though. We get to avoid expensive cluster locking during readahead, which would normally make us bounce data around the cluster. Also, we save some cpu by avoiding the entire operation for in-inode data. > > + * siutations are safe to ignore. > > Cnat tpye. Ok, point takne ;) I fixed this and the other typo you pointed out. --Mark -- Mark Fasheh Principal Software Developer, Oracle [EMAIL PROTECTED] From: Mark Fasheh <[EMAIL PROTECTED]> ocfs2: fix minor typos Signed-off-by: Mark Fasheh <[EMAIL PROTECTED]> --- fs/ocfs2/aops.c |2 +- fs/ocfs2/buffer_head_io.c |2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 865684f..bc7b4cb 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -332,7 +332,7 @@ out: /* * This is used only for read-ahead. Failures or difficult to handle - * siutations are safe to ignore. + * situations are safe to ignore. * * Right now, we don't bother with BH_Boundary - in-inode extent lists * are quite large (243 extents on 4k blocks), so most inodes don't diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c index 6eaa67f..ab3a41c 100644 --- a/fs/ocfs2/buffer_head_io.c +++ b/fs/ocfs2/buffer_head_io.c @@ -301,7 +301,7 @@ static inline void ocfs2_check_super_or_backup(struct super_block *sb, } /* - * Write super block and bakcups doesn't need to collaborate with journal, + * Write super block and backups doesn't need to collaborate with journal, * so we don't need to lock ip_io_mutex and inode doesn't need to bea passed * into this function. */ -- 1.5.3.6 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] bluetooth : move children of connection device to NULL before connection down
On Wed, Jan 23, 2008 at 02:06:29PM -0800, Andrew Morton wrote: > > On Tue, 22 Jan 2008 07:18:16 +0100 Marcel Holtmann <[EMAIL PROTECTED]> > > wrote: > > Hi Dave, > > > > > > Add people missed in cc-list. > > > > > > Thanks Dave for your continued efforts on Bluetooth bugs like this. > > > > > > Marcel, are you going to review/ACK/integrate/push-upstream/whatever > > > any of these Bluetooth patches? > > > > > > It hasn't been getting much love from you as of late, you are one of > > > the listed maintainers, and I don't want to lose any of Dave's > > > valuable bug fixing work. > > > > I will be fully back in business next week. Just got stuck in a project > > that needed 200% of my time to get it going. > > > > These patches in -mm: > > bluetooth-hidp_process_hid_control-remove-unnecessary-parameter-dealing.patch > bluetooth-uninlining.patch > drivers-bluetooth-bpa10xc-fix-memleak.patch > drivers-bluetooth-btsdioc-fix-double-free.patch > bluetooth-blacklist-another-broadcom-bcm2035-device.patch > bluetooth-rfcomm-tty_close-before-destruct.patch > hci_ldisc-fix-null-pointer-deref.patch > > could benefit from some attention please. Hi, andrew For the patch bluetooth-rfcomm-tty_close-before-destruct.patch I have to rethinkabout it. 1. The subject is not correct, should be rfcomm-tty-destroy-before-tty_close. 2. Don't know what I was thinking that time, could you replace it with the following better one? Sorry for that. --- rfcomm dev could be deleted in tty_hangup, so we must not call rfcomm_dev_del again to prevent from destroying rfcomm dev before tty close. Signed-off-by: Dave Young <[EMAIL PROTECTED]> --- net/bluetooth/rfcomm/tty.c |2 ++ 1 file changed, 2 insertions(+) diff -upr a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c --- a/net/bluetooth/rfcomm/tty.c2008-01-24 09:03:59.0 +0800 +++ b/net/bluetooth/rfcomm/tty.c2008-01-24 09:03:59.0 +0800 @@ -429,6 +429,8 @@ static int rfcomm_release_dev(void __use if (dev->tty) tty_vhangup(dev->tty); + if (!test_bit(RFCOMM_RELEASE_ONHUP, >flags)) + rfcomm_dev_del(dev); rfcomm_dev_del(dev); rfcomm_dev_put(dev); return 0; -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: kernel bug report 2.6.24-rc8 on core2quad q6600 with debian unstable amd64
> drivers/acpi/pci_bind.c: In function 'acpi_pci_unbind'::0: > internal compiler error: Segmentation fault > Depending on which kernel i build, it fails on different things. Seems like a classic symptom of some sort of hardware fault/memory corruption... > I tried disabling HPET and VT in the BIOS but that didn't help. I also > ruled out any memory problems with memtest86+.Don't know what else to do. So your memory may be OK... (memtest86 does miss some problem sometimes) > [8.6.] SCSI information (from /proc/scsi/scsi) > Attached devices: > Host: scsi0 Channel: 00 Id: 01 Lun: 00 > Vendor: IBM-PCCO Model: DDRS-34560W !# Rev: S97B > Type: Direct-AccessANSI SCSI revision: 02 > Host: scsi0 Channel: 00 Id: 05 Lun: 00 > Vendor: NEC Model: CD-ROM DRIVE:464 Rev: 1.14 > Type: CD-ROM ANSI SCSI revision: 02 > Host: scsi1 Channel: 00 Id: 00 Lun: 00 > Vendor: ATA Model: Maxtor 6V250F0 Rev: VA11 > Type: Direct-AccessANSI SCSI revision: 05 > Host: scsi2 Channel: 00 Id: 00 Lun: 00 > Vendor: ATA Model: Maxtor 6V250F0 Rev: VA11 > Type: Direct-AccessANSI SCSI revision: 05 > Host: scsi3 Channel: 00 Id: 00 Lun: 00 > Vendor: ATA Model: Maxtor 6V250F0 Rev: VA11 > Type: Direct-AccessANSI SCSI revision: 05 > Host: scsi4 Channel: 00 Id: 00 Lun: 00 > Vendor: ATA Model: Maxtor 6V250F0 Rev: VA11 > Type: Direct-AccessANSI SCSI revision: 05 > Host: scsi5 Channel: 00 Id: 00 Lun: 00 > Vendor: ATA Model: Maxtor 6V250F0 Rev: VA11 > Type: Direct-AccessANSI SCSI revision: 05 > Host: scsi6 Channel: 00 Id: 00 Lun: 00 > Vendor: ATA Model: Maxtor 6V250F0 Rev: VA11 > Type: Direct-AccessANSI SCSI revision: 05 > Host: scsi7 Channel: 00 Id: 00 Lun: 00 > Vendor: ATA Model: Maxtor 6L250R0 Rev: BAJ4 > Type: Direct-AccessANSI SCSI revision: 05 > Host: scsi7 Channel: 00 Id: 01 Lun: 00 > Vendor: ATA Model: Maxtor 6L250R0 Rev: BAH4 > Type: Direct-AccessANSI SCSI revision: 05 That is a lot of hard drives... my guess would be that your power supply is marginal, a cable/adapter card isn't seated perfectly somewhere, something is overheating, or something like that. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 1/1] x86: early cpu_to_node fix in numa_64.c
Both of these references to cpu_to_node() can potentially occur before the "late" cpu_to_node map is setup. Therefore, they should be changed to use early_cpu_to_node(). Signed-off-by: Mike Travis <[EMAIL PROTECTED]> --- arch/x86/mm/numa_64.c |5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -251,7 +251,7 @@ void __init numa_init_array(void) rr = first_node(node_online_map); for (i = 0; i < NR_CPUS; i++) { - if (cpu_to_node(i) != NUMA_NO_NODE) + if (early_cpu_to_node(i) != NUMA_NO_NODE) continue; numa_set_node(i, rr); rr = next_node(rr, node_online_map); @@ -528,7 +528,8 @@ void __init numa_initmem_init(unsigned l __cpuinit void numa_add_cpu(int cpu) { - set_bit(cpu, (unsigned long *)_to_cpumask_map[cpu_to_node(cpu)]); + set_bit(cpu, + (unsigned long *)_to_cpumask_map[early_cpu_to_node(cpu)]); } void __cpuinit numa_set_node(int cpu, int node) -- -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: kernel bug report 2.6.24-rc8 on core2quad q6600 with debian unstable amd64
On Thu, 24 Jan 2008 01:14:35 +0100 Tobias Winter <[EMAIL PROTECTED]> wrote: > -BEGIN PGP SIGNED MESSAGE- > Hash: SHA512 > > Hi folks, I'm sending the bug report here, since i have no idea where > else to send it. I hope it is helping. > > > [1.] One line summary of the problem: > The kernel panics under high CPU/IO load, compilers segfault, mdadm > used to do so, too. > > [2.] Full description of the problem/report: > I upgraded my system to fairly new hardware and find myself unable to > compile any kernel source and the system panics under high load e.g. > resyncing a RAID or building a kernel. Building kernel is 100% sure to > result in a kernel panic. Sounds like bad memory. Run memtest for at least 24hrs. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] x86: Construct 32 bit boot time page tables in native format.
Ian Campbell wrote: FYI, CONFIG_DEBUG_PAGEALLOC+PAE is broken. I'll dig in but it might be the weekend before I get a chance (there's a beer festival in town ;-)). I'm poking around trying to get Xen working again as well; I may end up fixing it in passing. At the moment I've got a problem with early_ioremap's bt_pte[] array ending up hanging around in init's pagetable, which Xen is most unhappy about. J -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 03/30] ocfs2: Remove mount/unmount votes
Andrew, thanks for the review. On Wed, Jan 23, 2008 at 02:05:09PM -0800, Andrew Morton wrote: > > On Thu, 17 Jan 2008 14:35:29 -0800 Mark Fasheh <[EMAIL PROTECTED]> wrote: > > The node maps that are set/unset by these votes are no longer relevant, thus > > we can remove the mount and umount votes. Since those are the last two > > remaining votes, we can also remove the entire vote infrastructure. > > > > The vote thread has been renamed to the downconvert thread, and the small > > amount of functionality related to managing it has been moved into > > fs/ocfs2/dlmglue.c. All references to votes have been removed or updated. > > > > Locking looks fishy. Btw, the downconvert code is functionally identical from what was in vote.c for years now. It just got moved into dlmglue.c and renamed. > > +static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb) > > +{ > > + unsigned long processed; > > + struct ocfs2_lock_res *lockres; > > + > > + mlog_entry_void(); > > + > > + spin_lock(>dc_task_lock); > > + /* grab this early so we know to try again if a state change and > > +* wake happens part-way through our work */ > > + osb->dc_work_sequence = osb->dc_wake_sequence; > > + > > + processed = osb->blocked_lock_count; > > + while (processed) { > > + BUG_ON(list_empty(>blocked_lock_list)); > > + > > + lockres = list_entry(osb->blocked_lock_list.next, > > +struct ocfs2_lock_res, l_blocked_list); > > + list_del_init(>l_blocked_list); > > + osb->blocked_lock_count--; > > + spin_unlock(>dc_task_lock); > > + > > + BUG_ON(!processed); > > + processed--; > > + > > + ocfs2_process_blocked_lock(osb, lockres); > > + > > + spin_lock(>dc_task_lock); > > + } > > + spin_unlock(>dc_task_lock); > > Once the lock has been dropped there is (apparently) nothing to prevent > alteration of the list and of ->blocked_lock_count. If this happens, > either items will be missed or we go BUG. The rule is that only the downconvert thread is allowed to remove locks from that list. Everyone else just decides a lock needs downconverting and queues it. Attached to this e-mail is a patch to better document this. > > + mlog_exit_void(); > > +} > > + > > +static int ocfs2_downconvert_thread_lists_empty(struct ocfs2_super *osb) > > +{ > > + int empty = 0; > > + > > + spin_lock(>dc_task_lock); > > + if (list_empty(>blocked_lock_list)) > > + empty = 1; > > + > > + spin_unlock(>dc_task_lock); > > + return empty; > > +} > > This function appears to be returning a value which is unreliable once the > lock was dropped. In this case that's ok. It's only used during dlmglue shutdown when no new locks should ever be queued. > > +static int ocfs2_downconvert_thread_should_wake(struct ocfs2_super *osb) > > +{ > > + int should_wake = 0; > > + > > + spin_lock(>dc_task_lock); > > + if (osb->dc_work_sequence != osb->dc_wake_sequence) > > + should_wake = 1; > > + spin_unlock(>dc_task_lock); > > + > > + return should_wake; > > +} > > Ditto. Another case where it's ok :) > > +int ocfs2_downconvert_thread(void *arg) > > +{ > > + int status = 0; > > + struct ocfs2_super *osb = arg; > > + > > + /* only quit once we've been asked to stop and there is no more > > +* work available */ > > + while (!(kthread_should_stop() && > > +ocfs2_downconvert_thread_lists_empty(osb))) { > > Extra whitespace Ok, a cleanup of this and the other whitespace problems you pointed out (in resize.c) have been commited to the tree. --Mark -- Mark Fasheh Principal Software Developer, Oracle [EMAIL PROTECTED] From: Mark Fasheh <[EMAIL PROTECTED]> ocfs2: document access rules for blocked_lock_list ocfs2_super->blocked_lock_list and ocfs2_super->blocked_lock_count have some usage restrictions which aren't immediately obvious to anyone reading the code. It's a good idea to document this so that we avoid making costly mistakes in the future. Signed-off-by: Mark Fasheh <[EMAIL PROTECTED]> --- fs/ocfs2/ocfs2.h |6 ++ 1 files changed, 6 insertions(+), 0 deletions(-) diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 22e334d..d084805 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -262,6 +262,12 @@ struct ocfs2_super unsigned long dc_wake_sequence; unsigned long dc_work_sequence; + /* +* Any thread can add locks to the list, but the downconvert +* thread is the only one allowed to remove locks. Any change +* to this rule requires updating +* ocfs2_downconvert_thread_do_work(). +*/ struct list_head blocked_lock_list; unsigned long blocked_lock_count; -- 1.5.3.6 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH][ppc] logical/bitand typo in powerpc/boot/4xx.c
On Thu, 2008-01-24 at 01:18 +0100, Roel Kluin wrote: > when you use git-grep -n "\(&&\|||\)${s}0x\([A-Z0-9]*\|[a-z0-9]*\)", > (with s="[[:space:]]*") there will be false positives [] > so i'd propose to change that to > +# Check for bitwise tests written as boolean > + if ($line =~ /(?:(?:\(|\&\&|\|\|)\s*0[xX]\s*(?:&&|\|\|)| > + (?:\&\&|\|\|)\s*0[xX]\s*(?:\)|&&|\|\|))/) { > + WARN("boolean test with hexadecimal, perhaps just 1 \& > or \|?\n" . $herecurr); > + } > + All 13 false positives are "hex_constant logical_test variable". I think that Linus would say that they are all poor style. Still, fine by me. cheers, Joe -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 4/4] firewire: fw-core: react on bus resets while the config ROM is being fetched
read_rom() obtained a fresh new fw_device.generation for each read transaction. Hence it was able to continue reading in the middle of the ROM even if a bus reset happened. However the device may have modified the ROM during the reset. We would end up with a corrupt fetched ROM image then. Although all of this is quite unlikely, it is not impossible. Therefore we now restart reading the ROM if the bus generation changed. Side note: The barrier in read_rom(), inserted by patch "firewire: enforce access order between generation and node ID" is not necessary anymore because the sequence of calls fw_device_init() -> read_bus_info_block() -> read_rom() read_rom() read_rom() ... will take care that generation is read before node_id, won't it? Signed-off-by: Stefan Richter <[EMAIL PROTECTED]> --- Refreshed version of the patch from November 1 2007. drivers/firewire/fw-device.c | 28 +--- 1 file changed, 17 insertions(+), 11 deletions(-) Index: linux/drivers/firewire/fw-device.c === --- linux.orig/drivers/firewire/fw-device.c +++ linux/drivers/firewire/fw-device.c @@ -388,15 +388,12 @@ complete_transaction(struct fw_card *car complete(_data->done); } -static int read_rom(struct fw_device *device, int index, u32 * data) +static int +read_rom(struct fw_device *device, int generation, int index, u32 *data) { struct read_quadlet_callback_data callback_data; struct fw_transaction t; u64 offset; - int generation = device->generation; - - /* device->node_id, accessed below, must not be older than generation */ - smp_rmb(); init_completion(_data.done); @@ -412,7 +409,14 @@ static int read_rom(struct fw_device *de return callback_data.rcode; } -static int read_bus_info_block(struct fw_device *device) +/* + * Read the bus info block, perform a speed probe, and read all of the rest of + * the config ROM. We do all this with a cached bus generation. If the bus + * generation changes under us, read_bus_info_block will fail and get retried. + * It's better to start all over in this case because the node from which we + * are reading the ROM may have changed the ROM during the reset. + */ +static int read_bus_info_block(struct fw_device *device, int generation) { static u32 rom[256]; u32 stack[16], sp, key; @@ -422,7 +426,7 @@ static int read_bus_info_block(struct fw /* First read the bus info block. */ for (i = 0; i < 5; i++) { - if (read_rom(device, i, [i]) != RCODE_COMPLETE) + if (read_rom(device, generation, i, [i]) != RCODE_COMPLETE) return -1; /* * As per IEEE1212 7.2, during power-up, devices can @@ -457,7 +461,8 @@ static int read_bus_info_block(struct fw device->max_speed = device->card->link_speed; while (device->max_speed > SCODE_100) { - if (read_rom(device, 0, ) == RCODE_COMPLETE) + if (read_rom(device, generation, 0, ) == + RCODE_COMPLETE) break; device->max_speed--; } @@ -490,7 +495,7 @@ static int read_bus_info_block(struct fw return -1; /* Read header quadlet for the block to get the length. */ - if (read_rom(device, i, [i]) != RCODE_COMPLETE) + if (read_rom(device, generation, i, [i]) != RCODE_COMPLETE) return -1; end = i + (rom[i] >> 16) + 1; i++; @@ -509,7 +514,8 @@ static int read_bus_info_block(struct fw * it references another block, and push it in that case. */ while (i < end) { - if (read_rom(device, i, [i]) != RCODE_COMPLETE) + if (read_rom(device, generation, i, [i]) != + RCODE_COMPLETE) return -1; if ((key >> 30) == 3 && (rom[i] >> 30) > 1 && sp < ARRAY_SIZE(stack)) @@ -656,7 +662,7 @@ static void fw_device_init(struct work_s * device. */ - if (read_bus_info_block(device) < 0) { + if (read_bus_info_block(device, device->generation) < 0) { if (device->config_rom_retries < MAX_RETRIES) { device->config_rom_retries++; schedule_delayed_work(>work, RETRY_DELAY); -- Stefan Richter -=-==--- ---= ==--- http://arcgraph.de/sr/ -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at
[PATCH 2/4] firewire: fw-cdev: use device generation, not card generation
We have to use the fw_device.generation here, not the fw_card.generation, because the generation must never be newer than the node ID when we emit a transaction. This cannot be guaranteed with fw_card.generation. Signed-off-by: Stefan Richter <[EMAIL PROTECTED]> --- This code also needs barriers to work precisely as intended. They will be added by a subsequent patch which consistently updates readers and writers of .generation and .node_id. drivers/firewire/fw-cdev.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) Index: linux/drivers/firewire/fw-cdev.c === --- linux.orig/drivers/firewire/fw-cdev.c +++ linux/drivers/firewire/fw-cdev.c @@ -206,12 +206,12 @@ fill_bus_reset_event(struct fw_cdev_even event->closure = client->bus_reset_closure; event->type = FW_CDEV_EVENT_BUS_RESET; + event->generation= client->device->generation; event->node_id = client->device->node_id; event->local_node_id = card->local_node->node_id; event->bm_node_id= 0; /* FIXME: We don't track the BM. */ event->irm_node_id = card->irm_node->node_id; event->root_node_id = card->root_node->node_id; - event->generation= card->generation; } static void -- Stefan Richter -=-==--- ---= ==--- http://arcgraph.de/sr/ -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 3/4] firewire: enforce access order between generation and node ID
fw_device.node_id and fw_device.generation are accessed without mutexes. We have to ensure that all readers will get to see node_id updates before generation updates. An earlier incarnation of this patch fixes an inability to recognize devices after "giving up on config rom", https://bugzilla.redhat.com/show_bug.cgi?id=429950 Signed-off-by: Stefan Richter <[EMAIL PROTECTED]> --- Rework of patches firewire: fw-core: enforce write order when updating fw_device.generation and parts of firewire: fw-core: react on bus resets while the config ROM is being fetched firewire: fw-sbp2: enforce read order of device generation and node ID from November 1 2007. Update: - write site and read sites folded into one patch - added fix to fw_device_enable_phys_dma() and fill_bus_reset_event() - smp_ barriers are sufficient - comments, changelog drivers/firewire/fw-cdev.c |1 + drivers/firewire/fw-device.c | 13 +++-- drivers/firewire/fw-device.h | 12 drivers/firewire/fw-sbp2.c |2 ++ drivers/firewire/fw-topology.c |5 + 5 files changed, 31 insertions(+), 2 deletions(-) Index: linux/drivers/firewire/fw-device.c === --- linux.orig/drivers/firewire/fw-device.c +++ linux/drivers/firewire/fw-device.c @@ -182,9 +182,13 @@ static void fw_device_release(struct dev int fw_device_enable_phys_dma(struct fw_device *device) { + int generation = device->generation; + + /* device->node_id, accessed below, must not be older than generation */ + smp_rmb(); return device->card->driver->enable_phys_dma(device->card, device->node_id, -device->generation); +generation); } EXPORT_SYMBOL(fw_device_enable_phys_dma); @@ -389,12 +393,16 @@ static int read_rom(struct fw_device *de struct read_quadlet_callback_data callback_data; struct fw_transaction t; u64 offset; + int generation = device->generation; + + /* device->node_id, accessed below, must not be older than generation */ + smp_rmb(); init_completion(_data.done); offset = 0xf400ULL + index * 4; fw_send_request(device->card, , TCODE_READ_QUADLET_REQUEST, - device->node_id, device->generation, device->max_speed, + device->node_id, generation, device->max_speed, offset, NULL, 4, complete_transaction, _data); wait_for_completion(_data.done); @@ -801,6 +809,7 @@ void fw_node_event(struct fw_card *card, device = node->data; device->node_id = node->node_id; + smp_wmb(); /* update node_id before generation */ device->generation = card->generation; if (atomic_read(>state) == FW_DEVICE_RUNNING) { PREPARE_DELAYED_WORK(>work, fw_device_update); Index: linux/drivers/firewire/fw-topology.c === --- linux.orig/drivers/firewire/fw-topology.c +++ linux/drivers/firewire/fw-topology.c @@ -518,6 +518,11 @@ fw_core_handle_bus_reset(struct fw_card card->bm_retries = 0; card->node_id = node_id; + /* +* Update node_id before generation to prevent anybody from using +* a stale node_id togeher with a current generation. +*/ + smp_wmb(); card->generation = generation; card->reset_jiffies = jiffies; schedule_delayed_work(>work, 0); Index: linux/drivers/firewire/fw-sbp2.c === --- linux.orig/drivers/firewire/fw-sbp2.c +++ linux/drivers/firewire/fw-sbp2.c @@ -672,6 +672,7 @@ static void sbp2_login(struct work_struc int generation, node_id, local_node_id; generation= device->generation; + smp_rmb();/* node_id must not be older than generation */ node_id = device->node_id; local_node_id = device->card->node_id; @@ -922,6 +923,7 @@ static void sbp2_reconnect(struct work_s int generation, node_id, local_node_id; generation= device->generation; + smp_rmb();/* node_id must not be older than generation */ node_id = device->node_id; local_node_id = device->card->node_id; Index: linux/drivers/firewire/fw-device.h === --- linux.orig/drivers/firewire/fw-device.h +++ linux/drivers/firewire/fw-device.h @@ -35,6 +35,18 @@ struct fw_attribute_group { struct attribute *attrs[11]; }; +/* + * Note, fw_device.generation always has to be read before fw_device.node_id. + * Use SMP memory barriers to ensure this. Otherwise requests will be sent + * to an outdated
[PATCH 1/4] firewire: fw-sbp2: use device generation, not card generation
There was a small window where a login or reconnect job could use an already updated card generation with an outdated node ID. We have to use the fw_device.generation here, not the fw_card.generation, because the generation must never be newer than the node ID when we emit a transaction. This cannot be guaranteed with fw_card.generation. Furthermore, the target's and initiator's node IDs can be obtained from fw_device and fw_card. Dereferencing their underlying topology objects is not necessary. Signed-off-by: Stefan Richter <[EMAIL PROTECTED]> --- Rework of patch "firewire: fw-sbp2: enforce read order of device generation and node ID" from November 1 2007. This code also needs barriers to work precisely as intended. They will be added by a subsequent patch which consistently updates readers and writers of .generation and .node_id. drivers/firewire/fw-sbp2.c | 12 ++-- 1 file changed, 6 insertions(+), 6 deletions(-) Index: linux/drivers/firewire/fw-sbp2.c === --- linux.orig/drivers/firewire/fw-sbp2.c +++ linux/drivers/firewire/fw-sbp2.c @@ -671,9 +671,9 @@ static void sbp2_login(struct work_struc struct sbp2_login_response response; int generation, node_id, local_node_id; - generation= device->card->generation; - node_id = device->node->node_id; - local_node_id = device->card->local_node->node_id; + generation= device->generation; + node_id = device->node_id; + local_node_id = device->card->node_id; if (sbp2_send_management_orb(lu, node_id, generation, SBP2_LOGIN_REQUEST, lu->lun, ) < 0) { @@ -921,9 +921,9 @@ static void sbp2_reconnect(struct work_s struct fw_device *device = fw_device(unit->device.parent); int generation, node_id, local_node_id; - generation= device->card->generation; - node_id = device->node->node_id; - local_node_id = device->card->local_node->node_id; + generation= device->generation; + node_id = device->node_id; + local_node_id = device->card->node_id; if (sbp2_send_management_orb(lu, node_id, generation, SBP2_RECONNECT_REQUEST, -- Stefan Richter -=-==--- ---= ==--- http://arcgraph.de/sr/ -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 0/4] firewire: order of memory accesses (bus generation vs. node ID)
I now updated some patches from November according to comments I received from Nick back then. As Jarod found out now while testing the November versions, parts of these patches actually fix multiply reported bugs. While updating this stuff, I also found two more read sites where the read order was not properly enforced. Incoming: 1/4 firewire: fw-sbp2: use device generation, not card generation 2/4 firewire: fw-cdev: use device generation, not card generation 3/4 firewire: enforce access order between generation and node ID 4/4 firewire: fw-core: react on bus resets while the config ROM is being fetched drivers/firewire/fw-cdev.c |3 ++- drivers/firewire/fw-device.c | 33 - drivers/firewire/fw-device.h | 12 drivers/firewire/fw-sbp2.c | 14 -- drivers/firewire/fw-topology.c |5 + 5 files changed, 51 insertions(+), 16 deletions(-) -- Stefan Richter -=-==--- ---= ==--- http://arcgraph.de/sr/ -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: fixed a bug of adma in rhel4u5 with HDS7250SASUN500G.
Kuan Luo wrote: First thank davide to help to send the attachment. Robert, The patch is to solve the error message "ata1: CPB flags CMD err, flags=0x11" when testing HDS7250SASUN500G in rhel4u5. I tested this hd in 2.6.24-rc7 which needed to remove the mask in blacklist to run the ncq and the same error also showed up. I traced the bug and found that the interrupt finished a command (for example, tag=0) when the driver got that adma status is NV_ADMA_STAT_DONE and cpb->resp_flags is NV_CPB_RESP_DONE. However, For this hd, the drive maybe didn't clear bit 0 at this moment. It meaned the hardware had not completely finished the command. If at the same time the driver freed the command(tag 0) and sended another command (tag 0), the error happened. The notifier register is 32-bit register containing notifier value. Value is bit vector containing one bit per tag number (0-31) in corresponding bit positions (bit 0 is for tag 0, etc). When bit is set then ADMA indicates that command with corresponding tag number completed execution. So i added the check notifier code. Sometimes i saw that the notifier reg set some bits , but the adma status set NV_ADMA_STAT_CMD_COMPLETE ,not NV_ADMA_STAT_DONE. So i added the NV_ADMA_STAT_CMD_COMPLETE check code. Kuan, does this patch (using the notifiers to see if the command is really done) still work if one port on the controller has ADMA disabled because it's in ATAPI mode? I seem to recall Allen Martin mentioning that notifiers wouldn't work in this case. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
kernel bug report 2.6.24-rc8 on core2quad q6600 with debian unstable amd64
-BEGIN PGP SIGNED MESSAGE- Hash: SHA512 Hi folks, I'm sending the bug report here, since i have no idea where else to send it. I hope it is helping. [1.] One line summary of the problem: The kernel panics under high CPU/IO load, compilers segfault, mdadm used to do so, too. [2.] Full description of the problem/report: I upgraded my system to fairly new hardware and find myself unable to compile any kernel source and the system panics under high load e.g. resyncing a RAID or building a kernel. Building kernel is 100% sure to result in a kernel panic. [3.] Keywords (i.e., modules, networking, kernel): kernel, panic, gcc, load, high, high load [4.] Kernel information [4.1.] Kernel version (from /proc/version): Linux version 2.6.24-rc8-wintix ([EMAIL PROTECTED]) (gcc version 4.2.3 20080114 (prerelease) (Debian 4.2.2-7)) #1 SMP Tue Jan 22 21:17:40 CET 2008 [4.2.] Kernel .config file: http://linuxdingsda.de/~wintix/temp/config.gz [5.] Most recent kernel version which did not have the bug: I tried 2.6.23 but doesn't work there, too [6.] Output of Oops.. message (if applicable) with symbolic information resolved (see Documentation/oops-tracing.txt) # make-kpkg kernel_image --initrd [..] CC net/core/net-sysfs.o CC drivers/char/agp/amd64-agp.o CC lib/vsprintf.o drivers/acpi/pci_bind.c: In function 'acpi_pci_unbind'::0: internal compiler error: Segmentation fault Depending on which kernel i build, it fails on different things. [7.] A small shell script or example program which triggers the problem (if possible) See 6. [8.] Environment [8.1.] Software (add the output of the ver_linux script here) Linux saugcenter 2.6.24-rc8-wintix #1 SMP Tue Jan 22 21:17:40 CET 2008 x86_64 GNU/Linux Gnu C 4.2.3 Gnu make 3.81 binutils 2.18.0.20080103 util-linux 2.13.1 mount 2.13 module-init-tools 3.3-pre11 e2fsprogs 1.40.3 reiserfsprogs 3.6.19 xfsprogs 2.9.5 Linux C Library2.7 Dynamic linker (ldd) 2.7 Procps 3.2.7 Net-tools 1.60 Console-tools 0.2.3 Sh-utils 5.97 udev 114 Modules Loaded nfsd lockd nfs_acl auth_rpcgss sunrpc exportfs ac battery acpi_cpufreq ipv6 fuse dm_crypt lm85 hwmon_vid budget_av saa7146_vv videobuf_dma_sg videobuf_core videodev v4l2_common v4l1_compat tua6100 budget_core saa7146 ttpci_eeprom dvb_pll tda10021 tda10023 tda1004x firmware_class stv0299 dvb_core serio_raw i2c_i801 evdev e1000e intel_agp button psmouse i2c_core ext3 jbd mbcache dm_mirror dm_snapshot dm_mod raid456 async_xor async_memcpy async_tx xor pata_marvell pata_acpi usbhid 3c59x mii generic ahci libata ide_core ehci_hcd uhci_hcd sg sr_mod cdrom thermal processor fan [8.2.] Processor information (from /proc/cpuinfo): processor : 0 vendor_id : GenuineIntel cpu family : 6 model : 15 model name : Intel(R) Core(TM)2 Quad CPUQ6600 @ 2.40GHz stepping: 11 cpu MHz : 1596.000 cache size : 4096 KB physical id : 0 siblings: 4 core id : 0 cpu cores : 4 fpu : yes fpu_exception : yes cpuid level : 10 wp : yes flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx lm constant_tsc arch_perfmon pebs bts rep_good pni monitor ds_cpl vmx est tm2 ssse3 cx16 xtpr lahf_lm bogomips: 4803.76 clflush size: 64 cache_alignment : 64 address sizes : 36 bits physical, 48 bits virtual power management: 4 times. only core-id and prozessor# differs. the frequency is due to clock-down on idle. [8.3.] Module information (from /proc/modules): ipt_REJECT 5632 1 - Live 0x8835f000 xt_state 3200 3 - Live 0x8835d000 xt_tcpudp 4160 9 - Live 0x8835a000 nf_conntrack_ftp 10856 0 - Live 0x88356000 nf_conntrack_ipv4 21136 3 - Live 0x8834f000 nf_conntrack 78384 3 xt_state,nf_conntrack_ftp,nf_conntrack_ipv4, Live 0x8833a000 iptable_filter 3776 1 - Live 0x880a5000 ip_tables 23304 1 iptable_filter, Live 0x88333000 x_tables 22600 4 ipt_REJECT,xt_state,xt_tcpudp,ip_tables, Live 0x8832c000 nfsd 279272 1 - Live 0x882e6000 lockd 75248 1 nfsd, Live 0x882d2000 nfs_acl 4416 1 nfsd, Live 0x882cf000 auth_rpcgss 52512 1 nfsd, Live 0x882c1000 sunrpc 212168 6 nfsd,lockd,nfs_acl,auth_rpcgss, Live 0x8828c000 exportfs 6144 1 nfsd, Live 0x88289000 ac 7368 0 - Live 0x88286000 battery 15880 0 - Live 0x88281000 acpi_cpufreq 10032 0 - Live 0x8827d000 ipv6 309640 20 - Live 0x8823 fuse 54960 1 - Live 0x88221000 dm_crypt 15752 0 - Live 0x8821c000 lm85 35620 0 - Live 0x88212000 hwmon_vid 4288 1 lm85, Live 0x8820f000 budget_av 21888 4 - Live 0x88208000 saa7146_vv 55296
[PATCH] agp/intel: add support for E7221 chipset
From: Carlos Martín <[EMAIL PROTECTED]> The E7221 chipset is a 915 rebadged for the Intel server line. Signed-off-by: Dave Airlie <[EMAIL PROTECTED]> --- drivers/char/agp/intel-agp.c | 11 +-- 1 files changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c index d879619..03eac1e 100644 --- a/drivers/char/agp/intel-agp.c +++ b/drivers/char/agp/intel-agp.c @@ -10,6 +10,8 @@ #include #include "agp.h" +#define PCI_DEVICE_ID_INTEL_E7221_HB 0x2588 +#define PCI_DEVICE_ID_INTEL_E7221_IG 0x258a #define PCI_DEVICE_ID_INTEL_82946GZ_HB 0x2970 #define PCI_DEVICE_ID_INTEL_82946GZ_IG 0x2972 #define PCI_DEVICE_ID_INTEL_82965G_1_HB 0x2980 @@ -526,7 +528,8 @@ static void intel_i830_init_gtt_entries(void) break; case I915_GMCH_GMS_STOLEN_48M: /* Check it's really I915G */ - if (agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82915G_HB || + if (agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_E7221_HB || + agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82915G_HB || agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82915GM_HB || agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82945G_HB || agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82945GM_HB || @@ -538,7 +541,8 @@ static void intel_i830_init_gtt_entries(void) break; case I915_GMCH_GMS_STOLEN_64M: /* Check it's really I915G */ - if (agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82915G_HB || + if (agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_E7221_HB || + agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82915G_HB || agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82915GM_HB || agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82945G_HB || agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82945GM_HB || @@ -1854,6 +1858,8 @@ static const struct intel_driver_description { { PCI_DEVICE_ID_INTEL_82865_HB, PCI_DEVICE_ID_INTEL_82865_IG, 0, "865", _845_driver, _830_driver }, { PCI_DEVICE_ID_INTEL_82875_HB, 0, 0, "i875", _845_driver, NULL }, + { PCI_DEVICE_ID_INTEL_E7221_HB, PCI_DEVICE_ID_INTEL_E7221_IG, 0, "E7221 (i915)", + NULL, _915_driver }, { PCI_DEVICE_ID_INTEL_82915G_HB, PCI_DEVICE_ID_INTEL_82915G_IG, 0, "915G", NULL, _915_driver }, { PCI_DEVICE_ID_INTEL_82915GM_HB, PCI_DEVICE_ID_INTEL_82915GM_IG, 0, "915GM", @@ -2059,6 +2065,7 @@ static struct pci_device_id agp_intel_pci_table[] = { ID(PCI_DEVICE_ID_INTEL_82875_HB), ID(PCI_DEVICE_ID_INTEL_7505_0), ID(PCI_DEVICE_ID_INTEL_7205_0), + ID(PCI_DEVICE_ID_INTEL_E7221_HB), ID(PCI_DEVICE_ID_INTEL_82915G_HB), ID(PCI_DEVICE_ID_INTEL_82915GM_HB), ID(PCI_DEVICE_ID_INTEL_82945G_HB), -- 1.5.3.6 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] drm/i915: add support for E7221 chipset
From: Carlos Martín <[EMAIL PROTECTED]> E7221 chipset is a server version of the i915. Signed-off-by: Dave Airlie <[EMAIL PROTECTED]> --- drivers/char/drm/drm_pciids.h |1 + 1 files changed, 1 insertions(+), 0 deletions(-) diff --git a/drivers/char/drm/drm_pciids.h b/drivers/char/drm/drm_pciids.h index f359397..43d3c42 100644 --- a/drivers/char/drm/drm_pciids.h +++ b/drivers/char/drm/drm_pciids.h @@ -297,6 +297,7 @@ {0x8086, 0x3582, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ {0x8086, 0x2572, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ {0x8086, 0x2582, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ + {0x8086, 0x258a, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ {0x8086, 0x2592, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ {0x8086, 0x2772, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ {0x8086, 0x27a2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \ -- 1.5.3.6 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH UPDATE] x86: ignore spurious faults
When changing a kernel page from RO->RW, it's OK to leave stale TLB entries around, since doing a global flush is expensive and they pose no security problem. They can, however, generate a spurious fault, which we should catch and simply return from (which will have the side-effect of reloading the TLB to the current PTE). This can occur when running under Xen, because it frequently changes kernel pages from RW->RO->RW to implement Xen's pagetable semantics. It could also occur when using CONFIG_DEBUG_PAGEALLOC, since it avoids doing a global TLB flush after changing page permissions. Signed-off-by: Jeremy Fitzhardinge <[EMAIL PROTECTED]> Cc: Harvey Harrison <[EMAIL PROTECTED]> --- arch/x86/mm/fault_32.c | 50 arch/x86/mm/fault_64.c | 50 2 files changed, 100 insertions(+) === --- a/arch/x86/mm/fault_32.c +++ b/arch/x86/mm/fault_32.c @@ -324,6 +324,51 @@ static int is_f00f_bug(struct pt_regs *r } /* + * Handle a spurious fault caused by a stale TLB entry. This allows + * us to lazily refresh the TLB when increasing the permissions of a + * kernel page (RO -> RW or NX -> X). Doing it eagerly is very + * expensive since that implies doing a full cross-processor TLB + * flush, even if no stale TLB entries exist on other processors. + * There are no security implications to leaving a stale TLB when + * increasing the permissions on a page. + */ +static int spurious_fault(unsigned long address, + unsigned long error_code) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + /* Reserved-bit violation or user access to kernel space? */ + if (error_code & (PF_USER | PF_RSVD)) + return 0; + + pgd = init_mm.pgd + pgd_index(address); + if (!pgd_present(*pgd)) + return 0; + + pud = pud_offset(pgd, address); + if (!pud_present(*pud)) + return 0; + + pmd = pmd_offset(pud, address); + if (!pmd_present(*pmd)) + return 0; + + pte = pte_offset_kernel(pmd, address); + if (!pte_present(*pte)) + return 0; + + if ((error_code & PF_WRITE) && !pte_write(*pte)) + return 0; + if ((error_code & PF_INSTR) && !pte_exec(*pte)) + return 0; + + return 1; +} + +/* * Handle a fault on the vmalloc or module mapping area * * This assumes no large pages in there. @@ -446,6 +491,11 @@ void __kprobes do_page_fault(struct pt_r if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) && vmalloc_fault(address) >= 0) return; + + /* Can handle a stale RO->RW TLB */ + if (spurious_fault(address, error_code)) + return; + /* * Don't take the mm semaphore here. If we fixup a prefetch * fault we could otherwise deadlock. === --- a/arch/x86/mm/fault_64.c +++ b/arch/x86/mm/fault_64.c @@ -312,6 +312,51 @@ static noinline void pgtable_bad(unsigne } /* + * Handle a spurious fault caused by a stale TLB entry. This allows + * us to lazily refresh the TLB when increasing the permissions of a + * kernel page (RO -> RW or NX -> X). Doing it eagerly is very + * expensive since that implies doing a full cross-processor TLB + * flush, even if no stale TLB entries exist on other processors. + * There are no security implications to leaving a stale TLB when + * increasing the permissions on a page. + */ +static int spurious_fault(unsigned long address, + unsigned long error_code) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + /* Reserved-bit violation or user access to kernel space? */ + if (error_code & (PF_USER | PF_RSVD)) + return 0; + + pgd = init_mm.pgd + pgd_index(address); + if (!pgd_present(*pgd)) + return 0; + + pud = pud_offset(pgd, address); + if (!pud_present(*pud)) + return 0; + + pmd = pmd_offset(pud, address); + if (!pmd_present(*pmd)) + return 0; + + pte = pte_offset_kernel(pmd, address); + if (!pte_present(*pte)) + return 0; + + if ((error_code & PF_WRITE) && !pte_write(*pte)) + return 0; + if ((error_code & PF_INSTR) && !pte_exec(*pte)) + return 0; + + return 1; +} + +/* * Handle a fault on the vmalloc area * * This assumes no large pages in there. @@ -443,6 +488,11 @@ asmlinkage void __kprobes do_page_fault( if (vmalloc_fault(address) >= 0) return; } + + /* Can handle a stale RO->RW TLB */ + if
Re: [PATCH] x86: ignore spurious faults
Harvey Harrison wrote: On Wed, 2008-01-23 at 16:05 -0800, Jeremy Fitzhardinge wrote: === --- a/arch/x86/mm/fault_32.c +++ b/arch/x86/mm/fault_32.c @@ -290,6 +290,53 @@ static int is_errata93(struct pt_regs *r /* + * Handle a spurious fault caused by a stale TLB entry. This allows + * us to lazily refresh the TLB when increasing the permissions of a + * kernel page (RO -> RW or NX -> X). Doing it eagerly is very + * expensive since that implies doing a full cross-processor TLB + * flush, even if no stale TLB entries exist on other processors. + * There are no security implications to leaving a stale TLB when + * increasing the permissions on a page. + */ +static int spurious_fault(unsigned long address, + unsigned long error_code) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + /* Reserved-bit violation or user access to kernel space? */ + if (error_code & (PF_USER | PF_RSVD)) + return 0; + + pgd = init_mm.pgd + pgd_index(address); + if (!pgd_present(*pgd)) + return 0; + + pud = pud_offset(pgd, address); + if (!pud_present(*pud)) + return 0; + + pmd = pmd_offset(pud, address); + if (!pmd_present(*pmd)) + return 0; + + pte = pte_offset_kernel(pmd, address); + if (!pte_present(*pte)) + return 0; + if ((error_code & 0x02) && !pte_write(*pte)) + return 0; if ((error_code & PF_WRITE) && !pte_write(*pte)) return 0; Oops, thanks. Overlooked that one. How about dropping the #if and rely on the !pte_exec() test always being false when _PAGE_NX = 0? The compiler should just trim this all away. from pgtable.h: static inline int pte_exec(pte_t pte) { return !(pte_val(pte) & _PAGE_NX); } Thanks for reminding me; I thought of this the other day, but forgot to do it. Will post an updated patch shortly. J -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: factor out common s2ram wakeup code
On Tuesday, 22 of January 2008, Pavel Machek wrote: > > It seems to compile 32/64bit, and work 32bit... It also works on a 64-bit system. Thanks, Rafael > --- > > Factor out common real-mode part of wakeup_{32,64}.S. > > Signed-off-by: Pavel Machek <[EMAIL PROTECTED]> > > --- > commit 2fac1886274433c64e76611e6b1ba2318c5914a4 > tree 2d6bdc7c2eda9845b57e1d7128b187df1b1fd2c9 > parent 7a9546efcc4d90040a52c003eb9a303200c41f86 > author Pavel <[EMAIL PROTECTED]> Tue, 22 Jan 2008 20:34:03 +0100 > committer Pavel <[EMAIL PROTECTED]> Tue, 22 Jan 2008 20:34:03 +0100 > > arch/x86/kernel/acpi/wakeup.S| 111 ++ > arch/x86/kernel/acpi/wakeup_32.S | 118 +--- > arch/x86/kernel/acpi/wakeup_64.S | 125 > +- > 3 files changed, 116 insertions(+), 238 deletions(-) > > diff --git a/arch/x86/kernel/acpi/wakeup.S b/arch/x86/kernel/acpi/wakeup.S > new file mode 100644 > index 000..38205c8 > --- /dev/null > +++ b/arch/x86/kernel/acpi/wakeup.S > @@ -0,0 +1,111 @@ > +# Copyright 2003, 2008 Pavel Machek <[EMAIL PROTECTED]>, distribute under > GPLv2 > + > +#define BEEP \ > + inb $97, %al; \ > + outb%al, $0x80; \ > + movb$3, %al;\ > + outb%al, $97; \ > + outb%al, $0x80; \ > + movb$-74, %al; \ > + outb%al, $67; \ > + outb%al, $0x80; \ > + movb$-119, %al; \ > + outb%al, $66; \ > + outb%al, $0x80; \ > + movb$15, %al; \ > + outb%al, $66; > + > +ALIGN > + .align 4096 > +ENTRY(wakeup_start) > +wakeup_code: > + wakeup_code_start = . > + .code16 > + > + cli > + cld > + > + # setup data segment > + movw%cs, %ax > + movw%ax, %ds# Make ds:0 > point to wakeup_start > + movw%ax, %ss > + > + testl $4, realmode_flags - wakeup_code > + jz 1f > + BEEP > +1: > + mov $(wakeup_stack - wakeup_code), %sp # Private stack > is needed for ASUS board > + > + pushl $0 # Kill any > dangerous flags > + popfl > + > + > + movlreal_magic - wakeup_code, %eax > + cmpl$0x12345678, %eax > + jne bogus_real_magic > + > + testl $1, realmode_flags - wakeup_code > + jz 1f > + lcall $0xc000,$3 > + movw%cs, %ax > + movw%ax, %ds# Bios might > have played with that > + movw%ax, %ss > +1: > + > + testl $2, realmode_flags - wakeup_code > + jz mode_done > + mov video_mode - wakeup_code, %ax > + callmode_set > + jmp mode_done > + > +/* This code uses an extended set of video mode numbers. These include: > + * Aliases for standard modes > + * NORMAL_VGA (-1) > + * EXTENDED_VGA (-2) > + * ASK_VGA (-3) > + * Video modes numbered by menu position -- NOT RECOMMENDED because of lack > + * of compatibility when extending the table. These are between 0x00 and > 0xff. > + */ > +#define VIDEO_FIRST_MENU 0x > + > +/* Standard BIOS video modes (BIOS number + 0x0100) */ > +#define VIDEO_FIRST_BIOS 0x0100 > + > +/* VESA BIOS video modes (VESA number + 0x0200) */ > +#define VIDEO_FIRST_VESA 0x0200 > + > +/* Video7 special modes (BIOS number + 0x0900) */ > +#define VIDEO_FIRST_V7 0x0900 > + > +# Setting of user mode (AX=mode ID) => CF=success > + > +# For now, we only handle VESA modes (0x0200..0x03ff). To handle other > +# modes, we should probably compile in the video code from the boot > +# directory. > +.code16 > +mode_set: > + movw%ax, %bx > + subb$VIDEO_FIRST_VESA>>8, %bh > + cmpb$2, %bh > + jb check_vesa > + > +setbad: > + clc > + ret > + > +check_vesa: > + orw $0x4000, %bx# Use linear frame buffer > + movw$0x4f02, %ax# VESA BIOS mode set call > + int $0x10 > + cmpw$0x004f, %ax# AL=4f if implemented > + jnz setbad # AH=0 if OK > + > + stc > + ret > + > +bogus_real_magic: > + jmp bogus_real_magic > + > +real_magic: .quad 0 > +video_mode: .quad 0 > +realmode_flags: .quad 0 > diff --git a/arch/x86/kernel/acpi/wakeup_32.S > b/arch/x86/kernel/acpi/wakeup_32.S > index 1e931aa..9b26909 100644 > --- a/arch/x86/kernel/acpi/wakeup_32.S > +++ b/arch/x86/kernel/acpi/wakeup_32.S > @@ -3,73 +3,11 @@ #include > #include > #include > > -# > -# wakeup_code runs in real mode, and at unknown address (determined at > run-time). > -# Therefore it must only use relative jumps/calls. > -# > -# Do we need to deal with A20? It is okay: ACPI specs says A20 must be > enabled > -# > -# If physical address of wakeup_code is 0x12345, BIOS should call us with > -# cs = 0x1234, eip = 0x05 > -# > - >