Re: GFS

2005-08-09 Thread David Teigland
On Mon, Aug 08, 2005 at 05:14:45PM +0300, Pekka J Enberg wrote:

 if (!dumping)
down_read(>mmap_sem);
> >+
> >+ for (vma = find_vma(mm, start); vma; vma = vma->vm_next) {
> >+ if (end <= vma->vm_start)
> >+ break;
> >+ if (vma->vm_file &&
> >+ vma->vm_file->f_dentry->d_inode->i_sb == sb) {
> >+ num_gh++;
> >+ }
> >+ }
> >+
> >+ ghs = kmalloc((num_gh + 1) * sizeof(struct gfs2_holder),
> >+   GFP_KERNEL);
> >+ if (!ghs) {
> >+ if (!dumping)
> >+ up_read(>mmap_sem);
> >+ return -ENOMEM;
> >+ }
> >+
> >+ for (vma = find_vma(mm, start); vma; vma = vma->vm_next) {
> 
> Sorry if this is an obvious question but what prevents another thread from 
> doing mmap() before we do the second walk and messing up num_gh? 

mm->mmap_sem ?

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: kernel workqueue -max name length

2005-08-09 Thread Frederic TEMPORELLI

Hi,


any explanation about the 10 chars limit for kernel workqueue name ?


another question: why is the name length test managed by BUG_ON ?
returning a NULL workqueue is done in the next test (failed kmalloc for wq)...

Anyway, this can explain some issues when loading some SCSI drivers modules.


hope that somebody knows...


Frederic TEMPORELLI wrote:

Hello,


When creating a workqueue, workqueue name is limited to 10 chars
(kernel/workqueue.c , function is __create_workqueue, test is done in a 
BUG_ON).


Why has this length be limited to 10 chars ?
Can I safely increase this max length (13 chars should be enough...) ?



Some comments about these questions:

In SCSI layer, HBA kernel ID is incremented after each modprobe/rmmod.

Then, when a scsi driver is managing a working queue and HBA kernel ID 
is greater than 99 (let's assume that you have modprobe/rmmod the scsi 
driver to get this ID to 99, or you may have play with 'scsi_debug' 
module), an oops is generated when loading again the driver (and the 
driver is frozen).


This is because working queue name format is "scsi_wq_%d" 
(drivers/scsi/hosts.c , function scsi_add_host, %d is the HBA ID), and 
so working queue name length is greater than 10 chars when HBA kernel ID 
is > 99...



Best regards





--
Frederic TEMPORELLI

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] Fix function/macro name collision on i386 oprofile

2005-08-09 Thread David Gibson
Andrew, please apply:

The i386 OProfile code has a function named nmi_exit(), which collides
with the nmi_exit() macro in linux/hardirq.h.  At the moment, we get
away with it, because hardirq.h isn't included in the oprofile code.
I hit this as a bug when working with a patch which (indirectly) adds
a #include of hardirq.h to oprofile.

Regardless, the name collision is probably not a good idea, so this
patch fixes it, renaming the oprofile function to op_nmi_exit().  It
also renames the nmi_init() and nmi_timer_init() functions similarly,
for consistency.

Signed-off-by: David Gibson <[EMAIL PROTECTED]>

---
 arch/i386/oprofile/init.c  |   12 ++--
 arch/i386/oprofile/nmi_int.c   |4 ++--
 arch/i386/oprofile/nmi_timer_int.c |2 +-
 3 files changed, 9 insertions(+), 9 deletions(-)

Index: working-2.6/arch/i386/oprofile/nmi_int.c
===
--- working-2.6.orig/arch/i386/oprofile/nmi_int.c   2005-07-15 
15:27:53.0 +1000
+++ working-2.6/arch/i386/oprofile/nmi_int.c2005-08-10 14:28:28.0 
+1000
@@ -355,7 +355,7 @@
 /* in order to get driverfs right */
 static int using_nmi;
 
-int __init nmi_init(struct oprofile_operations *ops)
+int __init op_nmi_init(struct oprofile_operations *ops)
 {
__u8 vendor = boot_cpu_data.x86_vendor;
__u8 family = boot_cpu_data.x86;
@@ -420,7 +420,7 @@
 }
 
 
-void nmi_exit(void)
+void op_nmi_exit(void)
 {
if (using_nmi)
exit_driverfs();
Index: working-2.6/arch/i386/oprofile/nmi_timer_int.c
===
--- working-2.6.orig/arch/i386/oprofile/nmi_timer_int.c 2005-07-15 
15:27:53.0 +1000
+++ working-2.6/arch/i386/oprofile/nmi_timer_int.c  2005-08-10 
14:28:42.0 +1000
@@ -40,7 +40,7 @@
 }
 
 
-int __init nmi_timer_init(struct oprofile_operations * ops)
+int __init op_nmi_timer_init(struct oprofile_operations * ops)
 {
extern int nmi_active;
 
Index: working-2.6/arch/i386/oprofile/init.c
===
--- working-2.6.orig/arch/i386/oprofile/init.c  2005-07-15 15:27:53.0 
+1000
+++ working-2.6/arch/i386/oprofile/init.c   2005-08-10 14:28:09.0 
+1000
@@ -15,9 +15,9 @@
  * with the NMI mode driver.
  */
  
-extern int nmi_init(struct oprofile_operations * ops);
-extern int nmi_timer_init(struct oprofile_operations * ops);
-extern void nmi_exit(void);
+extern int op_nmi_init(struct oprofile_operations * ops);
+extern int op_nmi_timer_init(struct oprofile_operations * ops);
+extern void op_nmi_exit(void);
 extern void x86_backtrace(struct pt_regs * const regs, unsigned int depth);
 
 
@@ -28,11 +28,11 @@
ret = -ENODEV;
 
 #ifdef CONFIG_X86_LOCAL_APIC
-   ret = nmi_init(ops);
+   ret = op_nmi_init(ops);
 #endif
 #ifdef CONFIG_X86_IO_APIC
if (ret < 0)
-   ret = nmi_timer_init(ops);
+   ret = op_nmi_timer_init(ops);
 #endif
ops->backtrace = x86_backtrace;
 
@@ -43,6 +43,6 @@
 void oprofile_arch_exit(void)
 {
 #ifdef CONFIG_X86_LOCAL_APIC
-   nmi_exit();
+   op_nmi_exit();
 #endif
 }
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: GFS

2005-08-09 Thread Pekka J Enberg

Zach Brown writes:
But couldn't we use make_pages_present() to figure which locks we need, 
sort them, and then grab them?


Doh, obviously we can't as nopage() needs to bring the page in. Sorry about 
that. 

I also thought of another failure case for the vma walk. When a thread uses 
userspace memcpy() between two clusterfs mmap'd regions instead of write() 
or read(). 

 Pekka 
-

To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] net/ipv4 debug cleanup, kernel 2.6.13-rc5

2005-08-09 Thread David S. Miller
From: Patrick McHardy <[EMAIL PROTECTED]>
Date: Wed, 10 Aug 2005 03:09:34 +0200

> These macros always looked a bit ugly to me, with your cleanup there
> isn't a single spot left where we require them to accept code as
> argument, so how about we change them to pure printk wrappers?

Applied to 2.6.14, with two changes:

1) the dccp cases fixed up
2) the args for the #if 0 part of sock.h fixed up
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC: 2.6 patch] the big Documentation/Changes change

2005-08-09 Thread Randy.Dunlap
On Wed, 10 Aug 2005 03:17:40 +0200 Adrian Bunk wrote:

> I edited Documentation/Changes:
> - remove obsolete information
> - point to feature-list-2.6.txt instead of providing similar information
> - removed the URLs of the software packages (people compiling their own
>   kernel usually know where to find the required software)

I always found those real handy.

Overall, I find this a good idea.

> The resulting file is pretty short.

>  Documentation/Changes |  376 +-
>  1 files changed, 15 insertions(+), 361 deletions(-)
> 
> --- linux-2.6.13-rc5-mm1-full/Documentation/Changes.old   2005-08-10 
> 03:01:11.0 +0200
> +++ linux-2.6.13-rc5-mm1-full/Documentation/Changes   2005-08-10 
> 03:12:12.0 +0200
> @@ -1,435 +1,89 @@

> -Kernel compilation
> -==
> +Notes
> +=
> +
> +Please read feature-list-2.6.txt for information about new features

We usually prefix Doc file names with Documentation/, even though
this is in the same directory.

but where is this file?  I can't find it.
Ah, it's in -mm only.

> +and changes compared to 2.4 kernels.

---
~Randy
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] Fix PPC signal handling of NODEFER, should not affect sa_mask

2005-08-09 Thread Steven Rostedt
On Tue, 2005-08-09 at 23:10 -0400, Steven Rostedt wrote:
> memset(,0,sizeof(act));
> sigaddset(_mask,SIGUSR1);
> ret = testsig(,SIGUSR1,SIGUSR1);
> if (ret == 1) {
> printf("sa_mask does not block sig\n");
> } else if (ret == 0) {
> printf("sa_mask blocks sig\n");
> } else {
> printf("Unknown return code!!\n");
> }
> 
> exit(0);
> exit(0);

Yuck! OK I was into the cut and paste here. This probably would look
much better as 

ret = testsig(,SIGUSR1,SIGUSR1);
switch (ret) {
case 0:
printf("...");
break;
case 1:
printf("...");
break;
default:
printf("Unknown...");
}

And what was I doing with the double exits??

OK, time for bed.

-- Steve

#include 
#include 
#include 
#include 
#include 
#include 
#include 

static int u1;
static int u2;

static void user1(int x)
{
	/* for testing against itself */
	if (u1)
		u2 = 1;
	u1 = 1;
	sleep(5);
	u1 = 0;
}

static void user2(int x)
{
	if (u1)
		u2 = 1;
}

static void intr(int x)
{
	exit(u2);
}

static void start(struct sigaction *act)
{
	struct sigaction a;

	memset(,0,sizeof(a));

	a.sa_handler = intr;
	if ((sigaction(SIGINT,,NULL)) < 0) {
		perror("sigaction");
		exit(-1);
	}
	
	/*
	 * This is the testing handler
	 */
	act->sa_handler = user1;
	if ((sigaction(SIGUSR1,act,NULL)) < 0) {
		perror("sigaction");
		exit(-1);
	}

	a.sa_handler = user2;
	if ((sigaction(SIGUSR2,,NULL)) < 0) {
		perror("sigaction");
		exit(-1);
	}
		
		
	for (;;)
		;

}
int testsig(struct sigaction *act, int sig1, int sig2)
{
	int pid;
	int status;

	if ((pid = fork()) < 0) {
		perror("fork");
	} else if (!pid) {
		/*
		 * Test1 sa_mask includes SIGUSR2
		 */
		start(act);
		exit(0);
	}
	/*
	 * Send first signal to start the test.
	 */
	kill(pid,sig1);
	/*
	 * SIGUSR1 sleeps for 5, just sleep for on here
	 * to make sure the system got it.
	 */
	sleep(1);
	/*
	 * Send the second signal to the child, to see if 
	 * this wakes it up.
	 */
	kill(pid,sig2);
	sleep(1);
	/*
	 * End the test.
	 */
	kill(pid,SIGINT);
	waitpid(pid,,0);
	return WEXITSTATUS(status);
}

int main(int argc, char **argv)
{
	struct sigaction act;
	int ret;
	
	memset(,0,sizeof(act));
	sigaddset(_mask,SIGUSR2);
	ret = testsig(,SIGUSR1,SIGUSR2);
	switch (ret) {
		case 0:
			printf("sa_mask blocks other signals\n");
			break;
		case 1:
			printf("sa_mask does not block other signals\n");
			break;
		default:
			printf("Unknown return code!!\n");
	}

	memset(,0,sizeof(act));
	act.sa_flags |= SA_NODEFER;
	ret = testsig(,SIGUSR1,SIGUSR2);
	switch (ret) {
		case 0:
			printf("SA_NODEFER blocks other signals\n");
			break;
		case 1:
			printf("SA_NODEFER does not block other signals\n");
			break;
		default:
			printf("Unknown return code!!\n");
	}

	memset(,0,sizeof(act));
	act.sa_flags |= SA_NODEFER;
	sigaddset(_mask,SIGUSR2);
	ret = testsig(,SIGUSR1,SIGUSR2);
	switch (ret) {
		case 0:
			printf("SA_NODEFER does not affect sa_mask\n");
			break;
		case 1:
			printf("SA_NODEFER affects sa_mask\n");
			break;
		default:
			printf("Unknown return code!!\n");
	}

	memset(,0,sizeof(act));
	act.sa_flags |= SA_NODEFER;
	sigaddset(_mask,SIGUSR1);
	ret = testsig(,SIGUSR1,SIGUSR1);
	switch (ret) {
		case 0:
			printf("SA_NODEFER and sa_mask blocks sig\n");
			break;
		case 1:
			printf("SA_NODEFER and sa_mask does not block sig\n");
			break;
		default:
			printf("Unknown return code!!\n");
	}

	memset(,0,sizeof(act));
	ret = testsig(,SIGUSR1,SIGUSR1);
	switch (ret) {
		case 0:
			printf("!SA_NODEFER blocks sig\n");
			break;
		case 1:
			printf("!SA_NODEFER does not block sig\n");
			break;
		default:
			printf("Unknown return code!!\n");
	}

	memset(,0,sizeof(act));
	memset(,0,sizeof(act));
	act.sa_flags |= SA_NODEFER;
	ret = testsig(,SIGUSR1,SIGUSR1);
	switch (ret) {
		case 0:
			printf("SA_NODEFER blocks sig\n");
			break;
		case 1:
			printf("SA_NODEFER does not block sig\n");
			break;
		default:
			printf("Unknown return code!!\n");
	}

	memset(,0,sizeof(act));
	sigaddset(_mask,SIGUSR1);
	ret = testsig(,SIGUSR1,SIGUSR1);
	switch (ret) {
		case 0:
			printf("sa_mask blocks sig\n");
			break;
		case 1:
			printf("sa_mask does not block sig\n");
			break;
		default:
			printf("Unknown return code!!\n");
	}

	exit(0);
}


Re: [PATCH] Fix PPC signal handling of NODEFER, should not affect sa_mask

2005-08-09 Thread Steven Rostedt
On Tue, 2005-08-09 at 14:27 -0700, Linus Torvalds wrote:
> On the other hand, the standard seems to be a bit confused according to 
> google:
> 
>   "This mask is formed by taking the union of the current signal mask and
>the value of the sa_mask for the signal being delivered unless
>SA_NODEFER or SA_RESETHAND is set, and then including the signal being
>delivered. If and when the user's signal handler returns normally, the
>original signal mask is restored."
> 
> Quite frankly, the way I read it is actually the old Linux behaviour: the 
> "unless SA_NODEFER or SA_RESETHAND is set" seems to be talking about the 
> whole union of the sa_mask thing, _not_ just the "and the signal being 
> delivered" part. Exactly the way the kernel currently does (except we 
> should apparently _also_ do it for SA_RESETHAND).

Actually I take it the other way.  The wording is awful. But the "unless
SA_NODEFER or SA_RESETHAND is set, and then including the signal being
delivered".  This looks to me that it adds the signal being delivered to
the blocked mask unless the SA_NODEFER or SA_RESETHAND is set. I kind of
wonder if English is the native language of those that wrote this.  
> 
> So if we decide to change the kernel behaviour, I'd like this to be in -mm
> for a while before merging (or merge _very_ early after 2.6.13). I could
> imagine this confusing some existing binaries that had only been tested
> with the old Linux behaviour, regardless of what a standard says. 
> Especially since the standard itself is so confusing and badly worded.
> 
> Maybe somebody can tell what other systems do, since I assume the standard 
> is trying to describe behaviour that actually exists in the wild..

Well, I wrote this (attached) test program to see how signals are
affected by different settings.  It's best run under an idle system, so
if someone has another unix out there that can run this and return the
results, we can see how they work.  This test program has possible race
conditions but should work fine on an idle system. But that's just the
nature of testing signals and other asynchronous activities, as well as
just writing something up in a couple of minutes ;-)

A non-modified Linux returns this:

   sa_mask blocks other signals
   SA_NODEFER does not block other signals
   SA_NODEFER affects sa_mask
   SA_NODEFER and sa_mask does not block sig
   !SA_NODEFER blocks sig
   SA_NODEFER does not block sig
   sa_mask blocks sig

This shows the following:

1. That signals in sa_mask are blocked while the signal handler is
running. 
2. SA_NODEFER does not by itself block other signals (this should never
be anything else).
3. SA_NODEFER does affect the sa_mask (which is the topic of this
discussion). 
4. When SA_NODEFER is set and sa_mask has the signal itself set, then
the signal is blocked (I believe that this is wrong too. If the signal
is itself in sa_mask then SA_NODEFER should still not let it run. This
should be interresting to see what other unices do).
5. When SA_NODEFER is not set, the signal is blocked (this is correct).
6. When SA_NODEFER is set (with nothing in sa_mask) the signal is not
blocked (also correct).
7. When the signal itself is set in sa_mask, then the signal is blocked
(correct).

With a patched kernel we get the following output:

   sa_mask blocks other signals
   SA_NODEFER does not block other signals
   SA_NODEFER does not affect sa_mask
   SA_NODEFER and sa_mask blocks sig
   !SA_NODEFER blocks sig
   SA_NODEFER does not block sig
   sa_mask blocks sig

Here the differences are that SA_NODEFER does _not_ affect the sa_mask,
and that when both the sig is in sa_mask and the SA_NODEFER is set, then
the signal is still blocked.  I can see this being useful if the sa_mask
is generated, and the NODEFER is expected to be the default. This allows
for overriding the default.

So, if someone can run this test on another unix system, then we can
have an idea of how others handle this.

-- Steve

#include 
#include 
#include 
#include 
#include 
#include 
#include 

static int u1;
static int u2;

static void user1(int x)
{
	/* for testing against itself */
	if (u1)
		u2 = 1;
	u1 = 1;
	sleep(5);
	u1 = 0;
}

static void user2(int x)
{
	if (u1)
		u2 = 1;
}

static void intr(int x)
{
	exit(u2);
}

static void start(struct sigaction *act)
{
	struct sigaction a;

	memset(,0,sizeof(a));

	a.sa_handler = intr;
	if ((sigaction(SIGINT,,NULL)) < 0) {
		perror("sigaction");
		exit(-1);
	}
	
	/*
	 * This is the testing handler
	 */
	act->sa_handler = user1;
	if ((sigaction(SIGUSR1,act,NULL)) < 0) {
		perror("sigaction");
		exit(-1);
	}

	a.sa_handler = user2;
	if ((sigaction(SIGUSR2,,NULL)) < 0) {
		perror("sigaction");
		exit(-1);
	}
		
		
	for (;;)
		;

}
int testsig(struct sigaction *act, int sig1, int sig2)
{
	int pid;
	int status;

	if ((pid = fork()) < 0) {
		perror("fork");
	} else if (!pid) {
		/*
		 * Test1 sa_mask includes SIGUSR2
		 */
		start(act);
		exit(0);
	}
	/*
	 * Send first signal to start 

BUG: reiserfs+acl+quota deadlock

2005-08-09 Thread Tarmo Tänav
Hi,

I've already reported a similiar bug to the one I found now
and that was fixed by:
"[PATCH] reiserfs: fix deadlock in inode creation failure path w/
default ACL"

This bug is similiar in effect but has some differences in how
to trigger it. The end effect will be just like with the other
bug that the affected directory will be unaccessible to any user
or process.

So here's the way to reproduce it, as minimal as I could get it:

You need reiserfs, quota and acl support in kernel.
you also need quota tools (edquota, quotaon, quotacheck), I used
linuxquota 3.12.

# cd /mnt
# dd if=/dev/zero of=test bs=1M count=50
50+0 records in
50+0 records out
# mkreiserfs -f test >/dev/null
mkreiserfs 3.6.19 (2003 www.namesys.com)

test is not a block special device
Continue (y/n):y
# mkdir mpoint
# mount test mpoint -o loop,acl,usrquota
# mkdir mpoint/user1
# useradd -d /mnt/mpoint/user1 user1 # may also use existing user
# chown user1 mpoint/user1
# quotacheck -v mpoint   # initializes quota file
# edquota user1
 set soft block limit to 1000, hard limit to 4000 
# edquota -t
 set the grace periods to something small: 1minutes ---
# quotaon mpoint
# ## at this point "repquota -a" should show the quota for user1
# su user1
# cd
# ## now we are in user1 home dir as user1
# cat /dev/zero > file1
loop2: warning, user block quota exceeded.
loop2: write failed, user block limit reached.
cat: write error: No space left on device
--- now we wait till the grace period expires (repquota -a) 
# cat "" > otherfile
loop2: write failed, user block quota exceeded too long.
 and it will hang forever 
# ## /mnt/mpoint can still be accessed, but /mnt/mpoint/user1 can't


I tested this on an -mm patchset kernel (2.6.13-rc5-mm1), but I
discovered the bug in my server which runs plain 2.6.12 with the
patch from Jeff Mahoney for the first reiserfs+acl bug.

The main difference between the two bugs is that the first one requires
the existance of a default acl, this one does not, but it does require
acl to be enabled.


PS. please CC, I'm not subscribed to the list

-- 
Tarmo Tänav <[EMAIL PROTECTED]>

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] ide-disk oopses on boot

2005-08-09 Thread Christoph Lameter
On Wed, 10 Aug 2005, Petr Vandrovec wrote:

> > Yes that was discussed extensively by Andi and me and finally fixed by
> > Kiran's patch in 2.6.13-rc6.
> 
> By which patch?  I hit it with post-2.6.13-rc6, exactly 2.6.13-rc6 with
> checkin hash "commit 00dd1e433967872f3997a45d5adf35056fdf2f56".  So if it is
> supposed to be fixed in 2.6.13-rc6, it is not.

Yes you are right there is one additional place where pcibus_to_node is 
used with the hwif that we did not cover. This better go into 2.6.13.

---
Fix ide-disk.c oops caused by hwif == NULL

1. Move hwif_to_node to ide.h

2. Use hwif_to_node in ide-disk.c

Signed-off-by: Christoph Lameter <[EMAIL PROTECTED]>

Index: linux-2.6/drivers/ide/ide-disk.c
===
--- linux-2.6.orig/drivers/ide/ide-disk.c   2005-07-27 18:29:17.0 
-0700
+++ linux-2.6/drivers/ide/ide-disk.c2005-08-09 19:55:03.0 -0700
@@ -1220,7 +1220,7 @@
goto failed;
 
g = alloc_disk_node(1 << PARTN_BITS,
-   pcibus_to_node(drive->hwif->pci_dev->bus));
+   hwif_to_node(drive->hwif));
if (!g)
goto out_free_idkp;
 
Index: linux-2.6/drivers/ide/ide-probe.c
===
--- linux-2.6.orig/drivers/ide/ide-probe.c  2005-08-04 15:47:15.0 
-0700
+++ linux-2.6/drivers/ide/ide-probe.c   2005-08-09 19:46:50.0 -0700
@@ -960,15 +960,6 @@
 }
 #endif /* MAX_HWIFS > 1 */
 
-static inline int hwif_to_node(ide_hwif_t *hwif)
-{
-   if (hwif->pci_dev)
-   return pcibus_to_node(hwif->pci_dev->bus);
-   else
-   /* Add ways to determine the node of other busses here */
-   return -1;
-}
-
 /*
  * init request queue
  */
Index: linux-2.6/include/linux/ide.h
===
--- linux-2.6.orig/include/linux/ide.h  2005-07-27 18:29:23.0 -0700
+++ linux-2.6/include/linux/ide.h   2005-08-09 19:47:14.0 -0700
@@ -1501,4 +1501,13 @@
 #define ide_id_has_flush_cache_ext(id) \
(((id)->cfs_enable_2 & 0x2400) == 0x2400)
 
+static inline int hwif_to_node(ide_hwif_t *hwif)
+{
+   if (hwif->pci_dev)
+   return pcibus_to_node(hwif->pci_dev->bus);
+   else
+   /* Add ways to determine the node of other busses here */
+   return -1;
+}
+
 #endif /* _IDE_H */
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: smbus driver for ati xpress 200m

2005-08-09 Thread yhlu
yhlunb:/proc/acpi/battery/BAT1 # cat info
present: yes
design capacity: 4800 mAh
last full capacity:  4435 mAh
battery technology:  rechargeable
design voltage:  14800 mV
design capacity warning: 300 mAh
design capacity low: 132 mAh
capacity granularity 1:  32 mAh
capacity granularity 2:  32 mAh
model number:ZF02
serial number:   836
battery type:LION
OEM info:SIMPLO
yhlunb:/proc/acpi/battery/BAT1 # cat state
present: yes
ERROR: Unable to read battery status



On 8/9/05, Andi Kleen <[EMAIL PROTECTED]> wrote:
> On Tue, Aug 09, 2005 at 11:50:53AM -0700, yhlu wrote:
> > anyone is working on add driver for ati xpress 200m?
> >
> > without that My turion notebook, can not work read the battery status.
> 
> Normally this should be done in ACPI battery.c
> 
> -Andi
>
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [linux-pm] PowerOP 0/3: System power operating point management API

2005-08-09 Thread Todd Poynor

Patrick Mochel wrote:

On Mon, 8 Aug 2005, Todd Poynor wrote:
(apologies for use of obsolete cpufreq mailing list address in my 
initial message.)

...

PowerOP is intended to leave all power
policy decisions to higher layers.


What do those higher layers look like? Do you have a userspace component
that uses this interface?


cpufreq is one example, it manages an abstraction of system 
power/performance levels based on cpu speed, which maps onto the 
PowerOP-level hardware capabilities in some fashion, and has both kernel 
and userspace components to manage the desired policy associated with 
this.  Regardless of whether this notion of configurable operating 
points would remain a separate layer from cpufreq or was more tightly 
integrated, the code to set these operating points can handle things 
such as setting validated voltage levels to match cpu speeds, etc.


For embedded systems, I am aware only of the Dynamic Power Management 
project, which you also mention and does indeed manage power policy 
based on the notions of power parameters and operating points.  The 
settings of these are configured entirely from userspace via sysfs, 
using shell scripts or convenience libraries that access the sysfs 
attributes.  A system designer chooses the operating points to be 
employed in the system based on the information from the processor or 
board vendor that describes validated, supported operating points and 
based on the characteristics of the system (how fast it needs to run 
while in use for different purposes and how much battery power can be 
spent for those purposes).


For example, a designer implementing a system based on an Intel XScale 
PXA27x processor can choose from among about 16 validated operating 
points listed in the most recent specification update.  Those operating 
points are comprised of register settings with inscrutable names such as 
CCCR[L], CCCR[2N], CLKCFG[T], CCCR[A], and two or three others.  A few 
of those operating points run the CPU at identical frequencies, but have 
other changes in memory clocking, system bus clocking, and the ability 
to quickly switch between certain cpu frequencies based on other 
properties of the platform (so-called "Turbo-mode" frequency scaling). 
A DPM- or PowerOP-based system can be configured with the subset of 
desired operating points and a particular operating point activated as 
needed.  The policy decision as to what operating point is appropriate 
to activate is a matter for custom code provided by the designer, 
tailored to their system.  It is also possible to write automated 
operating point selection algorithms based on such criteria as system 
busyness.



Who is using this code? Are there vendors that are already shipping
systems with this enabled?

Is this part of the DPM project? If so, what other components are left in
DPM?


The concepts and general Linux implementation of power parameters and 
operating points stems from the power-aware computing work done by 
Bishop Brock and Karthick Rajamani of IBM Research, and a somewhat 
different implementation is a part of the DPM project, which MontaVista 
(and reportedly others in the near future) does ship.  So far as I 
understand there are or soon will be mobile phones that use that code as 
the low- to mid-layers of the power management stack (the high-layer 
policy management is performed by a custom application of which I have 
no knowledge).


I mentioned in a previous email the next step of creating and activating 
operating points from userspace.  If that were in place, DPM would 
additionally consist primarily of:


1. Machine-specific backends to set operating points for the systems 
that DPM has been ported to.  If something like PowerOP is accepted into 
a broader community then that code would come along for the ride. 
XScale PXA27x and various ARM OMAPs are among the systems supported, as 
well as potentially others not yet making an appearance in open source.


2. DPM has further concepts of "operating state" (generally, whether the 
system is idle, processing interrupts, running a normal-power-usage 
task, running a background task without deadlines that can be assigned a 
low power/performance level, etc.) and the unfortunately-named "policy" 
that maps each operating state to an operating point, along with the 
code to switch in different operating points as the system switches 
operating states.  The "policy" is a bit of a misnomer; a system 
designer must create the desired operating points and decide upon the 
state -> point mappings appropriate, as well as make decisions on when 
to update the mappings based on external events, changing workloads, 
etc.  There are a few extra ramifications of modifying operating points 
in this fashion, including the need to handle such transitions while in 
interrupt context or in the idle loop, as well as a general concern for 
low overhead since switching may occur very frequently (such as at every 
entry and exit from 

RE: Please help with following NUMA-related questions

2005-08-09 Thread Xie, Bill
Hello

"numactl --show" will tell whether NUMA support is  enabled.

If there is no numactl in your system, you can check the dmesg. System will 
report 
following words if NUMA support is enabled.

"Scanning NUMA topology in Northbridge 24 "
"  <6>Number of nodes 2 (10010) "
"  <6>Node 0 MemBase  Limit 7fff "
"  <6>Node 1 MemBase 8000 Limit cbff "

The benefit of NUMA is to bring better memory bandwidth. You can use STREAM to 
test your system memory bandwidth. If bandwidth is better than 6000MBps for 
your 
2 node Opterons, NUMA works fine.

Correct BIOS setting for NUMA:

band interleave : AUTO
node interleave : DISABLE
SRAT : AUTO

Best Regards
Bill Xie

-Original Message-
From: [EMAIL PROTECTED] [mailto:[EMAIL PROTECTED] On Behalf Of Sheo Shanker 
Prasad
Sent: Monday, July 25, 2005 9:48 PM
To: linux-kernel@vger.kernel.org
Subject: Please help with following NUMA-related questions

I will greatly appreciate any help regarding the following matters:

(1) How to know whether my machine is NUMA-aware or not,

(2) Difference between memory bank interleaving and node interleaving

(3) When the BIOS asks me to set bank interleaving as AUTO, then it says that 
AUTO allows memory access to spread out over banks on the same node or across 
nodes decreasing memory access contentions. However, I have no idea when the 
memory access is spread over banks on the same node or across nodes. I also do 
not know how to tell the machine to access memory across the nodes or on the 
same node. I have no idea as to how the AUTO choice affects NUMA-awareness.

(4) The BIOS also tells me that I could choose bani interleaving as DISABLED. 
But I do not know what its implications are for NUMA awareness.

Here are other relevant details. I have a dual-Opteron 250 (2.4GHz) set in Tyan 
Thunder 2885 K8W with AMIBIOS version 2.05.

When I bought it last year, the machine was running under SuSE 9.1 Pro and the 
Linux kernel was 2.6.5-7.108-smp. At that time both the Hardware Info from YAST 
and /vat/log/messages were explicitly mentioning things :

Scanning NUMA topology in Northbridge 24
  <6>Number of nodes 2 (10010)
  <6>Node 0 MemBase  Limit 7fff
  <6>Node 1 MemBase 8000 Limit cbff
  <6>Using node hash shift of 24

These messages indicated that NODE interleaving was off and the machine was 
NUMA-aware.

Then, after a few months, the motherboard failed and the machine was sent to 
the vendor for repair. It came back with SuSE 9.3 and the Linux kernel version 
2.6.11.4-21.7-smp ([EMAIL PROTECTED]) (gcc version 3.3.5 20050117
(prerelease) (SUSE Linux)) #1 SMP Thu Jun 2 14:23:14 UTC 2005.


Now  both the Hardware Info from YAST and /vat/log/messages DO NOT mention NUMA 
anywhere, and I do not have anyway to check whether the NODE-Interleaving is 
OFF or ON. My difficulties are compounded because I do not know how to 
interpret the chipset related setting in the BIOS.

Currently, in the BIOS setting (Chipset->memory config -> Bank Interleaving), I 
am asked to choose between AUTO & DISABLED. No choice is offered for Node 
Interleaving.

The only guidance for the choice is that interleaving allows memory access to 
spread out over banks on the same node or across nodes decreasing memory access 
contentions. Nothing is mentioned about what happens when Interleaving is 
disabled. Furthermore, if I choose AUTO, then I do not know when the memory is 
spread out over banks on the same node or across nodes.

Any help will be greatly appreciated.

Thanking you in advance.

--
Best regards.

Sheo
(Sheo S. Prasad)
Creative Research Enterprises
6354 Camino del Lago
Pleasanton, CA 94566, USA
Voice Phone: (+1) 925 426-9341
Fax   Phone: (+1) 925 426-9417
e-mail: [EMAIL PROTECTED]

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the 
body of a message to [EMAIL PROTECTED] More majordomo info at  
http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: allow the load to grow upto its cpu_power (was Re: [Patch] don't kick ALB in the presence of pinned task)

2005-08-09 Thread Siddha, Suresh B
On Wed, Aug 10, 2005 at 10:27:44AM +1000, Nick Piggin wrote:
> Yeah this makes sense. Thanks.
> 
> I think we'll only need your first line change to fix this, though.
> 
> Your second change will break situations where a single group is very
> loaded, but it is in a domain with lots of cpu_power
> (total_load <= total_power).

In that case, we will move the excess load from that group to some
other group which is below its capacity. Instead of bringing everyone
to avg load, we make sure that everyone is at or below its cpu_power.
This will minimize the movements between the nodes.

For example, Let us assume sched groups node-0, node-1 each has 
4*SCHED_LOAD_SCALE as its cpu_power.

And with 6 tasks on node-0 and 0 on node-1, current load balance 
will move 3 tasks from node-0 to 1. But with my patch, it will move only 
2 tasks to node-1. Is this what you are referring to as breakage?

Even with just the first line change, we will still allow going into
a state of 4 on node-0 and 2 on node-1.

With the second hunk of the patch we are minimizing the movement between nodes
and at the same time making sure everyone is below its cpu_power, when
the system is lightly loaded.

If the group's resources are very critical, groups cpu_power should
represent that criticality.

thanks,
suresh
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: I2C block reads with i2c-viapro: testers wanted

2005-08-09 Thread Salah Coronya
I have a VT8235 chipset, I applied the patch to my kernel 
(2.6.12-gentoo-r6), comapred the "before" and "after" eeproms in /sys 
with diff and they are the same.


So it seems to work with VT8235.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[RFC/PATCH] Add pci_walk_bus function to PCI core

2005-08-09 Thread Paul Mackerras
Greg,

Any comments on this patch?  Would you be amenable to it going in post
2.6.13?

The PCI error recovery infrastructure needs to be able to contact all
the drivers affected by a PCI error event, which may mean traversing
all the devices under a given PCI-PCI bridge.  This patch adds a
function to the PCI core that traverses all the PCI devices on a PCI
bus and under any PCI-PCI bridges on that bus (recursively), calling a
given function for each device.  This provides a way for the error
recovery code to iterate through all devices that are affected by an
error event.  This function was originally written by Linas Vepstas
and moved to drivers/pci/bus.c (and slightly modified) by me.

Signed-off-by: Paul Mackerras <[EMAIL PROTECTED]>
---
diff -urN linux-2.6/drivers/pci/bus.c test-pseries/drivers/pci/bus.c
--- linux-2.6/drivers/pci/bus.c 2005-08-03 10:51:36.0 +1000
+++ test-pseries/drivers/pci/bus.c  2005-08-09 17:05:16.0 +1000
@@ -150,6 +150,36 @@
}
 }
 
+/** pci_walk_bus - walk devices on/under bus, calling callback.
+ *  @top  bus whose devices should be walked
+ *  @cb   callback to be called for each device found
+ *  @userdata arbitrary pointer to be passed to callback.
+ *
+ *  Walk the given bus, including any bridged devices
+ *  on buses under this bus.  Call the provided callback
+ *  on each device found.
+ */
+void pci_walk_bus(struct pci_bus *top, pci_buswalk_cb cb, void *userdata)
+{
+   struct pci_dev *dev, *tmp;
+
+   spin_lock(_bus_lock);
+   list_for_each_entry_safe (dev, tmp, >devices, bus_list) {
+   pci_dev_get(dev);
+   spin_unlock(_bus_lock);
+
+   /* Run device routines with the bus unlocked */
+   cb(dev, userdata);
+   if (dev->subordinate)
+   pci_walk_bus(dev->subordinate, cb, userdata);
+
+   spin_lock(_bus_lock);
+   pci_dev_put(dev);
+   }
+   spin_unlock(_bus_lock);
+}
+EXPORT_SYMBOL_GPL(pci_walk_bus);
+
 EXPORT_SYMBOL(pci_bus_alloc_resource);
 EXPORT_SYMBOL_GPL(pci_bus_add_device);
 EXPORT_SYMBOL(pci_bus_add_devices);
diff -urN linux-2.6/include/linux/pci.h test-pseries/include/linux/pci.h
--- linux-2.6/include/linux/pci.h   2005-08-10 10:53:31.0 +1000
+++ test-pseries/include/linux/pci.h2005-08-10 11:25:40.0 +1000
@@ -864,6 +864,9 @@
 const struct pci_device_id *pci_match_id(const struct pci_device_id *ids, 
struct pci_dev *dev);
 int pci_scan_bridge(struct pci_bus *bus, struct pci_dev * dev, int max, int 
pass);
 
+typedef void (*pci_buswalk_cb)(struct pci_dev *, void *);
+void pci_walk_bus(struct pci_bus *top, pci_buswalk_cb cb, void *userdata);
+
 /* kmem_cache style wrapper around pci_alloc_consistent() */
 
 #include 
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[RFC: 2.6 patch] the big Documentation/Changes change

2005-08-09 Thread Adrian Bunk
I edited Documentation/Changes:
- remove obsolete information
- point to feature-list-2.6.txt instead of providing similar information
- removed the URLs of the software packages (people compiling their own
  kernel usually know where to find the required software)

The resulting file is pretty short.


Signed-off-by: Adrian Bunk <[EMAIL PROTECTED]>

---

 Documentation/Changes |  376 +-
 1 files changed, 15 insertions(+), 361 deletions(-)

--- linux-2.6.13-rc5-mm1-full/Documentation/Changes.old 2005-08-10 
03:01:11.0 +0200
+++ linux-2.6.13-rc5-mm1-full/Documentation/Changes 2005-08-10 
03:12:12.0 +0200
@@ -1,435 +1,89 @@
 Intro
 =
 
 This document is designed to provide a list of the minimum levels of
-software necessary to run the 2.6 kernels, as well as provide brief
-instructions regarding any other "Gotchas" users may encounter when
-trying life on the Bleeding Edge.  If upgrading from a pre-2.4.x
-kernel, please consult the Changes file included with 2.4.x kernels for
-additional information; most of that information will not be repeated
-here.  Basically, this document assumes that your system is already
-functional and running at least 2.4.x kernels.
+software necessary to run the 2.6 kernels.
 
 This document is originally based on my "Changes" file for 2.0.x kernels
 and therefore owes credit to the same people as that file (Jared Mauch,
 Axel Boldt, Alessandro Sigala, and countless other users all over the
-'net).
+'net). Chris Ricker was the former maintainer of this file.
 
-The latest revision of this document, in various formats, can always
-be found at .
-
-Feel free to translate this document.  If you do so, please send me a
+Feel free to translate this document.  If you do so, please send a
 URL to your translation for inclusion in future revisions of this
 document.
 
-Smotrite file , yavlyaushisya
-russkim perevodom dannogo documenta.
-
-Visite  para obtener la traducción
-al español de este documento en varios formatos.
-
 Eine deutsche Version dieser Datei finden Sie unter
-.
-
-Last updated: October 29th, 2002
-
-Chris Ricker ([EMAIL PROTECTED] or [EMAIL PROTECTED]).
+.
 
 Current Minimal Requirements
 
 
 Upgrade to at *least* these software revisions before thinking you've
 encountered a bug!  If you're unsure what version you're currently
 running, the suggested command should tell you.
 
-Again, keep in mind that this list assumes you are already
-functionally running a Linux 2.4 kernel.  Also, not all tools are
-necessary on all systems; obviously, if you don't have any ISDN
-hardware, for example, you probably needn't concern yourself with
-isdn4k-utils.
+Not all tools are necessary on all systems; obviously, if you don't
+have any ISDN hardware, for example, you probably needn't concern yourself
+with isdn4k-utils.
 
 o  Gnu C  2.95.3  # gcc --version
 o  Gnu make   3.79.1  # make --version
 o  binutils   2.12# ld -v
 o  util-linux 2.10o   # fdformat --version
 o  module-init-tools  0.9.10  # depmod -V
 o  e2fsprogs  1.29# tune2fs
 o  jfsutils   1.1.3   # fsck.jfs -V
 o  reiserfsprogs  3.6.3   # reiserfsck -V 2>&1|grep 
reiserfsprogs
 o  reiser4progs   1.0.0   # fsck.reiser4 -V
 o  xfsprogs   2.6.0   # xfs_db -V
 o  pcmciautils004
-o  pcmcia-cs  3.1.21  # cardmgr -V
 o  quota-tools3.09# quota -V
 o  PPP2.4.0   # pppd --version
 o  isdn4k-utils   3.1pre1 # isdnctrl 2>&1|grep version
 o  nfs-utils  1.0.5   # showmount --version
 o  procps 3.2.0   # ps --version
 o  oprofile   0.9 # oprofiled --version
 o  udev   058 # udevinfo -V
 
-Kernel compilation
-==
+Notes
+=
+
+Please read feature-list-2.6.txt for information about new features
+and changes compared to 2.4 kernels.
 
 GCC
 ---
 
 The gcc version requirements may vary depending on the type of CPU in your
-computer. The next paragraph applies to users of x86 CPUs, but not
-necessarily to users of other CPUs. Users of other CPUs should obtain
-information about their gcc version requirements from another source.
-
-The recommended compiler for the kernel is gcc 2.95.x (x >= 3), and it
-should be used when you need absolute stability. You may use 

Re: [PATCH] net/ipv4 debug cleanup, kernel 2.6.13-rc5

2005-08-09 Thread Patrick McHardy
Heikki Orsila wrote:
> Here's a small patch to cleanup NETDEBUG() use in net/ipv4/ for Linux 
> kernel 2.6.13-rc5. Also weird use of indentation is changed in some
> places.
> 
> ---
> diff -urp linux-2.6.13-rc5-org/net/ipv4/icmp.c 
> linux-2.6.13-rc5/net/ipv4/icmp.c
> --- linux-2.6.13-rc5-org/net/ipv4/icmp.c  2005-08-02 07:45:48.0 
> +0300
> +++ linux-2.6.13-rc5/net/ipv4/icmp.c  2005-08-07 15:10:42.0 +0300
> @@ -936,8 +936,7 @@ int icmp_rcv(struct sk_buff *skb)
>   case CHECKSUM_HW:
>   if (!(u16)csum_fold(skb->csum))
>   break;
> - NETDEBUG(if (net_ratelimit())
> - printk(KERN_DEBUG "icmp v4 hw csum failure\n"));
> + LIMIT_NETDEBUG(printk(KERN_DEBUG "icmp v4 hw csum failure\n"));
>   case CHECKSUM_NONE:
>   if ((u16)csum_fold(skb_checksum(skb, 0, skb->len, 0)))
>   goto error;

These macros always looked a bit ugly to me, with your cleanup there
isn't a single spot left where we require them to accept code as
argument, so how about we change them to pure printk wrappers?

[NET]: Make NETDEBUG pure printk wrappers

Signed-off-by: Patrick McHardy <[EMAIL PROTECTED]>

---
commit a2db7bcdba3678fe8f67cd7d631c01a888031753
tree 7201cec98ca35b5854daebc14e4650ff95eb8571
parent db29e85a7ece62de1899917c1ec0ffe55cf1d3a0
author Patrick McHardy <[EMAIL PROTECTED]> Wed, 10 Aug 2005 03:08:01 +0200
committer Patrick McHardy <[EMAIL PROTECTED]> Wed, 10 Aug 2005 03:08:01 +0200

 include/net/sock.h |4 ++--
 net/ipv4/esp4.c|   12 ++--
 net/ipv4/icmp.c|   12 +---
 net/ipv4/igmp.c|2 +-
 net/ipv4/ip_fragment.c |6 +++---
 net/ipv4/ip_output.c   |2 +-
 net/ipv4/ipcomp.c  |4 ++--
 net/ipv4/tcp_ipv4.c|   11 +--
 net/ipv4/udp.c |   32 
 net/ipv6/ah6.c |   13 ++---
 net/ipv6/datagram.c|4 ++--
 net/ipv6/esp6.c|3 +--
 net/ipv6/exthdrs.c |8 
 net/ipv6/icmp.c|   20 +++-
 net/ipv6/ip6_output.c  |5 ++---
 net/ipv6/raw.c |3 +--
 net/ipv6/tcp_ipv6.c|2 +-
 net/ipv6/udp.c |7 +++
 18 files changed, 68 insertions(+), 82 deletions(-)

diff --git a/include/net/sock.h b/include/net/sock.h
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1260,8 +1260,8 @@ extern int sock_get_timestamp(struct soc
 #define NETDEBUG(x)do { } while (0)
 #define LIMIT_NETDEBUG(x) do {} while(0)
 #else
-#define NETDEBUG(x)do { x; } while (0)
-#define LIMIT_NETDEBUG(x) do { if (net_ratelimit()) { x; } } while(0)
+#define NETDEBUG(fmt, args...) printk(fmt,##args)
+#define LIMIT_NETDEBUG(fmt, args...) do { if (net_ratelimit()) 
printk(fmt,##args); } while(0)
 #endif
 
 /*
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -331,8 +331,8 @@ static void esp4_err(struct sk_buff *skb
x = xfrm_state_lookup((xfrm_address_t *)>daddr, esph->spi, 
IPPROTO_ESP, AF_INET);
if (!x)
return;
-   NETDEBUG(printk(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%08x\n",
-   ntohl(esph->spi), ntohl(iph->daddr)));
+   NETDEBUG(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%08x\n",
+ntohl(esph->spi), ntohl(iph->daddr));
xfrm_state_put(x);
 }
 
@@ -395,10 +395,10 @@ static int esp_init_state(struct xfrm_st
 
if (aalg_desc->uinfo.auth.icv_fullbits/8 !=
crypto_tfm_alg_digestsize(esp->auth.tfm)) {
-   NETDEBUG(printk(KERN_INFO "ESP: %s digestsize %u != 
%hu\n",
-  x->aalg->alg_name,
-  crypto_tfm_alg_digestsize(esp->auth.tfm),
-  aalg_desc->uinfo.auth.icv_fullbits/8));
+   NETDEBUG(KERN_INFO "ESP: %s digestsize %u != %hu\n",
+x->aalg->alg_name,
+crypto_tfm_alg_digestsize(esp->auth.tfm),
+aalg_desc->uinfo.auth.icv_fullbits/8);
goto error;
}
 
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -627,11 +627,10 @@ static void icmp_unreach(struct sk_buff 
break;
case ICMP_FRAG_NEEDED:
if (ipv4_config.no_pmtu_disc) {
-   LIMIT_NETDEBUG(
-   printk(KERN_INFO "ICMP: %u.%u.%u.%u: "
+   LIMIT_NETDEBUG(KERN_INFO "ICMP: %u.%u.%u.%u: "
 "fragmentation needed "
 "and DF set.\n",
-  NIPQUAD(iph->daddr)));
+  NIPQUAD(iph->daddr));
   

Re: capabilities patch (v 0.1)

2005-08-09 Thread David Wagner
David Madore  wrote:
>I intend to add a couple of capabilities which are normally available
>to all user processes, including capability to exec(), [...]

Once you have a mechanism that lets you prevent the untrusted program
from exec-ing a setuid/setgid program (such as your bounding set idea),
I don't see any added value in preventing the program from calling exec().

"Don't forbid what you can't prevent".  The program can always emulate
the effect of exec() in userspace (for non-setuid/setgid programs) --
doing so is tedious, but nothing prevents a malicious userspace program
from implementing such a thing, I think.

This is only a comment on forbidding exec(), not on anything else in
your proposal.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[ANNOUNCE] Interbench v0.29 - Interactivity benchmark

2005-08-09 Thread Con Kolivas
Interbench is designed to benchmark interactivity in Linux.

Direct download:
http://ck.kolivas.org/apps/interbench/interbench-0.29.tar.bz2

Web:
http://interbench.kolivas.org


Changes (PW: thanks to Peter Williams):
Altered the calibration loop.
Added the option to select loads to perform or not perform - PW
Added optional comment to logfile - PW
Numerous bugfixes - PW


Cheers,
Con
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] pci_find_device and pci_find_slot mark as deprecated

2005-08-09 Thread Jiri Slaby

Jiri Slaby napsal(a):


*It removes most occurences of pci_find_device in the kernel tree.
*pci_(get|find)_device(x, ANY_ID, ANY_ID, x) changes to 
for_each_pci_dev(x).


Generated in 2.6.13-rc5-mm1 kernel version. 


[...]


 drivers/scsi/qlogicisp.c |3 --


This maybe won't be needed, adrian bunk began removing process with that.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Linux-2.6.13-rc6: aic7xxx testers please..

2005-08-09 Thread James Bottomley
On Tue, 2005-08-09 at 16:12 -0400, John Stoffel wrote:
> Thank you for looking into this with me, I really appreciate it.  I'm
> kinda stumped why this suddenly started happening, but it could be
> hardware related of course...

Well ... there's something going on that your posted dmesg's don't seem
to cover.  This:

> Vendor: SUN   Model: DLT7000   Rev: 1E48
> Type:   Sequential-Access  ANSI SCSI revision: 02
>target1:0:6: asynchronous.
>target1:0:6: Beginning Domain Validation
>target1:0:6: wide asynchronous.
>target1:0:6: Domain Validation skipping write tests
>target1:0:6: FAST-10 WIDE SCSI 20.0 MB/s ST (100 ns, offset 8)
>target1:0:6: Ending Domain Validation

Say everything went OK with DV and the drive attaches wide and at 10MHz.

But in your previous posting, the aic proc routines said this:


> Target 6 Negotiation Settings
> User: 40.000MB/s transfers (20.000MHz, offset 127, 16bit)
> Goal: 20.000MB/s transfers (10.000MHz, offset 8, 16bit)
> Curr: 3.300MB/s transfers
> Channel A Target 6 Lun 0 Settings
> Commands Queued 1065
> Commands Active 0
> Command Openings 1
> Max Tagged Openings 0
> Device Queue Frozen Count 0

Which is the AIC driver's way of saying narrow async.

So something must have happened during the 1065 I/Os to cause this.
Hopefully that something left a trace in the logs.

James


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: sched_domains SD_BALANCE_FORK and sched_balance_self

2005-08-09 Thread Nick Piggin

Siddha, Suresh B wrote:


On Tue, Aug 09, 2005 at 03:19:58PM -0700, Martin J. Bligh wrote:


--On Tuesday, August 09, 2005 15:03:32 -0700 "Siddha, Suresh B" <[EMAIL 
PROTECTED]> wrote:


Balance on clone make some sort of sense, since you know they're not
going to exec afterwards. We've thrashed through this many times before
and decided that unless there was an explicit hint from userspace,
balance on fork was not a good thing to do in the general case. Not only
based on a large range of testing, but also previous experience from other
Unix's. What new data came forth to change this?



I agree with you. I will let Nick(the author) have a take at this.




Sorry I've taken a while with this. Darren, I'll reply to you soon.


Send instant messages to your online friends http://au.messenger.yahoo.com 


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] pci_find_device and pci_find_slot mark as deprecated

2005-08-09 Thread Jiri Slaby

Greg KH napsal(a):

On Tue, Aug 09, 2005 at 11:58:19AM +0200, Jiri Slaby wrote:


On 8/9/05, Greg KH <[EMAIL PROTECTED]> wrote:


On Tue, Aug 09, 2005 at 01:54:01AM +0200, Jiri Slaby wrote:


This marks these functions as deprecated not to use in latest drivers (it
doesn't use reference counts and the device returned by it can disappear in
any time).


Did you forget to send this to the PCI maintainer for some reason?


No, my badness, sorry.



Anyway, no, I don't want these functions marked this way, it's only
going to cause build noise.  I'd much rather you, or others, send me
patches that remove the usage of these functions so I can just delete
them entirely.


When the patch was here
(http://www.fi.muni.cz/~xslaby/lnx/lnx-pci_find-2.6.13-r3g4_3.patch --
it'll be certainly sliced into many pieces; of course I didn't cc you
:(



Yes, I can't take anything so big.  Just break it up into pieces please.


*It removes most occurences of pci_find_device in the kernel tree.
*pci_(get|find)_device(x, ANY_ID, ANY_ID, x) changes to for_each_pci_dev(x).

Generated in 2.6.13-rc5-mm1 kernel version.

 arch/alpha/kernel/sys_alcor.c|3 +-
 arch/alpha/kernel/sys_sio.c  |8 ++---
 arch/frv/mb93090-mb00/pci-frv.c  |8 +
 arch/frv/mb93090-mb00/pci-irq.c  |4 --
 arch/i386/kernel/cpu/cpufreq/gx-suspmod.c|6 +---
 arch/i386/pci/acpi.c |2 -
 arch/i386/pci/irq.c  |6 ++--
 arch/m68k/atari/hades-pci.c  |4 --
 arch/ppc/kernel/pci.c|   21 ---
 arch/ppc/platforms/85xx/mpc85xx_cds_common.c |9 --
 arch/ppc64/kernel/eeh.c  |2 -
 drivers/char/ip2main.c   |9 +++---
 drivers/char/istallion.c |9 +++---
 drivers/char/mxser.c |5 ++-
 drivers/char/rocket.c|2 -
 drivers/char/specialix.c |   13 ++---
 drivers/char/stallion.c  |6 ++--
 drivers/char/sx.c|2 -
 drivers/char/watchdog/alim1535_wdt.c |   15 --
 drivers/char/watchdog/alim7101_wdt.c |7 +++--
 drivers/char/watchdog/i8xx_tco.c |5 ++-
 drivers/ide/pci/alim15x3.c   |   17 ++--
 drivers/ide/pci/cs5530.c |7 -
 drivers/ide/pci/hpt366.c |   17 
 drivers/ide/pci/pdc202xx_new.c   |   13 +
 drivers/ide/pci/piix.c   |3 --
 drivers/ide/pci/serverworks.c|   17 ++--
 drivers/ide/pci/sis5513.c|3 +-
 drivers/ide/pci/via82cxxx.c  |   14 +-
 drivers/ide/setup-pci.c  |3 --
 drivers/isdn/hisax/avm_pci.c |8 -
 drivers/isdn/hisax/bkm_a4t.c |2 -
 drivers/isdn/hisax/bkm_a8.c  |2 -
 drivers/isdn/hisax/diva.c|8 ++---
 drivers/isdn/hisax/elsa.c|4 +-
 drivers/isdn/hisax/enternow_pci.c|8 ++---
 drivers/isdn/hisax/gazel.c   |2 -
 drivers/isdn/hisax/hfc_pci.c |   12 ++--
 drivers/isdn/hisax/niccy.c   |2 -
 drivers/isdn/hisax/nj_s.c|2 -
 drivers/isdn/hisax/nj_u.c|2 -
 drivers/isdn/hisax/sedlbauer.c   |2 -
 drivers/isdn/hisax/telespci.c|2 -
 drivers/isdn/hisax/w6692.c   |2 -
 drivers/isdn/hysdn/hysdn_init.c  |5 ++-
 drivers/macintosh/via-pmu.c  |   20 +-
 drivers/macintosh/via-pmu68k.c   |6 ++--
 drivers/media/radio/radio-maestro.c  |6 ++--
 drivers/media/video/bttv-cards.c |2 -
 drivers/media/video/stradis.c|3 +-
 drivers/media/video/zoran_card.c |2 -
 drivers/media/video/zr36120.c|9 --
 drivers/mtd/devices/pmc551.c |   15 +-
 drivers/mtd/maps/amd76xrom.c |3 +-
 drivers/mtd/maps/ichxrom.c   |3 +-
 drivers/mtd/maps/l440gx.c|   14 --
 drivers/mtd/maps/scx200_docflash.c   |   23 +---
 drivers/net/gt96100eth.c |   15 +++---
 drivers/net/sunhme.c |5 ++-
 drivers/net/wan/sdladrv.c|   11 +---
 drivers/pci/hotplug/fakephp.c|2 -
 drivers/pci/pci.c|4 +-
 drivers/pci/proc.c   |5 ++-
 drivers/pci/setup-irq.c  |4 +-
 drivers/scsi/BusLogic.c  |6 ++--
 drivers/scsi/advansys.c  |6 +++-
 

Re: sched_domains SD_BALANCE_FORK and sched_balance_self

2005-08-09 Thread Siddha, Suresh B
On Tue, Aug 09, 2005 at 03:19:58PM -0700, Martin J. Bligh wrote:
> --On Tuesday, August 09, 2005 15:03:32 -0700 "Siddha, Suresh B" <[EMAIL 
> PROTECTED]> wrote:
> 
> > On Fri, Aug 05, 2005 at 04:29:45PM -0700, Darren Hart wrote:
> >> I have some concerns as to the intent vs.  actual implementation of 
> >> SD_BALANCE_FORK and the sched_balance_fork() routine.
> > 
> > Intent and implementation match. Problem is with the intent ;-)
> > 
> > This has the intent info.
> > 
> > http://www.kernel.org/git/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=147cbb4bbe991452698f0772d8292f22825710ba
> > 
> > To solve these issues, we need to make the sched domain and its parameters
> > CMP aware. And dynamically we need to adjust these parameters based
> > on the system properties.
> 
> Can you explain the purpose of doing balance on both fork and exec?
> The reason we did it at exec time is that it's much cheaper to do 
> than at fork - you have very, very little state to deal with. The vast 
> majority of things that fork will exec immediately thereafter.
> 
> Balance on clone make some sort of sense, since you know they're not
> going to exec afterwards. We've thrashed through this many times before
> and decided that unless there was an explicit hint from userspace,
> balance on fork was not a good thing to do in the general case. Not only
> based on a large range of testing, but also previous experience from other
> Unix's. What new data came forth to change this?

I agree with you. I will let Nick(the author) have a take at this.

> > We can choose the leastly loaded CPU in the home node and we can let the
> > load balance to move it to other nodes if there is an imbalance.
> 
> Is that what it's actually doing now? That's not what Nick told me at
> Kernel Summit, but is the correct thing to do for clone, I think.

We don't do it today. But I would like to see that.

thanks,
suresh
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[2.6 patch] remove the obsolete SCSI qlogicisp driver

2005-08-09 Thread Adrian Bunk
The SCSI qlogicisp driver is both marked BROKEN and superseded by the 
qla1280 driver.

Signed-off-by: Adrian Bunk <[EMAIL PROTECTED]>

---

This patch was already sent on:
- 31 Jul 2005
- 22 Jul 2005

Due to it's size, the patch is attached compressed.

 Documentation/scsi/00-INDEX  |2 
 Documentation/scsi/qlogicfas.txt |3 
 Documentation/scsi/qlogicisp.txt |   30 
 drivers/scsi/Kconfig |   29 
 drivers/scsi/Makefile|1 
 drivers/scsi/qlogicisp.c | 1934 -
 drivers/scsi/qlogicisp_asm.c | 2034 ---
 7 files changed, 1 insertion(+), 4032 deletions(-)



patch-remove-qlogicisp.gz
Description: Binary data


Re: [linux-pm] PowerOP 1/3: PowerOP core

2005-08-09 Thread Todd Poynor

Geoff Levand wrote:

I'm wondering if anything could be gained by having the whole 
struct powerop_point defined in asm/powerop.h, and treat it as an 
opaque structure at this level.  That way, things other than just 
ints could be passed between the policy manager and the backend, 
although I guess that breaks the beauty of the simplicity and would 
complicate the sys-fs interface, etc.  I'm interested to hear your 
comments.


Making the "operating point" data structure entirely platform-specific 
should be OK.  There's a little value to having generic pieces handle 
some common chores (such as the sysfs interfaces), but even for integers 
decimal vs. hex formatting is nicer depending on the type of value. 
Since most values that have been managed using similar interfaces thus 
far have been flags, register values, voltages, etc. using integers has 
worked well and nicely simplified the platform backend, but if there's a 
need for other data types then should be doable.


Another point is that a policy manager would need to poll the system 
and/or get events and then act.  Your powerop work here only provides 
a (one way) piece of the final action.  Any comments regarding a more 
general interface?


What's discussed here is probably the bottommost layer of a power 
management software stack: to read and write the platform-specific 
system power parameters, optionally arranged into a mutually-consistent 
set called an "operating point".  Power policy management is a large, 
thorny topic that I wasn't trying to tackle now.


So far as kernel-to-userspace event notification goes (assuming the 
power policy manager is in userspace, which is certainly where I'd 
recommend), ACPI has a procfs-based communication channel but the 
kobject_uevent stuff looks like the way I'd go, and it's somewhere on my 
list to come up with a patch that does that as well.


If these general ideas of arbitrary platform power parameters and 
operating points are deemed worthy of continued consideration, I'll 
propose what I view is the next step: interfaces to create and activate 
operating points from userspace.


At that point it should be possible to write power policy management 
applications for systems that can benefit from this generalized notion 
of operating points: create the operating points that match the system 
usage models (in the case of many embedded systems, the system is some 
mode with different power/performance characteristics such as audio 
playback vs. mobile phone call in progress) and power needs (e.g., low 
battery strength vs. high strength) and activate operating points based 
on events received (new app running, low battery warning, etc.).


Any opinions on all that?  Thanks,

--
Todd
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: allow the load to grow upto its cpu_power (was Re: [Patch] don't kick ALB in the presence of pinned task)

2005-08-09 Thread Nick Piggin

Siddha, Suresh B wrote:


For example, lets take two nodes each having two physical packages. And
assume that there are two tasks and both of them are on (may or may n't be
pinned) two packages in node-0

Todays load balance will detect that there is an imbalance between the
two nodes and will try to distribute the load between the nodes.

In general, we should allow the load of a group to grow upto its cpu_power
and stop preventing these costly movements.

Appended patch will fix this. I have done limited testing of this patch.
Guys with big NUMA boxes, please give this patch a try. 


--

When the system is lightly loaded, don't bother about the average load.
In this case, allow the load of a sched group to grow upto its cpu_power.




Yeah this makes sense. Thanks.

I think we'll only need your first line change to fix this, though.

Your second change will break situations where a single group is very
loaded, but it is in a domain with lots of cpu_power
(total_load <= total_power).

Nick


Send instant messages to your online friends http://au.messenger.yahoo.com 


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: BUG: Real-Time Preemption 2.6.13-rc5-RT-V0.7.52-16

2005-08-09 Thread Daniel Walker

This may fix the warning , but I doubt it does anything for any hangs..

--- linux-2.6.12.orig/drivers/usb/core/hcd.c2005-08-09 22:41:18.0 
+
+++ linux-2.6.12/drivers/usb/core/hcd.c 2005-08-10 00:23:16.0 +
@@ -540,8 +540,7 @@ void usb_hcd_poll_rh_status(struct usb_h
if (length > 0) {

/* try to complete the status urb */
-   local_irq_save (flags);
-   spin_lock(_root_hub_lock);
+   spin_lock_irqsave(_root_hub_lock, flags);
urb = hcd->status_urb;
if (urb) {
spin_lock(>lock);






-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[2.6 patch] arm26: one -g is enough for everyone ;-)

2005-08-09 Thread Adrian Bunk
The main Makefile is already adding -g to the CFLAGS if 
CONFIG_DEBUG_INFO=y.

Not that two -g would do harm, but one works as well.


Signed-off-by: Adrian Bunk <[EMAIL PROTECTED]>

--- linux-2.6.13-rc5-mm1/arch/arm26/Makefile.old2005-08-10 
02:18:56.0 +0200
+++ linux-2.6.13-rc5-mm1/arch/arm26/Makefile2005-08-10 02:19:28.0 
+0200
@@ -17,10 +17,6 @@
 CFLAGS +=-fno-omit-frame-pointer -mno-sched-prolog
 endif
 
-ifeq ($(CONFIG_DEBUG_INFO),y)
-CFLAGS +=-g
-endif
-
 CFLAGS_BOOT:=-mapcs-26 -mcpu=arm3 -msoft-float -Uarm
 CFLAGS +=-mapcs-26 -mcpu=arm3 -msoft-float -Uarm
 AFLAGS +=-mapcs-26 -mcpu=arm3 -msoft-float
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 0/2] pci_find_device patches

2005-08-09 Thread Jiri Slaby
>Um, one patch per email please.
>
>Doesn't anyone read Documentation/SubmittingPatches anymore...
I did. But I only thought... Let it be. Sorry.

Here are they. The two, which you haven't accepted (one, which marks
deprecation isn't included because of refusing it and docpci has been added to
your tree).

regards,
jiri
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] use kthread infrastructure in md

2005-08-09 Thread Neil Brown
On Tuesday August 9, [EMAIL PROTECTED] wrote:
> Switch MD to use the kthread infrastructure, to simplify the code and
> get rid of tasklist_lock abuse in md_unregister_thread.  Long-term I
> wonder whether workqueues wouldn't be a better choice than the
> MD-specific thread wrappers for the lowlevel drivers.
> 

Thanks.  This is definitely a step in the right direction.   However
I think it still needs a bit of work.
The old md_unregister_thread sent a signal to the thread so that if it
was in 'wait_event_interruptible_timeout', that call would complete.
However I cannot see how the new md_unregister_thread will interrupt
the wait_event_interruptible_timeout.
I'll look into it..


Thanks,
NeilBrown
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2/2] removes pci_find_device from parport_pc.c

2005-08-09 Thread Jiri Slaby
[Andrew has added this into his tree yet.]

This patch changes pci_find_device to pci_get_device (encapsulated in
for_each_pci_dev).

Generated in 2.6.13-rc5-mm1 kernel version.

Signed-off-by: Jiri Slaby <[EMAIL PROTECTED]>

diff --git a/drivers/parport/parport_pc.c b/drivers/parport/parport_pc.c
--- a/drivers/parport/parport_pc.c
+++ b/drivers/parport/parport_pc.c
@@ -3007,7 +3007,7 @@ static int __init parport_pc_init_superi
struct pci_dev *pdev = NULL;
int ret = 0;
 
-   while ((pdev = pci_find_device(PCI_ANY_ID, PCI_ANY_ID, pdev)) != NULL) {
+   for_each_pci_dev(pdev) {
id = pci_match_id(parport_pc_pci_tbl, pdev);
if (id == NULL || id->driver_data >= last_sio)
continue;
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH -mm 1/2] removes pci_find_device from i6300esb.c

2005-08-09 Thread Jiri Slaby
This patch changes pci_find_device to pci_get_device (encapsulated in
for_each_pci_dev) in i6300esb watchdog card with appropriate adding pci_dev_put.

Generated in 2.6.13-rc5-mm1 kernel version.

Signed-off-by: Jiri Slaby <[EMAIL PROTECTED]>

diff --git a/drivers/char/watchdog/i6300esb.c b/drivers/char/watchdog/i6300esb.c
--- a/drivers/char/watchdog/i6300esb.c
+++ b/drivers/char/watchdog/i6300esb.c
@@ -368,12 +368,11 @@ static unsigned char __init esb_getdevic
  *  Find the PCI device
  */
 
-while ((dev = pci_find_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
+for_each_pci_dev(dev)
 if (pci_match_id(esb_pci_tbl, dev)) {
 esb_pci = dev;
 break;
 }
-}
 
 if (esb_pci) {
if (pci_enable_device(esb_pci)) {
@@ -430,6 +429,7 @@ err_release:
pci_release_region(esb_pci, 0);
 err_disable:
pci_disable_device(esb_pci);
+   pci_dev_put(esb_pci);
}
 out:
return 0;
@@ -481,6 +481,7 @@ err_unmap:
pci_release_region(esb_pci, 0);
 /* err_disable: */
pci_disable_device(esb_pci);
+   pci_dev_put(esb_pci);
 /* out: */
 return ret;
 }
@@ -497,6 +498,7 @@ static void __exit watchdog_cleanup (voi
iounmap(BASEADDR);
pci_release_region(esb_pci, 0);
pci_disable_device(esb_pci);
+   pci_dev_put(esb_pci);
 }
 
 module_init(watchdog_init);
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC 1/3] non-resident page tracking

2005-08-09 Thread Rik van Riel
On Tue, 9 Aug 2005, Marcelo Tosatti wrote:

> Two hopefully useful comments:
> 
> i) ARC and its variants requires additional information about page
> replacement (namely whether the page has been reclaimed from the L1 or
> L2 lists).
> 
> How costly would it be to add this information to the hash table?

Not at all.  Simply reduce the hash to 31 bits and use the remaining
bit to store that value.

> ii) From my reading of the patch, the provided "distance" information is
> relative to each hash bucket. I'm unable to understand the distance metric
> being useful if measured per-hash-bucket instead of globally?

The idea is that the hash function spreads things around evenly
enough for the different buckets to rotate at roughly the same
speed.

-- 
All Rights Reversed
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH -mm] removes pci_find_device from i6300esb.c

2005-08-09 Thread Greg KH
On Tue, Aug 09, 2005 at 06:06:19PM +0200, Jiri Slaby wrote:
> On 8/9/05, Greg KH <[EMAIL PROTECTED]> wrote:
> > So, care to resend all of your pci changes, including the documentation
> > ones, to me?
> Sure:

Um, one patch per email please.

Doesn't anyone read Documentation/SubmittingPatches anymore...

thanks,

greg k-h
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] PNPACPI: fix IRQ and 64-bit address decoding

2005-08-09 Thread Bjorn Helgaas
On Thursday 04 August 2005 5:26 pm, Bjorn Helgaas wrote:
> Maybe the third time's the charm :-)  Added a bugfix
> (pcibios_penalize_isa_irq()) and a workaround for HP
> HPET firmware description since last time.  The workaround
> accepts stuff that is illegal according to the spec,
> so speak up if you think this is a problem.  It seems
> fairly safe to me.

This patch is in 2.6.13-rc5-mm1 as
pnpacpi-fix-irq-and-64-bit-address-decoding.patch
and it works fine for me.

But plain 2.6.13-rc5, with CONFIG_PNPACPI turned on, hangs
at boot on HP ia64 boxes.  This is because 8250_pnp now
knows about MMIO UARTs, so it tries to poke one using a
64-bit address corrupted by PNPACPI.

CONFIG_PNPACPI is still marked experimental, but we may
want to consider putting the PNPACPI patch in 2.6.14
to fix the hang.

The patch in -mm doesn't apply cleanly, so I rediffed
it against 2.6.13-rc5 and attached it.

PNPACPI: fix IRQ and 64-bit address decoding

Use types that match the ACPI resource structures.  Previously
the u64 value from an RSTYPE_ADDRESS64 was passed as an int,
which corrupts the value.

Move pcibios_penalize_isa_irq() to pnpacpi_parse_allocated_irqresource().
Previously we passed the GSI, not the IRQ, and we did it even if parsing
the IRQ resource failed.

Parse IRQ descriptors that contain multiple interrupts.  This violates
the spec (in _CRS, only one interrupt per descriptor is allowed), but
some firmware does this.  HP rx7620 and rx8620 HPETs have this bug.

Signed-off-by: Bjorn Helgaas <[EMAIL PROTECTED]>

Index: work-vga/drivers/pnp/pnpacpi/rsparser.c
===
--- work-vga.orig/drivers/pnp/pnpacpi/rsparser.c	2005-08-09 16:54:57.0 -0600
+++ work-vga/drivers/pnp/pnpacpi/rsparser.c	2005-08-09 16:55:50.0 -0600
@@ -73,25 +73,35 @@
 }
 
 static void
-pnpacpi_parse_allocated_irqresource(struct pnp_resource_table * res, int irq)
+pnpacpi_parse_allocated_irqresource(struct pnp_resource_table * res, u32 gsi,
+	int edge_level, int active_high_low)
 {
 	int i = 0;
+	int irq;
+
+	if (!valid_IRQ(gsi))
+		return;
+
 	while (!(res->irq_resource[i].flags & IORESOURCE_UNSET) &&
 			i < PNP_MAX_IRQ)
 		i++;
-	if (i < PNP_MAX_IRQ) {
-		res->irq_resource[i].flags = IORESOURCE_IRQ;  //Also clears _UNSET flag
-		if (irq == -1) {
-			res->irq_resource[i].flags |= IORESOURCE_DISABLED;
-			return;
-		}
-		res->irq_resource[i].start =(unsigned long) irq;
-		res->irq_resource[i].end = (unsigned long) irq;
+	if (i >= PNP_MAX_IRQ)
+		return;
+
+	res->irq_resource[i].flags = IORESOURCE_IRQ;  // Also clears _UNSET flag
+	irq = acpi_register_gsi(gsi, edge_level, active_high_low);
+	if (irq < 0) {
+		res->irq_resource[i].flags |= IORESOURCE_DISABLED;
+		return;
 	}
+
+	res->irq_resource[i].start = irq;
+	res->irq_resource[i].end = irq;
+	pcibios_penalize_isa_irq(irq, 1);
 }
 
 static void
-pnpacpi_parse_allocated_dmaresource(struct pnp_resource_table * res, int dma)
+pnpacpi_parse_allocated_dmaresource(struct pnp_resource_table * res, u32 dma)
 {
 	int i = 0;
 	while (i < PNP_MAX_DMA &&
@@ -103,14 +113,14 @@
 			res->dma_resource[i].flags |= IORESOURCE_DISABLED;
 			return;
 		}
-		res->dma_resource[i].start =(unsigned long) dma;
-		res->dma_resource[i].end = (unsigned long) dma;
+		res->dma_resource[i].start = dma;
+		res->dma_resource[i].end = dma;
 	}
 }
 
 static void
 pnpacpi_parse_allocated_ioresource(struct pnp_resource_table * res,
-	int io, int len)
+	u32 io, u32 len)
 {
 	int i = 0;
 	while (!(res->port_resource[i].flags & IORESOURCE_UNSET) &&
@@ -122,14 +132,14 @@
 			res->port_resource[i].flags |= IORESOURCE_DISABLED;
 			return;
 		}
-		res->port_resource[i].start = (unsigned long) io;
-		res->port_resource[i].end = (unsigned long)(io + len - 1);
+		res->port_resource[i].start = io;
+		res->port_resource[i].end = io + len - 1;
 	}
 }
 
 static void
 pnpacpi_parse_allocated_memresource(struct pnp_resource_table * res,
-	int mem, int len)
+	u64 mem, u64 len)
 {
 	int i = 0;
 	while (!(res->mem_resource[i].flags & IORESOURCE_UNSET) &&
@@ -141,8 +151,8 @@
 			res->mem_resource[i].flags |= IORESOURCE_DISABLED;
 			return;
 		}
-		res->mem_resource[i].start = (unsigned long) mem;
-		res->mem_resource[i].end = (unsigned long)(mem + len - 1);
+		res->mem_resource[i].start = mem;
+		res->mem_resource[i].end = mem + len - 1;
 	}
 }
 
@@ -151,27 +161,28 @@
 	void *data)
 {
 	struct pnp_resource_table * res_table = (struct pnp_resource_table *)data;
+	int i;
 
 	switch (res->id) {
 	case ACPI_RSTYPE_IRQ:
-		if ((res->data.irq.number_of_interrupts > 0) &&
-			valid_IRQ(res->data.irq.interrupts[0])) {
-			pnpacpi_parse_allocated_irqresource(res_table, 
-acpi_register_gsi(res->data.irq.interrupts[0],
-	res->data.irq.edge_level,
-	res->data.irq.active_high_low));
-			pcibios_penalize_isa_irq(res->data.irq.interrupts[0], 1);
+		/*
+		 * Per spec, only one interrupt per descriptor is allowed in
+		 * _CRS, but some firmware violates this, so parse 

Trouble shooting a ten minute boot delay (SiI3112)

2005-08-09 Thread Shaun Jackman
I added a PCI SATA controller to my computer. Immediately after grub
loads the kernel there is a consistent ten minute delay before the
kernel displays its first message. I tested Linux 2.6.8 and 2.6.11
both from Debian, and 2.6.11 from Knoppix, all of which experience the
same delay.

The SATA controller is connected to two 200 GB Seagate SATA
ST3200826AS drives. I managed to install Debian on the system, though
the install was perilous, and once booted the system runs wonderfully!
Any suggestions on how I can trouble shoot the ten minute boot delay?
I don't reboot frequently, but it is irksome.

What's the appropriate mailing list for SATA questions, perhaps
linux-ide or linux-scsi?

Please cc me in your reply. Thanks!
Shaun

$ uname -a
Linux quince 2.6.11-1-k7 #1 Mon Jun 20 21:26:23 MDT 2005 i686 GNU/Linux
# lspci
:00:00.0 Host bridge: nVidia Corporation nForce CPU bridge (rev b2)
:00:00.1 RAM memory: nVidia Corporation nForce 220/420 Memory
Controller (rev b2)
:00:00.2 RAM memory: nVidia Corporation nForce 220/420 Memory
Controller (rev b2)
:00:00.3 RAM memory: nVidia Corporation: Unknown device 01aa (rev b2)
:00:01.0 ISA bridge: nVidia Corporation nForce ISA Bridge (rev c3)
:00:01.1 SMBus: nVidia Corporation nForce PCI System Management (rev c1)
:00:02.0 USB Controller: nVidia Corporation nForce USB Controller (rev c3)
:00:03.0 USB Controller: nVidia Corporation nForce USB Controller (rev c3)
:00:04.0 Ethernet controller: nVidia Corporation nForce Ethernet
Controller(rev c2)
:00:05.0 Multimedia audio controller: nVidia Corporation: Unknown
device 01b0 (rev c2)
:00:06.0 Multimedia audio controller: nVidia Corporation nForce
Audio (rev c2)
:00:08.0 PCI bridge: nVidia Corporation nForce PCI-to-PCI bridge (rev c2)
:00:09.0 IDE interface: nVidia Corporation nForce IDE (rev c3)
:00:1e.0 PCI bridge: nVidia Corporation nForce AGP to PCI Bridge (rev b2)
:01:06.0 Ethernet controller: Accton Technology Corporation
SMC2-1211TX (rev 10)
:01:07.0 Multimedia video controller: Internext Compression Inc
iTVC16 (CX23416) MPEG-2 Encoder (rev 01)
:01:08.0 Unknown mass storage controller: Silicon Image, Inc.
(formerly CMDTechnology Inc) SiI 3112 [SATALink/SATARaid] Serial ATA
Controller (rev 02)
:02:00.0 VGA compatible controller: Matrox Graphics, Inc. MGA G550
AGP (rev01)
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] Custom IORESOURCE Class

2005-08-09 Thread Matthew Gilbert
On Mon, 2005-08-08 at 23:23 -0500, Kumar Gala wrote:
> On Aug 8, 2005, at 6:17 PM, Adam Belay wrote:
> 
> > On Mon, Aug 08, 2005 at 09:00:21AM -0700, Greg KH wrote:
> >
> >> On Mon, Aug 08, 2005 at 11:11:45AM -0700, Matthew Gilbert wrote:
> >>
> >>> Below is a patch that adds an additional resource class to the
> >>>
> > platform
> >
> >>> resource types. This is to support additional resources that need to
> >>>
> > be passed
> >
> >>> to drivers without overloading the existing specific types. In my
> >>>
> > case, I need
> >
> >>> to send clock information to the driver to enable power management.
> >>>
> >>> Signed-off-by: Matthew Gilbert <[EMAIL PROTECTED]>
> >>>
> >>
> >> Hm, you do realize that Pat's no longer the driver core maintainer?
> >>
> > :)
> >
> >>
> >> Anyway, Russell and Adam, any objections to this patch?
> >>
> >
> > I'm not sure if I agree with this patch.  "struct resource" is used
> > primarily for
> > I/O resource assignment.  Although I agree we may need to add new
> > IORESOURCE types,
> > I'm not sure if clock data belongs here.  I don't think "start" and
> > "end" would be
> > useful for most platform data.  Could you provide more information  
> > about
> > this
> > specific issue and resource type?  Maybe we could create a new sysfs
> > attribute?
> 
> I would also like to understand more about what the need is here.  We  
> have clock data and such but use platform_data for it.

I am using IORESOURCE_MEM to pass in the base addresses of the necessary
clock registers. I also need to pass a fractional divider clk id. The
resource table seemed appropriate because the base addresses and the
divider id are closely related. Its also a great framework for enabling
varying resource lists. Currently I don't use this, but in the future I
may. Its possible in a future board revision there may not be a
fractional divider available. The resource framework makes querying for
the clk id very straight forward as opposed to magic values in a struct
I pass through platform_data. 

It can easily be moved to platform_data (or split between the two) if
that is more appropriate. Thanks for the feedback. _matt

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Bugme-new] [Bug 5003] New: Problem with symbios driver on recent -mm trees

2005-08-09 Thread Martin J. Bligh
--On Tuesday, August 09, 2005 11:55:36 -0500 James Bottomley <[EMAIL 
PROTECTED]> wrote:

> On Tue, 2005-08-09 at 07:59 -0700, Martin J. Bligh wrote:
>> Dear novice test examiner,
>> 
>> It's in http://test.kernel.org with everything else ;-)
>> 2.6.13-rc4-mm1+jejb_fix ... drills down to:
>> 
>> http://test.kernel.org/10080/debug/console.log
> 
> Well, OK, apparently some novice coder made an error converting from a
> stack allocated buffer to a kmalloc'd one in the sense handling
> routines.
> 
> I think this patch should fix it (or at least restore it to the level of
> bugginess it had before).


Wheee! that fixed it. Thanks very much. Log is here if you want to
peek at it:


http://test.kernel.org/10431/debug/console.log

Triples all round!

M.
 
> James
> 
> diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
> --- a/drivers/scsi/scsi_lib.c
> +++ b/drivers/scsi/scsi_lib.c
> @@ -342,12 +342,12 @@ int scsi_execute_req(struct scsi_device 
>   sense = kmalloc(SCSI_SENSE_BUFFERSIZE, GFP_KERNEL);
>   if (!sense)
>   return DRIVER_ERROR << 24;
> - memset(sense, 0, sizeof(*sense));
> + memset(sense, 0, SCSI_SENSE_BUFFERSIZE);
>   }
>   result = scsi_execute(sdev, cmd, data_direction, buffer, bufflen,
> sense, timeout, retries, 0);
>   if (sshdr)
> - scsi_normalize_sense(sense, sizeof(*sense), sshdr);
> + scsi_normalize_sense(sense, SCSI_SENSE_BUFFERSIZE, sshdr);
>  
>   kfree(sense);
>   return result;
> 
> 
> 
> 


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[RFC][PATCH] Rename PageChecked as PageMiscFS

2005-08-09 Thread Daniel Phillips
On Tuesday 09 August 2005 07:54, Andrew Morton wrote:
> Daniel Phillips <[EMAIL PROTECTED]> wrote:
> > > Suggestion for your next act:
> >
> > ...kill PG_checked please :)  Or at least keep it from spreading.
>
> It already spread - ext3 is using it and I think reiser4.  I thought I had
> a patch to rename it to PG_misc1 or somesuch, but no.

How about this one?

This filesystem-specific flag needs to be prevented from escaping into other
subsystems that might interact, such as VM.  The current usage is exclusively
for directories, except for Reiser4, which uses it for journalling.

Signed-off-by: Daniel Phillips <[EMAIL PROTECTED]>

diff -up --recursive 2.6.13-rc5-mm1.clean/fs/afs/dir.c 
2.6.13-rc5-mm1/fs/afs/dir.c
--- 2.6.13-rc5-mm1.clean/fs/afs/dir.c   2005-06-17 15:48:29.0 -0400
+++ 2.6.13-rc5-mm1/fs/afs/dir.c 2005-08-09 18:59:49.0 -0400
@@ -155,11 +155,11 @@ static inline void afs_dir_check_page(st
}
}
 
-   SetPageChecked(page);
+   SetPageMiscFS(page);
return;
 
  error:
-   SetPageChecked(page);
+   SetPageMiscFS(page);
SetPageError(page);
 
 } /* end afs_dir_check_page() */
@@ -193,7 +193,7 @@ static struct page *afs_dir_get_page(str
kmap(page);
if (!PageUptodate(page))
goto fail;
-   if (!PageChecked(page))
+   if (!PageMiscFS(page))
afs_dir_check_page(dir, page);
if (PageError(page))
goto fail;
diff -up --recursive 2.6.13-rc5-mm1.clean/fs/ext2/dir.c 
2.6.13-rc5-mm1/fs/ext2/dir.c
--- 2.6.13-rc5-mm1.clean/fs/ext2/dir.c  2005-06-17 15:48:29.0 -0400
+++ 2.6.13-rc5-mm1/fs/ext2/dir.c2005-08-09 18:59:51.0 -0400
@@ -112,7 +112,7 @@ static void ext2_check_page(struct page 
if (offs != limit)
goto Eend;
 out:
-   SetPageChecked(page);
+   SetPageMiscFS(page);
return;
 
/* Too bad, we had an error */
@@ -152,7 +152,7 @@ Eend:
dir->i_ino, (page->indexmapping->host);
 
-   WARN_ON(PageChecked(page));
+   WARN_ON(PageMiscFS(page));
if (!page_has_buffers(page))
return 0;
return journal_try_to_free_buffers(journal, page, wait);
@@ -1535,7 +1535,7 @@ out:
  */
 static int ext3_journalled_set_page_dirty(struct page *page)
 {
-   SetPageChecked(page);
+   SetPageMiscFS(page);
return __set_page_dirty_nobuffers(page);
 }
 
diff -up --recursive 2.6.13-rc5-mm1.clean/fs/freevxfs/vxfs_subr.c 
2.6.13-rc5-mm1/fs/freevxfs/vxfs_subr.c
--- 2.6.13-rc5-mm1.clean/fs/freevxfs/vxfs_subr.c2005-08-09 
18:23:11.0 -0400
+++ 2.6.13-rc5-mm1/fs/freevxfs/vxfs_subr.c  2005-08-09 18:59:54.0 
-0400
@@ -79,7 +79,7 @@ vxfs_get_page(struct address_space *mapp
kmap(pp);
if (!PageUptodate(pp))
goto fail;
-   /** if (!PageChecked(pp)) **/
+   /** if (!PageMiscFS(pp)) **/
/** vxfs_check_page(pp); **/
if (PageError(pp))
goto fail;
diff -up 

Re: Regression: radeonfb: No synchronisation on CRT with linux-2.6.13-rc5

2005-08-09 Thread Bodo Eggert
On Tue, 9 Aug 2005, Bodo Eggert wrote:
> On Mon, 8 Aug 2005, Benjamin Herrenschmidt wrote:
> > On Mon, 2005-08-08 at 02:06 +0200, Bodo Eggert wrote:

> > > The wrong values are constant across reboots (see my first mail), and I 
> > > have a CRT.
> > > 
> > > Can you tell me where the timing values are read?
> > 
> > radeon_write_mode() programs the mode. The monitor timing infos are read
> > by the various bits of code in radeon_monitor.c
> > 
> > I'd be curious if you could identify what bit of code is misbehaving
> 
> I added preempt_*able around radeon_probe_i2c_connector, and now I get the 
> output from below and still no sync. Obviously you shouldn't msleep in 
> preempt-disabled code. I'll try voluntary preemption, but that will at 
> best hide the error.

Update: voluntary preemption does not cause bad readings.
-- 
Who is General Failure and why is he reading my disk? 
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


allow the load to grow upto its cpu_power (was Re: [Patch] don't kick ALB in the presence of pinned task)

2005-08-09 Thread Siddha, Suresh B
On Tue, Aug 02, 2005 at 11:27:17AM +0200, Ingo Molnar wrote:
> 
> * Siddha, Suresh B <[EMAIL PROTECTED]> wrote:
> 
> > Jack Steiner brought this issue at my OLS talk.
> > 
> > Take a scenario where two tasks are pinned to two HT threads in a physical
> > package. Idle packages in the system will keep kicking migration_thread
> > on the busy package with out any success.
> > 
> > We will run into similar scenarios in the presence of CMP/NUMA.
> > 
> > Patch appended.
> > 
> > Signed-off-by: Suresh Siddha <[EMAIL PROTECTED]>
> 
> nice catch!
> 
> fine for -mm, but i dont think we need this fix in 2.6.13, as the effect 
> of the bug is an extra context-switch per 'CPU goes idle' event, in this 
> very specific (and arguably broken) task binding scenario.

No. This is not a broken scenario. Its possible in NUMA case aswell.

For example, lets take two nodes each having two physical packages. And
assume that there are two tasks and both of them are on (may or may n't be
pinned) two packages in node-0

Todays load balance will detect that there is an imbalance between the
two nodes and will try to distribute the load between the nodes.

In general, we should allow the load of a group to grow upto its cpu_power
and stop preventing these costly movements.

Appended patch will fix this. I have done limited testing of this patch.
Guys with big NUMA boxes, please give this patch a try. 

--

When the system is lightly loaded, don't bother about the average load.
In this case, allow the load of a sched group to grow upto its cpu_power.

Signed-off-by: Suresh Siddha <[EMAIL PROTECTED]>

--- linux-2.6.13-rc5/kernel/sched.c~2005-08-09 13:30:19.067072328 -0700
+++ linux-2.6.13-rc5/kernel/sched.c 2005-08-09 14:39:08.363323880 -0700
@@ -1932,9 +1932,23 @@
group = group->next;
} while (group != sd->groups);
 
-   if (!busiest || this_load >= max_load)
+   if (!busiest || this_load >= max_load || max_load <= SCHED_LOAD_SCALE)
goto out_balanced;
 
+   /* When the system is lightly loaded, don't bother about
+* the average load. Just make sure all the sched groups
+* are with in their capacities (i.e., load <= group's cpu_power)
+*/
+   if (total_load <= total_pwr) {
+   if (this_load >= SCHED_LOAD_SCALE)
+   goto out_balanced;
+
+   *imbalance = min((max_load - SCHED_LOAD_SCALE) * 
busiest->cpu_power,
+(SCHED_LOAD_SCALE - this_load) * 
this->cpu_power) / SCHED_LOAD_SCALE;
+
+   goto fix_imbalance;
+   }
+
avg_load = (SCHED_LOAD_SCALE * total_load) / total_pwr;
 
if (this_load >= avg_load ||
@@ -1957,6 +1971,7 @@
(avg_load - this_load) * this->cpu_power)
/ SCHED_LOAD_SCALE;
 
+fix_imbalance:
if (*imbalance < SCHED_LOAD_SCALE) {
unsigned long pwr_now = 0, pwr_move = 0;
unsigned long tmp;
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Dual 2.8ghz xeon, software raid, lvm, jfs

2005-08-09 Thread Sonny Rao
On Tue, Aug 09, 2005 at 09:44:56AM -0500, Phil Dier wrote:
> Hi,
> 
> I have 2 identical dual 2.8ghz xeon machines with 4gb ram, using
> software raid 10 with lvm layered on top, formatted with JFS (though
> at this point any filesystem with online resizing support will do). I
> have the boxes stable using 2.6.10, and they pass my stress test. I was
> trying to update to 2.6.12 so I can use the new ionice utility, but I'm
> experiencing oopses again. Can someone take a look at my info and give
> me an idea of what is causing my problems. I'm willing to test patches
> if anyone cares to work with me on a fix.  All the details can be found
> here:
> 
> http://www.icglink.com/cluster-debug-2.6.12.3.html
> 
> Please CC me on replies, as I am not subscribed to l-k.
> 
> Thanks for your help.

Generally on lkml, you want to post at least the output of an oops or
panic into your post.

Now, try running 2.6.13-rc6 and see if it fixes your problem, IIRC
there have been a number of changes to the MPT driver between those two
kernel versions.

Sonny
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: smbus driver for ati xpress 200m

2005-08-09 Thread Andi Kleen
On Tue, Aug 09, 2005 at 11:50:53AM -0700, yhlu wrote:
> anyone is working on add driver for ati xpress 200m?
> 
> without that My turion notebook, can not work read the battery status.

Normally this should be done in ACPI battery.c

-Andi
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: capabilities patch (v 0.1)

2005-08-09 Thread Bodo Eggert
On Tue, 9 Aug 2005, Chris Wright wrote:
> * Bodo Eggert ([EMAIL PROTECTED]) wrote:

> > 1) I wouldn't want an exploited service to gain any privileges, even by
> >chaining userspace exploits (e.g. exec sendmail < exploitstring).  For
> >most services, I'd like CAP_EXEC being unset (but it doesn't exist).
> 
> Don't let it exec things it shouldn't.  This can be done with namespaces
> or for finer-grained, that is what smth like SELinux is made for.

Namespaces may be OK for bind, but things like samba can't really use them 
and SELinux sounds more heavyweight (for brain and CPU).

> > 2) There are environments (linux-vserver.org) which limit root to a subset
> >of capabilities. I think they might use that feature, too. Off cause a
> >simple "suid bit" == "all capabilities" scheme won't work there.
> 
> IIRC, they effectively use the bounded set as per-context.  So it'd not
> make any difference there.

It could possibly be combined into one mechanism (less intrusive patch).

-- 
Funny quotes:
14. Eagles may soar, but weasels don't get sucked into jet engines.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] ppc32: Added support for the Book-E style Watchdog Timer

2005-08-09 Thread Kumar Gala


On Aug 9, 2005, at 5:01 PM, Andrew Morton wrote:


Kumar Gala <[EMAIL PROTECTED]> wrote:



PowerPC 40x and Book-E processors support a watchdog timer at the


processor


core level.  The timer has implementation dependent timeout


frequencies


that can be configured by software.

One the first Watchdog timeout we get a critical exception.  It is


left


to board specific code to determine what should happen at this point.


If


nothing is done and another timeout period expires the processor may
attempt to reset the machine.

Command line parameters:
  wdt=0 : disable watchdog (default)
  wdt=1 : enable watchdog

  wdt_period=N : N sets the value of the Watchdog Timer Period.

  The Watchdog Timer Period meaning is implementation specific. Check
  User Manual for the processor for more details.

This patch is based off of work done by Takeharu Kato.

...

+#ifdef CONFIG_BOOKE_WDT
+/* Checks wdt=x and wdt_period=xx command-line option */
+int __init early_parse_wdt(char *p)
+{
+extern u32 wdt_enable;
+
+if (p && strncmp(p, "0", 1) != 0)
+   wdt_enable = 1;
+
+return 0;
+}
+early_param("wdt", early_parse_wdt);
+
+int __init early_parse_wdt_period (char *p)
+{
+extern u32 wdt_period;
+
+if (p)
+wdt_period = simple_strtoul(p, NULL, 0);
+
+return 0;
+}




Would prefer to see the declaration of wdt_period in a header file,
please.

But beware that wdt_enable() is already a static symbol in a couple of
watchdog drivers.  It might be best to rename the ppc global to
something
less generic-sounding while you're there.


Ok, will make these changes and send an updated patch.

- kumar

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: capabilities patch (v 0.1)

2005-08-09 Thread Chris Wright
* Bodo Eggert ([EMAIL PROTECTED]) wrote:
> 1) I wouldn't want an exploited service to gain any privileges, even by
>chaining userspace exploits (e.g. exec sendmail < exploitstring).  For
>most services, I'd like CAP_EXEC being unset (but it doesn't exist).

Don't let it exec things it shouldn't.  This can be done with namespaces
or for finer-grained, that is what smth like SELinux is made for.

> 2) There are environments (linux-vserver.org) which limit root to a subset
>of capabilities. I think they might use that feature, too. Off cause a
>simple "suid bit" == "all capabilities" scheme won't work there.

IIRC, they effectively use the bounded set as per-context.  So it'd not
make any difference there.

thanks,
-chris
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: sched_domains SD_BALANCE_FORK and sched_balance_self

2005-08-09 Thread Martin J. Bligh


--On Tuesday, August 09, 2005 15:03:32 -0700 "Siddha, Suresh B" <[EMAIL 
PROTECTED]> wrote:

> On Fri, Aug 05, 2005 at 04:29:45PM -0700, Darren Hart wrote:
>> I have some concerns as to the intent vs.  actual implementation of 
>> SD_BALANCE_FORK and the sched_balance_fork() routine.
> 
> Intent and implementation match. Problem is with the intent ;-)
> 
> This has the intent info.
> 
> http://www.kernel.org/git/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=147cbb4bbe991452698f0772d8292f22825710ba
> 
> To solve these issues, we need to make the sched domain and its parameters
> CMP aware. And dynamically we need to adjust these parameters based
> on the system properties.

Can you explain the purpose of doing balance on both fork and exec?
The reason we did it at exec time is that it's much cheaper to do 
than at fork - you have very, very little state to deal with. The vast 
majority of things that fork will exec immediately thereafter.

Balance on clone make some sort of sense, since you know they're not
going to exec afterwards. We've thrashed through this many times before
and decided that unless there was an explicit hint from userspace,
balance on fork was not a good thing to do in the general case. Not only
based on a large range of testing, but also previous experience from other
Unix's. What new data came forth to change this?

>> It seems to me that the best CPU for a forked process would be an idle 
>> CPU on the same  node as the parent in order to stay close to it's memory.  
>> Failing this, we may need to move to other nodes if they are idle enough 
>> to warrant the move across node boundaries.  Thoughts?
> 
> We can choose the leastly loaded CPU in the home node and we can let the
> load balance to move it to other nodes if there is an imbalance.

Is that what it's actually doing now? That's not what Nick told me at
Kernel Summit, but is the correct thing to do for clone, I think.
 
> For exec, we can have the SD_BALANCE_EXEC for all the sched domains, which
> is the case today.

Yup.
 
M.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: irqpoll causing some breakage?

2005-08-09 Thread Daniel Drake

Alan Cox wrote:

What do the other reports look like ?



Here's one:

http://forums.gentoo.org/viewtopic-t-361718-highlight-irqpoll.html

This possibly suggests that the irqpoll patch actually caused a "nobody cared" 
which wasn't there previously. (Now that I have looked closer at the patch, I 
realise how unlikely this is, but this was my reaction at the time!)


I had another report like that by email (another network adapter, "nobody 
cared" message appeared which wasn't there before). The revision difference 
was even smaller and again irqpoll was my suspect. But he never responded to 
my request to test reverting the irqpoll patch and file a bug. I'll dig up the 
email and send a reminder.


Given that I haven't been able to pinpoint irqpoll as the cause of these, I 
don't think you should worry about them at this stage. The only interesting 
one at the moment is the keyboard/mouse thing...


Daniel
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC][patch 0/2] mm: remove PageReserved

2005-08-09 Thread Daniel Phillips
Hi Nick,

Did you know that your patches do not actually specify which kernel tree you 
diffed against?

Regards,

Daniel
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: sched_domains SD_BALANCE_FORK and sched_balance_self

2005-08-09 Thread Siddha, Suresh B
On Fri, Aug 05, 2005 at 04:29:45PM -0700, Darren Hart wrote:
> I have some concerns as to the intent vs.  actual implementation of 
> SD_BALANCE_FORK and the sched_balance_fork() routine.

Intent and implementation match. Problem is with the intent ;-)

This has the intent info.

http://www.kernel.org/git/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=147cbb4bbe991452698f0772d8292f22825710ba

To solve these issues, we need to make the sched domain and its parameters
CMP aware. And dynamically we need to adjust these parameters based
on the system properties.

> SD_NODE_INIT for $ARCHS contains SD_BALANCE_FORK, and no other SD_*_INIT 
> routines do.  This seems strange to me as it would seem more appropriate 
> to balance within a node on fork as to not have to access the duplicated 
> mm across nodes.  If we are going to use SD_BALANCE_FORK, wouldn't it 
> make sense to push it down the sched_domain hierarchy to the SD_CPU_INIT 
> level?

Ideally SD_BALANCE_FORK needs to be set for the domains starting from the
lowest domain to the SMP domain.

> It seems to me that the best CPU for a forked process would be an idle 
> CPU on the same  node as the parent in order to stay close to it's memory.  
> Failing this, we may need to move to other nodes if they are idle enough 
> to warrant the move across node boundaries.  Thoughts?

We can choose the leastly loaded CPU in the home node and we can let the
load balance to move it to other nodes if there is an imbalance.

For exec, we can have the SD_BALANCE_EXEC for all the sched domains, which
is the case today.

>  while (sd) {
>  cpumask_t span;
>  struct sched_group *group;
>  int new_cpu;
>  int weight;
> 
>  span = sd->span;
>  group = find_idlest_group(sd, t, cpu);
>  if (!group)
>  goto nextlevel;
> 
>  new_cpu = find_idlest_cpu(group, cpu);
>  if (new_cpu == -1 || new_cpu == cpu)
>  goto nextlevel;
> 
>  /* Now try balancing at a lower domain level */
>  cpu = new_cpu;
> nextlevel:
>  sd = NULL;
>  weight = cpus_weight(span);
>  for_each_domain(cpu, tmp) {
>  if (weight <= cpus_weight(tmp->span))
>  break;
>  if (tmp->flags & flag)
>  sd = tmp;
>  }
> 
> If I am reading it right, this for_each_domain will exit immediately if 
> jumped to via nextlevel and will only do any work if a new cpu is found 
> to run on (which is fair sense there is no need to keep looking if the 
> whole system doesn't have a better place for us to go).  If a new cpu 
> _is_ assigned though, for_each_domain will start with the lowest level 
> domain - which always has the smallest cpus_weight doesn't it?  If so, 
> won't the (weight <= cpu...) condition always equate to true, ending the 

no. last loop will take you to the domain which has the flag and is 
immd below the parent domain from where we started.

thanks,
suresh
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH -mm] removes pci_find_device from i6300esb.c

2005-08-09 Thread Greg KH
On Tue, Aug 09, 2005 at 11:28:16AM +0200, Jiri Slaby wrote:
> Andrew Morton napsal(a):
> 
> >Jiri Slaby <[EMAIL PROTECTED]> wrote:
> > 
> >
> >>--- a/drivers/char/watchdog/i6300esb.c
> >>+++ b/drivers/char/watchdog/i6300esb.c
> >>@@ -368,12 +368,11 @@ static unsigned char __init esb_getdevic
> >>  *  Find the PCI device
> >>  */
> >> 
> >>-while ((dev = pci_find_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != 
> >>NULL) {
> >>+for_each_pci_dev(dev)
> >> if (pci_match_id(esb_pci_tbl, dev)) {
> >> esb_pci = dev;
> >> break;
> >> }
> >>-}
> >> 
> >> if (esb_pci) {
> >>if (pci_enable_device(esb_pci)) {
> >>@@ -430,6 +429,7 @@ err_release:
> >>pci_release_region(esb_pci, 0);
> >> err_disable:
> >>pci_disable_device(esb_pci);
> >>+   pci_dev_put(esb_pci);
> >>   
> >>
> >
> >That doesn't look right.  Each iteration of for_each_pci_dev() needs a
> >pci_dev_put(), not just the final one.

Not true, see the documentation for pci_get_device(), it's only required
if you break out of the loop.

thanks,

greg k-h
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] ppc32: Added support for the Book-E style Watchdog Timer

2005-08-09 Thread Andrew Morton
Kumar Gala <[EMAIL PROTECTED]> wrote:
>
> PowerPC 40x and Book-E processors support a watchdog timer at the processor
> core level.  The timer has implementation dependent timeout frequencies
> that can be configured by software. 
> 
> One the first Watchdog timeout we get a critical exception.  It is left
> to board specific code to determine what should happen at this point.  If
> nothing is done and another timeout period expires the processor may
> attempt to reset the machine.
> 
> Command line parameters:
>   wdt=0 : disable watchdog (default)
>   wdt=1 : enable watchdog
> 
>   wdt_period=N : N sets the value of the Watchdog Timer Period.
> 
>   The Watchdog Timer Period meaning is implementation specific. Check
>   User Manual for the processor for more details.
> 
> This patch is based off of work done by Takeharu Kato.
> 
> ...
> 
> +#ifdef CONFIG_BOOKE_WDT
> +/* Checks wdt=x and wdt_period=xx command-line option */
> +int __init early_parse_wdt(char *p)
> +{
> + extern u32 wdt_enable;
> +
> + if (p && strncmp(p, "0", 1) != 0)
> +wdt_enable = 1;
> +
> + return 0;
> +}
> +early_param("wdt", early_parse_wdt);
> +
> +int __init early_parse_wdt_period (char *p)
> +{
> + extern u32 wdt_period;
> +
> + if (p)
> + wdt_period = simple_strtoul(p, NULL, 0);
> +
> + return 0;
> +}


Would prefer to see the declaration of wdt_period in a header file, please.

But beware that wdt_enable() is already a static symbol in a couple of
watchdog drivers.  It might be best to rename the ppc global to something
less generic-sounding while you're there.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] pci_find_device and pci_find_slot mark as deprecated

2005-08-09 Thread Greg KH
On Tue, Aug 09, 2005 at 11:58:19AM +0200, Jiri Slaby wrote:
> On 8/9/05, Greg KH <[EMAIL PROTECTED]> wrote:
> > On Tue, Aug 09, 2005 at 01:54:01AM +0200, Jiri Slaby wrote:
> > > This marks these functions as deprecated not to use in latest drivers (it
> > > doesn't use reference counts and the device returned by it can disappear 
> > > in
> > > any time).
> > 
> > Did you forget to send this to the PCI maintainer for some reason?
> No, my badness, sorry.
> 
> > Anyway, no, I don't want these functions marked this way, it's only
> > going to cause build noise.  I'd much rather you, or others, send me
> > patches that remove the usage of these functions so I can just delete
> > them entirely.
> When the patch was here
> (http://www.fi.muni.cz/~xslaby/lnx/lnx-pci_find-2.6.13-r3g4_3.patch --
> it'll be certainly sliced into many pieces; of course I didn't cc you
> :(

Yes, I can't take anything so big.  Just break it up into pieces please.

thanks,

greg k-h
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


BUG: Real-Time Preemption 2.6.13-rc5-RT-V0.7.52-16

2005-08-09 Thread Andrew Burgess
This particular module (uhci-hcd) caused hangs with many recent up rt kernels
when loaded by rc.sysinit so I put it in the hotplug blacklist and loaded it
manually later. This time I got a BUG.

On a probably seperate issue: I've tried this smp kernel a few times and half
the time I have no keyboard and all the times I tried I have no mouse. Makes
things difficult :-) I'm going to try to dig up a usb mouse to see if that works
around it. Anyone else see this?

The up version of this kernel is much more useable for me. This is a P4HT cpu.

messages:

Aug  9 13:51:39 cichlid kernel: Linux version 2.6.13-rc5-RT-V0.7.52-16-smp-2 
([EMAIL PROTECTED]) (gcc version 3.4.4 20050721 (Red Hat 3.4.4-2)) #2 SMP Mon 
Aug 8 17:28:41 PDT 2005
...
Aug  9 13:51:40 cichlid kernel: Kernel command line: ro root=LABEL=/ rhgb 
vga=791 pci=noacpi noapic acpi=ht single
...
Aug  9 13:52:35 cichlid kernel: USB Universal Host Controller Interface driver 
v2.3
Aug  9 13:52:35 cichlid kernel: PCI: Found IRQ 11 for device :00:1d.0
Aug  9 13:52:35 cichlid kernel: PCI: Sharing IRQ 11 with :00:1d.3
Aug  9 13:52:35 cichlid kernel: uhci_hcd :00:1d.0: Intel Corporation 
82801EB/ER (ICH5/ICH5R) USB UHCI Controller #1
Aug  9 13:52:35 cichlid kernel: uhci_hcd :00:1d.0: new USB bus registered, 
assigned bus number 2
Aug  9 13:52:35 cichlid kernel: uhci_hcd :00:1d.0: irq 11, io base 
0xbc00
Aug  9 13:52:35 cichlid kernel: hub 2-0:1.0: USB hub found
Aug  9 13:52:35 cichlid kernel: hub 2-0:1.0: 2 ports detected
Aug  9 13:52:35 cichlid x10: insmod 
/lib/modules/2.6.13-rc5-RT-V0.7.52-16-smp-2/kernel/drivers/usb/host/uhci-hcd.ko 
Aug  9 13:52:35 cichlid kernel: BUG: scheduling with irqs disabled: 
modprobe/0x2000/10765
Aug  9 13:52:35 cichlid kernel: caller is __down_mutex+0x484/0x62a
Aug  9 13:52:35 cichlid kernel:  [dump_stack+30/32] dump_stack+0x1e/0x20 (20)
Aug  9 13:52:35 cichlid kernel:  [] dump_stack+0x1e/0x20 (20)
Aug  9 13:52:36 cichlid kernel:  [schedule+161/274] schedule+0xa1/0x112 (28)
Aug  9 13:52:36 cichlid kernel:  [] schedule+0xa1/0x112 (28)
Aug  9 13:52:36 cichlid kernel:  [__down_mutex+1156/1578] 
__down_mutex+0x484/0x62a (124)
Aug  9 13:52:36 cichlid kernel:  [] __down_mutex+0x484/0x62a (124)
Aug  9 13:52:36 cichlid kernel:  [_spin_lock_irqsave+31/73] 
_spin_lock_irqsave+0x1f/0x49 (28)
Aug  9 13:52:36 cichlid kernel:  [] _spin_lock_irqsave+0x1f/0x49 (28)
Aug  9 13:52:36 cichlid kernel:  [urb_unlink+28/113] urb_unlink+0x1c/0x71 (28)
Aug  9 13:52:36 cichlid kernel:  [] urb_unlink+0x1c/0x71 (28)
Aug  9 13:52:36 cichlid kernel:  [usb_hcd_giveback_urb+22/116] 
usb_hcd_giveback_urb+0x16/0x74 (28)
Aug  9 13:52:36 cichlid kernel:  [] usb_hcd_giveback_urb+0x16/0x74 
(28)
Aug  9 13:52:36 cichlid kernel:  [usb_hcd_poll_rh_status+196/360] 
usb_hcd_poll_rh_status+0xc4/0x168 (48)
Aug  9 13:52:36 cichlid kernel:  [] usb_hcd_poll_rh_status+0xc4/0x168 
(48)
Aug  9 13:52:36 cichlid kernel:  [usb_add_hcd+795/922] usb_add_hcd+0x31b/0x39a 
(56)
Aug  9 13:52:36 cichlid kernel:  [] usb_add_hcd+0x31b/0x39a (56)
Aug  9 13:52:36 cichlid kernel:  [usb_hcd_pci_probe+608/869] 
usb_hcd_pci_probe+0x260/0x365 (60)
Aug  9 13:52:36 cichlid kernel:  [] usb_hcd_pci_probe+0x260/0x365 (60)
Aug  9 13:52:36 cichlid kernel:  [__pci_device_probe+73/87] 
__pci_device_probe+0x49/0x57 (28)
Aug  9 13:52:36 cichlid kernel:  [] __pci_device_probe+0x49/0x57 (28)
Aug  9 13:52:36 cichlid kernel:  [pci_device_probe+43/75] 
pci_device_probe+0x2b/0x4b (24)
Aug  9 13:52:36 cichlid kernel:  [] pci_device_probe+0x2b/0x4b (24)
Aug  9 13:52:36 cichlid kernel:  [driver_probe_device+51/173] 
driver_probe_device+0x33/0xad (36)
Aug  9 13:52:36 cichlid kernel:  [] driver_probe_device+0x33/0xad (36)
Aug  9 13:52:36 cichlid kernel:  [__driver_attach+65/81] 
__driver_attach+0x41/0x51 (24)
Aug  9 13:52:36 cichlid kernel:  [] __driver_attach+0x41/0x51 (24)
Aug  9 13:52:37 cichlid kernel:  [bus_for_each_dev+87/119] 
bus_for_each_dev+0x57/0x77 (48)
Aug  9 13:52:37 cichlid kernel:  [] bus_for_each_dev+0x57/0x77 (48)
Aug  9 13:52:37 cichlid kernel:  [driver_attach+40/42] driver_attach+0x28/0x2a 
(24)
Aug  9 13:52:37 cichlid kernel:  [] driver_attach+0x28/0x2a (24)
Aug  9 13:52:37 cichlid kernel:  [bus_add_driver+122/219] 
bus_add_driver+0x7a/0xdb (36)
Aug  9 13:52:37 cichlid kernel:  [] bus_add_driver+0x7a/0xdb (36)
Aug  9 13:52:37 cichlid kernel:  [driver_register+84/91] 
driver_register+0x54/0x5b (32)
Aug  9 13:52:37 cichlid kernel:  [] driver_register+0x54/0x5b (32)
Aug  9 13:52:37 cichlid kernel:  [pci_register_driver+149/173] 
pci_register_driver+0x95/0xad (28)
Aug  9 13:52:37 cichlid kernel:  [] pci_register_driver+0x95/0xad (28)
Aug  9 13:52:37 cichlid kernel:  [pg0+951435395/1069052928] 
uhci_hcd_init+0x83/0xf6 [uhci_hcd] (36)
Aug  9 13:52:37 cichlid kernel:  [] uhci_hcd_init+0x83/0xf6 
[uhci_hcd] (36)
Aug  9 13:52:37 cichlid kernel:  [sys_init_module+370/573] 
sys_init_module+0x172/0x23d (32)
Aug  9 13:52:37 cichlid kernel:  [] sys_init_module+0x172/0x23d (32)
Aug  9 

Re: PROBLEM: "drive appears confused" and "irq 18: nobody cared!"

2005-08-09 Thread Alan Cox
On Maw, 2005-08-09 at 19:28 +0200, Alexander Fieroch wrote:
> Andrew Morton wrote:
> > Please check 2.6.13-rc6 when it's out - this might fix the IRQ problem.
> 
> The errors "irq XXX: nobody cared" and "hdb: cdrom_pc_intr: The drive
> appears confused (ireason = 0x01)"  still occur in kernel 2.6.13rc6-git1.

Please stock cc'ing me about this. I'm nothing to do with the ACPI IRQ
routing code or your BIOS and these are not IDE bugs

Thanks
Alan

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC][patch 0/2] mm: remove PageReserved

2005-08-09 Thread Martin J. Bligh
>> On Tue, Aug 09, 2005 at 07:38:52AM -0700, Martin J. Bligh wrote:
>>> pfn_valid() doesn't tell you it's RAM or not - it tells you whether you
>>> have a backing struct page for that address. Could be an IO mapped device,
>>> a small memory hole, whatever.
>> 
>> The only things which have a struct page is RAM.  Nothing else does.
> 
> That's not true at all. Every physical address covered by the machine
> that we may need to access, plus every small hole we didn't use 
> discontigmem to exclude has a backing struct page. See e820 maps.

OK, on second thoughts, that's not quite true. Not every phys address
will (eg PCI window etc). but it's certianly not just RAM pages.

M.

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: capabilities patch (v 0.1)

2005-08-09 Thread David Madore
On Tue, Aug 09, 2005 at 11:36:00PM +0200, Bodo Eggert wrote:
> 1) I wouldn't want an exploited service to gain any privileges, even by
>chaining userspace exploits (e.g. exec sendmail < exploitstring).  For
>most services, I'd like CAP_EXEC being unset (but it doesn't exist).

I intend to add a couple of capabilities which are normally available
to all user processes, including capability to exec(), capability to
fork() and a couple of others (maybe a capability to perform any kind
of write operation, but that seems a bit more difficult to implement).
So keep an eye open[#] for future versions of my patch.

-- 
 David A. Madore
([EMAIL PROTECTED],
 http://www.madore.org/~david/ )

[#] On the other hand, I have a strong tendency not to finish anything
I start :-( so maybe this is all just vaporware.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: irqpoll causing some breakage?

2005-08-09 Thread Alan Cox
On Maw, 2005-08-09 at 18:12 +0100, Daniel Drake wrote:
> Alan Cox wrote:
> > Without the parameters it has exactly zero effect on the operation of
> > the kernel, the algorithms and the behaviour. So something odd is afoot
> > if its causing gentoo breakages.
> 
> Thats what I thought, yet it seems to be the difference between mouse and no 
> mouse in this case.
> 
> Strange. We'll try a different compiler.

What do the other reports look like ?

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Standardize shutdown of the system from enviroment control

2005-08-09 Thread Aaron Young
> 
> 
> On Tue, 9 Aug 2005, Christoph Hellwig wrote:
> 
> > Currently snsc_event for Altix systems sends SIGPWR to init (and abuses
> > tasklist_lock..) while the sbus drivers call execve for /sbin/shutdown
> > (which is also ugly, it should at least use call_usermodehelper)
> > With normal sysvinit both will end up the same, but I suspect the
> > shutdown variant, maybe with a sysctl to chose the exact path to call
> > would be cleaner.  What do you guys think about adding a common function
> > to do this.
> 
> Sounds reasonable to me.  I'll copy Aaron Young, who I think
> actually wrote the code to send the SIGPWR, in case he had a Good
> Reason for doing it this way.  (Aaron, if I'm remembering wrong
> and you're not the guy who wrote this, let me know...)

  Yep, that was me. I couldn't really find a better way to do it at
  the time. An 'execve shutdown' probably would have been better in retrospect
  because I think sending SIGPWR to init doesn't always shutdown the machine.
  It depends on how some config files are setup (inittab, powerfail).
  I'd rather not depend on any config files and just force a shutdown/poweroff.

> 
> > Could you test such a patch for me?
> 
> Sure.  I'll need to get hold of some hardware/firmware that will
> reproduce a critical environmental situation...  Might take a
> litte while...

 Testing should be easy - on a Deskside Prism system, just hit the
 power button while up at Linux.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: capabilities patch (v 0.1)

2005-08-09 Thread Bodo Eggert
On Tue, 9 Aug 2005, Chris Wright wrote:
> * Bodo Eggert ([EMAIL PROTECTED]) wrote:
> > Chris Wright <[EMAIL PROTECTED]> wrote:
> > > * David Madore ([EMAIL PROTECTED]) wrote:

> > >> * Second, a much more extensive change, the patch introduces a third
> > >> set of capabilities for every process, the "bounding" set.  Normally
> > > 
> > > this is not a good idea.  don't add more sets. if you really want to
> > > work on this i'll give you all the patches that have been done thus far,
> > > plus a set of tests that look at all the execve, ptrace, setuid type of
> > > corner cases.
> > 
> > How are you going to tell processes that may exec suid (or set-capability-)
> > programs from those that aren't supposed to gain certain capabilities?
> 
> typically you'd expect exec suid will reset to full caps.

ACK, but

1) I wouldn't want an exploited service to gain any privileges, even by
   chaining userspace exploits (e.g. exec sendmail < exploitstring).  For
   most services, I'd like CAP_EXEC being unset (but it doesn't exist).

2) There are environments (linux-vserver.org) which limit root to a subset
   of capabilities. I think they might use that feature, too. Off cause a
   simple "suid bit" == "all capabilities" scheme won't work there.

-- 
"Just because you are paranoid, do'nt mean they're not after you."
-- K.Cobain
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Reiserfs 3.6 + quota enabled, crash on delete (or maybe truncate)

2005-08-09 Thread Guillaume Pelat

Hi,

Would you, please, try to reproduce the problem having reiserfs check 
mode on.
(it is File systems->Reiserfs support->Enable reiserfs debug mode in 
kernel configuration)

and with attached patch.


Here is the error log with reiserfs check mode on + patch applied :

ReiserFS: sda3: found reiserfs format "3.6" with standard journal
ReiserFS: sda3: warning: CONFIG_REISERFS_CHECK is set ON
ReiserFS: sda3: warning: - it is slow mode for debugging.
ReiserFS: sda3: using ordered data mode
ReiserFS: sda3: journal params: device sda3, size 8192, journal first 
block 18, max trans len 1024, max batch 900, max commit age 30, max 
trans age 30

ReiserFS: sda3: checking transaction log (sda3)
ReiserFS: sda3: journal-1153: found in header: first_unflushed_offset 
4607, last_flushed_trans_id 401988
ReiserFS: sda3: journal-1206: Starting replay from offset 
1726529608356351, trans_id 0

ReiserFS: sda3: journal-1299: Setting newest_mount_id to 23
ReiserFS: sda3: Using r5 hash to sort names
ReiserFS: sda3: warning: vs-8301: reiserfs_kmalloc: allocated memory 202992
[..a few days later..]
REISERFS: panic (device Null superblock): vs-8025: set_entry_sizes: 
(mode==c, insert_size==-4958), invalid length of directory item
Kernel panic - not syncing: REISERFS: panic (device Null superblock): 
vs-8025: set_entry_sizes: (mode==c, insert_size==-4958

), invalid length of directory item

The partition had just been checked with reiserfsck (2 days before) and 
it was ok. I didnt reboot between the reiserfsck and the crash.


Here was the result of reiserfsck before the crash:
Checking internal tree..finished
Comparing bitmaps..finished
Checking Semantic tree:
finished
No corruptions found
There are on the filesystem:
Leaves 423085
Internal nodes 2932
Directories 1046685
Other files 8739829
Data block pointers 75038187 (0 of them are zero)
Safe links 0
###

Btw, i forgot to mention the mount options:
noatime,notail,usrquota


I just applied the patch submitted by Jan Kara:
http://bugzilla.kernel.org/show_bug.cgi?id=4771#c3
I dont know yet if it solves the problem :)

Best Regards,

Guillaume Pelat
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: FYI: device_suspend(...) in kernel_power_off().

2005-08-09 Thread Nigel Cunningham
Hi.

On Wed, 2005-08-10 at 03:25, Eric W. Biederman wrote:
> Pavel Machek <[EMAIL PROTECTED]> writes:
> 
> > Hi!
> >
> >> >> There as been a fair amount of consensus that calling
> >> >> device_suspend(...) in the reboot path was inappropriate now, because
> >> >> the device suspend code was too immature.   With this latest
> >> >> piece of evidence it seems to me that introducing device_suspend(...)
> >> >> in kernel_power_off, kernel_halt, kernel_reboot, or kernel_kexec
> >> >> can never be appropriate.
> >> >
> >> > Code is not ready now => it can never be fixed? Thats quite a strange
> >> > conclusion to make.
> >> 
> >> It seems there is an fundamental incompatibility with ACPI power off.
> >> As best as I can tell the normal case of device_suspend(PMSG_SUSPEND)
> >> works reasonably well in 2.6.x.
> >
> > Powerdown is going to have the same problems as the powerdown at the
> > end of suspend-to-disk. Can you ask people reporting broken shutdown
> > to try suspend-to-disk?
> 
> Everyone I know of who is affected has been copied on this thread.
> However your request is just nonsense.  There is a device_resume in
> the code before we get to the device_shutdown so there should be no
> effect at all.  Are we looking at the same kernel?

My poweroff after suspend-to-disk was broken during 2.6.13-rcs, and came
right in rc6.

> >> >From what I can tell there are some fairly fundamental semantic
> >> differences, on that code path.  The most peculiar problem I tracked
> >> is someone had a machine that would go into power off state and then
> >> wake right back up because of the device_suspend(PMSG_SUSPEND)
> >> change.
> >
> > So something is wrong with ACPI wakeup GPEs. It would hurt in
> > suspend-to-disk case, too.
> 
> Something was wrong.  I can't possibly see how the suspend-to-disk
> case would be affected.
> 
> >> I won't call it impossible to resolve the problems, but the people
> >
> > Good.
> 
> Nope.  Now that I have read the code I would just call it nonsense.
> 
> >> So yes without a darn good argument as to why it should work.  I will
> >> go with the experimental evidence that it fails miserably and
> >> trivially because of semantic incompatibility and can therefore
> >> never be fixed.
> >
> > I do not think any "semantic" issues exist. We need to pass detailed
> > info down to the drivers that care, and we need to fix all the bugs in
> > the drivers. That should be pretty much it.
> 
> Given that acpi and other platform firmware is involved there are
> pieces we cannot fix.  We either match the spec or we are incorrect.
> 
> I haven't a clue how suspend/resume is expected to interact with
> things in suspend to disk scenario.  Reading through the code
> the power message is PMSG_FREEZE not PMSG_SUSPEND (as you
> implemented).  All of the hardware is actually resumed before
> we device_shutdown() is called.
> 
> I want to see the correlation between device_suspend(PMSG_FREEZE) and
> the code in device_shutdown(), but I don't see it.
> device_suspend(...) is all about allowing the state of a device to be
> preserved.  device_shutdown() is really about stopping it.  These are
> really quite different operations. 

Agreed here.

> With the pm_suspend_disk calling kernel_power_off it appears that we
> currently have complete code reuse of the relevant code on that path.
> 
> Currently I see no true redundancy between the two cases at all.
> The methods do different things for different purposes.  Which is
> about the largest semantic difference I can think of.  The fact
> that the methods at first glance look like they do the same
> thing is probably the real surprise.

If the suspend to disk code called kernel_power_off, it should be
exactly what it sounds like. We've already written the image and we now
went to simply power down the machine. Just as with a 'normal'
powerdown, we should do everything necessary to ensure all data
submitted to hard drives is really flushed and that emergency head
parking isn't done, and then power down (or reboot). This should, so far
as I can see, be exactly the same in both cases.

Regards,

Nigel

> Calling device_suspend(...) from kernel_power_off, kernel_halt,
> kernel_kexec, or kernel_restart seems pointless, useless and silly.
> 
> Eric
-- 
Evolution.
Enumerate the requirements.
Consider the interdependencies.
Calculate the probabilities.

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC][patch 0/2] mm: remove PageReserved

2005-08-09 Thread Daniel Phillips
On Wednesday 10 August 2005 01:36, Hugh Dickins wrote:
> On Tue, 9 Aug 2005, Benjamin Herrenschmidt wrote:
> >  - We already have a refcount
> >  - We have a field where putting a flag isn't that much of a problem
> >  - It can be difficult to get page refcounting right when dealing with
> >such things, really.
>
> Probably easier to get the page refcounting right with these than with
> most.  Getting refcounting wrong is always bad.

He seems to be arguing for a new debug option.

> > In that case, we basically have an _easy_ way to trigger a useful BUG()
> > in the page free path when it's a page that should never be returned to
> > the pool.
>
> As bad_page already does on various other flags (though it clears those,
> whereas this one you'd prefer not to clear).   Hmm, okay, though I'm not
> sure it's worth its own page flag if they're in short supply.

Nineteen out of 32 officially spoken for so far, with some out of tree patches 
regarding the remainder with desirous eyes no doubt.  I think that qualifies 
as short supply.  But it is not just that, it is the extra cost of 
understanding and auditing the features implied by the flags, particularly 
bogus features.

Regards,

Daniel
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[ANNOUNCE] yaird 0.0.11, a mkinitrd based on hotplug concepts

2005-08-09 Thread Erik van Konijnenburg
Version 0.0.11 of yaird is now available at:
http://www.xs4all.nl/~ekonijn/yaird/yaird-0.0.11.tar.gz

Yaird is a proof of concept perl rewrite of mkinitrd.  It aims to
reliably identify the necessary modules by using the same algorithms
as hotplug, and comes with a template system to to tune the tool for
different distributions and experiment with different image layouts.
It requires a 2.6 kernel with hotplug.  There is a paper discussing it at:

http://www.xs4all.nl/~ekonijn/yaird/yaird.html

Summary of user visible changes:
 * Support configuration file that determines what the generated
   image should do.  It replaces command line options for root
   file system selection and for NFS support.
 * The file /etc/hotplug/blacklist does not have to exist:
   this can be a machine without hotplug, or with a future
   hotplug version, where blacklisting is delegated to module-init-tools.
   Based on patch by Marian Andre <[EMAIL PROTECTED]>
 * Bugfix: expect characters A-Z, in kernel config entries.
 * Handle kernels that do not have ide-generic.

Changes in version 0.0.10 and 0.0.9 were too small to
merit an announcement:
 * Support legacy keyboard compiled as module.
 * Place the docs under GPL instead of GFDL for the benefit of Debian.

Regards,
Erik

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] Fix PPC signal handling of NODEFER, should not affect sa_mask

2005-08-09 Thread Linus Torvalds


On Tue, 9 Aug 2005, Steven Rostedt wrote:
>
> If this is indeed the way things should work. I'll go ahead and fix all
> the other architectures.

It does appear that this is what the standards describe in the section 
quoted by Chris.

On the other hand, the standard seems to be a bit confused according to 
google:

  "This mask is formed by taking the union of the current signal mask and
   the value of the sa_mask for the signal being delivered unless
   SA_NODEFER or SA_RESETHAND is set, and then including the signal being
   delivered. If and when the user's signal handler returns normally, the
   original signal mask is restored."

Quite frankly, the way I read it is actually the old Linux behaviour: the 
"unless SA_NODEFER or SA_RESETHAND is set" seems to be talking about the 
whole union of the sa_mask thing, _not_ just the "and the signal being 
delivered" part. Exactly the way the kernel currently does (except we 
should apparently _also_ do it for SA_RESETHAND).

So if we decide to change the kernel behaviour, I'd like this to be in -mm
for a while before merging (or merge _very_ early after 2.6.13). I could
imagine this confusing some existing binaries that had only been tested
with the old Linux behaviour, regardless of what a standard says. 
Especially since the standard itself is so confusing and badly worded.

Maybe somebody can tell what other systems do, since I assume the standard 
is trying to describe behaviour that actually exists in the wild..

Linus
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Standardize shutdown of the system from enviroment control modules

2005-08-09 Thread Greg Howard

On Tue, 9 Aug 2005, Christoph Hellwig wrote:

> Currently snsc_event for Altix systems sends SIGPWR to init (and abuses
> tasklist_lock..) while the sbus drivers call execve for /sbin/shutdown
> (which is also ugly, it should at least use call_usermodehelper)
> With normal sysvinit both will end up the same, but I suspect the
> shutdown variant, maybe with a sysctl to chose the exact path to call
> would be cleaner.  What do you guys think about adding a common function
> to do this.

Sounds reasonable to me.  I'll copy Aaron Young, who I think
actually wrote the code to send the SIGPWR, in case he had a Good
Reason for doing it this way.  (Aaron, if I'm remembering wrong
and you're not the guy who wrote this, let me know...)

> Could you test such a patch for me?

Sure.  I'll need to get hold of some hardware/firmware that will
reproduce a critical environmental situation...  Might take a
litte while...

Thanks

--
Greg Howard, MTS - Core Platform SW MS 10-1-061
SGI - Silicon Graphics Inc. 2750 Blue Water Road
[EMAIL PROTECTED] Eagan, MN  55121

++
  "This assignment has two parts: a hard part, and an easy part.  Do
   the easy part first; you might learn something that will help you
   on the hard part.  Or, maybe you'll go outside for a walk before
   you start the hard part, and get hit by a truck!"
- Dr. Jeffrey W. Smith
++
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC 1/3] non-resident page tracking

2005-08-09 Thread Marcelo Tosatti
On Tue, Aug 09, 2005 at 09:15:26PM +0200, Peter Zijlstra wrote:
> On Tue, 2005-08-09 at 15:25 -0300, Marcelo Tosatti wrote:
> > Hi Rik,
> > 
> > Two hopefully useful comments:
> > 
> > i) ARC and its variants requires additional information about page
> > replacement (namely whether the page has been reclaimed from the L1 or
> > L2 lists).
> > 
> > How costly would it be to add this information to the hash table?
> > 
> I've been thinking on reserving another word in the cache-line and use
> that as a bit-array to keep that information; the only problems with
> that would be atomicy of the {bucket,bit} tuple and very large
> cachelines where NUM_NR > 32. 

The chance for a lookup hit to happen on a hash value which is in a
modified-state in a different CPU's cacheline should be pretty small
(depends on the architecture also, but shouldnt be much of an issue I
guess).

Hoping on that, guaranteed validity of data is not necessary, it is OK
to be incorrect occasionally.

> > ii) From my reading of the patch, the provided "distance" information is
> > relative to each hash bucket. I'm unable to understand the distance metric
> > being useful if measured per-hash-bucket instead of globally?
> 
> The assumption is that IFF the hash function has good distribution
> properties the per bucket distance is a good approximation of
> (distance >> nonres_shift).

Well, not really "good approximation" it sounds to me, the sensibility
goes down to L1_CACHE_LINE/sizeof(u32), which is:

- 8 on 32-byte cacheline
- 16 on 64-byte cacheline 
- 32 on 128-byte cacheline

Right?

So the (nice!) refault histogram gets limited to those values?

> > PS: Since remember_page() is always called with the zone->lru_lock held,
> > the preempt_disable/enable pair is unecessary at the moment... still, 
> > might be better to leave it there for safety reasons.
> > 
> 
> There being multiple zones; owning zone->lru_lock does not guarantee
> uniqueness on the remember_page() path as its a global structure.

True, but it guarantees disabled preemption. No big deal...
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] atomic open(..., O_CREAT | ...)

2005-08-09 Thread Miklos Szeredi
> Really?
> 
> static int __emul_lookup_dentry(const char *name, struct nameidata *nd)
> {
>   .
>   if (path_walk(name, nd) == 0) {
>   if (nd->dentry->d_inode) {
>   dput(old_dentry);
>   mntput(old_mnt);
>   return 1;
>   }
>   path_release(nd);
>   }
>   nd->dentry = old_dentry;
>   nd->mnt = old_mnt;
>   nd->last = last;
>   nd->last_type = last_type;
>   }
>   return 1;
> }

I see what you are getting at.  But notice, that every (relevant)
field of nameidata is reinitialized, except nd->flags, which is
_obviously_ not changed by either path_walk() or path_release().

So your argument doesn't hold.

You basically argue, that intent.open.file must be zeroed, because
someone might call path_release_open_intent() twice, which very
obviously does not make any sense, unless it does some magic like the
above (which it should not), in which case it might as well be aware,
that it has to save/restore the intent.open.file field as well.

> Currently, yes. The only caller of open_namei() is filp_open(). That was
> not always the case previously.
> 
> If we think it will never be the case in the future, then there is an
> argument for merging the two and/or making open_namei() and inlined
> function.

Yes, that would make sense.

Miklos
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC][patch 0/2] mm: remove PageReserved

2005-08-09 Thread Martin J. Bligh


--On Tuesday, August 09, 2005 20:41:00 +0100 Russell King <[EMAIL PROTECTED]> 
wrote:

> On Tue, Aug 09, 2005 at 07:38:52AM -0700, Martin J. Bligh wrote:
>> pfn_valid() doesn't tell you it's RAM or not - it tells you whether you
>> have a backing struct page for that address. Could be an IO mapped device,
>> a small memory hole, whatever.
> 
> The only things which have a struct page is RAM.  Nothing else does.

That's not true at all. Every physical address covered by the machine
that we may need to access, plus every small hole we didn't use 
discontigmem to exclude has a backing struct page. See e820 maps.

Unless you're speaking only with respect to ARM, in which case, I'll
bow to your knowledge, but it's certainly not true in general ...

M.

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


I2C block reads with i2c-viapro: testers wanted

2005-08-09 Thread Jean Delvare
Hi all,

I am implementing I2C block reads in the i2c-viapro driver, and am
looking for testers. I was able to test on my own VT8237R chip, it works
OK, now I'd need to know how it works on older VIA south bridges, namely
the VT8235 and the VT82C686B. South bridges before that (VT82C686A,
VT8233A and older) are supposed not to work according to the datasheets,
but a confirmation would be welcome, who knows, it might simply not be
documented.

My experimental patch follows. I have enabled the I2C block read
function for all VIA south bridges, so that it can be tested on all
chips. I'll restrict that after the test phase, of course.

The easiest way to test the patch is to use i2c-viapro in conjunction
with the eeprom driver. This supposes that you do actually have a VIA
south bridge with EEPROMs (typically SPD) on the SMBus. If not, you
won't be able to test, sorry.

In order to verify whether I2C block reads work for you, just compare
the contents of this file:
  /sys/bus/i2c/devices/0-0050/eeprom
before and after applying the patch (and cycling i2c-viapro, obviously).
If it works, the contents should be identical. Note that the bus number
(0 above) and exact address (0050 above) may change depending on the
hardware setup.

You can also use lm_sensors' utilities to test the I2C block read
function: i2cdump has an I2C block mode ("i"), and even "sensors" will
display the SPD information. If it's correct after applying the patch,
it means that the I2C block read function is working OK for you.

On my system, the dump is down from over 2 seconds without the patch to
below 0.2 second with the patch, which proves how efficient I2C block
reads are and explains why I want to implement this function.

Thanks.

 drivers/i2c/busses/i2c-viapro.c |   40 ++--
 1 files changed, 38 insertions(+), 2 deletions(-)

--- linux-2.6.13-rc6.orig/drivers/i2c/busses/i2c-viapro.c   2005-08-08 
18:55:48.0 +0200
+++ linux-2.6.13-rc6/drivers/i2c/busses/i2c-viapro.c2005-08-09 
22:52:56.0 +0200
@@ -88,6 +88,7 @@
 #define VT596_BYTE_DATA  0x08
 #define VT596_WORD_DATA  0x0C
 #define VT596_BLOCK_DATA 0x14
+#define VT596_I2C_BLOCK_DATA   0x34
 
 
 /* If force is set to anything different from 0, we forcibly enable the
@@ -107,6 +108,9 @@
 
 static struct i2c_adapter vt596_adapter;
 
+#define FEATURE_I2CBLOCK   (1<<0)
+static int vt596_features;
+
 /* Another internally used function */
 static int vt596_transaction(void)
 {
@@ -242,9 +246,21 @@
}
size = VT596_BLOCK_DATA;
break;
+   case I2C_SMBUS_I2C_BLOCK_DATA:
+   outb_p(((addr & 0x7f) << 1) | (read_write & 0x01),
+  SMBHSTADD);
+   outb_p(command, SMBHSTCMD);
+   if (read_write == I2C_SMBUS_WRITE) {
+   dev_warn(_adapter.dev,
+"I2C block write not supported!\n");
+   return -1;
+   }
+   outb_p(I2C_SMBUS_BLOCK_MAX, SMBHSTDAT0);
+   size = VT596_I2C_BLOCK_DATA;
+   break;
}
 
-   outb_p((size & 0x1C) + (ENABLE_INT9 & 1), SMBHSTCNT);
+   outb_p((size & 0x3C) + (ENABLE_INT9 & 1), SMBHSTCNT);
 
if (vt596_transaction()) /* Error in transaction */
return -1;
@@ -267,6 +283,7 @@
data->word = inb_p(SMBHSTDAT0) + (inb_p(SMBHSTDAT1) << 8);
break;
case VT596_BLOCK_DATA:
+   case VT596_I2C_BLOCK_DATA:
data->block[0] = inb_p(SMBHSTDAT0);
if (data->block[0] > I2C_SMBUS_BLOCK_MAX)
data->block[0] = I2C_SMBUS_BLOCK_MAX;
@@ -280,9 +297,15 @@
 
 static u32 vt596_func(struct i2c_adapter *adapter)
 {
-   return I2C_FUNC_SMBUS_QUICK | I2C_FUNC_SMBUS_BYTE |
+   u32 func = I2C_FUNC_SMBUS_QUICK | I2C_FUNC_SMBUS_BYTE |
I2C_FUNC_SMBUS_BYTE_DATA | I2C_FUNC_SMBUS_WORD_DATA |
I2C_FUNC_SMBUS_BLOCK_DATA;
+
+#if 0
+   if (vt596_features & FEATURE_I2CBLOCK)
+#endif
+   func |= I2C_FUNC_SMBUS_READ_I2C_BLOCK;
+   return func;
 }
 
 static struct i2c_algorithm smbus_algorithm = {
@@ -391,6 +414,19 @@
vt596_pdev = NULL;
}
 
+   if (pdev->device == PCI_DEVICE_ID_VIA_8235
+|| pdev->device == PCI_DEVICE_ID_VIA_8237) {
+   vt596_features |= FEATURE_I2CBLOCK;
+   } else if (pdev->device == PCI_DEVICE_ID_VIA_82C686_4) {
+   u8 rev;
+
+   /* VT82C686B (rev 0x40) does support I2C block mode, but
+  VT82C686A (rev 0x30) doesn't. */
+   if (!pci_read_config_byte(pdev, PCI_REVISION_ID, )
+&& rev >= 0x40)
+   vt596_features |= FEATURE_I2CBLOCK;
+   }
+
/* Always return failure here.  This is to allow other drivers to bind
 * to this pci device.  We don't really want to have control over the
 * 

Re: [RFC] atomic open(..., O_CREAT | ...)

2005-08-09 Thread Trond Myklebust
ty den 09.08.2005 Klokka 22:42 (+0200) skreiv Miklos Szeredi:

> Trond, wake up!  __emul_lookup_dentry() does nothing of the sort.
> Neither does anything else.  In theory it could, but that's not a
> reason to do a confusing thing like that.

Really?

static int __emul_lookup_dentry(const char *name, struct nameidata *nd)
{
.
if (path_walk(name, nd) == 0) {
if (nd->dentry->d_inode) {
dput(old_dentry);
mntput(old_mnt);
return 1;
}
path_release(nd);
}
nd->dentry = old_dentry;
nd->mnt = old_mnt;
nd->last = last;
nd->last_type = last_type;
}
return 1;
}

Which is called by path_lookup(), which again returns success, and
expects the user to call path_release() later.

> > Firstly, the open_namei() flags field is not a "permissions" field. It
> > contains open mode information. The calculation of the open permissions
> > flags is done by open_namei() itself.
> 
> Based on flags.  It's just a FMODE_* -> MAY_* transformation
> 
> > Secondly, what advantage is there in allowing callers of open_namei() to
> > be able to override the MAY_WRITE check when doing open(O_TRUNC)? This
> > is a calculation that should be done _once_ in order to always get it
> > right, and it should therefore be done in open_namei() together with the
> > rest of the permissions calculation.
> 
> I think the _only_ caller of open_namei() is filp_open(), so this is
> not much of an issue, but yeah, you could do it that way too.

Currently, yes. The only caller of open_namei() is filp_open(). That was
not always the case previously.

If we think it will never be the case in the future, then there is an
argument for merging the two and/or making open_namei() and inlined
function.

Cheers,
  Trond

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Standardize shutdown of the system from enviroment control modules

2005-08-09 Thread Christoph Hellwig
Currently snsc_event for Altix systems sends SIGPWR to init (and abuses
tasklist_lock..) while the sbus drivers call execve for /sbin/shutdown
(which is also ugly, it should at least use call_usermodehelper)
With normal sysvinit both will end up the same, but I suspect the
shutdown variant, maybe with a sysctl to chose the exact path to call
would be cleaner.  What do you guys think about adding a common function
to do this.  Could you test such a patch for me?
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] Fix i386 signal handling of NODEFER, should not affect sa_mask (was: Re: Signal handling possibly wrong)

2005-08-09 Thread Chris Wright
* Steven Rostedt ([EMAIL PROTECTED]) wrote:
> Hmm, I think you want this patch. You still need to check the return of
> setting up the frames.

Indeed, I noticecd just after I sent, and sent an updated patch.
Thanks Steve!
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] Fix PPC signal handling of NODEFER, should not affect sa_mask

2005-08-09 Thread Steven Rostedt
If this is indeed the way things should work. I'll go ahead and fix all
the other architectures.

-- Steve

Signed-off-by: Steven Rostedt <[EMAIL PROTECTED]>

--- linux-2.6.13-rc6-git1/arch/ppc/kernel/signal.c.orig 2005-08-09 
17:00:43.0 -0400
+++ linux-2.6.13-rc6-git1/arch/ppc/kernel/signal.c  2005-08-09 
17:01:37.0 -0400
@@ -759,13 +759,12 @@ int do_signal(sigset_t *oldset, struct p
else
handle_signal(signr, , , oldset, regs, newsp);
 
-   if (!(ka.sa.sa_flags & SA_NODEFER)) {
-   spin_lock_irq(>sighand->siglock);
-   sigorsets(>blocked,>blocked,_mask);
+   spin_lock_irq(>sighand->siglock);
+   sigorsets(>blocked,>blocked,_mask);
+   if (!(ka.sa.sa_flags & SA_NODEFER))
sigaddset(>blocked, signr);
-   recalc_sigpending();
-   spin_unlock_irq(>sighand->siglock);
-   }
+   recalc_sigpending();
+   spin_unlock_irq(>sighand->siglock);
 
return 1;
 }


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] use kthread infrastructure in md

2005-08-09 Thread Christoph Hellwig
Switch MD to use the kthread infrastructure, to simplify the code and
get rid of tasklist_lock abuse in md_unregister_thread.  Long-term I
wonder whether workqueues wouldn't be a better choice than the
MD-specific thread wrappers for the lowlevel drivers.


Signed-off-by: Christoph Hellwig <[EMAIL PROTECTED]>

Index: linux-2.6/drivers/md/md.c
===
--- linux-2.6.orig/drivers/md/md.c  2005-08-09 19:28:16.0 +0200
+++ linux-2.6/drivers/md/md.c   2005-08-09 20:17:21.0 +0200
@@ -34,6 +34,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -2947,18 +2948,6 @@
 {
mdk_thread_t *thread = arg;
 
-   lock_kernel();
-
-   /*
-* Detach thread
-*/
-
-   daemonize(thread->name, mdname(thread->mddev));
-
-   current->exit_signal = SIGCHLD;
-   allow_signal(SIGKILL);
-   thread->tsk = current;
-
/*
 * md_thread is a 'system-thread', it's priority should be very
 * high. We avoid resource deadlocks individually in each
@@ -2970,10 +2959,9 @@
 * bdflush, otherwise bdflush will deadlock if there are too
 * many dirty RAID5 blocks.
 */
-   unlock_kernel();
 
complete(thread->event);
-   while (thread->run) {
+   while (!kthread_should_stop()) {
void (*run)(mddev_t *);
 
wait_event_interruptible_timeout(thread->wqueue,
@@ -2986,11 +2974,8 @@
run = thread->run;
if (run)
run(thread->mddev);
-
-   if (signal_pending(current))
-   flush_signals(current);
}
-   complete(thread->event);
+
return 0;
 }
 
@@ -3007,11 +2992,9 @@
 const char *name)
 {
mdk_thread_t *thread;
-   int ret;
struct completion event;
 
-   thread = (mdk_thread_t *) kmalloc
-   (sizeof(mdk_thread_t), GFP_KERNEL);
+   thread = kmalloc(sizeof(mdk_thread_t), GFP_KERNEL);
if (!thread)
return NULL;
 
@@ -3024,8 +3007,8 @@
thread->mddev = mddev;
thread->name = name;
thread->timeout = MAX_SCHEDULE_TIMEOUT;
-   ret = kernel_thread(md_thread, thread, 0);
-   if (ret < 0) {
+   thread->tsk = kthread_run(md_thread, thread, mdname(thread->mddev));
+   if (IS_ERR(thread->tsk)) {
kfree(thread);
return NULL;
}
@@ -3035,21 +3018,9 @@
 
 void md_unregister_thread(mdk_thread_t *thread)
 {
-   struct completion event;
-
-   init_completion();
-
-   thread->event = 
-
-   /* As soon as ->run is set to NULL, the task could disappear,
-* so we need to hold tasklist_lock until we have sent the signal
-*/
dprintk("interrupting MD-thread pid %d\n", thread->tsk->pid);
-   read_lock(_lock);
-   thread->run = NULL;
-   send_sig(SIGKILL, thread->tsk, 1);
-   read_unlock(_lock);
-   wait_for_completion();
+
+   kthread_stop(thread->tsk);
kfree(thread);
 }
 
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Signal handling possibly wrong

2005-08-09 Thread Chris Wright
* Chris Wright ([EMAIL PROTECTED]) wrote:

Actually that one broke a fix that I think Brodo discovered in the first
place with bogus stack frames.

Should be this one.

thanks,
-chris
---


Subject: [PATCH] fix SA_NODEFER signals to honor sa_mask

When receiving SA_NODEFER signal, kernel was inapproriately not applying
the sa_mask.  As pointed out by Brodo Stroesser.

Signed-off-by: Chris Wright <[EMAIL PROTECTED]>

diff --git a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c
--- a/arch/i386/kernel/signal.c
+++ b/arch/i386/kernel/signal.c
@@ -577,10 +577,11 @@ handle_signal(unsigned long sig, siginfo
else
ret = setup_frame(sig, ka, oldset, regs);
 
-   if (ret && !(ka->sa.sa_flags & SA_NODEFER)) {
+   if (ret) {
spin_lock_irq(>sighand->siglock);
sigorsets(>blocked,>blocked,>sa.sa_mask);
-   sigaddset(>blocked,sig);
+   if (!(ka->sa.sa_flags & SA_NODEFER))
+   sigaddset(>blocked,sig);
recalc_sigpending();
spin_unlock_irq(>sighand->siglock);
}
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: capabilities patch (v 0.1)

2005-08-09 Thread David Madore
On Tue, Aug 09, 2005 at 01:52:06PM -0700, Chris Wright wrote:
> * Bodo Eggert ([EMAIL PROTECTED]) wrote:
> > How are you going to tell processes that may exec suid (or set-capability-)
> > programs from those that aren't supposed to gain certain capabilities?
> 
> typically you'd expect exec suid will reset to full caps.

suid exec _must_ reset to full caps or we have the sendmail disaster
again.  However, that is _if_ execve() succeeds.  It is quite possible
that execve() should fail, and that is precisely what my patch does:
if a process has bounded capabilities, it _may not_ exec suid.

-- 
 David A. Madore
([EMAIL PROTECTED],
 http://www.madore.org/~david/ )
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] Fix i386 signal handling of NODEFER, should not affect sa_mask (was: Re: Signal handling possibly wrong)

2005-08-09 Thread Steven Rostedt
On Tue, 2005-08-09 at 13:49 -0700, Chris Wright wrote:

> 
> SA_NODEFER
> [XSI] If set and sig is caught, sig shall not be added to the thread's
> signal mask on entry to the signal handler unless it is included in
> sa_mask. Otherwise, sig shall always be added to the thread's signal
> mask on entry to the signal handler.
> 
> Brodo, is this what you mean?
> 
> thanks,
> -chris
> --
> 
> Subject: [PATCH] fix SA_NODEFER signals to honor sa_mask
> 
> When receiving SA_NODEFER signal, kernel was inapproriately not applying
> the sa_mask.  As pointed out by Brodo Stroesser.
> 
> Signed-off-by: Chris Wright <[EMAIL PROTECTED]>
> ---
> 
> diff --git a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c
> --- a/arch/i386/kernel/signal.c
> +++ b/arch/i386/kernel/signal.c
> @@ -577,13 +577,12 @@ handle_signal(unsigned long sig, siginfo
>   else
>   ret = setup_frame(sig, ka, oldset, regs);
>  
> - if (ret && !(ka->sa.sa_flags & SA_NODEFER)) {
> - spin_lock_irq(>sighand->siglock);
> - sigorsets(>blocked,>blocked,>sa.sa_mask);
> + spin_lock_irq(>sighand->siglock);
> + sigorsets(>blocked,>blocked,>sa.sa_mask);
> + if (ret && !(ka->sa.sa_flags & SA_NODEFER))
>   sigaddset(>blocked,sig);
> - recalc_sigpending();
> - spin_unlock_irq(>sighand->siglock);
> - }
> + recalc_sigpending();
> + spin_unlock_irq(>sighand->siglock);
>  
>   return ret;
>  }


Hmm, I think you want this patch. You still need to check the return of
setting up the frames.

-- Steve

Signed-off-by: Steven Rostedt <[EMAIL PROTECTED]>

--- linux-2.6.13-rc6-git1/arch/i386/kernel/signal.c.orig2005-08-09 
16:54:36.0 -0400
+++ linux-2.6.13-rc6-git1/arch/i386/kernel/signal.c 2005-08-09 
16:55:24.0 -0400
@@ -577,10 +577,11 @@ handle_signal(unsigned long sig, siginfo
else
ret = setup_frame(sig, ka, oldset, regs);
 
-   if (ret && !(ka->sa.sa_flags & SA_NODEFER)) {
+   if (ret) {
spin_lock_irq(>sighand->siglock);
sigorsets(>blocked,>blocked,>sa.sa_mask);
-   sigaddset(>blocked,sig);
+   if (!(ka->sa.sa_flags & SA_NODEFER))
+   sigaddset(>blocked,sig);
recalc_sigpending();
spin_unlock_irq(>sighand->siglock);
}


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[RFC] slight rework of [PATCH 2/5] Add support for AIO completion notification

2005-08-09 Thread Benjamin LaHaise
Hello Sebastien et al,

The patch below is a slight rework of Sebastien's POSIX AIO completion 
signals patch.  Most of the changes are cosmetic for splitting up the 
code into smaller functions, but one significant change is that the 
uid/euid is checked against the target process when the signal is about 
to be delivered, whereas before Sebastien's patch only checked at the 
time the iocb was submitted.  Extra eyes peering over the code in 
__aio_send_signal would be appreciated.  This patch applies towards 
the end of the whole series I've got pending which will be posted in 
a few hours.  Sebastien, can you verify this still works with your 
signal tests?  Thanks,

-ben

 fs/aio.c|  227 +++-
 include/linux/aio.h |   12 ++
 include/linux/aio_abi.h |3 
 3 files changed, 199 insertions(+), 43 deletions(-)

[AIO] add support for POSIX AIO completion signals

This patch adds POSIX AIO completion notification event by adding an 
aio_sigevent field to the aiocb.

The sigevent structure is filled in by the user application as part of the
AIO request preparation. Upon request completion, the kernel notifies the
application using those sigevent parameters.

The original patch was by S??bastien Dugu?? with heavy modifications by 
Benjamin LaHaise.

Signed-off-by: S??bastien Dugu?? <[EMAIL PROTECTED]>
Signed-off-by: Benjamin LaHaise <[EMAIL PROTECTED]>
diff -purN --exclude=description 82_aio_threads/fs/aio.c 83_sigevent/fs/aio.c
--- 82_aio_threads/fs/aio.c 2005-08-08 23:28:52.0 -0400
+++ 83_sigevent/fs/aio.c2005-08-09 16:34:02.0 -0400
@@ -404,6 +404,7 @@ static struct kiocb fastcall *__aio_get_
req->ki_cancel = NULL;
req->ki_retry = NULL;
req->ki_dtor = NULL;
+   req->ki_signo = 0;
req->private = NULL;
INIT_LIST_HEAD(>ki_run_list);
 
@@ -932,6 +933,97 @@ void fastcall kick_iocb(struct kiocb *io
 }
 EXPORT_SYMBOL(kick_iocb);
 
+static void __aio_send_signal(struct kiocb *iocb)
+{
+   struct siginfo info;
+   struct task_struct *p;
+   unsigned long flags;
+   int ret = -1;
+
+   memset(, 0, sizeof(struct siginfo));
+
+   info.si_signo = iocb->ki_signo;
+   info.si_errno = 0;
+   info.si_code = SI_ASYNCIO;
+   info.si_pid = 0;
+   info.si_uid = 0;
+   info.si_value = iocb->ki_sigev_value;
+
+   read_lock(_lock);
+   p = find_task_by_pid(iocb->ki_pid);
+   if (!p || !p->sighand)
+   goto out_unlock;
+
+   /* Do we have permission to signal this task? */
+   if ((iocb->ki_euid ^ p->suid) && (iocb->ki_euid ^ p->uid)
+&& (iocb->ki_uid ^ p->suid) && (iocb->ki_uid ^ p->uid))
+   goto out_unlock;/* No. */
+
+   spin_lock_irqsave(>sighand->siglock, flags);
+
+   switch(iocb->ki_notify) {
+   case IO_NOTIFY_SIGNAL:
+   ret = __group_send_sig_info(iocb->ki_signo, , p);
+   break;
+   case IO_NOTIFY_THREAD_ID:
+   //ret = specific_send_sig_info(iocb->ki_signo, , p);
+   ret = __group_send_sig_info(iocb->ki_signo, , p);
+   break;
+   }
+
+   spin_unlock_irqrestore(>sighand->siglock, flags);
+
+   if (ret)
+   printk(KERN_DEBUG "__aio_send_signal: failed to send signal %d 
to %d\n",
+  iocb->ki_signo, iocb->ki_pid);
+
+out_unlock:
+   read_unlock(_lock);
+}
+
+static void __aio_write_evt(struct kioctx *ctx, struct io_event *event)
+{
+   struct aio_ring_info*info;
+   struct aio_ring *ring;
+   struct io_event *ring_event;
+   unsigned long   tail;
+
+   info = >ring_info;
+
+   /* add a completion event to the ring buffer.
+* must be done holding ctx->ctx_lock to prevent
+* other code from messing with the tail
+* pointer since we might be called from irq
+* context.
+*/
+
+   ring = kmap_atomic(info->ring_pages[0], KM_IRQ1);
+
+   tail = info->tail;
+   ring_event = aio_ring_event(info, tail, KM_IRQ0);
+   if (++tail >= info->nr)
+   tail = 0;
+
+   *ring_event = *event;
+
+   dprintk("aio_write_evt: %p[%lu]: %Lx %Lx %Lx %Lx\n",
+   ctx, tail, event->obj, event->data, event->res, event->res2);
+
+   /* after flagging the request as done, we
+* must never even look at it again
+*/
+
+   smp_wmb();  /* make event visible before updating tail */
+
+   info->tail = tail;
+   ring->tail = tail;
+
+   put_aio_ring_event(ring_event, KM_IRQ0);
+   kunmap_atomic(ring, KM_IRQ1);
+
+   pr_debug("added to ring at [%lu]\n", tail);
+}
+
 /* aio_complete
  * Called when the io request on the given iocb is complete.
  * Returns true if this is the last user of the request.  The 
@@ -940,11 +1032,8 @@ EXPORT_SYMBOL(kick_iocb);
 int fastcall aio_complete(struct kiocb *iocb, long res, long 

Re: [RFC][patch 0/2] mm: remove PageReserved

2005-08-09 Thread Linus Torvalds


On Tue, 9 Aug 2005, Russell King wrote:

> On Tue, Aug 09, 2005 at 07:38:52AM -0700, Martin J. Bligh wrote:
> > pfn_valid() doesn't tell you it's RAM or not - it tells you whether you
> > have a backing struct page for that address. Could be an IO mapped device,
> > a small memory hole, whatever.
> 
> The only things which have a struct page is RAM.  Nothing else does.

That's not true.

We have "struct page" show up for the ISA legacy MMIO region too, for 
example.

Linus
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: capabilities patch (v 0.1)

2005-08-09 Thread Chris Wright
* Bodo Eggert ([EMAIL PROTECTED]) wrote:
> Chris Wright <[EMAIL PROTECTED]> wrote:
> > * David Madore ([EMAIL PROTECTED]) wrote:
> 
> >> * Second, a much more extensive change, the patch introduces a third
> >> set of capabilities for every process, the "bounding" set.  Normally
> > 
> > this is not a good idea.  don't add more sets. if you really want to
> > work on this i'll give you all the patches that have been done thus far,
> > plus a set of tests that look at all the execve, ptrace, setuid type of
> > corner cases.
> 
> How are you going to tell processes that may exec suid (or set-capability-)
> programs from those that aren't supposed to gain certain capabilities?

typically you'd expect exec suid will reset to full caps.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC][patch 0/2] mm: remove PageReserved

2005-08-09 Thread Daniel Phillips
On Wednesday 10 August 2005 06:17, Hugh Dickins wrote:
> There might be a case for packaging repeated arguments into structures
> (though several of these levels are inlined anyway), but that's some
> other exercise entirely, shouldn't get in the way of removing Reserved.

Agreed, an entirely separate question that I'd like to return to in time.  The 
existing herd of page table walkers is unnecessarily repetitious.

Regards,

Daniel
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Signal handling possibly wrong

2005-08-09 Thread Chris Wright
* Steven Rostedt ([EMAIL PROTECTED]) wrote:
> Where, sa_mask is _ignored_ if NODEFER is set. (I now have woken up!).
> The attached program shows that the sa_mask is indeed ignored when
> SA_NODEFER is set.
> 
> Now the real question is... Is this a bug?

That's not correct w.r.t. SUSv3.  sa_mask should be always used and
SA_NODEFER is just whether or not to add that signal in.

SA_NODEFER
[XSI] If set and sig is caught, sig shall not be added to the thread's
signal mask on entry to the signal handler unless it is included in
sa_mask. Otherwise, sig shall always be added to the thread's signal
mask on entry to the signal handler.

Brodo, is this what you mean?

thanks,
-chris
--

Subject: [PATCH] fix SA_NODEFER signals to honor sa_mask

When receiving SA_NODEFER signal, kernel was inapproriately not applying
the sa_mask.  As pointed out by Brodo Stroesser.

Signed-off-by: Chris Wright <[EMAIL PROTECTED]>
---

diff --git a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c
--- a/arch/i386/kernel/signal.c
+++ b/arch/i386/kernel/signal.c
@@ -577,13 +577,12 @@ handle_signal(unsigned long sig, siginfo
else
ret = setup_frame(sig, ka, oldset, regs);
 
-   if (ret && !(ka->sa.sa_flags & SA_NODEFER)) {
-   spin_lock_irq(>sighand->siglock);
-   sigorsets(>blocked,>blocked,>sa.sa_mask);
+   spin_lock_irq(>sighand->siglock);
+   sigorsets(>blocked,>blocked,>sa.sa_mask);
+   if (ret && !(ka->sa.sa_flags & SA_NODEFER))
sigaddset(>blocked,sig);
-   recalc_sigpending();
-   spin_unlock_irq(>sighand->siglock);
-   }
+   recalc_sigpending();
+   spin_unlock_irq(>sighand->siglock);
 
return ret;
 }
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: capabilities patch (v 0.1)

2005-08-09 Thread David Madore
On Tue, Aug 09, 2005 at 04:28:31PM -0400, [EMAIL PROTECTED] wrote:
> On Tue, 09 Aug 2005 07:26:21 +0200, David Madore said:
> > * Second, a much more extensive change, the patch introduces a third
> > set of capabilities for every process, the "bounding" set.  Normally
> > the bounding set has every capability in it
> 
> How is this different in semantics from the existing 'permitted' capset?

The permitted sets is a set of capabilities really available to the
process (though they may be temporarily dropped by removing them from
the effective set, they are still available to take back).  In
contrast, the bounding set capabilities are not readily available to
the process; it just means that the capabilities in question *might*
be acquired by running a suid program (or setcap program if filesystem
support for capabilities ever comes to Linux).

Currently this is more or less an all-or-nothing process: since
capabilities can only be acquired by running a suid program, removing
any capability from the bounding set means the program will never be
permitted to execute a suid program any more (execve() will fail with
EPERM).  But maybe I'll reinstate the CAP_SETPCAP thing in some future
version of the patch (I'm still waiting for someone to tell me what
was wrong with CAP_SETPCAP and why it was removed), and then the
bounding set should also prohibit capabilities being given through
that interface.

The bottom line is: if you have some untrusted process, it might be
wise to remove empty its bounding set, making it incapable of
executing a suid root program and thus acquiring new capabilities.  (I
also plan to add some normally-available-to-all capabilities such as
"permission to fork()", "permission to exec()" and so on, and then it
will also be useful to remove these from a process's permitted set.)

> include/linux/capabilities.h:
> 
> typedef struct __user_cap_data_struct {
> __u32 effective;
> __u32 permitted;
> __u32 inheritable;
> } __user *cap_user_data_t;
> 

And my patch adds a __u32 bounding to that structure.

-- 
 David A. Madore
([EMAIL PROTECTED],
 http://www.madore.org/~david/ )
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] atomic open(..., O_CREAT | ...)

2005-08-09 Thread Miklos Szeredi
> > > There is quite a bit of code out there that assumes it is free to stuff
> > > things into nd->mnt and nd->dentry. Some of it is Al Viro's code, some
> > > of it is from other people.
> > > For instance, the ESTALE handling will just save nd->mnt/nd->dentry
> > > before calling __link_path_walk(), then restore + retry if the ESTALE
> > > error comes up.
> > 
> > Yeah, but how is that relevant to the fact, that after
> > path_release_*() _nothing_ will be valid in the nameidata, not
> > nd->mnt, not nd->dentry, and not nd->intent.open.file.  So what's the
> > point in setting it to NULL if it must never be used anyway?
> 
> path_release() does _not_ invalidate the nameidata. Look for instance at
> __emul_lookup_dentry(), which clearly makes use of that fact.

Trond, wake up!  __emul_lookup_dentry() does nothing of the sort.
Neither does anything else.  In theory it could, but that's not a
reason to do a confusing thing like that.

> Firstly, the open_namei() flags field is not a "permissions" field. It
> contains open mode information. The calculation of the open permissions
> flags is done by open_namei() itself.

Based on flags.  It's just a FMODE_* -> MAY_* transformation

> Secondly, what advantage is there in allowing callers of open_namei() to
> be able to override the MAY_WRITE check when doing open(O_TRUNC)? This
> is a calculation that should be done _once_ in order to always get it
> right, and it should therefore be done in open_namei() together with the
> rest of the permissions calculation.

I think the _only_ caller of open_namei() is filp_open(), so this is
not much of an issue, but yeah, you could do it that way too.

Or you could initialize nameidata from filp_open().

Miklos
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: capabilities patch (v 0.1)

2005-08-09 Thread David Madore
On Tue, Aug 09, 2005 at 05:37:56AM +, Chris Wright wrote:
> * David Madore ([EMAIL PROTECTED]) wrote:
> > * Second, a much more extensive change, the patch introduces a third
> > set of capabilities for every process, the "bounding" set.  Normally
> 
> this is not a good idea.  don't add more sets.

Could you elaborate?  Why is adding sets bad?  From what I read of the
June 2000 discussions on the linux-privs-discuss mailing-list (http://sourceforge.net/mailarchive/forum.php?forum_id=25120_rows=25=flat=26
 >), a rather large consensus had formed around the idea that some
kind of bounding set was a useful idea (as a matter of fact, the
sendmail problem came essentially from the fact that some people
wanted an inheritable set and other people wanted a bounding set, and
the code was some mixture of the two); and it had been argued
convincincly that it could be made POSIX compliant if that is the
issue.  Plus, Solaris privileges also come in four sets.

If it's compatibility you're worried about, it seems to me that the
user interface can be made so that it will still work with the old
libcap and merely ignore the bounding set.  So full binary
compatibility will be achieved, at least on the user level.

Finally, if it's a matter of kernel policy, I seem to understand that
my patch has a snowball's chance in hell of ever being accepted in the
mainstream kernel (I mean, it's not as though this were new: patches
to make capabilities work have been available ever since the sendmail
exploit, and in five years they haven't ever been accepted, so I
suppose there's a reason to this), so adding a fourth set of
capabilities of my own initiative isn't going to change a thing there.

So what's the problem?

>if you really want to
> work on this i'll give you all the patches that have been done thus far,
> plus a set of tests that look at all the execve, ptrace, setuid type of
> corner cases.

Yes, I'm very interested in the test suite.

-- 
 David A. Madore
([EMAIL PROTECTED],
 http://www.madore.org/~david/ )
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] ppc32: Added support for the Book-E style Watchdog Timer

2005-08-09 Thread Kumar Gala
PowerPC 40x and Book-E processors support a watchdog timer at the processor
core level.  The timer has implementation dependent timeout frequencies
that can be configured by software. 

One the first Watchdog timeout we get a critical exception.  It is left
to board specific code to determine what should happen at this point.  If
nothing is done and another timeout period expires the processor may
attempt to reset the machine.

Command line parameters:
  wdt=0 : disable watchdog (default)
  wdt=1 : enable watchdog

  wdt_period=N : N sets the value of the Watchdog Timer Period.

  The Watchdog Timer Period meaning is implementation specific. Check
  User Manual for the processor for more details.

This patch is based off of work done by Takeharu Kato.

Signed-off-by: Matt McClintock <[EMAIL PROTECTED]>
Signed-off-by: Kumar Gala <[EMAIL PROTECTED]>

---
commit 1780d9e65903f3132133f754cf290a5db7916965
tree 88daf35ce065f24a86ad0ad4f7a57c91aa15527d
parent a60c894ec93e545340a495fc15d34ca69e7accbe
author Kumar K. Gala <[EMAIL PROTECTED]> Tue, 09 Aug 2005 15:27:28 -0500
committer Kumar K. Gala <[EMAIL PROTECTED]> Tue, 09 Aug 2005 15:27:28 -0500

 Documentation/watchdog/watchdog-api.txt |   20 +++
 arch/ppc/kernel/head_44x.S  |4 +
 arch/ppc/kernel/head_4xx.S  |4 -
 arch/ppc/kernel/head_fsl_booke.S|5 +
 arch/ppc/kernel/setup.c |   24 
 arch/ppc/kernel/traps.c |   19 +++
 arch/ppc/syslib/ppc4xx_setup.c  |   25 
 drivers/char/watchdog/Kconfig   |4 +
 drivers/char/watchdog/Makefile  |1 
 drivers/char/watchdog/booke_wdt.c   |  191 +++
 10 files changed, 270 insertions(+), 27 deletions(-)

diff --git a/Documentation/watchdog/watchdog-api.txt 
b/Documentation/watchdog/watchdog-api.txt
--- a/Documentation/watchdog/watchdog-api.txt
+++ b/Documentation/watchdog/watchdog-api.txt
@@ -228,6 +228,26 @@ advantechwdt.c -- Advantech Single Board
The GETSTATUS call returns if the device is open or not.
[FIXME -- silliness again?]

+booke_wdt.c -- PowerPC BookE Watchdog Timer
+
+   Timeout default varies according to frequency, supports
+   SETTIMEOUT
+
+   Watchdog can not be turned off, CONFIG_WATCHDOG_NOWAYOUT
+   does not make sense
+
+   GETSUPPORT returns the watchdog_info struct, and
+   GETSTATUS returns the supported options. GETBOOTSTATUS
+   returns a 1 if the last reset was caused by the
+   watchdog and a 0 otherwise. This watchdog can not be
+   disabled once it has been started. The wdt_period kernel
+   parameter selects which bit of the time base changing
+   from 0->1 will trigger the watchdog exception. Changing
+   the timeout from the ioctl calls will change the
+   wdt_period as defined above. Finally if you would like to
+   replace the default Watchdog Handler you can implement the
+   WatchdogHandler() function in your own code.
+
 eurotechwdt.c -- Eurotech CPU-1220/1410
 
The timeout can be set using the SETTIMEOUT ioctl and defaults
diff --git a/arch/ppc/kernel/head_44x.S b/arch/ppc/kernel/head_44x.S
--- a/arch/ppc/kernel/head_44x.S
+++ b/arch/ppc/kernel/head_44x.S
@@ -462,7 +462,11 @@ interrupt_base:
 
/* Watchdog Timer Interrupt */
/* TODO: Add watchdog support */
+#ifdef CONFIG_BOOKE_WDT
+   CRITICAL_EXCEPTION(0x1020, WatchdogTimer, WatchdogException)
+#else
CRITICAL_EXCEPTION(0x1020, WatchdogTimer, UnknownException)
+#endif
 
/* Data TLB Error Interrupt */
START_EXCEPTION(DataTLBError)
diff --git a/arch/ppc/kernel/head_4xx.S b/arch/ppc/kernel/head_4xx.S
--- a/arch/ppc/kernel/head_4xx.S
+++ b/arch/ppc/kernel/head_4xx.S
@@ -448,7 +448,9 @@ label:
 
 /* 0x1020 - Watchdog Timer (WDT) Exception
 */
-
+#ifdef CONFIG_BOOKE_WDT
+   CRITICAL_EXCEPTION(0x1020, WDTException, WatchdogException)
+#else
CRITICAL_EXCEPTION(0x1020, WDTException, UnknownException)
 #endif
 
diff --git a/arch/ppc/kernel/head_fsl_booke.S b/arch/ppc/kernel/head_fsl_booke.S
--- a/arch/ppc/kernel/head_fsl_booke.S
+++ b/arch/ppc/kernel/head_fsl_booke.S
@@ -564,8 +564,11 @@ interrupt_base:
EXCEPTION(0x3100, FixedIntervalTimer, UnknownException, EXC_XFER_EE)
 
/* Watchdog Timer Interrupt */
-   /* TODO: Add watchdog support */
+#ifdef CONFIG_BOOKE_WDT
+   CRITICAL_EXCEPTION(0x3200, WatchdogTimer, WatchdogException)
+#else
CRITICAL_EXCEPTION(0x3200, WatchdogTimer, UnknownException)
+#endif
 
/* Data TLB Error Interrupt */
START_EXCEPTION(DataTLBError)
diff --git a/arch/ppc/kernel/setup.c b/arch/ppc/kernel/setup.c
--- a/arch/ppc/kernel/setup.c
+++ b/arch/ppc/kernel/setup.c
@@ -615,6 +615,30 @@ machine_init(unsigned long r3, unsigned 
if (ppc_md.progress)
ppc_md.progress("id mach(): done", 0x200);
 }
+#ifdef CONFIG_BOOKE_WDT
+/* Checks wdt=x and wdt_period=xx command-line option */

Re: capabilities patch (v 0.1)

2005-08-09 Thread Valdis . Kletnieks
On Tue, 09 Aug 2005 07:26:21 +0200, David Madore said:

> * Second, a much more extensive change, the patch introduces a third
> set of capabilities for every process, the "bounding" set.  Normally
> the bounding set has every capability in it

How is this different in semantics from the existing 'permitted' capset?

include/linux/capabilities.h:

typedef struct __user_cap_data_struct {
__u32 effective;
__u32 permitted;
__u32 inheritable;
} __user *cap_user_data_t;



pgpIgPN4MdLwj.pgp
Description: PGP signature


Re: understanding Linux capabilities brokenness

2005-08-09 Thread Kyle Moffett

On Aug 9, 2005, at 11:16:33, Christopher Warner wrote:

In my observer pragmatic view; yes. On many occasion, i've come to CAP
calls only to be frustrated with the sheer disconnect of it all. It
simply doesn't work. If it means having to break posix conformance  
for a

working implementation. Then so be it.

On Tue, 2005-08-09 at 00:46 -0400, James Morris wrote:


Let me play the Devil's advocate here.

Should we be thinking about deprecating and removing capabilities  
from

Linux?


One brief suggestion:

A key/token interface was recently introduced that might be useful to  
allow
a simple new inheritance model for "capabilities", "roles",  
"rootperms" or

whatever other abstraction you create.

Cheers,
Kyle Moffett

--
There are two ways of constructing a software design. One way is to  
make it so
simple that there are obviously no deficiencies. And the other way is  
to make
it so complicated that there are no obvious deficiencies.  The first  
method is

far more difficult.
  -- C.A.R. Hoare


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: my kernel sometimes did a crash, but no panic

2005-08-09 Thread Adrian Bunk
On Tue, Aug 09, 2005 at 06:04:05PM +0200, Klasyk wrote:

> my kernel sometimes did a crash, but no panic
> Keyboard hunged up :(
> Network were working and I can log in. Without the keybord - it
> generally worked.
> 
> In logs:
> for example:
>...
> Aug  6 15:30:02 o kernel: Modules linked in: ip_nat_irc
>...
>  btcx-risc tveeprom i2c-core nvidia agpgart usblp ehci-hcd uhci-hcd
>...
> Aug  6 15:30:02 o kernel: EIP:0060:[]Tainted: P
>...
> it is not apic problem, i disabled it, and it didn't help
> Linux o 2.6.11-12mdkcustom #2 Sat Aug 6 11:02:20 CEST 2005 i686 AMD
> Duron(TM) unknown GNU/Linux
>...

Does this problem still occur with:
- a vanilla 2.6.13-rc6 ftp.kernel.org kernel and
- without loading any external modules since booting?

If it doesn't it's completely off-topic here and you should ask either 
Mandrake or Nvidia for support.

cu
Adrian

-- 

   "Is there not promise of rain?" Ling Tan asked suddenly out
of the darkness. There had been need of rain for many days.
   "Only a promise," Lao Er said.
   Pearl S. Buck - Dragon Seed

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] i386 No-Idle-Hz aka Dynamic-Ticks 3

2005-08-09 Thread Daniel Petrini
> I convinced my self that the next_timer... code in timer.c misses timers
> (i.e. gives the wrong answer).  I did this (after wondering due to
> performance) by scanning the whole timer list after I had the
> next_timer... answer and finding a better answer, not always, but some
> times.  That code does not address the cascade list correctly.

The timertop kernel patch accounts for all the scheduled timers. Try
to take a look at its output at /proc/top_info. Maybe it can help to
detect it.


Daniel Petrini
-- 
10LE - Linux
INdT - Manaus - Brazil
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: capabilities patch (v 0.1)

2005-08-09 Thread Bodo Eggert
Chris Wright <[EMAIL PROTECTED]> wrote:
> * David Madore ([EMAIL PROTECTED]) wrote:

>> * Second, a much more extensive change, the patch introduces a third
>> set of capabilities for every process, the "bounding" set.  Normally
> 
> this is not a good idea.  don't add more sets. if you really want to
> work on this i'll give you all the patches that have been done thus far,
> plus a set of tests that look at all the execve, ptrace, setuid type of
> corner cases.

How are you going to tell processes that may exec suid (or set-capability-)
programs from those that aren't supposed to gain certain capabilities?

-- 
Ich danke GMX dafür, die Verwendung meiner Adressen mittels per SPF
verbreiteten Lügen zu sabotieren.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Signal handling possibly wrong

2005-08-09 Thread Steven Rostedt
On Tue, 2005-08-09 at 16:03 -0400, Steven Rostedt wrote:

> Man pages and kernel are right.  I just tested this out on 2.6.13-rc3
> with the attached program and it seems to follow what is stated in the
> man pages. So the assumption of what the code did by looking at it
> proves to be the mistake. :-)
> 
> Conclusion:  sa_mask defers the signals. SA_NODEFER defers the sent
> signal.

I'm the one that's wrong here ;-)   OK the kernel _does_ have a bug.
Looking at the code, I now see it, and my last program didn't show it.
The code in question was (as Bodo showed earlier):

if (ret && !(ka->sa.sa_flags & SA_NODEFER)) {
spin_lock_irq(>sighand->siglock);
sigorsets(>blocked,>blocked,>sa.sa_mask);
sigaddset(>blocked,sig);
recalc_sigpending();
spin_unlock_irq(>sighand->siglock);
}

Where, sa_mask is _ignored_ if NODEFER is set. (I now have woken up!).
The attached program shows that the sa_mask is indeed ignored when
SA_NODEFER is set.

Now the real question is... Is this a bug?

-- Steve



signal2
Description: application/executable


Re: [RFC][patch 0/2] mm: remove PageReserved

2005-08-09 Thread Hugh Dickins
On Wed, 10 Aug 2005, Daniel Phillips wrote:
> On Tuesday 09 August 2005 10:15, Nick Piggin wrote:
> > Daniel Phillips wrote:
> > > Why don't you pass the vma in zap_details?
> >
> > Possibly. I initially did it that way, but it ended up fattening
> > paths that don't use details.
> 
> It should not, it only affects, hmm, less than 10 places, each at the 
> beginning of a massive call chain, e.g., in madvise_dontneed:
> 
> - zap_page_range(vma, start, end - start, NULL);
> + zap_page_range(start, end - start, &(struct zap){ .vma = vma });
> 
> > And this way is less intrusive.
> 
> Nearly the same I think, and makes forward progress in controlling this 
> middle-aged belly roll of an internal API.

I much prefer how Nick has it, with vma passed separately as a regular
argument.  details is for packaging up some details only required in
unlikely circumstances, normally it's NULL and not filled in at all.

You can argue that (vma->vm_flags & VM_RESERVED) is precisely that
kind of detail.  But personally I find it rather odd that vma isn't
an explicit argument to zap_pte_range already - I find it very useful
when trying to shed light on the rmap.c:BUG, for example.

There might be a case for packaging repeated arguments into structures
(though several of these levels are inlined anyway), but that's some
other exercise entirely, shouldn't get in the way of removing Reserved.

Hugh
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Regression: radeonfb: No synchronisation on CRT with linux-2.6.13-rc5

2005-08-09 Thread Bodo Eggert
On Mon, 8 Aug 2005, Benjamin Herrenschmidt wrote:
> On Mon, 2005-08-08 at 02:06 +0200, Bodo Eggert wrote:

> > The wrong values are constant across reboots (see my first mail), and I 
> > have a CRT.
> > 
> > Can you tell me where the timing values are read?
> 
> radeon_write_mode() programs the mode. The monitor timing infos are read
> by the various bits of code in radeon_monitor.c
> 
> I'd be curious if you could identify what bit of code is misbehaving

I added preempt_*able around radeon_probe_i2c_connector, and now I get the 
output from below and still no sync. Obviously you shouldn't msleep in 
preempt-disabled code. I'll try voluntary preemption, but that will at 
best hide the error.

Maybe I can mess with the msleep()s like thorndike's cat, but any success 
will be an accident.

Aug  9 20:58:26 be1 __mod_timer+0xb4/0x100
Aug  9 20:58:27 be1  [] schedule_timeout+0x51/0xa0
Aug  9 20:58:27 be1  [] process_timeout+0x0/0x10
Aug  9 20:58:27 be1  [] msleep+0x2f/0x40
Aug  9 20:58:27 be1  [] radeon_probe_i2c_connector+0xaa/0x320
Aug  9 20:58:27 be1  [] radeon_probe_screens+0x482/0x5d0
Aug  9 20:58:27 be1  [] radeonfb_pci_register+0x309/0x570
...
Aug  9 20:58:27 be1 scheduling while atomic: swapper/0x0001/1
Aug  9 20:58:27 be1  [] schedule+0x589/0x640
Aug  9 20:58:27 be1  [] lock_timer_base+0x32/0x70
Aug  9 20:58:27 be1  [] __mod_timer+0xb4/0x100
Aug  9 20:58:27 be1  [] schedule_timeout+0x51/0xa0
Aug  9 20:58:27 be1  [] process_timeout+0x0/0x10
Aug  9 20:58:27 be1  [] msleep+0x2f/0x40
Aug  9 20:58:27 be1  [] radeon_probe_i2c_connector+0xaa/0x320
Aug  9 20:58:27 be1  [] radeon_probe_screens+0x482/0x5d0
Aug  9 20:58:27 be1  [] radeonfb_pci_register+0x309/0x570
Aug  9 20:58:27 be1  [] __pci_device_probe+0x48/0x60



-- 
It is still called paranoia when they really are out to get you.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Linux-2.6.13-rc6: aic7xxx testers please..

2005-08-09 Thread John Stoffel
> "James" == James Bottomley <[EMAIL PROTECTED]> writes:

Thank you for looking into this with me, I really appreciate it.  I'm
kinda stumped why this suddenly started happening, but it could be
hardware related of course...

James> So basically the problem is on scsi1 with the tape device, which
James> apparently negotiates only narrow async?

It's not a performance problem, it's a lockup problem.  I use bacula
to make my backups using the DLT7000 as my media.  But the tape drives
hangs, it's hung now so that I can't do my backups.

James> What do the domain validation messages say about this device?
James> They should be in dmesg.  I'm fairly certain that DLT tapes do
James> better than narrow async.

The drive should be able to do 10MBytes/sec to the interface, the
drive itself can only do 5Mbytes/sec to the media, but with 2:1
compression and an 8MB buffer on the drive, it likes to be fed data as
quickly as it can.  

Here's the validation results for my two disks and tape drive:

  scsi0 : Adaptec AIC7XXX EISA/VLB/PCI SCSI HBA DRIVER, Rev 6.2.36
  
  aic7890/91: Ultra2 Wide Channel A, SCSI Id=7, 32/253 SCBs

   target0:0:0: asynchronous.
Vendor: COMPAQModel: HC01841729Rev: 3208
Type:   Direct-Access  ANSI SCSI revision: 02
  scsi0:A:0:0: Tagged Queuing enabled.  Depth 32
   target0:0:0: Beginning Domain Validation
   target0:0:0: wide asynchronous.
   target0:0:0: Domain Validation skipping write tests
   target0:0:0: FAST-20 WIDE SCSI 40.0 MB/s ST (50 ns, offset 15)
   target0:0:0: Ending Domain Validation
Vendor: COMPAQModel: BD018222CARev: B016
Type:   Direct-Access  ANSI SCSI revision: 02
   target0:0:1: asynchronous.
  scsi0:A:1:0: Tagged Queuing enabled.  Depth 32
   target0:0:1: Beginning Domain Validation
   target0:0:1: wide asynchronous.
   target0:0:1: Domain Validation skipping write tests
   target0:0:1: FAST-20 WIDE SCSI 40.0 MB/s ST (50 ns, offset 63)
   target0:0:1: Ending Domain Validation
  scsi1 : Adaptec AIC7XXX EISA/VLB/PCI SCSI HBA DRIVER, Rev 6.2.36
  
  aic7880: Ultra Wide Channel A, SCSI Id=7, 16/253 SCBs

Vendor: SUN   Model: DLT7000   Rev: 1E48
Type:   Sequential-Access  ANSI SCSI revision: 02
   target1:0:6: asynchronous.
   target1:0:6: Beginning Domain Validation
   target1:0:6: wide asynchronous.
   target1:0:6: Domain Validation skipping write tests
   target1:0:6: FAST-10 WIDE SCSI 20.0 MB/s ST (100 ns, offset 8)
   target1:0:6: Ending Domain Validation
  st: Version 20050501, fixed bufsize 32768, s/g segs 256
  Attached scsi tape st0 at scsi1, channel 0, id 6, lun 0
  st0: try direct i/o: yes (alignment 512 B), max page reachable by HBA 1048575
  SCSI device sda: 35566000 512-byte hdwr sectors (18210 MB)
  SCSI device sda: drive cache: write through
  SCSI device sda: 35566000 512-byte hdwr sectors (18210 MB)
  SCSI device sda: drive cache: write through
   sda: sda1 sda2 sda3 sda4 < sda5 sda6 >
  Attached scsi disk sda at scsi0, channel 0, id 0, lun 0
  SCSI device sdb: 35565080 512-byte hdwr sectors (18209 MB)
  SCSI device sdb: drive cache: write through
  SCSI device sdb: 35565080 512-byte hdwr sectors (18209 MB)
  SCSI device sdb: drive cache: write through
   sdb: sdb1 sdb2 sdb3 sdb4 < sdb5 >
  Attached scsi disk sdb at scsi0, channel 0, id 1, lun 0
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] i386 No-Idle-Hz aka Dynamic-Ticks 3

2005-08-09 Thread George Anzinger

Tony Lindgren wrote:

* Srivatsa Vaddagiri <[EMAIL PROTECTED]> [050805 05:37]:


On Wed, Aug 03, 2005 at 06:05:28AM +, Con Kolivas wrote:

This is the dynamic ticks patch for i386 as written by Tony Lindgen 
<[EMAIL PROTECTED]> and Tuukka Tikkanen <[EMAIL PROTECTED]>. 
Patch for 2.6.13-rc5


There were a couple of things that I wanted to change so here is an updated 
version. This code should have stabilised enough for general testing now.


Con,
I have been looking at some of the requirement of tickless idle CPUs in
core kernel areas like scheduler and RCU. Basically, both power management and 
virtualization benefit if idle CPUs can cut off useless timer ticks. Especially 
from a virtualization standpoint, I think it makes sense that we enable this 
feature on a per-CPU basis i.e let individual CPUs cut off their ticks as and 
when they become idle. The benefit of this is more visible in platforms that 
host lot of (SMP) VMs on the same machine. Most of the time, these VMs may be 
partially idle (some CPUs in it are idle, some not) and it is good that we 
quiesce the timer ticks on the partial set of idle CPUs. Both S390 and Xen ports
of Linux kernel have this ability today (S390 has it in mainline already and 
Xen has it out of tree).



Good point, and it would be nice to have it resolved for systems that support
idling individual CPUs. The current setup was done because when I was tinkering
with the amd76x_pm patch a while a back, I noticed that idling the cpu
disconnects all cpus from the bus. (As far as I remember)

So this may need to be configured depending on the system.



From this viewpoint, I think the current implementation of dynamic tick
falls short of this requirement. It cuts of the timer ticks only when 
all CPUs go idle.


Apart from this observation, I have some others about the current dynamic tick
patch:

- All CPUs seem to cut off the same number of ticks (dyn_tick->skip). Isn't
 this wrong, considering that the timer list is per-CPU? This will cause
 some timers to be serviced much later than usual.



Yes if it's done on per-CPU basis. In the current setup the first interrupt
will kick the system off the dyn-tick state and the timers get checked again.



- The fact that dyn_tick_state is global and accessed from all CPUs
 is probably a scalability concern, especially if we allow the ticks
 to be cut off on per-CPU basis.




From idling devices point of view, we still need some global variable I

believe. How else would you be able to tell all devices that the whole
system does not have any timers for next 2 seconds?



- Again, when we allow this on a per-CPU basis, subsystems like
 RCU need to know the partial set of idle CPUs. RCU already does
 that thr' nohz_cpu_mask (which will need to replace dyn_cpu_map).



Sounds like that could work for dyn-tick too.


- Looking at dyn_tick_timer_interrupt, would it be nice if we avoid calling 
 do_timer_interrupt so many times and instead update jiffies to

 (skipped_ticks - 1) and then call do_timer_interrupt once? I think
 VST does it that way.



In the long run we would do the calculations in usecs and just emulate
jiffies from the hw timer. But yes, optimizing updating the time would be
great.



- dyn_tick->max_skip = 0xff / apic_timer_val;
From my reading of Intel docs, APIC_TMICT is 32-bit. So why does the
 above calculation take only 24-bits into account? What am I missing here?



Hmm, could be a bug here, needs to be checked. Maybe 32-bit APIC timer is
optional support, or maybe I accidentally pulled the optional 24-bit support
from the ACPI PM timer.

But in any case on P4 systems the APIC timer is not the bottleneck as
stopping or reprogramming PIT also kills APIC. (This does not happen on P3
systems). So the bottleneck most likely is the length of PIT.


I can take a shot at addressing these concerns in dynamic_tick patch, but it 
seems to me that VST has already addressed all these to a big extent. Had you 
considered VST before? The biggest bottleneck I see in VST going mainline is 
its dependency on HRT patch but IMO it should be possible to write a small patch
to support VST w/o HRT. 


George, what do you think?



HRT + VST depend on APIC only, and does not use next_timer_interrupt().


I convinced my self that the next_timer... code in timer.c misses timers 
(i.e. gives the wrong answer).  I did this (after wondering due to 
performance) by scanning the whole timer list after I had the 
next_timer... answer and finding a better answer, not always, but some 
times.  That code does not address the cascade list correctly.



--
George Anzinger   george@mvista.com
HRT (High-res-timers):  http://sourceforge.net/projects/high-res-timers/
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Signal handling possibly wrong

2005-08-09 Thread Steven Rostedt
On Tue, 2005-08-09 at 21:41 +0200, Bodo Stroesser wrote:
> S
> > To me, the man pages make more sense, and I think the kernel is wrong.
> 
> Yes, that's what I think, too. If someone doesn't want additional signals
> to be masked, he can set sa_mask to be empty.
> OTOH, I have no idea, what POSIX specifies. Maybe kernel is right and man
> page is wrong?
> 
>   Bodo
> > 

Man pages and kernel are right.  I just tested this out on 2.6.13-rc3
with the attached program and it seems to follow what is stated in the
man pages. So the assumption of what the code did by looking at it
proves to be the mistake. :-)

Conclusion:  sa_mask defers the signals. SA_NODEFER defers the sent
signal.

-- Steve

#include 
#include 
#include 
#include 
#include 
#include 

void user1(int x)
{
	int pid = getpid();
	printf("pid[%d]: user1 start\n",pid);
	sleep(5);
	printf("pid[%d]: user1 stopped\n",pid);
}

void user2(int x)
{
	int pid = getpid();
	printf("pid[%d]:in user2\n",pid);
}

void intr(int x)
{
	int pid = getpid();
	printf("pid[%d]: received SIGINT\n",pid);
	exit(0);
}

void start1(void)
{
	struct sigaction act;

	memset(,0,sizeof(act));

	act.sa_handler = intr;
	if ((sigaction(SIGINT,,NULL)) < 0) {
		perror("child1: sigaction");
		exit(-1);
	}
		
	act.sa_handler = user1;
	sigaddset(_mask,SIGUSR2);
	if ((sigaction(SIGUSR1,,NULL)) < 0) {
		perror("child1: sigaction");
		exit(-1);
	}

	act.sa_handler = user2;
	if ((sigaction(SIGUSR2,,NULL)) < 0) {
		perror("child1: sigaction");
		exit(-1);
	}
		
		
	for (;;)
		;

}

void start2(void)
{
	struct sigaction act;

	memset(,0,sizeof(act));
	
	act.sa_handler = intr;
	if ((sigaction(SIGINT,,NULL)) < 0) {
		perror("child2: sigaction");
		exit(-1);
	}
		
	act.sa_handler = user1;
	act.sa_flags |= SA_NODEFER;
	if ((sigaction(SIGUSR1,,NULL)) < 0) {
		perror("child2: sigaction");
		exit(-1);
	}
		
	act.sa_handler = user2;
	if ((sigaction(SIGUSR2,,NULL)) < 0) {
		perror("child1: sigaction");
		exit(-1);
	}
		
	for (;;)
		;

}

int main(int argc, char **argv)
{
	int pid[2];

	if ((pid[0] = fork()) < 0) {
		perror("fork");
	} else if (!pid[0]) {
		start1();
		exit(0);
	}
	
	if ((pid[1] = fork()) < 0) {
		perror("fork");
	} else if (!pid[1]) {
		start2();
		exit(0);
	}

	printf("parent sending %d SIGUSR1\n",pid[0]);
	kill(pid[0],SIGUSR1);
	sleep(1);
	printf("parent sending %d SIGUSR2\n",pid[0]);
	kill(pid[0],SIGUSR2);
	sleep(5);
	printf("parent sending %d SIGUSR1\n",pid[0]);
	kill(pid[0],SIGUSR1);
	sleep(1);
	printf("parent sending %d SIGUSR1\n",pid[0]);
	kill(pid[0],SIGUSR1);
	sleep(1);
	printf("parent sending %d SIGINT\n",pid[0]);
	kill(pid[0],SIGINT);

	printf("parent sending %d SIGUSR1\n",pid[1]);
	kill(pid[1],SIGUSR1);
	sleep(1);
	printf("parent sending %d SIGUSR2\n",pid[1]);
	kill(pid[1],SIGUSR2);
	sleep(5);
	printf("parent sending %d SIGUSR1\n",pid[1]);
	kill(pid[1],SIGUSR1);
	sleep(1);
	printf("parent sending %d SIGUSR1\n",pid[1]);
	kill(pid[1],SIGUSR1);
	sleep(1);
	printf("parent sending %d SIGINT\n",pid[1]);
	kill(pid[1],SIGINT);

	exit(0);
}


  1   2   3   4   5   6   >