Re: [RFC] Union Mount: Readdir approaches

2007-09-12 Thread hooanon05

Al Boldi:
> It turns out that the problem was this in dentry.c:
:::
> Commenting the #if block makes it compile now.
> 
> Works great too.  Even performance wise.  Needs more testing though.

Thank you for your report and forwarding your original message.
And I am glad that it is working for you.

It seems that '#if ... #endif' in an 'unlikly' macro argument is bad
coding. I don't know why my compiler and other users compiler didn't
produce an error.
Anyway, I'll fix such code.


> You really need to post a cleaned up version for review and possible 
> inclusion into mainline.  It definitely looks solid.

I'll try in the future.


Thanks
Junjiro Okajima
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[GIT PULL] Blackfin arch bug fixing for 2.6.23-rc6

2007-09-12 Thread Bryan Wu
Hi Linus,

Please pull from 'for-linus' branch of

master.kernel.org:/pub/scm/linux/kernel/git/cooloney/blackfin-2.6.git for-linus

to receive the following updates:

 arch/blackfin/mach-common/pm.c  |6 ++
 include/asm-blackfin/mach-bf561/cdefBF561.h |4 +-
 include/asm-blackfin/string.h   |  129 +--
 3 files changed, 88 insertions(+), 51 deletions(-)

Michael Hennerich (1):
  Blackfin arch: Update/Fix PM support add new pm_ops valid

Mike Frysinger (1):
  Blackfin arch: fix some bugs in lib/string.h functions found by our 
string testing modules

Robin Getz (1):
  Blackfin arch: fix the aliased write macros

diff --git a/arch/blackfin/mach-common/pm.c b/arch/blackfin/mach-common/pm.c
index 1772d8d..b103027 100644
--- a/arch/blackfin/mach-common/pm.c
+++ b/arch/blackfin/mach-common/pm.c
@@ -158,10 +158,16 @@ static int bfin_pm_finish(suspend_state_t state)
return 0;
 }
 
+static int bfin_pm_valid(suspend_state_t state)
+{
+   return (state == PM_SUSPEND_STANDBY);
+}
+
 struct pm_ops bfin_pm_ops = {
.prepare = bfin_pm_prepare,
.enter = bfin_pm_enter,
.finish = bfin_pm_finish,
+   .valid  = bfin_pm_valid,
 };
 
 static int __init bfin_pm_init(void)
diff --git a/include/asm-blackfin/mach-bf561/cdefBF561.h 
b/include/asm-blackfin/mach-bf561/cdefBF561.h
index 6e87ab2..73d4d65 100644
--- a/include/asm-blackfin/mach-bf561/cdefBF561.h
+++ b/include/asm-blackfin/mach-bf561/cdefBF561.h
@@ -83,9 +83,9 @@ static __inline__ void bfin_write_VR_CTL(unsigned int val)
 
 /* For MMR's that are reserved on Core B, set up defines to better integrate 
with other ports */
 #define bfin_read_SWRST()bfin_read_SICA_SWRST()
-#define bfin_write_SWRST()   bfin_write_SICA_SWRST()
+#define bfin_write_SWRST(val)bfin_write_SICA_SWRST(val)
 #define bfin_read_SYSCR()bfin_read_SICA_SYSCR()
-#define bfin_write_SYSCR()   bfin_write_SICA_SYSCR()
+#define bfin_write_SYSCR(val)bfin_write_SICA_SYSCR(val)
 
 /* System Reset and Interrupt Controller registers for core A (0xFFC0 
0100-0xFFC0 01FF) */
 #define bfin_read_SICA_SWRST()   bfin_read16(SICA_SWRST)
diff --git a/include/asm-blackfin/string.h b/include/asm-blackfin/string.h
index 6f1eb7d..e8ada91 100644
--- a/include/asm-blackfin/string.h
+++ b/include/asm-blackfin/string.h
@@ -9,13 +9,16 @@ extern inline char *strcpy(char *dest, const char *src)
char *xdest = dest;
char temp = 0;
 
-   __asm__ __volatile__
-   ("1:\t%2 = B [%1++] (Z);\n\t"
-"B [%0++] = %2;\n\t"
-"CC = %2;\n\t"
-"if cc jump 1b (bp);\n"
-   : "+" (dest), "+" (src), "=" (temp)
-::"memory", "CC");
+   __asm__ __volatile__ (
+   "1:"
+   "%2 = B [%1++] (Z);"
+   "B [%0++] = %2;"
+   "CC = %2;"
+   "if cc jump 1b (bp);"
+   : "+" (dest), "+" (src), "=" (temp)
+   :
+   : "memory", "CC");
+
return xdest;
 }
 
@@ -28,37 +31,56 @@ extern inline char *strncpy(char *dest, const char *src, 
size_t n)
if (n == 0)
return xdest;
 
-   __asm__ __volatile__
-   ("1:\t%3 = B [%1++] (Z);\n\t"
-"B [%0++] = %3;\n\t"
-"CC = %3;\n\t"
-"if ! cc jump 2f;\n\t"
-"%2 += -1;\n\t"
-"CC = %2 == 0;\n\t"
-"if ! cc jump 1b (bp);\n"
-"2:\n"
-   : "+" (dest), "+" (src), "+" (n), "=" (temp)
-::"memory", "CC");
+   __asm__ __volatile__ (
+   "1:"
+   "%3 = B [%1++] (Z);"
+   "B [%0++] = %3;"
+   "CC = %3;"
+   "if ! cc jump 2f;"
+   "%2 += -1;"
+   "CC = %2 == 0;"
+   "if ! cc jump 1b (bp);"
+   "jump 4f;"
+   "2:"
+   /* if src is shorter than n, we need to null pad bytes now */
+   "%3 = 0;"
+   "3:"
+   "%2 += -1;"
+   "CC = %2 == 0;"
+   "if cc jump 4f;"
+   "B [%0++] = %3;"
+   "jump 3b;"
+   "4:"
+   : "+" (dest), "+" (src), "+" (n), "=" (temp)
+   :
+   : "memory", "CC");
+
return xdest;
 }
 
 #define __HAVE_ARCH_STRCMP
 extern inline int strcmp(const char *cs, const char *ct)
 {
-   char __res1, __res2;
-
-   __asm__
-   ("1:\t%2 = B[%0++] (Z);\n\t" /* get *cs */
-   "%3 = B[%1++] (Z);\n\t" /* get *ct */
-   "CC = %2 == %3;\n\t"/* compare a byte */
-   "if ! cc jump 2f;\n\t"  /* not equal, break out */
-   "CC = %2;\n\t"  /* at end of cs? */
-   "if cc jump 1b (bp);\n\t"   /* no, keep going */
-   "jump.s 3f;\n"  /* strings are equal */
-   "2:\t%2 = 

Re: [patch 1/8] Immediate Values - Global Modules List and Module Mutex

2007-09-12 Thread Rusty Russell
On Tue, 2007-09-11 at 10:27 -0400, Mathieu Desnoyers wrote: 
> * Rusty Russell ([EMAIL PROTECTED]) wrote:
> > On Mon, 2007-09-10 at 20:45 -0400, Mathieu Desnoyers wrote:
> > > Code patching of _live_ SMP code is allowed. This is why I went through
> > > all this trouble on i386.
> > 
> > Oh, I was pretty sure it wasn't.  OK.
> > 
> > So now why three versions of immediate_set()?  And why are you using my
> > lock for exclusion?  Against what?
> > 
> 
> If we need to patch code at boot time, when interrupts are still
> disabled (it happens when we parse the kernel arguments for instance),
> we cannot afford to use IPIs to call sync_core() on each cpu, using
> breakpoints/notifier chains could be tricky (because we are very early
> at boot and alternatives or paravirt may not have been applied yet).

Hi Mathieu,

Sure, but why is that the caller's problem?  immediate_set() isn't
fastpath, so why not make it do an "if (early_boot)" internally?

> _immediate_set() has been introduced because of the way immediate values
> are used by markers: the linux kernel markers already hold the module
> mutex when they need to update the immediate values. Taking the mutex
> twice makes no sence, so _immediate_set() is used when the caller
> already holds the module mutex.

> Why not just have one immediate_set() which iterates through and fixes
> > up all the references?
> 
> (reasons explained above)
> 
> > It can use an internal lock if you want to avoid
> > concurrent immediate_set() calls.
> > 
> 
> An internal lock won't protect against modules load/unload race. We have
> to iterate on the module list.

Sure, but it seems like that's fairly easy to do within module.c:

/* This updates all the immediates even though only one might have
* changed.  But it's so rare it's not worth optimizing. */
void module_update_immediates(void)
{
mutex_lock(_mutex);
list_for_each_entry(mod, , list)
update_immediates(mod->immediate, mod->num_immediate);
mutex_unlock(_mutex);
}

Then during module load you do:

update_immediates(mod->immediate, mod->num_immediate);

Your immediate_update() just becomes:

update_immediates(__start___immediate,
  __stop___immediate - __start___immediate);
module_update_immediates();

update_immediates() can grab the immediate_mutex if you want.

> > Why is it easier to patch the sites now than later?  Currently it's just
> > churn.  You could go back and find them when this mythical patch gets
> > merged into this mythical future gcc version.  It could well need a
> > completely different macro style, like "cond_imm(var, code)".
> 
> Maybe you're right. My though was that if we have a way to express a
> strictly boolean if() statement that can later be optimized further by
> gcc using a jump rather than a conditionnal branch and currently emulate
> it by using a load immediate/test/branch, we might want to do so right
> now so we don't have to do a second code transition from
> if (immediate_read()) to immediate_if () later. But you might be
> right in that the form could potentially change anyway when the
> implementation would come, although I don't see how.

I was thinking that we might find useful specific cases before we get
GCC support, which archs can override with tricky asm if they wish.

Cheers,
Rusty.


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


crashme fault

2007-09-12 Thread Randy Dunlap
I run almost-daily kernel testing.  I haven't seen 'crashme' cause a
kernel fault until today, and now I've seen it twice on 2.6.23-rc6-git2,
x86_64.  After the first fault, I ran 'crashme' about 10 more times
to get the second fault (usually for 10 minutes, one time for 30
minutes).

[This is gjc-crashme, not Dave's scrashme replacement:
http://people.delphiforums.com/gjc/crashme.html]

There is very little helpful info.  RIP is strange, e.g.: 0051b446
The call stack is not printed.  No kernel symbols are printed,
even though I have CONFIG_KALLSYMS{_ALL}=y.

Here are the 2 kernel message logs of the faults.
Any ideas/suggestions?  I suppose I will try to log the syscall
parameters that crashme is using.

 1. ~~~

[ 7487.208128] Unable to handle kernel paging request at ff019b53 RIP:
[ 7487.212752]  [<00510eea>]
[ 7487.218537] PGD 10c1a2067 PUD 0
[ 7487.221811] Oops:  [1] SMP
[ 7487.224989] CPU 2
[ 7487.227024] Modules linked in: loop
[ 7487.230550] Pid: 19139, comm: crashme Not tainted 2.6.23-rc6-git2 #1
[ 7487.236896] RIP: 0033:[<00510eea>]  [<00510eea>]
[ 7487.242925] RSP: 002b:7fffc9a8ec10  EFLAGS: 00010e83
[ 7487.248234] RAX: 8c4a RBX: 004014f1 RCX: 2b20e11c8b37
[ 7487.255361] RDX: 00510ee0 RSI:  RDI: 000a
[ 7487.262489] RBP: 7fffc9a8ec10 R08: 7fffc9a8eb60 R09: 
[ 7487.269616] R10: 0008 R11: 0612 R12: 
[ 7487.276743] R13: 7fffc9a8ee00 R14:  R15: 
[ 7487.283871] FS:  2b20e13676d0() GS:81011fc75840() 
knlGS:
[ 7487.291952] CS:  0010 DS:  ES:  CR0: 8005003b
[ 7487.297693] CR2: ff019b53 CR3: 5be6b000 CR4: 06e0
[ 7487.304821] DR0:  DR1:  DR2: 
[ 7487.311949] DR3:  DR6: 0ff0 DR7: 0400
[ 7487.319076] Process crashme (pid: 19139, threadinfo 81010683, task 
810102cf5040)
[ 7487.327511]
[ 7487.329009] RIP  [<00510eea>]
[ 7487.332690]  RSP <7fffc9a8ec10>
[ 7487.336180] CR2: ff019b53
[ 7487.339810] Kernel panic - not syncing: Fatal exception

~~~ 2. ~~

[16625.590976] Unable to handle kernel paging request at ff019f05 RIP: 
[16625.595604]  [<0051b446>]
[16625.601397] PGD d9af2067 PUD 0 
[16625.604577] Oops:  [1] SMP 
[16625.607754] CPU 1 
[16625.609789] Modules linked in: loop
[16625.613315] Pid: 23057, comm: crashme Not tainted 2.6.23-rc6-git2 #1
[16625.619662] RIP: 0033:[<0051b446>]  [<0051b446>]
[16625.625689] RSP: 002b:7fff7b5851f0  EFLAGS: 00010603
[16625.630998] RAX:  RBX: 2b672f63ec00 RCX: 2b672f6d1b37
[16625.638127] RDX: 00401476 RSI:  RDI: 000a
[16625.645254] RBP: 7fff7b5851f0 R08: 7fff7b585140 R09: 
[16625.652381] R10: 0008 R11: 0612 R12: 
[16625.659508] R13: 7fff7b5853e0 R14:  R15: 
[16625.37] FS:  2b672f8706d0() GS:81011fc75e40() 
knlGS:
[16625.674717] CS:  0010 DS:  ES:  CR0: 80050033
[16625.680458] CR2: ff019f05 CR3: d942b000 CR4: 06e0
[16625.687587] DR0:  DR1:  DR2: 
[16625.694714] DR3:  DR6: 4ff0 DR7: 0400
[16625.701841] Process crashme (pid: 23057, threadinfo 8100d9bfe000, task 
81011e83a040)
[16625.710267] 
[16625.711765] RIP  [<0051b446>]
[16625.715446]  RSP <7fff7b5851f0>
[16625.718937] CR2: ff019f05
[16625.722553] Kernel panic - not syncing: Fatal exception


---
~Randy
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] Union Mount: Readdir approaches

2007-09-12 Thread Al Boldi
[EMAIL PROTECTED] wrote:
> Jan Engelhardt:
> > On Sep 12 2007 13:46, Al Boldi wrote:
> > >This is way too complicated, but I tried it anyway, only to find it
> > > doesn't compile:
> >
> > cvs up -D 2007-08-07
> >
> > that one works ;-)
>
> Jan, do you mean that only the one month old version could be compiled?
> It it rather surprise since I know some users compiled the newer
> versions. Won't you tell me how did you 'make' it? I think a personal
> mail for me is pereferable to ML.
>
> To Al Boldi,
> Will you send me directly the message which is quoted by Jan? Since it
> was not delivered to me.
>
> Thanks in advance.
> Junjiro Okajima

It turns out that the problem was this in dentry.c:

627-if (unlikely(do_udba
628- && !is_root
629- && (unhashed != d_unhashed(h_dentry)
630://#if 1
631- || name->len != h_dentry->d_name.len
632- || memcmp(name->name, h_dentry->d_name.name,
633-   name->len)
634-//#endif
635- ))) {
636-LKTRTrace("unhash 0x%x 0x%x, %.*s %.*s\n",
637-  unhashed, d_unhashed(h_dentry),
638-  DLNPair(dentry), DLNPair(h_dentry));
639-goto err;
640-}

Commenting the #if block makes it compile now.

Works great too.  Even performance wise.  Needs more testing though.

You really need to post a cleaned up version for review and possible 
inclusion into mainline.  It definitely looks solid.


Thanks!

--
Al

--  Original Message  --

Subject: Re: [RFC] Union Mount: Readdir approaches
Date: Wednesday 12 September 2007 01:46 pm
From: Al Boldi <[EMAIL PROTECTED]>
To: [EMAIL PROTECTED]
Cc: [EMAIL PROTECTED], linux-kernel@vger.kernel.org, 
[EMAIL PROTECTED], [EMAIL PROTECTED], Jan Blunck 
<[EMAIL PROTECTED]>, "Josef 'Jeff' Sipek" <[EMAIL PROTECTED]>

[EMAIL PROTECTED] wrote:
> But if you really want to read or try it, you can get all source files
> from sourceforge. Read http://aufs.sf.net and try,
> $ cvs -d:pserver:[EMAIL PROTECTED]:/cvsroot/aufs login
> (CVS password is empty)
> $ cvs -z3 -d:pserver:[EMAIL PROTECTED]:/cvsroot/aufs co
> aufs

This is way too complicated, but I tried it anyway, only to find it doesn't
compile:

  CHK include/linux/version.h
  CHK include/linux/utsrelease.h
  CALLscripts/checksyscalls.sh
  CHK include/linux/compile.h
  CC  fs/aufs/dentry.o
fs/aufs/dentry.c:630:1: directives may not be used inside a macro argument
fs/aufs/dentry.c:629:65: unterminated argument list invoking macro
 "unlikely" fs/aufs/dentry.c: In function `h_d_revalidate':
fs/aufs/dentry.c:631: `unlikely' undeclared (first use in this function)
fs/aufs/dentry.c:631: (Each undeclared identifier is reported only once
fs/aufs/dentry.c:631: for each function it appears in.)
fs/aufs/dentry.c:635: parse error before ')' token
fs/aufs/dentry.c:571: warning: unused variable `h_plus'
fs/aufs/dentry.c:571: warning: unused variable `is_nfs'
fs/aufs/dentry.c:572: warning: unused variable `p'
fs/aufs/dentry.c:575: warning: unused variable `h_inode'
fs/aufs/dentry.c:575: warning: unused variable `h_cached_inode'
fs/aufs/dentry.c:576: warning: unused variable `h_mode'
fs/aufs/dentry.c:578: warning: unused variable `reval'
fs/aufs/dentry.c:639: label `err' used but not defined
fs/aufs/dentry.c: At top level:
fs/aufs/dentry.c:642: warning: type defaults to `int' in declaration of
`reval'
fs/aufs/dentry.c:642: warning: initialization makes integer from pointer
without a cast
fs/aufs/dentry.c:642: warning: data definition has no type or storage class
fs/aufs/dentry.c:643: parse error before "if"
fs/aufs/dentry.c:649: warning: type defaults to `int' in declaration of
 `err' fs/aufs/dentry.c:649: `h_dentry' undeclared here (not in a function)
 fs/aufs/dentry.c:649: `p' undeclared here (not in a function)
fs/aufs/dentry.c:649: called object is not a function
fs/aufs/dentry.c:649: warning: data definition has no type or storage class
fs/aufs/dentry.c:650: parse error before "if"
fs/aufs/dentry.c:653: warning: type defaults to `int' in declaration of
`fake_dm_release'
fs/aufs/dentry.c:653: warning: parameter names (without types) in function
declaration
fs/aufs/dentry.c:653: conflicting types for `fake_dm_release'

...and more...


It would make matters much easier if you could just publish a link to a
combo-patch against at least the latest stable kernel, like 2.6.22.


Thanks!

--
Al

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[git patches] net driver fixes

2007-09-12 Thread Jeff Garzik

Please pull from 'upstream-linus' branch of
master.kernel.org:/pub/scm/linux/kernel/git/jgarzik/netdev-2.6.git 
upstream-linus

to receive the following updates:

 drivers/net/atl1/atl1_main.c |   19 +++
 drivers/net/ehea/ehea.h  |5 -
 drivers/net/ehea/ehea_main.c |   16 ++--
 drivers/net/phy/phy.c|4 ++--
 drivers/net/phy/phy_device.c |4 ++--
 drivers/net/sky2.c   |9 -
 drivers/net/spider_net.c |   12 
 7 files changed, 41 insertions(+), 28 deletions(-)

Hans-Jürgen Koch (1):
  Fix a lock problem in generic phy code

Ishizaki Kou (1):
  spidernet: fix interrupt reason recognition

Jan-Bernd Themann (2):
  ehea: propagate physical port state
  ehea: fix last_rx update

Luca Tettamanti (1):
  atl1: disable broken 64-bit DMA

Stephen Hemminger (1):
  sky2: restore multicast list on resume and other ops

diff --git a/drivers/net/atl1/atl1_main.c b/drivers/net/atl1/atl1_main.c
index 3c1984e..f23e13c 100644
--- a/drivers/net/atl1/atl1_main.c
+++ b/drivers/net/atl1/atl1_main.c
@@ -2203,21 +2203,20 @@ static int __devinit atl1_probe(struct pci_dev *pdev,
struct net_device *netdev;
struct atl1_adapter *adapter;
static int cards_found = 0;
-   bool pci_using_64 = true;
int err;
 
err = pci_enable_device(pdev);
if (err)
return err;
 
-   err = pci_set_dma_mask(pdev, DMA_64BIT_MASK);
+   /*
+* 64-bit DMA currently has data corruption problems, so let's just
+* use 32-bit DMA for now.  This is a big hack that is probably wrong.
+*/
+   err = pci_set_dma_mask(pdev, DMA_32BIT_MASK);
if (err) {
-   err = pci_set_dma_mask(pdev, DMA_32BIT_MASK);
-   if (err) {
-   dev_err(>dev, "no usable DMA configuration\n");
-   goto err_dma;
-   }
-   pci_using_64 = false;
+   dev_err(>dev, "no usable DMA configuration\n");
+   goto err_dma;
}
/* Mark all PCI regions associated with PCI device
 * pdev as being reserved by owner atl1_driver_name
@@ -2282,7 +2281,6 @@ static int __devinit atl1_probe(struct pci_dev *pdev,
 
netdev->ethtool_ops = _ethtool_ops;
adapter->bd_number = cards_found;
-   adapter->pci_using_64 = pci_using_64;
 
/* setup the private structure */
err = atl1_sw_init(adapter);
@@ -2299,9 +2297,6 @@ static int __devinit atl1_probe(struct pci_dev *pdev,
 */
/* netdev->features |= NETIF_F_TSO; */
 
-   if (pci_using_64)
-   netdev->features |= NETIF_F_HIGHDMA;
-
netdev->features |= NETIF_F_LLTX;
 
/*
diff --git a/drivers/net/ehea/ehea.h b/drivers/net/ehea/ehea.h
index d67f97b..8d58be5 100644
--- a/drivers/net/ehea/ehea.h
+++ b/drivers/net/ehea/ehea.h
@@ -39,7 +39,7 @@
 #include 
 
 #define DRV_NAME   "ehea"
-#define DRV_VERSION"EHEA_0073"
+#define DRV_VERSION"EHEA_0074"
 
 /* eHEA capability flags */
 #define DLPAR_PORT_ADD_REM 1
@@ -402,6 +402,8 @@ struct ehea_mc_list {
 
 #define EHEA_PORT_UP 1
 #define EHEA_PORT_DOWN 0
+#define EHEA_PHY_LINK_UP 1
+#define EHEA_PHY_LINK_DOWN 0
 #define EHEA_MAX_PORT_RES 16
 struct ehea_port {
struct ehea_adapter *adapter;/* adapter that owns this port */
@@ -427,6 +429,7 @@ struct ehea_port {
u32 msg_enable;
u32 sig_comp_iv;
u32 state;
+   u8 phy_link;
u8 full_duplex;
u8 autoneg;
u8 num_def_qps;
diff --git a/drivers/net/ehea/ehea_main.c b/drivers/net/ehea/ehea_main.c
index db57474..717b129 100644
--- a/drivers/net/ehea/ehea_main.c
+++ b/drivers/net/ehea/ehea_main.c
@@ -53,17 +53,21 @@ static int rq3_entries = EHEA_DEF_ENTRIES_RQ3;
 static int sq_entries = EHEA_DEF_ENTRIES_SQ;
 static int use_mcs = 0;
 static int num_tx_qps = EHEA_NUM_TX_QP;
+static int prop_carrier_state = 0;
 
 module_param(msg_level, int, 0);
 module_param(rq1_entries, int, 0);
 module_param(rq2_entries, int, 0);
 module_param(rq3_entries, int, 0);
 module_param(sq_entries, int, 0);
+module_param(prop_carrier_state, int, 0);
 module_param(use_mcs, int, 0);
 module_param(num_tx_qps, int, 0);
 
 MODULE_PARM_DESC(num_tx_qps, "Number of TX-QPS");
 MODULE_PARM_DESC(msg_level, "msg_level");
+MODULE_PARM_DESC(prop_carrier_state, "Propagate carrier state of physical "
+"port to stack. 1:yes, 0:no.  Default = 0 ");
 MODULE_PARM_DESC(rq3_entries, "Number of entries for Receive Queue 3 "
 "[2^x - 1], x = [6..14]. Default = "
 __MODULE_STRING(EHEA_DEF_ENTRIES_RQ3) ")");
@@ -467,7 +471,7 @@ static struct ehea_cqe *ehea_proc_rwqes(struct net_device 
*dev,
else
netif_receive_skb(skb);
 
-   dev->last_rx = jiffies;
+   port->netdev->last_rx = jiffies;
} else {
 

[PATCH] 9p: attach-per-user

2007-09-12 Thread Latchesar Ionkov
The 9P2000 protocol requires the authentication and permission checks to be
done in the file server. For that reason every user that accesses the file
server tree has to authenticate and attach to the server separately.
Multiple users can share the same connection to the server.

Currently v9fs does a single attach and executes all I/O operations as a
single user. This makes using v9fs in multiuser environment unsafe as it
depends on the client doing the permission checking.

This patch improves the 9P2000 support by allowing every user to attach
separately. The patch defines three modes of access (new mount option
'access'):

- attach-per-user (access=user) (default mode for 9P2000.u)
 If a user tries to access a file served by v9fs for the first time, v9fs
 sends an attach command to the server (Tattach) specifying the user. If
 the attach succeeds, the user can access the v9fs tree.
 As there is no uname->uid (string->integer) mapping yet, this mode works
 only with the 9P2000.u dialect.

- allow only one user to access the tree (access=)
 Only the user with uid can access the v9fs tree. Other users that attempt
 to access it will get EPERM error.

- do all operations as a single user (access=any) (default for 9P2000)
 V9fs does a single attach and all operations are done as a single user.
 If this mode is selected, the v9fs behavior is identical with the current
 one.

Signed-off-by: Latchesar Ionkov <[EMAIL PROTECTED]>

---
commit c3daf49339c2ba13b1d8bc5087c196093f2f78f5
tree a401d3d055bbfa3be2a4c7259f3b25f81f8c1272
parent 13bf527796712619df072c0963e3f6c8c00189b8
author Latchesar Ionkov <[EMAIL PROTECTED](none)> Wed, 12 Sep 2007 22:38:59 
-0600
committer Latchesar Ionkov <[EMAIL PROTECTED](none)> Wed, 12 Sep 2007 22:38:59 
-0600

 Documentation/filesystems/9p.txt |   10 ++
 fs/9p/fid.c  |  157 ++
 fs/9p/v9fs.c |   67 +---
 fs/9p/v9fs.h |   11 ++-
 fs/9p/vfs_inode.c|   20 ++---
 include/net/9p/9p.h  |7 +-
 include/net/9p/client.h  |5 +
 net/9p/client.c  |   10 +-
 net/9p/conv.c|   32 +++-
 9 files changed, 247 insertions(+), 72 deletions(-)

diff --git a/Documentation/filesystems/9p.txt b/Documentation/filesystems/9p.txt
index e694cd1..d6fd6c6 100644
--- a/Documentation/filesystems/9p.txt
+++ b/Documentation/filesystems/9p.txt
@@ -88,6 +88,16 @@ OPTIONS
This can be used to share devices/named pipes/sockets between
hosts.  This functionality will be expanded in later versions.
 
+  access   there are three access modes.
+   user  = if a user tries to access a file on v9fs
+   filesystem for the first time, v9fs sends an
+   attach command (Tattach) for that user.
+   This is the default mode.
+= allows only user with uid= to access
+   the files on the mounted filesystem
+   any   = v9fs does single attach and performs all
+   operations as one user
+
 RESOURCES
 =
 
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index 15e05a1..b364da7 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -1,6 +1,7 @@
 /*
  * V9FS FID Management
  *
+ *  Copyright (C) 2007 by Latchesar Ionkov <[EMAIL PROTECTED]>
  *  Copyright (C) 2005, 2006 by Eric Van Hensbergen <[EMAIL PROTECTED]>
  *
  *  This program is free software; you can redistribute it and/or modify
@@ -34,9 +35,9 @@
 #include "fid.h"
 
 /**
- * v9fs_fid_insert - add a fid to a dentry
+ * v9fs_fid_add - add a fid to a dentry
+ * @dentry: dentry that the fid is being added to
  * @fid: fid to add
- * @dentry: dentry that it is being added to
  *
  */
 
@@ -66,52 +67,144 @@ int v9fs_fid_add(struct dentry *dentry, struct p9_fid *fid)
 }
 
 /**
- * v9fs_fid_lookup - return a locked fid from a dentry
+ * v9fs_fid_find - retrieve a fid that belongs to the specified uid
  * @dentry: dentry to look for fid in
- *
- * find a fid in the dentry, obtain its semaphore and return a reference to it.
- * code calling lookup is responsible for releasing lock
- *
- * TODO: only match fids that have the same uid as current user
+ * @uid: return fid that belongs to the specified user
+ * @any: if non-zero, return any fid associated with the dentry
  *
  */
 
-struct p9_fid *v9fs_fid_lookup(struct dentry *dentry)
+static struct p9_fid *v9fs_fid_find(struct dentry *dentry, u32 uid, int any)
 {
struct v9fs_dentry *dent;
-   struct p9_fid *fid;
-
-   P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry);
-   dent = dentry->d_fsdata;
-   if (dent)
-   fid = list_entry(dent->fidlist.next, struct p9_fid, dlist);
-   else
-   fid = ERR_PTR(-EBADF);
+   struct p9_fid *fid, *ret;
+
+   

[PATCH] 9p: define session flags

2007-09-12 Thread Latchesar Ionkov
Create more general flags field in the v9fs_session_info struct and move the
'extended' flag as a bit in the flags.

Signed-off-by: Latchesar Ionkov <[EMAIL PROTECTED]>

---
commit 52f23404fd5bd77b619460e00930087463ec0cd9
tree 41c68f68a211796fb65d9c772120e7b7587dc945
parent ce1bfbeb6af28c96b990a95b7d7dde52c601fb8c
author Latchesar Ionkov <[EMAIL PROTECTED](none)> Wed, 12 Sep 2007 22:36:43 
-0600
committer Latchesar Ionkov <[EMAIL PROTECTED](none)> Wed, 12 Sep 2007 22:36:43 
-0600

 fs/9p/v9fs.c  |6 +++---
 fs/9p/v9fs.h  |   12 +++-
 fs/9p/vfs_inode.c |   26 +-
 3 files changed, 27 insertions(+), 17 deletions(-)

diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 08d880f..8ac2467 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -128,7 +128,7 @@ static void v9fs_parse_options(struct v9fs_session_info 
*v9ses)
 
/* setup defaults */
v9ses->maxdata = 8192;
-   v9ses->extended = 1;
+   v9ses->flags = V9FS_EXTENDED;
v9ses->afid = ~0;
v9ses->debug = 0;
v9ses->cache = 0;
@@ -178,7 +178,7 @@ static void v9fs_parse_options(struct v9fs_session_info 
*v9ses)
match_strcpy(v9ses->remotename, [0]);
break;
case Opt_legacy:
-   v9ses->extended = 0;
+   v9ses->flags &= ~V9FS_EXTENDED;
break;
case Opt_nodevmap:
v9ses->nodev = 1;
@@ -244,7 +244,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info 
*v9ses,
v9ses->maxdata = v9ses->trans->maxsize-P9_IOHDRSZ;
 
v9ses->clnt = p9_client_create(trans, v9ses->maxdata+P9_IOHDRSZ,
-   v9ses->extended);
+   v9fs_extended(v9ses));
 
if (IS_ERR(v9ses->clnt)) {
retval = PTR_ERR(v9ses->clnt);
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index 7eb135c..804b3ef 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -29,7 +29,7 @@
 struct v9fs_session_info {
/* options */
unsigned int maxdata;
-   unsigned char extended; /* set to 1 if we are using UNIX extensions */
+   unsigned char flags;/* session flags */
unsigned char nodev;/* set to 1 if no disable device mapping */
unsigned short debug;   /* debug level */
unsigned int afid;  /* authentication fid */
@@ -45,6 +45,11 @@ struct v9fs_session_info {
struct dentry *debugfs_dir;
 };
 
+/* session flags */
+enum {
+   V9FS_EXTENDED,
+};
+
 /* possible values of ->cache */
 /* eventually support loose, tight, time, session, default always none */
 enum {
@@ -70,3 +75,8 @@ static inline struct v9fs_session_info 
*v9fs_inode2v9ses(struct inode *inode)
 {
return (inode->i_sb->s_fs_info);
 }
+
+static inline int v9fs_extended(struct v9fs_session_info *v9ses)
+{
+   return v9ses->flags & V9FS_EXTENDED;
+}
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index e5c45ee..2270d06 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -59,7 +59,7 @@ static int unixmode2p9mode(struct v9fs_session_info *v9ses, 
int mode)
res = mode & 0777;
if (S_ISDIR(mode))
res |= P9_DMDIR;
-   if (v9ses->extended) {
+   if (v9fs_extended(v9ses)) {
if (S_ISLNK(mode))
res |= P9_DMSYMLINK;
if (v9ses->nodev == 0) {
@@ -99,21 +99,21 @@ static int p9mode2unixmode(struct v9fs_session_info *v9ses, 
int mode)
 
if ((mode & P9_DMDIR) == P9_DMDIR)
res |= S_IFDIR;
-   else if ((mode & P9_DMSYMLINK) && (v9ses->extended))
+   else if ((mode & P9_DMSYMLINK) && (v9fs_extended(v9ses)))
res |= S_IFLNK;
-   else if ((mode & P9_DMSOCKET) && (v9ses->extended)
+   else if ((mode & P9_DMSOCKET) && (v9fs_extended(v9ses))
 && (v9ses->nodev == 0))
res |= S_IFSOCK;
-   else if ((mode & P9_DMNAMEDPIPE) && (v9ses->extended)
+   else if ((mode & P9_DMNAMEDPIPE) && (v9fs_extended(v9ses))
 && (v9ses->nodev == 0))
res |= S_IFIFO;
-   else if ((mode & P9_DMDEVICE) && (v9ses->extended)
+   else if ((mode & P9_DMDEVICE) && (v9fs_extended(v9ses))
 && (v9ses->nodev == 0))
res |= S_IFBLK;
else
res |= S_IFREG;
 
-   if (v9ses->extended) {
+   if (v9fs_extended(v9ses)) {
if ((mode & P9_DMSETUID) == P9_DMSETUID)
res |= S_ISUID;
 
@@ -214,7 +214,7 @@ struct inode *v9fs_get_inode(struct super_block *sb, int 
mode)
case S_IFBLK:
case S_IFCHR:
case S_IFSOCK:
-   if(!v9ses->extended) {
+   if (!v9fs_extended(v9ses)) {
P9_DPRINTK(P9_DEBUG_ERROR,
  "special files without extended mode\n");
return 

[PATCH] 9p: rename uid and gid parameters

2007-09-12 Thread Latchesar Ionkov
Change the names of 'uid' and 'gid' parameters to the more appropriate
'dfltuid' and 'dfltgid'.

Signed-off-by: Latchesar Ionkov <[EMAIL PROTECTED]>

---
commit 13bf527796712619df072c0963e3f6c8c00189b8
tree 7211a2899dcfd58c76b901334a8726c7e60115e1
parent 52f23404fd5bd77b619460e00930087463ec0cd9
author Latchesar Ionkov <[EMAIL PROTECTED](none)> Wed, 12 Sep 2007 22:37:33 
-0600
committer Latchesar Ionkov <[EMAIL PROTECTED](none)> Wed, 12 Sep 2007 22:37:33 
-0600

 Documentation/filesystems/9p.txt |4 ++--
 fs/9p/v9fs.c |   16 +---
 fs/9p/v9fs.h |6 --
 fs/9p/vfs_inode.c|4 ++--
 4 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/Documentation/filesystems/9p.txt b/Documentation/filesystems/9p.txt
index 1a5f50d..e694cd1 100644
--- a/Documentation/filesystems/9p.txt
+++ b/Documentation/filesystems/9p.txt
@@ -78,9 +78,9 @@ OPTIONS
 
   noextend force legacy mode (no 9p2000.u semantics)
 
-  uid  attempt to mount as a particular uid
+  dfltuid  attempt to mount as a particular uid
 
-  gid  attempt to mount with a particular gid
+  dfltgid  attempt to mount with a particular gid
 
   afid security channel - used by Plan 9 authentication protocols
 
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 8ac2467..68f82be 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -84,7 +84,7 @@ static struct p9_trans_module *v9fs_match_trans(const 
substring_t *name)
 
 enum {
/* Options that take integer arguments */
-   Opt_debug, Opt_msize, Opt_uid, Opt_gid, Opt_afid,
+   Opt_debug, Opt_msize, Opt_dfltuid, Opt_dfltgid, Opt_afid,
/* String options */
Opt_uname, Opt_remotename, Opt_trans,
/* Options that take no arguments */
@@ -98,8 +98,8 @@ enum {
 static match_table_t tokens = {
{Opt_debug, "debug=%x"},
{Opt_msize, "msize=%u"},
-   {Opt_uid, "uid=%u"},
-   {Opt_gid, "gid=%u"},
+   {Opt_dfltuid, "dfltuid=%u"},
+   {Opt_dfltgid, "dfltgid=%u"},
{Opt_afid, "afid=%u"},
{Opt_uname, "uname=%s"},
{Opt_remotename, "aname=%s"},
@@ -159,11 +159,11 @@ static void v9fs_parse_options(struct v9fs_session_info 
*v9ses)
case Opt_msize:
v9ses->maxdata = option;
break;
-   case Opt_uid:
-   v9ses->uid = option;
+   case Opt_dfltuid:
+   v9ses->dfltuid = option;
break;
-   case Opt_gid:
-   v9ses->gid = option;
+   case Opt_dfltgid:
+   v9ses->dfltgid = option;
break;
case Opt_afid:
v9ses->afid = option;
@@ -219,6 +219,8 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info 
*v9ses,
 
strcpy(v9ses->name, V9FS_DEFUSER);
strcpy(v9ses->remotename, V9FS_DEFANAME);
+   v9ses->dfltuid = V9FS_DEFUID;
+   v9ses->dfltgid = V9FS_DEFGID;
 
v9ses->options = kstrdup(data, GFP_KERNEL);
v9fs_parse_options(v9ses);
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index 804b3ef..5d0280a 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -38,8 +38,8 @@ struct v9fs_session_info {
char *options;  /* copy of mount options */
char *name; /* user name to mount as */
char *remotename;   /* name of remote hierarchy being mounted */
-   unsigned int uid;   /* default uid/muid for legacy support */
-   unsigned int gid;   /* default gid for legacy support */
+   unsigned int dfltuid;   /* default uid/muid for legacy support */
+   unsigned int dfltgid;   /* default gid for legacy support */
struct p9_trans_module *trans; /* 9p transport */
struct p9_client *clnt; /* 9p client */
struct dentry *debugfs_dir;
@@ -70,6 +70,8 @@ void v9fs_session_cancel(struct v9fs_session_info *v9ses);
 #define V9FS_PORT  564
 #define V9FS_DEFUSER   "nobody"
 #define V9FS_DEFANAME  ""
+#define V9FS_DEFUID(0)
+#define V9FS_DEFGID(0)
 
 static inline struct v9fs_session_info *v9fs_inode2v9ses(struct inode *inode)
 {
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 2270d06..f08a35d 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -805,8 +805,8 @@ v9fs_stat2inode(struct p9_stat *stat, struct inode *inode,
inode->i_mtime.tv_sec = stat->mtime;
inode->i_ctime.tv_sec = stat->mtime;
 
-   inode->i_uid = v9ses->uid;
-   inode->i_gid = v9ses->gid;
+   inode->i_uid = v9ses->dfltuid;
+   inode->i_gid = v9ses->dfltgid;
 
if (v9fs_extended(v9ses)) {
inode->i_uid = stat->n_uid;
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  

RE: [PATCH v3] Make the pr_*() family of macros in kernel.h complete

2007-09-12 Thread Joe Perches
On Wed, 2007-09-12 at 11:44 -0700, Medve Emilian-EMMEDVE1 wrote:
> First, this patch doesn't have the trailing "\n" problem that one had.

I expect all the kernel logging functions to be
overhauled eventually.

I'd prefer a mechanism that somehow supports
identifying complete messages.  I think the new
pr_ functions are not particularly useful
without a mechanism to avoid or identify multiple
processors or threads interleaving partial in-progress
multiple statement messages.

I've got a very large patch series that converts _all_
the current single line messages that use KERN_
to pr_ and identifies, prefixes and postfixes
the rest of the multiple source line messages.

At some point, sooner or later, the logging functions
will be improved.  Apparently, more likely later.

cheers, Joe

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [linux-dvb] [PATCH] Userspace tuner

2007-09-12 Thread Dâniel Fraga
Well, I'd like to see Linus' opinion about this, because while
programmers keep discussing this, users are waiting forever... so if
Markus has a concrete and better solution, why don't use it?

And as far as I know, Markus is the programmer who is most
interested in this code. I didn't see anybody else in the world doing
his work...

And I always had a impression that if most of things could be
done in user space, than it will be better (for example, devfs -> udev).
Why do everything in kernel space? Lets put *less* code in the kernel,
not more code. And besides that, code in user space can be changed
easily. Code in kernel has to wait a long time for Linus to accept (*if*
he accepts).

Linus could put an end to this discussion, since he will say
the final word.

On Thu, 13 Sep 2007 01:10:55 +0200
"Markus Rechberger" <[EMAIL PROTECTED]> wrote:

> Let's add the LKML to this.
> 
> On 9/13/07, Markus Rechberger <[EMAIL PROTECTED]> wrote:
> > On 9/12/07, Mauro Carvalho Chehab <[EMAIL PROTECTED]> wrote:
> > > Markus,
> > >
> > > Em Ter, 2007-08-14 às 16:31 +0200, Markus Rechberger escreveu:
> > > > Following patch adds the possibility to implement tuner drivers in
> > > > userspace.
> > >
> > > As you asked me about userspace driver, at Linux Conf Europe, let me
> > > give you my feedback about it.
> > >
> > > On Linux, userspace-to-kernelspace APIs are meant to be forever. This
> > > means that, once a newer API is created, this should remain supported
> > > for all future versions. So, such APIs should be carefully analyzed and
> > > accepted by the community, before going to mainstream.
> > >
> >
> > The V4L and DVB API is stable at the moment because it's at a stage
> > which is sufficient for older devices but not sufficient for newer
> > devices anymore.
> > To support newer device it needs a change.
> >
> > > I don't see any technical reason why tuner drivers should be moved to
> > > userspace. Looking at xc3028 device, the driver is very simple and
> > > doesn't require any special treatment that it isn't possible to be done
> > > at kernel. There are already some implementations on kernelspace that
> > > works fine.
> > >
> >
> > As from my side to support the xceive driver properly it needs a
> > rewrite and a proper API description. Since it's not possible to
> > discuss any API changes I will work around at least for those devices
> > which I can support for.
> >
> > > On the other hand, a TV driver without a tuner is a broken driver. With
> > > parts of the driver being at userspace, this means to add undesired
> > > complexity at the drivers architecture, while not bringing any benefit.
> > >
> > > If you look at V4L history, the first drivers started at userspace,
> > > being migrated to kernelspace, where we have the proper scenario for
> > > managing those devices.
> > >
> > > Another aspect that should be analyzed is what is desired by the
> > > community:
> >
> > don't get me wrong but the existing community is rather small and
> > kicking off people who are interested in changing things.
> > I recently had a talk with someone and I've been told that I'm kicking
> > off people.
> > Guess why I kick off people? -> because they do not contribute in a
> > productive way which also means submitting patches. Optical useless
> > changes don't make any difference at the functionality in the end. And
> > my requirements are ignored constantly here.
> >
> > > kernelspace tuners or userspace tuners. Keeping support for
> > > both at long term doesn't seem reasonable. The Linux community should
> > > decide what is the better way. Currently, only you are pushing for
> > > userspace tuners, mainly due to non-technical reasons.
> >
> > read the project site and you will see the reasons.
> > http://mcentral.de/wiki/index.php/Userspace_tuner#Advantages
> > Another advantage is that I have cygwin based code here which I can
> > easily reuse with all that work I'm not going to reinvent the wheel
> > even for newer devices which I work on.
> >
> > > Almost all the
> > > other developers are comfortable with kernelspace tuners. So, creating
> > > an userspace interface just to make you happy is not the way we should
> > > go.
> > >
> >
> > I'm afraid of giving the people which are against what I submitted the
> > responsibility over the project. Initially there was an RFC which
> > didn't get commented either (well there was one useless comment, I
> > tried to discuss it on IRC before with the same guy) after I
> > implemented exactly what I proposed there I got the first non
> > technical comments - also keep in mind that working on something costs
> > alot of time and talking about something unknown is rather cheap.
> >
> > > A final aspect is that having an userspace driver for tuner will mean
> > > that the kernel driver will depend on an userspace counterpart in order
> > > to work. This will allow a vendor with bad intentions to release a
> > > partially broken userspace 

Re: [PATCHv2] misc: fix unterminated *_device_id lists

2007-09-12 Thread Jeff Garzik

Kees Cook wrote:

Fixes drivers that do not correctly terminate their *_device_id lists.
This results in garbage being spewed into modules.pcimap when the module
happens to not have 28 NULL bytes following the table, and/or the last PCI
ID is actually truncated from the table when calculating the modules.alias
PCI aliases, causing those unfortunate device IDs to not auto-load.

Signed-off-by: Kees Cook <[EMAIL PROTECTED]>
---
Re-send, using recommended list termination code-style.
Diff is against 2.6.23-rc6.


ACK


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: clockevents: fix resume logic

2007-09-12 Thread Andrew Morton
On Wed, 12 Sep 2007 18:57:42 +0200 Thomas Gleixner <[EMAIL PROTECTED]> wrote:

> Does the test hack below fix the problem for nohz/highres enabled
> kernels ?
> 
>   tglx
> 
> --- a/kernel/time/tick-broadcast.c
> +++ b/kernel/time/tick-broadcast.c
> @@ -382,6 +382,8 @@ static int tick_broadcast_set_event(ktime_t expires, int 
> force)
>  
>  int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
>  {
> + cpu_set(smp_processor_id(), tick_broadcast_oneshot_mask);
> +
>   clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
>  
>   if(!cpus_empty(tick_broadcast_oneshot_mask))

yup
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCHv2] misc: fix unterminated *_device_id lists

2007-09-12 Thread Kees Cook
Fixes drivers that do not correctly terminate their *_device_id lists.
This results in garbage being spewed into modules.pcimap when the module
happens to not have 28 NULL bytes following the table, and/or the last PCI
ID is actually truncated from the table when calculating the modules.alias
PCI aliases, causing those unfortunate device IDs to not auto-load.

Signed-off-by: Kees Cook <[EMAIL PROTECTED]>
---
Re-send, using recommended list termination code-style.
Diff is against 2.6.23-rc6.

 linux-2.6.23-rc6/drivers/char/ipmi/ipmi_si_intf.c|3 ++-
 linux-2.6.23-rc6/drivers/media/video/usbvision/usbvision-cards.c |1 +
 linux-2.6.23-rc6/drivers/mtd/nand/cafe_nand.c|3 ++-
 3 files changed, 5 insertions(+), 2 deletions(-)
---
diff -uNrp linux-2.6.23-rc6~/drivers/char/ipmi/ipmi_si_intf.c 
linux-2.6.23-rc6/drivers/char/ipmi/ipmi_si_intf.c
--- linux-2.6.23-rc6~/drivers/char/ipmi/ipmi_si_intf.c  2007-09-11 
23:17:13.0 -0700
+++ linux-2.6.23-rc6/drivers/char/ipmi/ipmi_si_intf.c   2007-09-11 
23:21:51.0 -0700
@@ -2215,7 +2215,8 @@ static int ipmi_pci_resume(struct pci_de
 
 static struct pci_device_id ipmi_pci_devices[] = {
{ PCI_DEVICE(PCI_HP_VENDOR_ID, PCI_MMC_DEVICE_ID) },
-   { PCI_DEVICE_CLASS(PCI_ERMC_CLASSCODE, PCI_ERMC_CLASSCODE_MASK) }
+   { PCI_DEVICE_CLASS(PCI_ERMC_CLASSCODE, PCI_ERMC_CLASSCODE_MASK) },
+   { },/* terminate list */
 };
 MODULE_DEVICE_TABLE(pci, ipmi_pci_devices);
 
diff -uNrp linux-2.6.23-rc6~/drivers/mtd/nand/cafe_nand.c 
linux-2.6.23-rc6/drivers/mtd/nand/cafe_nand.c
--- linux-2.6.23-rc6~/drivers/mtd/nand/cafe_nand.c  2007-07-08 
16:32:17.0 -0700
+++ linux-2.6.23-rc6/drivers/mtd/nand/cafe_nand.c   2007-09-11 
23:22:11.0 -0700
@@ -816,7 +816,8 @@ static void __devexit cafe_nand_remove(s
 }
 
 static struct pci_device_id cafe_nand_tbl[] = {
-   { 0x11ab, 0x4100, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_MEMORY_FLASH << 8, 
0x0 }
+   { 0x11ab, 0x4100, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_MEMORY_FLASH << 8, 
0x0 },
+   { },/* terminate list */
 };
 
 MODULE_DEVICE_TABLE(pci, cafe_nand_tbl);
diff -urp -x '*.o' 
linux-2.6.23-rc6~/drivers/media/video/usbvision/usbvision-cards.c 
linux-2.6.23-rc6/drivers/media/video/usbvision/usbvision-cards.c
--- linux-2.6.23-rc6~/drivers/media/video/usbvision/usbvision-cards.c   
2007-09-11 23:17:19.0 -0700
+++ linux-2.6.23-rc6/drivers/media/video/usbvision/usbvision-cards.c
2007-09-12 17:26:46.0 -0700
@@ -1081,6 +1081,7 @@ struct usb_device_id usbvision_table [] 
{ USB_DEVICE(0x2304, 0x0301), .driver_info=PINNA_LINX_VD_IN_CAB_PAL },
{ USB_DEVICE(0x2304, 0x0419), .driver_info=PINNA_PCTV_BUNGEE_PAL_FM },
{ USB_DEVICE(0x2400, 0x4200), .driver_info=HPG_WINTV },
+   { },/* terminate list */
 };
 
 MODULE_DEVICE_TABLE (usb, usbvision_table);


-- 
Kees Cook
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] IB/ehca: Make sure user pages are from hugetlb before using MR large pages

2007-09-12 Thread Roland Dreier
 > -#define HCA_CAP_MR_PGSIZE_4K  1
 > -#define HCA_CAP_MR_PGSIZE_64K 2
 > -#define HCA_CAP_MR_PGSIZE_1M  4
 > -#define HCA_CAP_MR_PGSIZE_16M 8
 > +#define HCA_CAP_MR_PGSIZE_4K  0x8000
 > +#define HCA_CAP_MR_PGSIZE_64K 0x4000
 > +#define HCA_CAP_MR_PGSIZE_1M  0x2000
 > +#define HCA_CAP_MR_PGSIZE_16M 0x1000

Not sure I understand what this has to do with things... is this an
unrelated fix?

 > +static int ehca_is_mem_hugetlb(unsigned long addr, unsigned long size)

This is rather awful -- another call to get_user_pages() to iterate
over all the vmas...

I would suggest extending ib_umem_get() to check the vmas and adding a
member to struct ib_umem to say whether the memory is entirely covered
by hugetlb pages or not.

 > +ret = ehca_is_mem_hugetlb(virt, length);
 > +switch (ret) {
 > +case 0: /* mem is not from hugetlb */
 > +hwpage_size = PAGE_SIZE;
 > +break;
 > +case 1:
 > +if (length <= EHCA_MR_PGSIZE4K
 > +&& PAGE_SIZE == EHCA_MR_PGSIZE4K)
 > +hwpage_size = EHCA_MR_PGSIZE4K;
 > +else if (length <= EHCA_MR_PGSIZE64K)
 > +hwpage_size = EHCA_MR_PGSIZE64K;
 > +else if (length <= EHCA_MR_PGSIZE1M)
 > +hwpage_size = EHCA_MR_PGSIZE1M;
 > +else
 > +hwpage_size = EHCA_MR_PGSIZE16M;
 > +break;
 > +default: /* out of mem */
 > +ib_mr = ERR_PTR(-ENOMEM);
 > +goto reg_user_mr_exit1;

It seems like it would be better to just assume the memory is not from
a hugetlb is ehca_is_mem_hugetlb() fails its memory allocation and
fall back to the PAGE_SIZE case rather than failing entirely.

Also if someone runs a kernel with 64K pages on a machine where they
end up being simulated from 4K pages, do you have the same issue with
the hypervisor ganging together non-contiguous pages?

 - R.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] Allow sysfs_remove_group() to be called on non-added groups

2007-09-12 Thread Michael Ellerman
It would be nice to be able to do:

for_each_thing(thing) {
error = sysfs_create_group(>kobj, attrs);
if (error) {
for_each_thing(thing)
sysfs_remove_group(>kobj, attrs);
return error;
}
}

But there's a BUG_ON() in sysfs_remove_group() which hits if the attributes
were never added.

As discussed here ...
http://ozlabs.org/pipermail/cbe-oss-dev/2007-July/002774.html

.. we should just return in that case instead of BUG'ing.

Signed-off-by: Michael Ellerman <[EMAIL PROTECTED]>
---
 fs/sysfs/group.c |3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index f318b73..a256775 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -73,7 +73,8 @@ void sysfs_remove_group(struct kobject * kobj,
 
if (grp->name) {
sd = sysfs_get_dirent(dir_sd, grp->name);
-   BUG_ON(!sd);
+   if (!sd)
+   return;
} else
sd = sysfs_get(dir_sd);
 
-- 
1.5.1.3.g7a33b

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v3] doc: about email clients for Linux patches

2007-09-12 Thread Randy Dunlap
From: Randy Dunlap <[EMAIL PROTECTED]>

Requested by Jeff Garzik.
v3, updated from lkml comments.

Add info about various email clients and their applicability
in being used to send Linux kernel patches.

Some notes takes from http://mbligh.org/linuxdocs/Email/Clients
Portions used with permission.

Signed-off-by: Randy Dunlap <[EMAIL PROTECTED]>
---
 Documentation/email-clients.txt |  217 
 1 file changed, 217 insertions(+)

--- /dev/null
+++ linux-2.6.23-rc5-git1/Documentation/email-clients.txt
@@ -0,0 +1,217 @@
+Email clients info for Linux
+==
+
+General Preferences
+--
+Patches for the Linux kernel are submitted via email, preferably as
+inline text in the body of the email.  Some maintainers accept
+attachments, but then the attachments should have content-type
+"text/plain".  However, attachments are generally frowned upon because
+it makes quoting portions of the patch more difficult in the patch
+review process.
+
+Email clients that are used for Linux kernel patches should send the
+patch text untouched.  For example, they should not modify or delete tabs
+or spaces, even at the beginning or end of lines.
+
+Don't send patches with "format=flowed".  This can cause unexpected
+and unwanted line breaks.
+
+Don't let your email client do automatic word wrapping for you.
+This can also corrupt your patch.
+
+Email clients should not modify the character set encoding of the text.
+Emailed patches should be in ASCII or UTF-8 encoding only.
+If you configure your email client to send emails with UTF-8 encoding,
+you avoid some possible charset problems.
+
+Email clients should generate and maintain References: or In-Reply-To:
+headers so that mail threading is not broken.
+
+Copy-and-paste (or cut-and-paste) usually does not work for patches
+because tabs are converted to spaces.  Using xclipboard, xclip, and/or
+xcutsel may work, but it's best to test this for yourself or just avoid
+copy-and-paste.
+
+Don't use PGP/GPG signatures in mail that contains patches.
+This breaks many scripts that read and apply the patches.
+(This should be fixable.)
+
+It's a good idea to send a patch to yourself, save the received message,
+and successfully apply it with 'patch' before sending patches to Linux
+mailing lists.
+
+
+Some email client (MUA) hints
+--
+Here are some specific MUA configuration hints for editing and sending
+patches for the Linux kernel.  These are not meant to be complete
+software package configuration summaries.
+
+Legend:
+TUI = text-based user interface
+GUI = graphical user interface
+
+~~
+Alpine (TUI)
+
+Config options:
+In the "Sending Preferences" section:
+
+- "Do Not Send Flowed Text" must be enabled
+- "Strip Whitespace Before Sending" must be disabled
+
+When composing the message, the cursor should be placed where the patch
+should appear, and then pressing CTRL-R let you specify the patch file
+to insert into the message.
+
+~~
+Evolution (GUI)
+
+Some people use this successfully for patches.
+
+When composing mail select: Preformat
+  from Format->Heading->Preformatted (Ctrl-7)
+  or the toolbar
+
+Then use:
+  Insert->Text File... (Alt-n x)
+to insert the patch.
+
+You can also "diff -Nru old.c new.c | xclip", select Preformat, then
+paste with the middle button.
+
+~~
+Kmail (GUI)
+
+Some people use Kmail successfully for patches.
+
+The default setting of not composing in HTML is appropriate; do not
+enable it.
+
+When composing an email, under options, uncheck "word wrap". The only
+disadvantage is any text you type in the email will not be word-wrapped
+so you will have to manually word wrap text before the patch. The easiest
+way around this is to compose your email with word wrap enabled, then save
+it as a draft. Once you pull it up again from your drafts it is now hard
+word-wrapped and you can uncheck "word wrap" without losing the existing
+wrapping.
+
+At the bottom of your email, put the commonly-used patch delimiter before
+inserting your patch:  three hyphens (---).
+
+Then from the "Message" menu item, select insert file and choose your patch.
+As an added bonus you can customise the message creation toolbar menu
+and put the "insert file" icon there.
+
+You can safely GPG sign attachments, but inlined text is preferred for
+patches so do not GPG sign them.  Signing patches that have been inserted
+as inlined text will make them tricky to extract from their 7-bit encoding.
+
+If you absolutely must send patches as attachments instead of inlining
+them as text, right click on the attachment and select properties, and
+highlight "Suggest automatic display" to make the attachment 

Re: [PATCH 1/2][RESEND] ehea: propagate physical port state

2007-09-12 Thread Jeff Garzik

Jan-Bernd Themann wrote:

Introduces a module parameter to decide whether the physical
port link state is propagated to the network stack or not.
It makes sense not to take the physical port state into account
on machines with more logical partitions that communicate
with each other. This is always possible no matter what the physical
port state is. Thus eHEA can be considered as a switch there.

Signed-off-by: Jan-Bernd Themann <[EMAIL PROTECTED]>


applied 1-2


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] Fix a lock problem in generic phy code

2007-09-12 Thread Jeff Garzik

Hans-Jürgen Koch wrote:

Lock debugging finds a problem in phy.c and phy_device.c,
this patch fixes it. Tested on an AT91SAM9263-EK board, 
kernel 2.6.23-rc4.


Signed-off-by: Hans J. Koch <[EMAIL PROTECTED]>


applied


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 2/2] drivers/net/: all drivers/net/ cleanup with ARRAY_SIZE

2007-09-12 Thread Jeff Garzik

Denis Cheng wrote:

Signed-off-by: Denis Cheng <[EMAIL PROTECTED]>
---
 drivers/net/apne.c |2 +-
 drivers/net/arm/am79c961a.c|2 +-
 drivers/net/atarilance.c   |2 +-
 drivers/net/atl1/atl1_hw.c |2 +-
 drivers/net/bnx2.c |2 +-
 drivers/net/cs89x0.c   |6 +++---
 drivers/net/e1000/e1000_ethtool.c  |3 +--
 drivers/net/fec_8xx/fec_mii.c  |5 ++---
 drivers/net/ibm_emac/ibm_emac_debug.c  |8 
 drivers/net/irda/actisys-sir.c |2 +-
 drivers/net/ixgb/ixgb_ethtool.c|3 +--
 drivers/net/lp486e.c   |4 +---
 drivers/net/mv643xx_eth.c  |3 +--
 drivers/net/ne-h8300.c |2 +-
 drivers/net/ne.c   |2 +-
 drivers/net/ne2.c  |2 +-
 drivers/net/ne2k-pci.c |2 +-
 drivers/net/netxen/netxen_nic.h|2 +-
 drivers/net/netxen/netxen_nic_hw.c |2 +-
 drivers/net/pcmcia/axnet_cs.c  |2 +-
 drivers/net/pcmcia/pcnet_cs.c  |4 ++--
 drivers/net/phy/phy.c  |2 +-
 drivers/net/skfp/smt.c |2 +-
 drivers/net/skfp/srf.c |4 ++--
 drivers/net/tulip/de4x5.c  |6 +++---
 drivers/net/wireless/airo.c|6 +++---
 drivers/net/wireless/hostap/hostap_ioctl.c |6 +++---
 drivers/net/wireless/ipw2100.c |7 +++
 drivers/net/wireless/libertas/fw.c |3 +--
 drivers/net/wireless/libertas/main.c   |   14 +++---
 drivers/net/wireless/libertas/wext.c   |4 ++--
 drivers/net/wireless/netwave_cs.c  |6 +++---
 drivers/net/wireless/prism54/isl_ioctl.c   |7 +++
 drivers/net/wireless/ray_cs.c  |6 +++---
 drivers/net/wireless/wavelan.c |6 +++---
 drivers/net/wireless/wavelan_cs.c  |6 +++---
 drivers/net/wireless/wl3501_cs.c   |2 +-
 drivers/net/zorro8390.c|2 +-
 38 files changed, 71 insertions(+), 80 deletions(-)


applied


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] usb: fix unterminated usn_device_id lists

2007-09-12 Thread Jeff Garzik

Kees Cook wrote:

Hi,

On Wed, Sep 12, 2007 at 09:15:07PM -0400, Jeff Garzik wrote:

+   { 0, },

FWIW I (and several drivers) tend to prefer the more clean version,

{ },

or even

{ },/* terminate list */


Ah, yes.  I see that now in some of the other drivers.  Should I re-send
this patch (and/or the other PCI patches)?


Can't hurt...  but I cannot speak for other maintainers.

Jeff



-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 2.6.18-rc6 1/2] dllink driver: porting v1.19 to linux 2.6.18-rc6

2007-09-12 Thread Jeff Garzik

Hayim Shaul wrote:

Description:
For DLink Fiber NIC, Linux 2.4.22 ships with driver version 1.19,
whereas, Linux 2.6.x ship with driver version 1.17.

The following patch upgrades the 2.6.x driver to include changes (and
bug fixes done until 1.19b).

These fixes are (copied from the driver):
1.182002/11/07  New tx scheme, adaptive tx_coalesce.
Remove tx_coalesce option.
1.192003/12/16  Fix problem parsing the eeprom on big endian
systems. ([EMAIL PROTECTED])

Disclaimer:
Since I returned my DLink NIC to the store I couldn't test it
thoroughly. It seemed to work just as well as v1.17. However, both
version made the NIC hang after a few minutes.


Please update to the latest upstream kernel (2.6.22 or 2.6.23-rc).

Also, please make sure you do not back out changes included in the 
upstream kernel since 2.6.17.



-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] sb1250-mac.c: De-typedef, de-volatile, de-etc...

2007-09-12 Thread Jeff Garzik

Maciej W. Rozycki wrote:

 Remove typedefs, volatiles and convert kmalloc()/memset() pairs to
kcalloc().  Also reformat the surrounding clutter.

Signed-off-by: Maciej W. Rozycki <[EMAIL PROTECTED]>
---
 Per your request, Andrew, a while ago.  It builds, runs, passes 
checkpatch.pl and sparse.  No semantic changes.


 Please apply,

  Maciej


ACK, but patch does not apply cleanly to netdev-2.6.git#upstream (nor -mm)


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] usb: fix unterminated usn_device_id lists

2007-09-12 Thread Kees Cook
Hi,

On Wed, Sep 12, 2007 at 09:15:07PM -0400, Jeff Garzik wrote:
>> +{ 0, },
>
> FWIW I (and several drivers) tend to prefer the more clean version,
>
>   { },
>
> or even
>
>   { },/* terminate list */

Ah, yes.  I see that now in some of the other drivers.  Should I re-send
this patch (and/or the other PCI patches)?

Thanks,

-Kees

-- 
Kees Cook
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/7 RESEND] cxgb3 - Firmware update

2007-09-12 Thread Jeff Garzik

Divy Le Ray wrote:

From: Divy Le Ray <[EMAIL PROTECTED]>

Update firmware version.
Allow the driver to be up and running with older FW image

Signed-off-by: Divy Le Ray <[EMAIL PROTECTED]>


applied 1-7


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[patch] update CFI URI n mtd kconfig

2007-09-12 Thread Mike Frysinger
Since AMD shunted its flash memory division, the URI in the mtd Kconfig is now
broken, so the attached patch points people to Wikipedia.

Signed-off-by: Mike Frysinger <[EMAIL PROTECTED]>
---
diff --git a/drivers/mtd/chips/Kconfig b/drivers/mtd/chips/Kconfig
index 479d32b..980117a 100644
--- a/drivers/mtd/chips/Kconfig
+++ b/drivers/mtd/chips/Kconfig
@@ -11,8 +11,8 @@ config MTD_CFI
  AMD and other flash manufactures that provides a universal method
  for probing the capabilities of flash devices. If you wish to
  support any device that is CFI-compliant, you need to enable this
- option. Visit 
- for more information on CFI.
+ option. For more information on CFI, visit:
+ 
 
 config MTD_JEDECPROBE
tristate "Detect non-CFI AMD/JEDEC-compatible flash chips"
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: ECC and DMA to/from disk controllers

2007-09-12 Thread Bruce Allen

Alan, Robert, Dick,

Thank you all for the informed and helpful response!

Alan, I'll pass your comments on to Peter Kelemen.  Not sure if he follows 
LKML.  I think he'll be interested in your characterization of the error 
types.  I'll point him to the thread.  (I think Peter and his 
collaborators are fairly aware of the undetected error rates in standard 
ethernet TCP/IP traffic which as I recall is about one undetected 
single-bit error per 4TB transfered.  I am pretty sure they have ruled 
this out since they have checksums computed after any network transfers.)


Robert, Dick, if I have understood correctly, in response to my specific 
question, RAID controllers on PCI cards will DMA data into memory over a 
PCI bus using one parity bit per 32 data bits for protection.  This does 
provide some protection against errors in the data transfer, but much less 
protection than typical RAM ECC which has one ECC byte for each eight data 
bytes. As I recall, many older motherboards disabled parity on the PCI 
bus, so even this protection may be inactive in many cases. From a few 
minutes of on-line research, I have the impression that PCI-e has better 
ECC protection against address/data errors than PCI but I am not certain.


Thanks again!

Cheers,
Bruce
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [1/4] 2.6.23-rc6: known regressions

2007-09-12 Thread Mark Lord

Michal Piotrowski wrote:

Hi all,

Here is a list of some known regressions in 2.6.23-rc6.

...

Missing from the list:

USB "autosuspend" feature (new in 2.6.23) breaks *lots* of devices.
Many have since been blacklisted in one-at-a-time discovery patches,
but that's really just the tip of the iceberg.

This "feature" breaks a TON of user-visible things,
mostly USB storage devices (USB drives/pens, digicams, etc..).

The functionality is broken for just too much stuff,
and needs to either be reverted or defaulted to "off" rather than "on".

Cheers
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: timerfd redux

2007-09-12 Thread Andrew Morton
On Wed, 05 Sep 2007 17:32:01 +0200 "Michael Kerrisk" <[EMAIL PROTECTED]> wrote:

> [Was: Re: [PATCH] Revised timerfd() interface]
> 
> > Michael, could you please refresh our memories with a brief,
> > from-scratch summary of what the current interface is, followed
> > by a summary of what you believe to be the shortcomings to be? 
> 
> Andrew,
> 
> I'll break this up into parts:
> 
> 1. the existing timerfd interface
> 2. timerfd limitations
> 3. possible solutions
>  a) Add an argument
>  b) Create an interface similar to POSIX timers
>  c) Integrate timerfd with POSIX timers
> 
> Cheers,
> 
> Michael
> 
> 
> 1: the existing timerfd interface
> =
> 
> In 2.6.22, Davide added timerfd() with the following interface:
> 
> returned_fd = timerfd(int fd, int clockid, int flags,
>   struct itimerspec *utimer);
> 
> If fd is -1, a new timer is created and started.  The syscall
> returns a file descriptor for the timer. 'utimer' specifies
> the initial expiration and interval of the timer.
> 'clockid' is CLOCK_REALTIME or CLOCK_REALTIME.  The 'utimer'
> value is relative, unless TFD_TIMER_ABSTIME is specified in
> 'flags', in which case the initial expiration is specified
> absolutely.
> 
> If 'fd' is not -1, then the call modifies the existing timer
> referred to by the file descriptor 'fd'.  The 'clockid', 'flags',
> and 'utimer' can all be modified.  The return value is 'fd'.
> 
> The key feature of timerfd() is that the caller can use
> select/poll/epoll to wait on traditional file descriptors and
> one or more timers.
> 
> read() from a timerfd file descriptor (should) return a 4-byte
> integer that is the number of timer expirations since the last
> read.  (If no expiration has so far occurred, read() will block.)
> 
> IMPORTANT POINT: as implemented in 2.6.22, timerfd was broken:
> only a single byte of info was returned by read().  I regard
> this as a virtue: it gives us something closer to a blank slate
> for fixing the problems described below; furthermore,
> arguably at this point we could buy ourselves time by
> pulling timerfd() from 2.6.23, and taking more time to get
> things right in 2.6.24.
> 
> (More details on timerfd() can be found here: 
> http://lwn.net/Articles/245533/)

OK.

> 2. timerfd limitations
> ==
> 
> Unix has two older timer interfaces:
> 
> * setitimer/getitimer and
> 
> * POSIX timers (timer_create/timer_settime/timer_gettime).
> 
> timerfd() lacks two features that are present in the older
> interfaces:
> 
> * Retrieve the previous setting of an existing timer when
>   setting a new value for the timer.
> 
> * Non-destructively fetch the timer remaining until the
>   next expiration of the timer.
> 
> The fact that this functionality is present in both older APIs
> strongly suggests that various applications really need both
> functionalities.  

Yes, I can imagine applications wanting to do those things.

> (Davide has argued that timerfd() doesn't need the
> get-while-setting functionality because we can create multiple
> timerfd timers.  However, POSIX timers also allow multiple
> timer instances, but nevertheless provide get-while-setting.
> I would estimate that this functionality would be useful for
> libraries that want to create and control a (single) timerfd
> file descriptor that is returned to the caller.)

Sure.  If you're implementing a timeout and you want to reset it, you might
indeed want to know how close the old one was to expiring.

Davide's proposal sounds like an awkward workaround for missing
functionality.


Does Davide have a proposal for the non-destructive fetch?


> 3. possible solutions

I don't think we'll have this settled and coded in time for 2.6.23.  So I
think the prudent thing to do is to push this back to 2.6.24 and not offer
sys_timerfd() in 2.6.23.

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [linux-usb-devel] spontaneous disconnect with "usb-storage: implement autosuspend"

2007-09-12 Thread Mark Lord

Greg KH wrote:


There are many regressions right now, _ONLY_ if you enable
CONFIG_USB_SUSPEND.  If you disable that, your problems will go away,
right?

..

Oh, and currently no distro will enable this option due to the hardware
problems, so the only people that could get hit by this are those who
build their own kernels, and they can easily disable the option.


Ubuntu has CONFIG_USB_SUSPEND=y

There are probably lots of other distros that use it as well.

This new behaviour (failed operation of USB flash keys) is a *new* regression
in 2.6.23, and we still have time to fix it by either reverting the changed
functionality, or by changing the code to default OFF.

Here's my patch for 2.6.23-rc6+ :

Fix USB Storage failures.

Signed-Off-By:  Mark Lord <[EMAIL PROTECTED]>
---

--- linux/drivers/usb/storage/usb.c.orig2007-09-11 11:52:14.0 
-0400
+++ linux/drivers/usb/storage/usb.c 2007-09-12 18:18:35.0 -0400
@@ -1065,7 +1065,7 @@
.pre_reset =storage_pre_reset,
.post_reset =   storage_post_reset,
.id_table = storage_usb_ids,
-   .supports_autosuspend = 1,
+   .supports_autosuspend = 0,
};

static int __init usb_stor_init(void)
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [linux-usb-devel] spontaneous disconnect with "usb-storage: implement autosuspend"

2007-09-12 Thread Mark Lord

Greg KH wrote:

On Wed, Sep 12, 2007 at 06:14:04PM -0400, Mark Lord wrote:

Oliver Neukum wrote:

Am Dienstag 14 August 2007 schrieb Paolo Ornati:

On Tue, 14 Aug 2007 17:46:16 +0200
Oliver Neukum <[EMAIL PROTECTED]> wrote:


Am Dienstag 14 August 2007 schrieb Paolo Ornati:
Hewlett-Packard PhotoSmart 720 / PhotoSmart 935 (storage)  

Please try this patch.

Tried on -rc3 but it doesn't work, dmesg attached.

However I've found that if "hald" is running the problems doesn't
happen (I think it's just hidden by the fact that hald do some polling
on it preventing autosuspend to trigger).

Exactly. This is not reliable. It needs to be done in kernel. This patch
should do it.
Regards
Oliver
---
--- a/drivers/usb/core/quirks.c 2007-08-14 17:42:22.0 +0200
+++ b/drivers/usb/core/quirks.c 2007-08-14 20:30:28.0 +0200
@@ -30,6 +30,8 @@
 static const struct usb_device_id usb_quirk_list[] = {
/* HP 5300/5370C scanner */
 	{ USB_DEVICE(0x03f0, 0x0701), .driver_info = USB_QUIRK_STRING_FETCH_255 
},

+   /* Hewlett-Packard PhotoSmart 720 / PhotoSmart 935 (storage) */
+   { USB_DEVICE(0x03f0, 0x4002), .driver_info = USB_QUIRK_NO_AUTOSUSPEND },
/* Acer Peripherals Inc. (now BenQ Corp.) Prisa 640BU */
{ USB_DEVICE(0x04a5, 0x207e), .driver_info = USB_QUIRK_NO_AUTOSUSPEND },
/* Benq S2W 3300U */
-

I believe the offending commit needs to be reverted.
It just breaks too much stuff, including my Sandisk USB sticks.


with "CONFIG_USB_SUSPEND=y", since commit:
8dfe4b14869fd185ca25ee88b02ada58a3005eaf
usb-storage: implement autosuspend
This patch (as930) implements autosuspend for usb-storage.  It is
adapted from a patch by Oliver Neukum.  Autosuspend is allowed except
during LUN scanning, resets, and command execution.
my USB photo-camera gets automagically disconnected before I can do
anything with it  ;) 

Ditto for several other devices that are being slowly special-cased,
and many that have yet to be tested.  This commit is (unfortunately)
a disaster with many regressions.


There are many regressions right now, _ONLY_ if you enable
CONFIG_USB_SUSPEND.  If you disable that, your problems will go away,
right?

This option is a new option, and we have found out the hard way that
a very large class of hardware really does not like working with usb
suspend at all.


CONFIG_USB_SUSPEND is *not* a new config option.
It's been around for quite some time now, and I also had it
enabled in 2.6.22 without any troubles.  Definite regression here, folks!

Cheers
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] x86_64: set cfg_size for AMD Family 10h in case MMCONFIG is used

2007-09-12 Thread Andrew Morton
On Wed, 12 Sep 2007 19:21:43 -0700 Yinghai Lu <[EMAIL PROTECTED]> wrote:

> +/**
> + * Regular PCI devices have 256 bytes, but AMD Family 10h Opteron ext config
> + * have 4096 bytes.  Even if the device is capable, that doesn't mean we can
> + * access it.  Maybe we don't have a way to generate extended config space
> + * accesses.   So check it
> + */

Please don't use the kerneldoc leadin "/**" for non-kerneldoc comments.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] Union Mount: Readdir approaches

2007-09-12 Thread hooanon05

Jan Engelhardt:
> On Sep 12 2007 13:46, Al Boldi wrote:
::
> >This is way too complicated, but I tried it anyway, only to find it doesn't 
> >compile:
> 
> cvs up -D 2007-08-07
> 
> that one works ;-)

Jan, do you mean that only the one month old version could be compiled?
It it rather surprise since I know some users compiled the newer
versions. Won't you tell me how did you 'make' it? I think a personal
mail for me is pereferable to ML.

To Al Boldi,
Will you send me directly the message which is quoted by Jan? Since it
was not delivered to me.

Thanks in advance.
Junjiro Okajima
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] x86_64: set cfg_size for AMD Family 10h in case MMCONFIG is used

2007-09-12 Thread Yinghai Lu
[PATCH] x86_64: set cfg_size for AMD Family 10h in case MMCONFIG is used.

reuse pci_cfg_space_size but skip check pci express and pci-x CAP ID.

Signed-off-by: Yinghai Lu <[EMAIL PROTECTED]>

 arch/i386/pci/fixup.c |   13 +
 drivers/pci/probe.c   |   11 ++-
 include/linux/pci.h   |1 +
 3 files changed, 24 insertions(+), 1 deletion(-)

===
Index: linux-2.6/arch/i386/pci/fixup.c
===
--- linux-2.6.orig/arch/i386/pci/fixup.c2007-09-12 19:00:56.0 
-0700
+++ linux-2.6/arch/i386/pci/fixup.c 2007-09-12 19:01:32.0 -0700
@@ -444,3 +444,16 @@
 }
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SIEMENS, 0x0015,
  pci_siemens_interrupt_controller);
+
+/**
+ * Regular PCI devices have 256 bytes, but AMD Family 10h Opteron ext config
+ * have 4096 bytes.  Even if the device is capable, that doesn't mean we can
+ * access it.  Maybe we don't have a way to generate extended config space
+ * accesses.   So check it
+ */
+static void fam10h_pci_cfg_space_size(struct pci_dev *dev)
+{
+dev->cfg_size = pci_cfg_space_size_ext(dev, 0);
+}
+
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, PCI_ANY_ID, 
fam10h_pci_cfg_space_size);
Index: linux-2.6/drivers/pci/probe.c
===
--- linux-2.6.orig/drivers/pci/probe.c  2007-09-12 19:00:56.0 -0700
+++ linux-2.6/drivers/pci/probe.c   2007-09-12 19:01:32.0 -0700
@@ -831,11 +831,14 @@
  * reading the dword at 0x100 which must either be 0 or a valid extended
  * capability header.
  */
-int pci_cfg_space_size(struct pci_dev *dev)
+int pci_cfg_space_size_ext(struct pci_dev *dev, unsigned check_exp_pcix)
 {
int pos;
u32 status;
 
+   if (!check_exp_pcix)
+   goto skip;
+
pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
if (!pos) {
pos = pci_find_capability(dev, PCI_CAP_ID_PCIX);
@@ -847,6 +850,7 @@
goto fail;
}
 
+ skip:
if (pci_read_config_dword(dev, 256, ) != PCIBIOS_SUCCESSFUL)
goto fail;
if (status == 0x)
@@ -858,6 +862,11 @@
return PCI_CFG_SPACE_SIZE;
 }
 
+int pci_cfg_space_size(struct pci_dev *dev)
+{
+   return pci_cfg_space_size_ext(dev, 1);
+}
+
 static void pci_release_bus_bridge_dev(struct device *dev)
 {
kfree(dev);
Index: linux-2.6/include/linux/pci.h
===
--- linux-2.6.orig/include/linux/pci.h  2007-09-12 19:00:56.0 -0700
+++ linux-2.6/include/linux/pci.h   2007-09-12 19:01:32.0 -0700
@@ -626,6 +626,7 @@
 
 void pci_walk_bus(struct pci_bus *top, void (*cb)(struct pci_dev *, void *),
  void *userdata);
+int pci_cfg_space_size_ext(struct pci_dev *dev, unsigned check_exp_pcix);
 int pci_cfg_space_size(struct pci_dev *dev);
 unsigned char pci_bus_max_busnr(struct pci_bus* bus);
 
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 15/15] Add DEFINE_SPUFS_ATTRIBUTE()

2007-09-12 Thread Michael Ellerman
On Wed, 2007-09-12 at 10:47 +0200, Arnd Bergmann wrote:
> On Wednesday 12 September 2007, Michael Ellerman wrote:
> > On Wed, 2007-09-12 at 17:43 +1000, Michael Ellerman wrote:
> > > This patch adds DEFINE_SPUFS_ATTRIBUTE(), a wraper around
> > > DEFINE_SIMPLE_ATTRIBUTE which does the specified locking for the get
> > > routine for us.
> > > 
> > > Unfortunately we need two get routines (a locked and unlocked version) to
> > > support the coredump code. This patch hides one of those (the locked 
> > > version)
> > > inside the macro foo.
> 
> > 
> > jk said:
> > > "Good god man!"
> > 
> > Yeah, I'm a bit lukewarm on this one. But the diffstat is nice, 50% code
> > reduction ain't bad :)
> 
> Have you looked at the change in object code size? I would expect the
> object code to actually become bigger. I also think that it hurts
> readability rather than help it.

Yeah I did, it's smaller actually:

   textdata bss dec hex filename
  44898   17804 120   62822f566 spufs-before.o
  44886   17804 120   62810f55a spufs-after.o

> Maybe a better solution is to change the core dump code to not
> require the mutex to be held in the first place. By the time
> we get to call the get functions, it should already be in
> saved state and no longer be able to get scheduled, so we might
> not actually need all the extra tricks with avoiding the
> mutex to be taken again.

Well that'd be nice, but I don't see anywhere that that happens. AFAICT
the acquire we do in the first coredump callback is the first the SPU
contexts know about their PPE process dying. And spufs is still live, so
I think we definitely need to grab the mutex, or we might race with
userspace accessing spufs files.

cheers

-- 
Michael Ellerman
OzLabs, IBM Australia Development Lab

wwweb: http://michael.ellerman.id.au
phone: +61 2 6212 1183 (tie line 70 21183)

We do not inherit the earth from our ancestors,
we borrow it from our children. - S.M.A.R.T Person


signature.asc
Description: This is a digitally signed message part


Re: [NFSv4] 2.6.23-rc4 oops in nfs4_cb_recall...

2007-09-12 Thread J. Bruce Fields
On Mon, Sep 10, 2007 at 03:39:23PM +0100, Daniel J Blueman wrote:
> On 09/09/2007, J. Bruce Fields <[EMAIL PROTECTED]> wrote:
> > > When accessing a directory inode from a single other client, NFSv4
> > > callbacks catastrophically failed [1] on the NFS server with
> > > 2.6.23-rc4 (unpatched); clients are both 2.6.22 (Ubuntu Gutsy build).
> > > Seems not easy to reproduce, since this kernel was running smoothly
> > > for 7 days on the server.
> > >
> > > What information will help track this down, or is there a known
> > > failure mechanism?
> >
> > I haven't seen that before.
> >
> > > I can map stack frames to source lines with objdump, if that helps.
> 
> > If it's still easy, it might help to figure out exactly where in
> > xprt_reserve() it died, and why.  If we've got some race that can lead
> > to freeing the client while a callback is in progress, then perhaps this
> > is on the first dereference of xprt?
> 
> I've raised the bug report into bugzilla, added other observations
> from a second occurrence recently and disassembled xprt_reserve with
> line numbers.
> 
> http://bugzilla.kernel.org/show_bug.cgi?id=9003
> 
> Ping me for any more detail/info and thanks!

If you or anyone else that's seen this problem could test the following,
that would be helpful.  Thanks!

--b.

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index c1cb7e0..9d536a8 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -486,6 +486,7 @@ out_put_cred:
/* Success or failure, now we're either waiting for lease expiration
 * or deleg_return. */
dprintk("NFSD: nfs4_cb_recall: dp %p dl_flock %p dl_count %d\n",dp, 
dp->dl_flock, atomic_read(>dl_count));
+   put_nfs4_client(clp);
nfs4_put_delegation(dp);
return;
 }
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 6256492..6f182d2 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -358,9 +358,22 @@ alloc_client(struct xdr_netobj name)
return clp;
 }
 
+static void
+shutdown_callback_client(struct nfs4_client *clp)
+{
+   struct rpc_clnt *clnt = clp->cl_callback.cb_client;
+
+   /* shutdown rpc client, ending any outstanding recall rpcs */
+   if (clnt) {
+   clp->cl_callback.cb_client = NULL;
+   rpc_shutdown_client(clnt);
+   }
+}
+
 static inline void
 free_client(struct nfs4_client *clp)
 {
+   shutdown_callback_client(clp);
if (clp->cl_cred.cr_group_info)
put_group_info(clp->cl_cred.cr_group_info);
kfree(clp->cl_name.data);
@@ -375,18 +388,6 @@ put_nfs4_client(struct nfs4_client *clp)
 }
 
 static void
-shutdown_callback_client(struct nfs4_client *clp)
-{
-   struct rpc_clnt *clnt = clp->cl_callback.cb_client;
-
-   /* shutdown rpc client, ending any outstanding recall rpcs */
-   if (clnt) {
-   clp->cl_callback.cb_client = NULL;
-   rpc_shutdown_client(clnt);
-   }
-}
-
-static void
 expire_client(struct nfs4_client *clp)
 {
struct nfs4_stateowner *sop;
@@ -396,8 +397,6 @@ expire_client(struct nfs4_client *clp)
dprintk("NFSD: expire_client cl_count %d\n",
atomic_read(>cl_count));
 
-   shutdown_callback_client(clp);
-
INIT_LIST_HEAD();
spin_lock(_lock);
while (!list_empty(>cl_delegations)) {
@@ -1346,6 +1345,7 @@ void nfsd_break_deleg_cb(struct file_lock *fl)
 * lock) we know the server hasn't removed the lease yet, we know
 * it's safe to take a reference: */
atomic_inc(>dl_count);
+   atomic_inc(>dl_client->cl_count);
 
spin_lock(_lock);
list_add_tail(>dl_recall_lru, _recall_lru);
@@ -1354,8 +1354,12 @@ void nfsd_break_deleg_cb(struct file_lock *fl)
/* only place dl_time is set. protected by lock_kernel*/
dp->dl_time = get_seconds();
 
-   /* XXX need to merge NFSD_LEASE_TIME with fs/locks.c:lease_break_time */
-   fl->fl_break_time = jiffies + NFSD_LEASE_TIME * HZ;
+   /*
+* We don't want the locks code to timeout the lease for us;
+* we'll remove it ourself if the delegation isn't returned
+* in time.
+*/
+   fl->fl_break_time = 0;
 
t = kthread_run(do_recall, dp, "%s", "nfs4_cb_recall");
if (IS_ERR(t)) {
@@ -1364,6 +1368,7 @@ void nfsd_break_deleg_cb(struct file_lock *fl)
printk(KERN_INFO "NFSD: Callback thread failed for "
"for client (clientid %08x/%08x)\n",
clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
+   put_nfs4_client(dp->dl_client);
nfs4_put_delegation(dp);
}
 }
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [00/41] Large Blocksize Support V7 (adds memmap support)

2007-09-12 Thread David Chinner
On Wed, Sep 12, 2007 at 01:27:33AM +1000, Nick Piggin wrote:
> > IOWs, we already play these vmap harm-minimisation games in the places
> > where we can, but still the overhead is high and something we'd prefer
> > to be able to avoid.
> 
> I don't think you've looked nearly far enough with all this low hanging
> fruit.

Ok, so we need to hack the vm to optimise it further. When it comes to
TLB flush code and optimising that sort of stuff, I'm out of my depth.

> I just gave 4 things which combined might easily reduce xfs vmap overhead
> by several orders of magnitude, all without changing much code at all.

Patches would be greatly appreciately. You obviously understand this
vm code much better than I do, so if it's easy to fix by adding some
generic vmap cache thingy, please do.

Cheers,

Dave.
-- 
Dave Chinner
Principal Engineer
SGI Australian Software Group
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH -mm] ssb: Make pcmciahost depend on PCMCIA=y

2007-09-12 Thread Paul Mundt
On Wed, Sep 12, 2007 at 12:59:00PM +0200, Michael Buesch wrote:
> On Wednesday 12 September 2007 12:17:45 Paul Mundt wrote:
> > On Wed, Sep 12, 2007 at 12:09:09PM +0200, Michael Buesch wrote:
> > > There we go. The usual SELECT dependency hell again...
> > > Would changing SSB_PCMCIAHOST_POSSIBLE to tristate also fix it?
> > > What would be the sideeffects?
> > > 
> > I tried that first, if you do that you have to change the default to
> > SSB && PCMCIA, and then anything that depends on it also has to be a
> > tristate. That worked ok for SSB_PCMCIAHOST, but it didn't work ok for
> > the b43 wireless + PCMCIA, which is why I opted for the PCMCIA=y thing
> > instead, which makes sure that SSB_PCMCIAHOST can't be enabled if PCMCIA
> > is modular.
> 
> Ok, so much for "SELECT is easy and it works if used correctly..." :)
> Well, let's apply that patch then. It needlessly restricts the
> choice to not allow modular pcmcia in that case, though.
> 
That is the compromise, yes. Feel free to propose a better solution ;-)
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH -mm] uvesafb: Don't access VGA registers directly when running on non-x86

2007-09-12 Thread Paul Mundt
On Wed, Sep 12, 2007 at 09:41:51PM +0200, Michal Januszewski wrote:
> The VGA registers are only available at their legacy IO locations on
> x86. Don't try to access them when running on other arches.
> 
> Note that the code accessing them directly is just an optimization
> (limits slow BIOS function calls).  We don't lose any functionality 
> by using BIOS calls instead of it on non-x86.
> 
> Signed-off-by: Michal Januszewski <[EMAIL PROTECTED]>

Looks fine, thanks Michal.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] modpost: detect unterminated device id lists

2007-09-12 Thread Andrew Morton
On Wed, 12 Sep 2007 17:49:37 -0700 Kees Cook <[EMAIL PROTECTED]> wrote:

> On Wed, Sep 12, 2007 at 02:53:56PM -0700, Greg KH wrote:
> > On Wed, Sep 12, 2007 at 03:48:49PM +0400, Alexey Dobriyan wrote:
> > > On 9/12/07, Jeff Garzik <[EMAIL PROTECTED]> wrote:
> > > > Kees Cook wrote:
> > > > > This patch against 2.6.23-rc6 fixes a couple drivers that do not
> > > > > correctly terminate their pci_device_id lists.  This results in 
> > > > > garbage
> > > > > being spewed into modules.pcimap when the module happens to not have
> > > > > 28 NULL bytes following the table, and/or the last PCI ID is actually
> > > > > truncated from the table when calculating the modules.alias PCI 
> > > > > aliases,
> > > > > cause those unfortunate device IDs to not auto-load.
> > > > >
> > > > > Signed-off-by: Kees Cook <[EMAIL PROTECTED]>
> > > >
> > > > ACK
> > > 
> > > I mut say, non-terminated PCI ids lists are constant PITA. There should be
> > > a way to a) put it in macro[1], so that terminator automatically added, 
> > > and
> > > b) still allow #ifdef inside table like, e.g. 8139too does.
> > > 
> > > [1] or not macro, because #ifdef inside macros aren't allowed.
> > 
> > If you know of a way to do this in an easier manner, patches are always
> > gladly accepted :)
> 
> This patch against 2.6.23-rc6 will cause modpost to fail if any device
> id lists are incorrectly terminated, after reporting the offender.

ooh, clever chap.

> + fprintf(stderr,"%s: struct %s_device_id is %lu bytes.  
> The last of %lu is:\n", modname, device_id, id_size, size / id_size);

dude, bid on this: 
http://cgi.ebay.com/Wyse-WY55-General-Purpose-Serial-Terminal-No-Keyboard_W0QQitemZ230169388145QQihZ013QQcategoryZ51280QQssPageNameZWDVWQQrdZ1QQcmdZViewItem

(not allowed to use 132-column mode, either)
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] usb: fix unterminated usn_device_id lists

2007-09-12 Thread Jeff Garzik

Kees Cook wrote:

This patch against 2.6.23-rc6 fixes a unterminated list of USB device ids.

Signed-off-by: Kees Cook <[EMAIL PROTECTED]>
---
 linux-2.6.23-rc6/drivers/media/video/usbvision/usbvision-cards.c |1 +
 1 file changed, 1 insertion(+)
---
diff -urp -x '*.o' 
linux-2.6.23-rc6~/drivers/media/video/usbvision/usbvision-cards.c 
linux-2.6.23-rc6/drivers/media/video/usbvision/usbvision-cards.c
--- linux-2.6.23-rc6~/drivers/media/video/usbvision/usbvision-cards.c   
2007-09-11 23:17:19.0 -0700
+++ linux-2.6.23-rc6/drivers/media/video/usbvision/usbvision-cards.c
2007-09-12 17:26:46.0 -0700
@@ -1081,6 +1081,7 @@ struct usb_device_id usbvision_table [] 
 	{ USB_DEVICE(0x2304, 0x0301), .driver_info=PINNA_LINX_VD_IN_CAB_PAL },

{ USB_DEVICE(0x2304, 0x0419), .driver_info=PINNA_PCTV_BUNGEE_PAL_FM },
{ USB_DEVICE(0x2400, 0x4200), .driver_info=HPG_WINTV },
+   { 0, },


FWIW I (and several drivers) tend to prefer the more clean version,

{ },

or even

{ },/* terminate list */

Regards,

Jeff



-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [BUGFIX] x86_64: NX bit handling in change_page_attr

2007-09-12 Thread Huang, Ying
On Wed, 2007-09-12 at 15:35 +0200, Andi Kleen wrote:
> > > Index: linux-2.6.23-rc2-mm2/arch/x86_64/mm/pageattr.c
> > > ===
> > > --- linux-2.6.23-rc2-mm2.orig/arch/x86_64/mm/pageattr.c   2007-08-17
> > > 12:50:25.0 +0800 +++
> > > linux-2.6.23-rc2-mm2/arch/x86_64/mm/pageattr.c2007-08-17
> > > 12:50:48.0 +0800 @@ -147,6 +147,7 @@
> > >   split = split_large_page(address, prot, ref_prot2);
> > >   if (!split)
> > >   return -ENOMEM;
> > > + pgprot_val(ref_prot2) &= ~_PAGE_NX;
> > >   set_pte(kpte, mk_pte(split, ref_prot2));
> > >   kpte_page = split;
> > >   }
> >
> > What happened with this?  Still valid?
> 
> The bug is probably latent there, but I don't think it can affect anything
> in the kernel because nothing in the kernel should change NX status
> as far as I know.
> 
> Where did you see it? 

I found the problem for EFI runtime service supporting. Where the EFI
runtime code (from firmware) need to be mapped without NX bit set.

> Anyways I would prefer to only clear the PMD NX when NX status actually 
> changes on the PTE.Can you do that change? 

This change is sufficient for Intel CPU. Because the NX bit of PTE is
still there, no page will be made executable if not been set explicitly
through PTE. For AMD CPU, will the page be made executable if the NX bit
of PMD is cleared and the NX bit of PTE is set? If so, I will do the
change as you said.

> Anyways; it's really not very important.

It is needed by EFI runtime service supporting.

Best Regards,
Huang Ying
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] State limits to safety of _safe iterators

2007-09-12 Thread Paul E. McKenney
The _safe list iterators make a blanket statement about how they are
safe against removal.  This patch, inspired by private conversations
with people who unwisely but perhaps understandably took this blanket
statement at its word, adds comments stating limits to this safety.

Signed-off-by: Paul E. McKenney <[EMAIL PROTECTED]>
---

 list.h |   42 ++
 1 file changed, 42 insertions(+)

diff -urpNa -X dontdiff linux-2.6.22/include/linux/list.h 
linux-2.6.22-safedoc/include/linux/list.h
--- linux-2.6.22/include/linux/list.h   2007-07-08 16:32:17.0 -0700
+++ linux-2.6.22-safedoc/include/linux/list.h   2007-09-12 17:45:38.0 
-0700
@@ -472,6 +472,12 @@ static inline void list_splice_init_rcu(
  * @pos:   the  list_head to use as a loop cursor.
  * @n: another  list_head to use as temporary storage
  * @head:  the head for your list.
+ *
+ * Please note that this is safe only against removal by the code in
+ * this iterator's body (either directly or via function calls).  In
+ * particular, it is not safe against removal by other tasks unless
+ * you use appropriate locking, RCU, or other synchronization
+ * mechanism.
  */
 #define list_for_each_safe(pos, n, head) \
for (pos = (head)->next, n = pos->next; pos != (head); \
@@ -542,6 +548,12 @@ static inline void list_splice_init_rcu(
  * @n: another type * to use as temporary storage
  * @head:  the head for your list.
  * @member:the name of the list_struct within the struct.
+ *
+ * Please note that this is safe only against removal by the code in
+ * this iterator's body (either directly or via function calls).  In
+ * particular, it is not safe against removal by other tasks unless
+ * you use appropriate locking, RCU, or other synchronization
+ * mechanism.
  */
 #define list_for_each_entry_safe(pos, n, head, member) \
for (pos = list_entry((head)->next, typeof(*pos), member),  \
@@ -558,6 +570,12 @@ static inline void list_splice_init_rcu(
  *
  * Iterate over list of given type, continuing after current point,
  * safe against removal of list entry.
+ *
+ * Please note that this is safe only against removal by the code in
+ * this iterator's body (either directly or via function calls).  In
+ * particular, it is not safe against removal by other tasks unless
+ * you use appropriate locking, RCU, or other synchronization
+ * mechanism.
  */
 #define list_for_each_entry_safe_continue(pos, n, head, member)
\
for (pos = list_entry(pos->member.next, typeof(*pos), member),  
\
@@ -574,6 +592,12 @@ static inline void list_splice_init_rcu(
  *
  * Iterate over list of given type from current point, safe against
  * removal of list entry.
+ *
+ * Please note that this is safe only against removal by the code in
+ * this iterator's body (either directly or via function calls).  In
+ * particular, it is not safe against removal by other tasks unless
+ * you use appropriate locking, RCU, or other synchronization
+ * mechanism.
  */
 #define list_for_each_entry_safe_from(pos, n, head, member)
\
for (n = list_entry(pos->member.next, typeof(*pos), member);
\
@@ -589,6 +613,12 @@ static inline void list_splice_init_rcu(
  *
  * Iterate backwards over list of given type, safe against removal
  * of list entry.
+ *
+ * Please note that this is safe only against removal by the code in
+ * this iterator's body (either directly or via function calls).  In
+ * particular, it is not safe against removal by other tasks unless
+ * you use appropriate locking, RCU, or other synchronization
+ * mechanism.
  */
 #define list_for_each_entry_safe_reverse(pos, n, head, member) \
for (pos = list_entry((head)->prev, typeof(*pos), member),  \
@@ -623,6 +653,12 @@ static inline void list_splice_init_rcu(
  *
  * Iterate over an rcu-protected list, safe against removal of list entry.
  *
+ * Please note that this is safe only against removal by the code in
+ * this iterator's body (either directly or via function calls).  In
+ * particular, it is not safe against removal by other tasks unless
+ * you use appropriate locking, RCU, or other synchronization
+ * mechanism.
+ *
  * This list-traversal primitive may safely run concurrently with
  * the _rcu list-mutation primitives such as list_add_rcu()
  * as long as the traversal is guarded by rcu_read_lock().
@@ -942,6 +978,12 @@ static inline void hlist_add_after_rcu(s
  * @n: another  hlist_node to use as temporary storage
  * @head:  the head for your list.
  * @member:the name of the hlist_node within the struct.
+ *
+ * Please note that this is safe only against removal by the code in
+ * this iterator's body (either directly or via function calls).  In
+ * particular, it is not safe against removal by other tasks unless
+ * you use appropriate locking, RCU, or other synchronization
+ * mechanism.
  */
 #define 

[PATCH] modpost: detect unterminated device id lists

2007-09-12 Thread Kees Cook
On Wed, Sep 12, 2007 at 02:53:56PM -0700, Greg KH wrote:
> On Wed, Sep 12, 2007 at 03:48:49PM +0400, Alexey Dobriyan wrote:
> > On 9/12/07, Jeff Garzik <[EMAIL PROTECTED]> wrote:
> > > Kees Cook wrote:
> > > > This patch against 2.6.23-rc6 fixes a couple drivers that do not
> > > > correctly terminate their pci_device_id lists.  This results in garbage
> > > > being spewed into modules.pcimap when the module happens to not have
> > > > 28 NULL bytes following the table, and/or the last PCI ID is actually
> > > > truncated from the table when calculating the modules.alias PCI aliases,
> > > > cause those unfortunate device IDs to not auto-load.
> > > >
> > > > Signed-off-by: Kees Cook <[EMAIL PROTECTED]>
> > >
> > > ACK
> > 
> > I mut say, non-terminated PCI ids lists are constant PITA. There should be
> > a way to a) put it in macro[1], so that terminator automatically added, and
> > b) still allow #ifdef inside table like, e.g. 8139too does.
> > 
> > [1] or not macro, because #ifdef inside macros aren't allowed.
> 
> If you know of a way to do this in an easier manner, patches are always
> gladly accepted :)

This patch against 2.6.23-rc6 will cause modpost to fail if any device
id lists are incorrectly terminated, after reporting the offender.

Signed-off-by: Kees Cook <[EMAIL PROTECTED]>
---
 linux-2.6.23-rc6/scripts/mod/file2alias.c |   39 --
 1 file changed, 32 insertions(+), 7 deletions(-)
---
diff -urp -x '*.o' linux-2.6.23-rc6~/scripts/mod/file2alias.c 
linux-2.6.23-rc6/scripts/mod/file2alias.c
--- linux-2.6.23-rc6~/scripts/mod/file2alias.c  2007-09-11 23:17:49.0 
-0700
+++ linux-2.6.23-rc6/scripts/mod/file2alias.c   2007-09-12 17:41:30.0 
-0700
@@ -55,10 +55,13 @@ do {
  * Check that sizeof(device_id type) are consistent with size of section
  * in .o file. If in-consistent then userspace and kernel does not agree
  * on actual size which is a bug.
+ * Also verify that the final entry in the table is all zeros.
  **/
-static void device_id_size_check(const char *modname, const char *device_id,
-unsigned long size, unsigned long id_size)
+static void device_id_check(const char *modname, const char *device_id,
+   unsigned long size, unsigned long id_size,
+   void *symval)
 {
+   int i;
if (size % id_size || size < id_size) {
fatal("%s: sizeof(struct %s_device_id)=%lu is not a modulo "
  "of the size of section __mod_%s_device_table=%lu.\n"
@@ -66,6 +69,18 @@ static void device_id_size_check(const c
  "in mod_devicetable.h\n",
  modname, device_id, id_size, device_id, size, device_id);
}
+   /* Verify last one is a terminator */
+   for (i = 0; i < id_size; i++ ) {
+   if ( *(uint8_t*)(symval+size-id_size+i) ) {
+   fprintf(stderr,"%s: struct %s_device_id is %lu bytes.  
The last of %lu is:\n", modname, device_id, id_size, size / id_size);
+   for (i = 0; i < id_size; i++ ) {
+   fprintf(stderr,"0x%02x ", 
*(uint8_t*)(symval+size-id_size+i) );
+   }
+   fprintf(stderr,"\n");
+   fatal("%s: struct %s_device_id is not terminated "
+   "with a NULL entry!\n", modname, device_id);
+   }
+   }
 }
 
 /* USB is special because the bcdDevice can be matched against a numeric range 
*/
@@ -168,7 +183,7 @@ static void do_usb_table(void *symval, u
unsigned int i;
const unsigned long id_size = sizeof(struct usb_device_id);
 
-   device_id_size_check(mod->name, "usb", size, id_size);
+   device_id_check(mod->name, "usb", size, id_size, symval);
 
/* Leave last one: it's the terminator. */
size -= id_size;
@@ -505,7 +520,7 @@ static void do_table(void *symval, unsig
char alias[500];
int (*do_entry)(const char *, void *entry, char *alias) = function;
 
-   device_id_size_check(mod->name, device_id, size, id_size);
+   device_id_check(mod->name, device_id, size, id_size, symval);
/* Leave last one: it's the terminator. */
size -= id_size;
 
@@ -527,14 +542,22 @@ void handle_moddevtable(struct module *m
Elf_Sym *sym, const char *symname)
 {
void *symval;
+   char *zeros = NULL;
 
/* We're looking for a section relative symbol */
if (!sym->st_shndx || sym->st_shndx >= info->hdr->e_shnum)
return;
 
-   symval = (void *)info->hdr
-   + info->sechdrs[sym->st_shndx].sh_offset
-   + sym->st_value;
+   /* Handle all-NULL symbols allocated into .bss */
+   if (info->sechdrs[sym->st_shndx].sh_type & SHT_NOBITS) {
+   zeros = calloc(1, sym->st_size);
+   symval = zeros;
+   }
+ 

[PATCH] usb: fix unterminated usn_device_id lists

2007-09-12 Thread Kees Cook
This patch against 2.6.23-rc6 fixes a unterminated list of USB device ids.

Signed-off-by: Kees Cook <[EMAIL PROTECTED]>
---
 linux-2.6.23-rc6/drivers/media/video/usbvision/usbvision-cards.c |1 +
 1 file changed, 1 insertion(+)
---
diff -urp -x '*.o' 
linux-2.6.23-rc6~/drivers/media/video/usbvision/usbvision-cards.c 
linux-2.6.23-rc6/drivers/media/video/usbvision/usbvision-cards.c
--- linux-2.6.23-rc6~/drivers/media/video/usbvision/usbvision-cards.c   
2007-09-11 23:17:19.0 -0700
+++ linux-2.6.23-rc6/drivers/media/video/usbvision/usbvision-cards.c
2007-09-12 17:26:46.0 -0700
@@ -1081,6 +1081,7 @@ struct usb_device_id usbvision_table [] 
{ USB_DEVICE(0x2304, 0x0301), .driver_info=PINNA_LINX_VD_IN_CAB_PAL },
{ USB_DEVICE(0x2304, 0x0419), .driver_info=PINNA_PCTV_BUNGEE_PAL_FM },
{ USB_DEVICE(0x2400, 0x4200), .driver_info=HPG_WINTV },
+   { 0, },
 };
 
 MODULE_DEVICE_TABLE (usb, usbvision_table);


-- 
Kees Cook
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: DTR/DSR Patch

2007-09-12 Thread Andrew Morton
On Fri, 31 Aug 2007 15:17:41 +0200
Michael Westermann <[EMAIL PROTECTED]> wrote:

> Hello,
> 
> I make driver for Point of Sale Printer, a wide range of Printer use
> only a DTR/DSR hardware-handshaking. When I use a handshaking in the
> userspace, the Printr has a overrun problem and our customer has a
> problem with the tax office.
> 
> my Patch relaize a simple DTR/DSR handhake with a small change of the
> code. The change auf the userspace is very simple. e.g.
> 
> 
> cflags |= CDTRDSR;
> 
> The change of the stty tool at 2 lines.
> 

googling for CDTRDSR is useful.  I see from
http://lkml.org/lkml/2006/12/10/49 that Russell was generally supportive,
but requested changes which afaict you have implemented here?


> --- linux-2.6/include/asm-i386/termbits.h 2007-05-21 10:37:10.0 
> +0200
> +++ linux-2.6.dsr_test/include/asm-i386/termbits.h2007-08-28 
> 11:58:43.0 +0200
> @@ -157,6 +157,7 @@
>  #define  B350 0010016
>  #define  B400 0010017
>  #define CIBAUD 00200360
> +#define CDTRDSR0040  /* dtrdsr flow control */
>  #define CMSPAR 0100  /* mark or space (stick) parity 
> */
>  #define CRTSCTS0200  /* flow control */
>  

This will break all architectures except for i386.  So for now I guess we
can do this: 

--- a/include/linux/termios.h~serial-8250-implement-dtr-dsr-handshaking-fix
+++ a/include/linux/termios.h
@@ -4,4 +4,8 @@
 #include 
 #include 
 
+#ifndef CDTRDSR
+#define CDTRDSR 0 /* remove this when all architectures have a definition */
+#endif
+
 #endif
_

but that's temporary.  If we decide to proceed with this change then we
should add CDTRDSR to each arch's termbits.h.  That'll be pretty
straightforward to arrange.

Actually, let's spam them ...

--- a/include/linux/termios.h~serial-8250-implement-dtr-dsr-handshaking-fix
+++ a/include/linux/termios.h
@@ -4,4 +4,9 @@
 #include 
 #include 
 
+#ifndef CDTRDSR
+#warning This architecture should implement CDTRDSR
+#define CDTRDSR 0 /* remove this when all architectures have a definition */
+#endif
+
 #endif
_

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Add all thread stats for TASKSTATS_CMD_ATTR_TGID (v3)

2007-09-12 Thread Andrew Morton
On Fri, 31 Aug 2007 14:35:35 +0200
Guillaume Chazarain <[EMAIL PROTECTED]> wrote:

> TASKSTATS_CMD_ATTR_TGID used to return only the delay accounting stats,
> not the basic and extended accounting. With this patch,
> TASKSTATS_CMD_ATTR_TGID also aggregates the accounting info for all threads 
> of a
> thread group. This makes TASKSTATS_CMD_ATTR_TGID usable in a similar
> fashion to TASKSTATS_CMD_ATTR_PID, for commands like iotop -P
> (http://guichaz.free.fr/misc/iotop.py).
> 
> Here is the output of the testcase before the patch:
> 
> ...

>  Documentation/accounting/dump-test.c |  314 
> +++

Another C file in the Documentation directory.  Sigh.  At kernel summit I
suggested that we should be putting these things in a place from where we
can actually build and install them.  People said there was no need to do
that because kernel developers can now easily get new stuff into
util-linux.  I don't believe them.  Wanna be a guinea pig?


> +static void loop_reading(const char *filename)
> +{
> + int fd = open(filename, O_RDONLY);
> + char buffer[4096];
> +
> + if (fd < 0) {
> + perror(filename);
> + return;
> + }
> +
> + for (;;) {
> + lseek(fd, 0, SEEK_SET);
> + while (read(fd, buffer, sizeof(buffer)) > 0) ;

newline here.  Just because it's userspace doesn't mean that it needs to
look crappy, despite all the code out there which disproves this ;)

I think you just invented pread().

> + }
> +}
> +
>
> ...
>
> --- a/kernel/taskstats.c  Fri Aug 31 01:42:23 2007 -0700
> +++ b/kernel/taskstats.c  Fri Aug 31 13:36:29 2007 +0200
> @@ -168,6 +168,60 @@ static void send_cpu_listeners(struct sk
>   up_write(>sem);
>  }
>  
> +/*
> + * There are two types of taskstats fields when considering a thread group:
> + *   - those that can be aggregated from each thread in the group (like CPU
> + *   times),
> + *   - those that cannot be aggregated (like UID) or are identical (like
> + *   memory usage), so are taken from the group leader.
> + * XXX_threadgroup() methods deal with the first type while XXX_add_tsk() 
> with
> + * the second.
> + */
> +static void fill_threadgroup(struct taskstats *stats, struct task_struct 
> *task)
> +{
> + /*
> +  * Each accounting subsystem adds calls to its functions to initialize
> +  * relevant parts of struct taskstsats for a single tgid as follows:
> +  *
> +  *  per-task-foo-fill_threadgroup(stats, task);
> +  */
> +
> + stats->version = TASKSTATS_VERSION;
> +
> + /* fill in basic acct fields */
> + bacct_fill_threadgroup(stats, task);
> +
> + /* fill in extended acct fields */
> + xacct_fill_threadgroup(stats, task);
> +}
> +
> +/*
> + * Stats specific to each thread in the thread group. Stats of @task should 
> be
> + * combined with those already present in @stats. add_tsk() works in
> + * conjunction with fill_threadgroup(), taskstats fields should not be 
> touched
> + * by both functions.
> + */

It's odd to use kerneldoc-style markup in a non-kerneldoc comment.

> @@ -232,32 +272,21 @@ static int fill_tgid(pid_t tgid, struct 
>   else
>   memset(stats, 0, sizeof(*stats));
>  
> + leader = first->group_leader;
> + get_task_struct(leader);
> + fill_threadgroup(stats, leader);
> + put_task_struct(leader);
> +

Are the get_task_struct/put_task_struct here actually needed?


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: O_NOLINK for open()

2007-09-12 Thread Brent Casavant
On Wed, 12 Sep 2007, Brent Casavant wrote:

> On Wed, 12 Sep 2007, Al Viro wrote:
> 
> > Give me a break.  And learn about ptrace(2).  This "unlinking" bullshit
> > buys you zero additional security, both for /proc/*/mem and for /dev/mem
> > (see mknod(2)).
> 
> My (limited) understanding of ptrace is that a parent-child
> relationship is needed between the tracing process and the traced
> process (at least that's what I gather from the man page).  This
> does give cause for concern, and I might have to see what can be
> done to alleviate this concern.  I fully realize that making this
> design completely unassilable is a fools errand, but closing off
> as many attack vectors as possible seems prudent.

Hmm.  The solution would appear to be as simple as making the
target program set-user-id.  As long as as the attacker isn't
the superuser (or has CAP_SYS_PTRACE) we should be OK.

Thanks for the heads-up,
Brent

-- 
Brent Casavant  All music is folk music.  I ain't
[EMAIL PROTECTED]never heard a horse sing a song.
Silicon Graphics, Inc.-- Louis Armstrong
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Update: Ext3 vs NTFS performance

2007-09-12 Thread Cabot, Mason B
> Subject: Ext3 vs NTFS performance
> 
> Hello all,
> 
> I've been testing the NAS performance of ext3/Openfiler 2.2 against
> NTFS/WinXP and have found that NTFS significantly outperforms ext3 for
> video workloads. The Windows CIFS client will attempt a poor-man's
> pre-allocation of the file on the server by sending 1-byte writes at
> 128K-byte strides, breaking block allocation on ext3 and leading to
> fragmentation and poor performance. This will happen for many
> applications (including iTunes) as the CIFS client issues these
> pre-allocates under the application layer.
> 
> I've posted a brief paper on Intel's OSS website
> (http://softwarecommunity.intel.com/articles/eng/1259.htm). 
> Please give
> it a read and let me know what you think. In particular, I'd like to
> arrive at the right place to fix this problem: is it in the 
> filesystem,
> VFS, or Samba?
> 
> thanks,
> Mason 
> 
> (please CC responses to mason dot b dot cabot at intel dot com)
> 

Folks:

thanks for the comments from the initial posting of this note. We've
looked further into the problem and found that Samba 3.0.20 or greater
fills the performance gap for ext3: the "strict allocate" flag now zero
fills the file, forcing allocation in the underlying filesystem and
avoiding fragmentation.

An update to the original whitepaper will be posted soon to the same
location on Intel's OSS website.

thanks,
Mason

(please CC responses to mason dot b dot cabot at intel dot com)
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] xen: don't bother trying to set cr4

2007-09-12 Thread Jeremy Fitzhardinge
Xen ignores all updates to cr4, and some versions will kill the domain
if you try to change its value.  Just ignore all changes.

Signed-off-by: Jeremy Fitzhardinge <[EMAIL PROTECTED]>
Cc: Andi Kleen <[EMAIL PROTECTED]>

---
 arch/i386/xen/enlighten.c |4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

===
--- a/arch/i386/xen/enlighten.c
+++ b/arch/i386/xen/enlighten.c
@@ -625,8 +625,8 @@ static unsigned long xen_read_cr2_direct
 
 static void xen_write_cr4(unsigned long cr4)
 {
-   /* never allow TSC to be disabled */
-   native_write_cr4(cr4 & ~X86_CR4_TSD);
+   /* Just ignore cr4 changes; Xen doesn't allow us to do
+  anything anyway. */
 }
 
 static unsigned long xen_read_cr3(void)


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Problems with USB disk [solved]

2007-09-12 Thread Greg KH
On Wed, Sep 12, 2007 at 06:10:50PM -0400, Mark Lord wrote:
> Chuck Ebbert wrote:
>> On 08/13/2007 10:50 AM, Niels wrote:
>>> On Sunday 12 August 2007 11:54, Niels wrote:
>>>
 On Friday 10 August 2007 14:43, Niels wrote:

> On Wednesday 08 August 2007 12:57, Ismail D??nmez wrote:
>
>> On Wednesday 08 August 2007 13:48:29 you wrote:
>>> On Tuesday 07 August 2007 23:18, Greg KH wrote:
 On Tue, Aug 07, 2007 at 10:26:15PM +0200, Niels wrote:
> Hi,
>
> I'm having problems with a new 500 GB USB disk. It works, but
> sometimes I get these in dmesg:
>
>
> usb 1-3: reset high speed USB device using ehci_hcd and address 2
> usb 5-1: USB disconnect, address 2
> drivers/usb/class/usblp.c: usblp0: removed
> sd 0:0:0:0: Device not ready: <6>: Sense Key : 0x2 [current]
>
> : ASC=0x4 ASCQ=0x2
>
> end_request: I/O error, dev sda, sector 254148215
> sd 0:0:0:0: Device not ready: <6>: Sense Key : 0x2 [current]
>
> : ASC=0x4 ASCQ=0x2
>
> end_request: I/O error, dev sda, sector 252434023
> EXT3-fs error (device sda1): ext3_find_entry: reading directory
> #15761836 offset 0
>
>
> There's also a printer connected. This is on a pci/usb2 card. When
> the above happens, I get I/O errors. When I mount the drive next,
> there are errors and often missing files. Quite annoying!
>
> Kernel is 2.6.21
>
> What's going on?
 You have a low voltage issue, or a bad cable.  The device is
 electronically disconnecting itself.  Try using a externally-powered
 hub, or a new cable.
>> I am seeing a similar problem with 2.6.22 and 2.6.23-* kernels with my
>> 60G iPod Video, works fine with 2.6.18 kernel though.
>>
> So far I'm seeing this:
>
> - On 2.6.21 I mount the drive. After a while it spins down, and when I
> then unmount it, an error pops up in dmesg.
>
> - On 2.6.18 I can't provoke the same error. The drive doesn't appear to
> spin down. I don't know if the data corruption from 2.6.21 occurs with
> regular use.
>
> There are a number of other factor I need to eliminate on my system, 
> but
> that's it so far. CONFIG_USB_SUSPEND is not set on either kernel.
 OK, on a vanilla 2.6.18.8 I also have this problem, with both the 
 pci/usb2
 card, and the usb1 on the board. I listen to music from the drive, and
 after some time (10-20 minutes or so), it freaks out:

 =
 sd 1:0:0:0: Device not ready: <6>: Current: sense key=0x2
 ASC=0x4 ASCQ=0x2
 end_request: I/O error, dev sda, sector 126693711
 sd 1:0:0:0: Device not ready: <6>: Current: sense key=0x2
 ASC=0x4 ASCQ=0x2
 end_request: I/O error, dev sda, sector 126693711
 sd 1:0:0:0: Device not ready: <6>: Current: sense key=0x2
 ASC=0x4 ASCQ=0x2
 end_request: I/O error, dev sda, sector 126693711
 =

>>> Using a new PSU and a powered hub made no difference. But I found a 
>>> solution
>>> here:
>>>
>>> http://alienghic.livejournal.com/382903.html
>>>
>>> Basically, the problem is, as suspected, that the drive spins down / goes 
>>> to
>>> suspend. This can be disabled with "sdparm --clear STANDBY -6 /dev/sda".
>>>
>>> It seems to me to be an error that the kernel reports this as something 
>>> like
>>> a hardware failure. Or at least very misleading.
>>>
>> Oh, nice. The usb-storage (SCSI) disk spins itself down and we can't 
>> handle that.
>> Should we be disabling auto-spindown when we connect the device, or be 
>> able to
>> handle this by sending the start command when needed?
>
> There's more to this.
>
> My Sandisk Cruzer Micro 1GB USB sticks suffer from this regression.
> Plug one in, it works for about 5 seconds, then the light goes off (bad).
> Next access requires a 30s timeout + reset.  Etc..
>
> This is with 2.6.23-rc6.
> Works without any problems in 2.6.22.  REGRESSION.

Disable CONFIG_USB_SUSPEND, that will fix the problem for you, or we can
add your device to the quirk list.

thanks,

greg k-h
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [linux-usb-devel] spontaneous disconnect with "usb-storage: implement autosuspend"

2007-09-12 Thread Greg KH
On Wed, Sep 12, 2007 at 06:14:04PM -0400, Mark Lord wrote:
> Oliver Neukum wrote:
>> Am Dienstag 14 August 2007 schrieb Paolo Ornati:
>>> On Tue, 14 Aug 2007 17:46:16 +0200
>>> Oliver Neukum <[EMAIL PROTECTED]> wrote:
>>>
 Am Dienstag 14 August 2007 schrieb Paolo Ornati:
> Hewlett-Packard PhotoSmart 720 / PhotoSmart 935 (storage)  
 Please try this patch.
>>> Tried on -rc3 but it doesn't work, dmesg attached.
>>>
>>> However I've found that if "hald" is running the problems doesn't
>>> happen (I think it's just hidden by the fact that hald do some polling
>>> on it preventing autosuspend to trigger).
>> Exactly. This is not reliable. It needs to be done in kernel. This patch
>> should do it.
>>  Regards
>>  Oliver
>> ---
>> --- a/drivers/usb/core/quirks.c  2007-08-14 17:42:22.0 +0200
>> +++ b/drivers/usb/core/quirks.c  2007-08-14 20:30:28.0 +0200
>> @@ -30,6 +30,8 @@
>>  static const struct usb_device_id usb_quirk_list[] = {
>>  /* HP 5300/5370C scanner */
>>  { USB_DEVICE(0x03f0, 0x0701), .driver_info = USB_QUIRK_STRING_FETCH_255 
>> },
>> +/* Hewlett-Packard PhotoSmart 720 / PhotoSmart 935 (storage) */
>> +{ USB_DEVICE(0x03f0, 0x4002), .driver_info = USB_QUIRK_NO_AUTOSUSPEND },
>>  /* Acer Peripherals Inc. (now BenQ Corp.) Prisa 640BU */
>>  { USB_DEVICE(0x04a5, 0x207e), .driver_info = USB_QUIRK_NO_AUTOSUSPEND },
>>  /* Benq S2W 3300U */
>> -
>
> I believe the offending commit needs to be reverted.
> It just breaks too much stuff, including my Sandisk USB sticks.
>
>> with "CONFIG_USB_SUSPEND=y", since commit:
>> 8dfe4b14869fd185ca25ee88b02ada58a3005eaf
>> usb-storage: implement autosuspend
>> This patch (as930) implements autosuspend for usb-storage.  It is
>> adapted from a patch by Oliver Neukum.  Autosuspend is allowed except
>> during LUN scanning, resets, and command execution.
>> my USB photo-camera gets automagically disconnected before I can do
>> anything with it  ;) 
>
> Ditto for several other devices that are being slowly special-cased,
> and many that have yet to be tested.  This commit is (unfortunately)
> a disaster with many regressions.

There are many regressions right now, _ONLY_ if you enable
CONFIG_USB_SUSPEND.  If you disable that, your problems will go away,
right?

This option is a new option, and we have found out the hard way that
a very large class of hardware really does not like working with usb
suspend at all.

Because of this, we have a patch queued up for 2.6.24 that will disable
suspend for devices, and have to be enabled from a white-list that we
will put in userspace based on the history of what other operating
systems have determined are devices that can sleep properly.

I can send this patch in now to Linus, but as it changes functionality
from previous -rc patches, I've been hesitant to do so.  Especially when
a mere config option change will solve your problem.

Oh, and currently no distro will enable this option due to the hardware
problems, so the only people that could get hit by this are those who
build their own kernels, and they can easily disable the option.

Does this help explain things?

thanks,

greg k-h
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: O_NOLINK for open()

2007-09-12 Thread Brent Casavant
On Wed, 12 Sep 2007, Al Viro wrote:

> On Wed, Sep 12, 2007 at 05:44:30PM -0500, Brent Casavant wrote:
> 
> > P.S. By the way, there doesn't seem to be a way to remove /proc/#/mem
> >  files.  That might be an additional nicety -- programs worried about
> >  being snooped could unlink their own entry.  /dev/mem and /dev/kmem
> >  can simply be removed by the sysadmin of such a system.  If all of
> >  that were done you'd have to resort to attacking crash dumps, core
> >  dumps, or via something like kdb to extract "hidden" data.
> 
> Give me a break.  And learn about ptrace(2).  This "unlinking" bullshit
> buys you zero additional security, both for /proc/*/mem and for /dev/mem
> (see mknod(2)).

Yes, I fully understand that mknod can recreate the nodes -- however
only the superuser can do so, and if the superuser is attacking a
process all bets are off anyway.  OK, so /dev/*mem isn't to worry
about, since it's already owned by root.  Still, /proc/#/mem is owned
by the user, not root, leaving it potentially open to inspection by
third party processes.

I'm thinking out loud.  Sorry to cause any grief.

My (limited) understanding of ptrace is that a parent-child
relationship is needed between the tracing process and the traced
process (at least that's what I gather from the man page).  This
does give cause for concern, and I might have to see what can be
done to alleviate this concern.  I fully realize that making this
design completely unassilable is a fools errand, but closing off
as many attack vectors as possible seems prudent.

-- 
Brent Casavant  All music is folk music.  I ain't
[EMAIL PROTECTED]never heard a horse sing a song.
Silicon Graphics, Inc.-- Louis Armstrong
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH -mm] fs: define file_fsync() even for CONFIG_BLOCK=n

2007-09-12 Thread Josef Sipek
On Wed, Sep 12, 2007 at 10:30:20AM +0100, Christoph Hellwig wrote:
> On Wed, Sep 12, 2007 at 11:06:10AM +0900, Paul Mundt wrote:
> > There's nothing that is problematic for file_fsync() with CONFIG_BLOCK=n,
> > and it's built in unconditionally anyways, so move the prototype out to
> > reflect that. Without this, the unionfs build bails out.
> 
> Unionfs should stop using it instead.
 
We did stop.

Josef 'Jeff' Sipek.

-- 
NT is to UNIX what a doughnut is to a particle accelerator.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[RFC PATCH] Add a 'minimal tree install' target

2007-09-12 Thread Chris Wedgwood
This is a somewhat rough first-pass at making a 'minimal tree'
installation target.  This installs a partial source-tree which you
can use to build external modules against.  It feels pretty unclean
but I'm not aware of a much better way to do some of this.

This patch works for me, even when using O=.  It probably
needs further cleanups.

Comments?

-
Add a 'mintree-install' makefile target.

Red Hat and other distributions typically have some logic in their
kernel package build system to create/install 'minimalist source tree'
which contains enough state to build external modules against but is
much smaller than the entire build-tree.

This introduces similar logic, the guts of this was taken from a
Fedora Core spec file and mutilated to make it work for O=<...>
builds.
-


diff --git a/Makefile b/Makefile
index 3067f6a..1246939 100644
--- a/Makefile
+++ b/Makefile
@@ -1085,6 +1085,11 @@ package-dir  := $(srctree)/scripts/package
$(Q)$(MAKE) $(build)=$(package-dir) $@
 rpm: include/config/kernel.release FORCE
$(Q)$(MAKE) $(build)=$(package-dir) $@
+# /usr/src/linux to match what most distro's do
+#export INSTALL_MINTREE_PATH ?= /usr/src/linux
+export INSTALL_MINTREE_PATH ?= /tmp/mt-test/
+mintree-install: include/config/kernel.release FORCE
+   $(Q)$(MAKE) $(build)=$(package-dir) $@
 
 
 # Brief documentation of the typical targets used
diff --git a/scripts/package/Makefile b/scripts/package/Makefile
index 7c434e0..0c5f07d 100644
--- a/scripts/package/Makefile
+++ b/scripts/package/Makefile
@@ -86,6 +86,12 @@ tar%pkg: FORCE
 clean-dirs += $(objtree)/tar-install/
 
 
+# minimal tree installation target
+# ---
+mintree-install: FORCE
+   $(MAKE) KBUILD_SRC=
+   $(CONFIG_SHELL) $(srctree)/scripts/package/mintree-install
+
 # Help text displayed when executing 'make help'
 # ---
 help: FORCE
@@ -96,4 +102,6 @@ help: FORCE
@echo '  tar-pkg - Build the kernel as an uncompressed tarball'
@echo '  targz-pkg   - Build the kernel as a gzip compressed 
tarball'
@echo '  tarbz2-pkg  - Build the kernel as a bzip2 compressed 
tarball'
+   @echo '  mintree-install - Build the kernel and install a 
minimal-build-tree'
+   @echo 'that can be used to build external modules 
against'
 
diff --git a/scripts/package/mintree-install b/scripts/package/mintree-install
new file mode 100644
index 000..1362cc2
--- /dev/null
+++ b/scripts/package/mintree-install
@@ -0,0 +1,87 @@
+#!/bin/bash
+
+# miniman-tree-install
+#
+# This should install necessary the headers, makefiles and
+# configuration files in a location so that you can use kbuild to
+# build external (out of tree) modules without needing the entire
+# kernel build tree.
+#
+
+# This has been shamelessly taken from a Red Hat's spec file,
+# 
http://cvs.fedora.redhat.com/viewcvs/devel/kernel/kernel.spec?rev=1.145=markup
+
+# FIXME:cw Some parts of this are still icky, it's not clear how best
+# to clean some of this up.  Error handling is more or less
+# non-existent.  You could argue that this entire process should be
+# done differently (ie. using kbuild knowledge instead of a whole lot
+# of hard-coded magic here).
+
+# FIXME:cw If any of srctree, objtree or INSTALL_MINTREE_PATH have
+# spaces in them some parts of this will fail as-is, it shouldn't be
+# that hard to fix (it's not clear if the rest of t kbuild is clean in
+# that respect though)
+
+# This relies on the following environment variables being sane and
+# passed in from the Makefiles:
+#
+#INSTALL_MINTREE_PATH
+#KERNELVERSION
+
+
+# target directory
+tgtdir="${INSTALL_MINTREE_PATH}"/${KERNELVERSION}/
+
+
+# And save the headers/makefiles etc for building modules against
+#
+# This all looks scary, but the end result is supposed to be:
+# * all arch relevant include/ files
+# * all Makefile/Kconfig files
+# * all script/ files
+
+cd "$srctree" || exit $?
+
+mkdir -p ${tgtdir}
+
+# first copy everything.  If objtree differs from srctree (ie. O=<...>
+# is used) then make sure we don't copy the Makefile(s) from inside
+# $objtree
+if [ "$srctree" != "$objtree" ] ; then
+cp --parents $(find -type f -name "Makefile*" -o -name "Kconfig*" -not 
-ipath "$objtree/*Makefile" ) ${tgtdir}
+else
+cp --parents $(find -type f -name "Makefile*" -o -name "Kconfig*") 
${tgtdir}
+fi
+
+cp "${objtree}/Module.symvers" ${tgtdir}
+# then drop all but the needed Makefiles/Kconfig files
+#rm -rf ${tgtdir}/Documentation
+rm -rf "${tgtdir}/scripts"
+rm -rf "${tgtdir}/include"
+cp "${objtree}/.config" ${tgtdir}
+cp -a scripts ${tgtdir}
+if [ -d ${srctree}/arch/${ARCH}/scripts ]; then
+cp -a ${srctree}/arch/${ARCH}/scripts ${tgtdir}/arch/${ARCH} || :
+fi
+if [ -f ${objtree}/arch/${ARCH}/*lds ]; then
+cp -a ${objtree}/arch/${ARCH}/*lds ${tgtdir}/arch/${ARCH}/ 

Re: [00/41] Large Blocksize Support V7 (adds memmap support)

2007-09-12 Thread Christoph Lameter
On Wed, 12 Sep 2007, Nick Piggin wrote:

> I will still argue that my approach is the better technical solution for large
> block support than yours, I don't think we made progress on that. And I'm
> quite sure we agreed at the VM summit not to rely on your patches for
> VM or IO scalability.

The approach has already been tried (see the XFS layer) and found lacking. 

Having a fake linear block through vmalloc means that a special software 
layer must be introduced and we may face special casing in the block / fs 
layer to check if we have one of these strange vmalloc blocks.

> But you just showed in two emails that you don't understand what the
> problem is. To reiterate: lumpy reclaim does *not* invalidate my formulae;
> and antifrag does *not* isolate the issue.

I do understand what the problem is. I just do not get what your problem 
with this is and why you have this drive to demand perfection. We are 
working a variety of approaches on the (potential) issue but you 
categorically state that it cannot be solved.

> But what do you say about viable alternatives that do not have to
> worry about these "unlikely scenarios", full stop? So, why should we
> not use fs block for higher order page support?

Because it has already been rejected in another form and adds more 
layering to the filesystem and more checking for special cases in which 
we only have virtual linearity? It does not reduce the number of page 
structs that have to be handled by the lower layers etc.

Maybe we coud get to something like a hybrid that avoids some of these 
issues? Add support so something like a virtual compound page can be 
handled transparently in the filesystem layer with special casing if 
such a beast reaches the block layer?

> I didn't skip that. We have large page pools today. How does that give
> first class of support to those allocations if you have to have memory
> reserves?

See my other mail. That portion is not complete yet. Sorry.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


CFS: some bad numbers with Java/database threading

2007-09-12 Thread Antoine Martin
-BEGIN PGP SIGNED MESSAGE-
Hash: SHA512

Hi list,

I was working on some unit tests and thought I'd give CFS a whirl to see
if it had any impact on my workloads (to see what the fuss was about),
and I came up with some pretty disturbing numbers:
http://devloop.org.uk/documentation/database-performance/Linux-Kernels/Kernels-ManyThreads-CombinedTests-noload2.png
As above but also showing the load average:
http://devloop.org.uk/documentation/database-performance/Linux-Kernels/Kernels-ManyThreads-CombinedTests2.png
Looks like a regression to me...

Basically, all the previous kernels are pretty close (2.6.16 through to
2.6.20 performed almost identically to 2.6.22 and are not shown here to
avoid cluttering the graphs)

All the 2.6.23-rc kernels performed poorly (except -rc3!): much more
erratically and with a sharp performance drop above 800 threads. The
load starts to go up and the performance takes a nosedive.

With fewer threads (less than 50) there is hardly any difference at all
between all the kernels.

Notes about the tests and setup:
* environment is:
Dual Opteron 252 with 3GB ram, scsi disk, etc..
Sun Java 1.6
MySQL 5.0.44
Junit + ant + my test code (devloop.org.uk)
* java threads are created first and the data is prepared, then all the
threads are started in a tight loop. Each thread runs multiple queries
with a 10ms pause (to allow the other threads to get scheduled)
* load average is divided by the number of cpus (2)
* more general information (which also covers some irrelevant
information about some other tests I have published) is here:
http://devloop.org.uk/documentation/database-performance/Setup/

Don't shoot the messenger!
I can run some more tests if needed (bearing in mind that a full test
run takes a few hours) or you can run the tests yourself: instructions
on running the tests are included.
I am now re-testing with sched-cfs-v2.6.23-rc6-v21-combo-2.patch
but feel free to send some other patches.

Antoine
-BEGIN PGP SIGNATURE-
Version: GnuPG v2.0.6 (GNU/Linux)
Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org

iD8DBQFG6HH+GK2zHPGK1rsRCl3oAJ9c4crCtNQfGs9gWO7Y5CvcIno8TACbBPTw
0TEHkqLMGAfH0ILwWVKc0oo=
=1iBA
-END PGP SIGNATURE-
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [linux-dvb] [PATCH] Userspace tuner

2007-09-12 Thread Markus Rechberger
Let's add the LKML to this.

On 9/13/07, Markus Rechberger <[EMAIL PROTECTED]> wrote:
> On 9/12/07, Mauro Carvalho Chehab <[EMAIL PROTECTED]> wrote:
> > Markus,
> >
> > Em Ter, 2007-08-14 às 16:31 +0200, Markus Rechberger escreveu:
> > > Following patch adds the possibility to implement tuner drivers in
> > > userspace.
> >
> > As you asked me about userspace driver, at Linux Conf Europe, let me
> > give you my feedback about it.
> >
> > On Linux, userspace-to-kernelspace APIs are meant to be forever. This
> > means that, once a newer API is created, this should remain supported
> > for all future versions. So, such APIs should be carefully analyzed and
> > accepted by the community, before going to mainstream.
> >
>
> The V4L and DVB API is stable at the moment because it's at a stage
> which is sufficient for older devices but not sufficient for newer
> devices anymore.
> To support newer device it needs a change.
>
> > I don't see any technical reason why tuner drivers should be moved to
> > userspace. Looking at xc3028 device, the driver is very simple and
> > doesn't require any special treatment that it isn't possible to be done
> > at kernel. There are already some implementations on kernelspace that
> > works fine.
> >
>
> As from my side to support the xceive driver properly it needs a
> rewrite and a proper API description. Since it's not possible to
> discuss any API changes I will work around at least for those devices
> which I can support for.
>
> > On the other hand, a TV driver without a tuner is a broken driver. With
> > parts of the driver being at userspace, this means to add undesired
> > complexity at the drivers architecture, while not bringing any benefit.
> >
> > If you look at V4L history, the first drivers started at userspace,
> > being migrated to kernelspace, where we have the proper scenario for
> > managing those devices.
> >
> > Another aspect that should be analyzed is what is desired by the
> > community:
>
> don't get me wrong but the existing community is rather small and
> kicking off people who are interested in changing things.
> I recently had a talk with someone and I've been told that I'm kicking
> off people.
> Guess why I kick off people? -> because they do not contribute in a
> productive way which also means submitting patches. Optical useless
> changes don't make any difference at the functionality in the end. And
> my requirements are ignored constantly here.
>
> > kernelspace tuners or userspace tuners. Keeping support for
> > both at long term doesn't seem reasonable. The Linux community should
> > decide what is the better way. Currently, only you are pushing for
> > userspace tuners, mainly due to non-technical reasons.
>
> read the project site and you will see the reasons.
> http://mcentral.de/wiki/index.php/Userspace_tuner#Advantages
> Another advantage is that I have cygwin based code here which I can
> easily reuse with all that work I'm not going to reinvent the wheel
> even for newer devices which I work on.
>
> > Almost all the
> > other developers are comfortable with kernelspace tuners. So, creating
> > an userspace interface just to make you happy is not the way we should
> > go.
> >
>
> I'm afraid of giving the people which are against what I submitted the
> responsibility over the project. Initially there was an RFC which
> didn't get commented either (well there was one useless comment, I
> tried to discuss it on IRC before with the same guy) after I
> implemented exactly what I proposed there I got the first non
> technical comments - also keep in mind that working on something costs
> alot of time and talking about something unknown is rather cheap.
>
> > A final aspect is that having an userspace driver for tuner will mean
> > that the kernel driver will depend on an userspace counterpart in order
> > to work. This will allow a vendor with bad intentions to release a
> > partially broken userspace driver, with limited capabilities, and a
> > closed source driver for full support. This model is likely to occur, if
> > you take a look at the past. For example: ATI and Nvidia closed source
> > drivers, several soft modem drivers, some network drivers, ...
> >
>
> Please go forward and discuss the UIO driver with Greg Kroah Hartmann
> and the fuse driver with the other people. If companies want to
> release binary drivers they can easily use the existing code put it
> into an RPM or DEB package and Ubuntu will pick it up.
>
> > With all those issues, I'm against to add an userspace interface for
> > tuners.
> >
>
> I'm against how the project works out at the moment and how it worked
> out in history. Exactly this way will kick off companies to be
> interested in future like Avermedia. A driver can easily be written
> within a few weeks and I've been struggling with it for 2 years(!!!)
> now just for nothing finally telling me that some guys want to steal
> my code and move it to kernelspace although it would raise more
> complications with 

Re: [PATCH/RFC] doc: about email clients for Linux kernel patches

2007-09-12 Thread Adrian Bunk
On Wed, Sep 12, 2007 at 09:53:00PM +0200, Peter Zijlstra wrote:
> 
> On Wed, 2007-09-12 at 15:16 -0400, Rik van Riel wrote:
> > Jeff Garzik wrote:
> > > Chris Friesen wrote:
> > >> Randy Dunlap wrote:
> > >>
> > >>> +Thunderbird (GUI)
> > >>> +
> > >>> +By default, thunderbird likes to mangle text, but there are ways to
> > >>> +coerce it into being nice.
> > >>
> > >> Can someone describe the problems with just attaching the patch in 
> > >> Thunderbird?  It's what Martin says he does on the linked document...
> > > 
> > > Email clients don't like to quote attachments, even text/plain ones, 
> > > which then makes attached patches much more difficult to review and 
> > > comment on (i.e. you greatly reduce the number of reviewers).
> > 
> > Interestingly, Thunderbird does this right and simply
> > adds text/plain attachments to the quoted text.
> 
> Devolution allows the same, but most other mailers dont. Esp the text
> based onces which are the majority under the people you want reviews
> from.

I still prefer patches directly inline, but mutt both is a text based 
MUA and defaults to adding text/plain attachments to the quoted text.

cu
Adrian

-- 

   "Is there not promise of rain?" Ling Tan asked suddenly out
of the darkness. There had been need of rain for many days.
   "Only a promise," Lao Er said.
   Pearl S. Buck - Dragon Seed

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] pci: fix unterminated pci_device_id lists

2007-09-12 Thread Andrew Morton
On Wed, 12 Sep 2007 14:53:56 -0700
Greg KH <[EMAIL PROTECTED]> wrote:

> On Wed, Sep 12, 2007 at 03:48:49PM +0400, Alexey Dobriyan wrote:
> > On 9/12/07, Jeff Garzik <[EMAIL PROTECTED]> wrote:
> > > Kees Cook wrote:
> > > > This patch against 2.6.23-rc6 fixes a couple drivers that do not
> > > > correctly terminate their pci_device_id lists.  This results in garbage
> > > > being spewed into modules.pcimap when the module happens to not have
> > > > 28 NULL bytes following the table, and/or the last PCI ID is actually
> > > > truncated from the table when calculating the modules.alias PCI aliases,
> > > > cause those unfortunate device IDs to not auto-load.
> > > >
> > > > Signed-off-by: Kees Cook <[EMAIL PROTECTED]>
> > >
> > > ACK
> > 
> > I mut say, non-terminated PCI ids lists are constant PITA. There should be
> > a way to a) put it in macro[1], so that terminator automatically added, and
> > b) still allow #ifdef inside table like, e.g. 8139too does.
> > 
> > [1] or not macro, because #ifdef inside macros aren't allowed.
> 
> If you know of a way to do this in an easier manner, patches are always
> gladly accepted :)

Change (ie: fix) the APIs to take a `length' arg, then fix up 10^42 drivers.

Oh, you said "easy" ;)

Perhaps there's some clever way in which we can check that the tables are
correctly terminated.  I guess some static code-checker could do it.  A
weaker option would be to do some runtime hack which carefully walks the
table and checks stuff.

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [00/41] Large Blocksize Support V7 (adds memmap support)

2007-09-12 Thread Christoph Lameter
On Wed, 12 Sep 2007, Nick Piggin wrote:

> In my attack, I cause the kernel to allocate lots of unmovable allocations
> and deplete movable groups. I theoretically then only need to keep a
> small number (1/2^N) of these allocations around in order to DoS a
> page allocation of order N.

True. That is why we want to limit the number of unmovable allocations and 
that is why ZONE_MOVABLE exists to limit those. However, unmovable 
allocations are already rare today. The overwhelming majority of 
allocations are movable and reclaimable. You can see that f.e. by looking 
at /proc/meminfo and see how high SUnreclaim: is (does not catch 
everything but its a good indicator).

> Now there are lots of other little heuristics, *including lumpy reclaim
> and various slab reclaim improvements*, that improve the effectiveness
> or speed of this thing, but at the end of the day, it has the same basic

All of these methods also have their own purpose aside from the mobility 
patches.

> issues. Unless you can move practically any currently unmovable
> allocation (which will either be a lot of intrusive code or require a
> vmapped kernel), then you can't get around the fundamental problem.
> And if you do get around the fundamental problem, you don't really
> need to group pages by mobility any more because they are all
> movable[*].
> 
> So lumpy reclaim does not change my formula nor significantly help
> against a fragmentation attack. AFAIKS.

Lumpy reclaim improves the situation significantly because the 
overwhelming majority of allocation during the lifetime of a systems are 
movable and thus it is able to opportunistically restore the availability 
of higher order pages by reclaiming neighboring pages.

> [*] ok, this isn't quite true because if you can actually put a hard limit on
> unmovable allocations then anti-frag will fundamentally help -- get back to
> me on that when you get patches to move most of the obvious  ones.

We have this hard limit using ZONE_MOVABLE in 2.6.23.

> > The patch currently only supports 64k.
> 
> Sure, and I pointed out the theoretical figure for 64K pages as well. Is that
> figure not problematic to you? Where do you draw the limit for what is
> acceptable? Why? What happens with tiny memory machines where a reserve
> or even the anti-frag patches may not be acceptable and/or work very well?
> When do you require reserve pools? Why are reserve pools acceptable for
> first-class support of filesystems when it has been very loudly been made a
> known policy decision by Linus in the past (and for some valid reasons) that
> we should not put limits on the sizes of caches in the kernel.

64K pages may problematic because it is above the PAGE_ORDER_COSTLY in 
2.6.23. 32K is currently much safer because lumpy reclaim can restore 
these and does so on my systems. I expect the situation for 64K pages to 
improve when more of Mel's patches go in. We have long term experience 
with 32k sized allocation through Andrew's tree.

Reserve pools as handled (by the not yet available) large page pool 
patches (which again has altogether another purpose) are not a limit. The 
reserve pools are used to provide a mininum of higher order pages that is 
not broken down in order to insure that a mininum number of the desired 
order of pages is even available in your worst case scenario. Mainly I 
think that is needed during the period when memory defragmentation is 
still under development.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] local_t protection (critical section)

2007-09-12 Thread Mathieu Desnoyers
* Christoph Lameter ([EMAIL PROTECTED]) wrote:
> On Wed, 5 Sep 2007, Mathieu Desnoyers wrote:
> 
> > Index: linux-2.6-lttng/include/asm-generic/local.h
> > ===
> > --- linux-2.6-lttng.orig/include/asm-generic/local.h2007-09-04 
> > 15:32:02.0 -0400
> > +++ linux-2.6-lttng/include/asm-generic/local.h 2007-09-05 
> > 08:50:47.0 -0400
> > @@ -46,6 +46,15 @@ typedef struct
> >  #define local_add_unless(l, a, u) atomic_long_add_unless((&(l)->a), (a), 
> > (u))
> >  #define local_inc_not_zero(l) atomic_long_inc_not_zero(&(l)->a)
> >  
> > +#define local_enter_save(flags) local_irq_save(flags)
> > +#define local_exit_restore(flags) local_irq_restore(flags)
> > +#define local_enter() local_irq_disable()
> > +#define local_exit() local_irq_enable()
> > +#define local_nest_irq_save(flags) (flags)
> > +#define local_nest_irq_restore(flags) (flags)
> > +#define local_nest_irq_disable()
> > +#define local_nest_irq_enable()
> > +
> 
> This list is going to increase with RT support in SLUB? Argh.
> 

AFAIK, there is no difference between local irq save/restore in mainline
VS -RT. The same applies to preempt disable/enable.

The only thing we have to make sure is that the irq disable and
preempt disable code paths are short and O(1).

> 
> > Index: linux-2.6-lttng/include/asm-i386/local.h
> > ===
> > --- linux-2.6-lttng.orig/include/asm-i386/local.h   2007-09-04 
> > 15:28:52.0 -0400
> > +++ linux-2.6-lttng/include/asm-i386/local.h2007-09-05 
> > 08:49:19.0 -0400
> > @@ -194,6 +194,23 @@ static __inline__ long local_sub_return(
> >  })
> >  #define local_inc_not_zero(l) local_add_unless((l), 1, 0)
> >  
> > +#define local_enter_save(flags) \
> > +   do { \
> > +   (flags); \
> > +   preempt_disable(); \
> > +   } while (0)
> 
> 
> > +#define local_exit_restore(flags) \
> > +   do { \
> > +   (flags); \
> > +   preempt_enable(); \
> > +   } while (0)
> 
> 
> This does not result in warnings because a variable is not used or used 
> uninitialized?

Because the variable is not used at all if I don't put the "(flags)"
(gcc warns about this).

I'm glad that some of the proposed changes may help. I'll let the
cmpxchg_local patches sleep for a while so I can concentrate my efforts
on text edit lock, immediate values and markers. I think what we'll
really need for the cmpxchg_local is two flavors: one that is as atomic
as possible (for things such as tracing), and the other one the fastest
possible (potentially using irq disable). A lot of per architecture
testing/fine tuning will be required though, and I don't have the
hardware to do this.

Mathieu

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: O_NOLINK for open()

2007-09-12 Thread Al Viro
On Wed, Sep 12, 2007 at 05:44:30PM -0500, Brent Casavant wrote:

> P.S. By the way, there doesn't seem to be a way to remove /proc/#/mem
>  files.  That might be an additional nicety -- programs worried about
>  being snooped could unlink their own entry.  /dev/mem and /dev/kmem
>  can simply be removed by the sysadmin of such a system.  If all of
>  that were done you'd have to resort to attacking crash dumps, core
>  dumps, or via something like kdb to extract "hidden" data.

Give me a break.  And learn about ptrace(2).  This "unlinking" bullshit
buys you zero additional security, both for /proc/*/mem and for /dev/mem
(see mknod(2)).
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC 0/3] Recursive reclaim (on __PF_MEMALLOC)

2007-09-12 Thread Christoph Lameter
On Wed, 12 Sep 2007, Peter Zijlstra wrote:

> > assumes single critical user of memory. There are other consumers of 
> > memory and if you have a load that depends on other things than networking 
> > then you should not kill the other things that want memory.
> 
> The VM is a _critical_ user of memory. And I dare say it is the _most_
> important user. 

The users of memory are various subsystems. The VM itself of course also 
uses memory to manage memory but the important thing is that the VM 
provides services to other subsystems

> Every user of memory relies on the VM, and we only get into trouble if
> the VM in turn relies on one of these users. Traditionally that has only
> been the block layer, and we special cased that using mempools and
> PF_MEMALLOC.
> 
> Why do you object to me doing a similar thing for networking?

I have not seen you using mempools for the networking layer. I would not 
object to such a solution. It already exists for other subsystems.
 
> The problem of circular dependancies on and with the VM is rather
> limited to kernel IO subsystems, and we only have a limited amount of
> them. 

The kernel has to use the filesystems and other subsystems for I/O. These 
subsystems compete for memory in order to make progress. I would not 
consider strictly them part of the VM. The kernel reclaim may trigger I/O 
in multiple I/O subsystems simultaneously.

> You talk about something generic, do you mean an approach that is
> generic across all these subsystems?

Yes an approach that is fair and does not allow one single subsystem to 
hog all of memory.

> If so, my approach would be it, I can replace mempools as we have them
> with the reserve system I introduce.

Replacing the mempools for the block layer sounds pretty good. But how do 
these various subsystems that may live in different portions of the system 
for various devices avoid global serialization and livelock through your 
system? And how is fairness addresses? I may want to run a fileserver on 
some nodes and a HPC application that relies on a fiberchannel connection 
on other nodes. How do we guarantee that the HPC application is not 
impacted if the network services of the fileserver flood the system with 
messages and exhaust memory?

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: sata & scsi suggestion for make menuconfig

2007-09-12 Thread Adrian Bunk
On Sun, Sep 09, 2007 at 05:11:44PM -0400, Jeff Garzik wrote:
> Andi Kleen wrote:
>>> I can see where you're coming from, but logically, this is wrong.
>>> There's a huge slew of enterprise machines that only have DVD on SATA.
>> ... and enterprise systems don't really care about a few KB more of code.
>> In fact you definitely want to have SATA compiled in in case you need
>> to recover the machine later when the SAN is down.
>>> On the other hand, all of these machines will have SCSI disk devices on
>>> various other transports, so no harm is done, it's just an inelegant
>>> solution.
>> Do you know of a better one?
>
> Let's step back a moment and consider the actual scale and impact of the 
> problem at hand.
>
> The vast majority of users are consumers of pre-compiled kernels, built by 
> People With Clue(tm), who figured this stuff out as soon as it was 
> introduced.

We are talking about a patch to kconfig, and the users using 
pre-compiled kernels are not kconfig users.

> The current setup expresses the dependencies as they exist -- OPTIONAL 
> extras, and that is a problem once a year or so, when someone builds their 
> own kernel but must learn this fact anew.
>
> There is simply no compelling need at all to change things from the current 
> setup.
>
> Our Kconfig system is for people who already know the kernel, not Aunt 
> Tillie.

Couldn't we just remove kconfig and assume that all "people who already 
know the kernel" anyway prefer to edit their .config using vi?  ;-)

In my experience, the vast majority of kconfig users are not the few 
people working on distribution kernels, most of the kconfig userbase 
could be better described by the use case "sysadmin who knows about the 
hardware in his machine and which filesystems he uses".

And there must have been a reason why a leading kernel developer has 
written a complete book covering only configuration and building of the
kernel - the target audience of this book are most likely not "people 
who already know the kernel".

>   Jeff

cu
Adrian

-- 

   "Is there not promise of rain?" Ling Tan asked suddenly out
of the darkness. There had been need of rain for many days.
   "Only a promise," Lao Er said.
   Pearl S. Buck - Dragon Seed

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: O_NOLINK for open()

2007-09-12 Thread Brent Casavant
On Wed, 12 Sep 2007, Andreas Schwab wrote:

> Brent Casavant <[EMAIL PROTECTED]> writes:
> 
> > I could mmap a temporary tmpfs file (tmpfs so that if there is a
> > machine crash no sensitive data persists) which is created with
> > permissions of 0, immediately unlink it, and pass the file
> > descriptor through an AF_UNIX socket.  This does open up a very
> > small window of vulnerability if another process is able to chmod
> > the file and open it before the unlink.
> 
> Only the owner can chmod a file, so why is that a vulnerability?

In this particular case because the user may not normally have direct
access to some of the data to be contained in that file.

Decryption keys in a key management system, in particular.  If the
keys are passed over secure network links such that they only ever
exist in system RAM, and are not reachable via the filesystem, these
keys can be protected from disclosure to the user (short of /proc/#/mem
type of tricks).  However, if there is even a brief window when the
user can gain access to the file, these keys are at risk of disclosure.

The problem can be addressed, in this case, by having the daemon half
of the design create these files, however it would provide a bit more
flexibility if the client side was also capable of creating them.  It's
not a make-or-break problem, by any means, but does somewhat motivate
an O_NOLINK flag for open().

Brent

P.S. By the way, there doesn't seem to be a way to remove /proc/#/mem
 files.  That might be an additional nicety -- programs worried about
 being snooped could unlink their own entry.  /dev/mem and /dev/kmem
 can simply be removed by the sysadmin of such a system.  If all of
 that were done you'd have to resort to attacking crash dumps, core
 dumps, or via something like kdb to extract "hidden" data.

-- 
Brent Casavant  All music is folk music.  I ain't
[EMAIL PROTECTED]never heard a horse sing a song.
Silicon Graphics, Inc.-- Louis Armstrong
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] x86_64: check and enable MMCONFIG for AMD Family 10h Opteron

2007-09-12 Thread Andrew Morton
On Wed, 12 Sep 2007 12:33:38 -0700
Yinghai Lu <[EMAIL PROTECTED]> wrote:

> [PATCH] x86_64: check and enable MMCONFIG for AMD Family 10h Opteron
> 
> Signed-off-by: Yinghai Lu <[EMAIL PROTECTED]>
> 
> Index: linux-2.6/arch/x86_64/kernel/setup.c
> ===
> --- linux-2.6.orig/arch/x86_64/kernel/setup.c 2007-09-12 11:28:08.0 
> -0700
> +++ linux-2.6/arch/x86_64/kernel/setup.c  2007-09-12 12:09:32.0 
> -0700
> @@ -507,6 +507,32 @@
>  }
>  #endif
>  
> +/*[39:8] */
> +/* why not using 0xfe00 ? */
> +#define FAM10H_PCI_MMIO_BASE 0xc000
> +static void fam10h_check_enable_mmcfg(struct cpuinfo_x86 *c)
> +{
> + u32 low, high, address;
> +
> + address = 0xc0010058;
> + if (rdmsr_safe(address, , ))
> + return;
> +
> + if (low & 1)
> + return;
> +
> + printk(KERN_INFO "Enable MMCONFIG on AMD Family 10h\n");
> + /*
> +  * if it is not enable, let enable it and assume only one segement
> +  * with 256 buses
> +  */
> + low &= ~(0xfff0 | (0xf<<2));
> + low |= (8<<2) | (1<<0);
> + high &= ~(0x);
> + high |= (FAM10H_PCI_MMIO_BASE>>(32-8));
> + wrmsr_safe(address, low, high);
> +}
> +
>  /*
>   * On a AMD dual core setup the lower bits of the APIC id distingush the 
> cores.
>   * Assumes number of cores is a power of two.
> @@ -655,6 +681,9 @@
>   /* Family 10 doesn't support C states in MWAIT so don't use it */
>   if (c->x86 == 0x10 && !force_mwait)
>   clear_bit(X86_FEATURE_MWAIT, >x86_capability);
> +
> + if (c->x86 == 0x10)
> + fam10h_check_enable_mmcfg(c);
>  }
>  
>  static void __cpuinit detect_ht(struct cpuinfo_x86 *c)

fixes:

--- 
a/arch/x86_64/kernel/setup.c~x86_64-check-and-enable-mmconfig-for-amd-family-10h-opteron-fix
+++ a/arch/x86_64/kernel/setup.c
@@ -494,7 +494,7 @@ static int nearby_node(int apicid)
 /*[39:8] */
 /* why not using 0xfe00 ? */
 #define FAM10H_PCI_MMIO_BASE 0xc000
-static void fam10h_check_enable_mmcfg(struct cpuinfo_x86 *c)
+static void __cpuinit fam10h_check_enable_mmcfg(struct cpuinfo_x86 *c)
 {
u32 low, high, address;
 
@@ -507,8 +507,8 @@ static void fam10h_check_enable_mmcfg(st
 
printk(KERN_INFO "Enable MMCONFIG on AMD Family 10h\n");
/*
-* if it is not enable, let enable it and assume only one segement
-* with 256 buses
+* If it is not enabled, enable it and assume only one segment
+* with 256 busses.
 */
low &= ~(0xfff0 | (0xf<<2));
low |= (8<<2) | (1<<0);
_

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.23-rc1: USB hard disk broken (REGRESSION)

2007-09-12 Thread Mark Lord

Mark Lord wrote:

Dan Zwell wrote:

Alan Stern wrote:

[  126.512815] usb 1-1: usb auto-resume
[  126.543447] uhci_hcd :00:1f.2: port 1 portsc 00a5,01
[  126.559426] usb 1-1: finish resume
[  126.561435] usb 1-1: gone after usb resume? status -19
[  126.561445] usb 1-1: can't resume, status -19
[  126.561451] hub 1-0:1.0: logical disconnect on port 1
[  126.562486] sd 5:0:0:0: [sdb] Result: hostbyte=DID_ERROR 
driverbyte=DRIVER_OK,SUGGEST_OK


This suggests a bug in the device's firmware, probably it sends a 
1-byte Device-Status reply instead of a 2-byte reply as required by 
the USB spec.  You could find out for certain by using usbmon.


But if that is indeed the problem, the patch below should help.  I've 
seen it before; perhaps we should adopt this workaround permanently.



Relevant info:
-obviously, I'm using uhci
-the drive is SATA, connected to USB with a SATA/IDE to USB adapter
-this problem does not occur with a USB flash drive
-reverting the commit that introduced auto-suspend prevents this error.


If necessary you could disable autosuspend for your drive.  But first 
test this patch.


Alan Stern



Index: 2.6.23-rc1/drivers/usb/core/hub.c
===
--- 2.6.23-rc1.orig/drivers/usb/core/hub.c
+++ 2.6.23-rc1/drivers/usb/core/hub.c
@@ -1644,9 +1644,10 @@ static int finish_port_resume(struct usb
  * and device drivers will know about any resume quirks.
  */
 if (status == 0) {
+devstatus = 0;
 status = usb_get_status(udev, USB_RECIP_DEVICE, 0, );
 if (status >= 0)
-status = (status == 2 ? 0 : -ENODEV);
+status = (status > 0 ? 0 : -ENODEV);
 }
 
 if (status) {





Alan,

Yes, that patch worked, and dmesg now shows the device auto-suspending 
and resuming every few seconds. Thanks a lot. I hope you do merge this 
patch or a workaround like it.


Dan


The same bug kills my Sandisk Cruzer Micro USB pen drives.
I plug them in, they work briefly, then the light goes out (abnormal),
and 30-second timeout/reset is needed for each subsequent access.  Ugh.

They work fine in 2.6.22.  I'll try the above patch here now and see if 
it fixes

this regression.


Nope.  Patch is already in -rc6 I see, so still NFG.
We can continue blacklisting the multitudes of b0rked devices one by one,
or we can revert this change or default it to "off" for usb-storage (at least).

This really kills a lot of everyday devices.  Here's my Sandisk Cruzer(s),
after forcing autosuspend=0:

Bus 005 Device 014: ID 0781:5151 SanDisk Corp. Cruzer Micro 256/512MB Flash 
Drive
Device Descriptor:
 bLength18
 bDescriptorType 1
 bcdUSB   2.00
 bDeviceClass0 (Defined at Interface level)
 bDeviceSubClass 0
 bDeviceProtocol 0
 bMaxPacketSize064
 idVendor   0x0781 SanDisk Corp.
 idProduct  0x5151 Cruzer Micro 256/512MB Flash Drive
 bcdDevice0.10
 iManufacturer   1 SanDisk Corporation
 iProduct2 Cruzer Micro
 iSerial 3 20060775000CF73334D3
 bNumConfigurations  1
 Configuration Descriptor:
   bLength 9
   bDescriptorType 2
   wTotalLength   32
   bNumInterfaces  1
   bConfigurationValue 1
   iConfiguration  0
   bmAttributes 0x80
 (Bus Powered)
   MaxPower  200mA
   Interface Descriptor:
 bLength 9
 bDescriptorType 4
 bInterfaceNumber0
 bAlternateSetting   0
 bNumEndpoints   2
 bInterfaceClass 8 Mass Storage
 bInterfaceSubClass  6 SCSI
 bInterfaceProtocol 80 Bulk (Zip)
 iInterface  0
 Endpoint Descriptor:
   bLength 7
   bDescriptorType 5
   bEndpointAddress 0x81  EP 1 IN
   bmAttributes2
 Transfer TypeBulk
 Synch Type   None
 Usage Type   Data
   wMaxPacketSize 0x0200  1x 512 bytes
   bInterval   0
 Endpoint Descriptor:
   bLength 7
   bDescriptorType 5
   bEndpointAddress 0x01  EP 1 OUT
   bmAttributes2
 Transfer TypeBulk
 Synch Type   None
 Usage Type   Data
   wMaxPacketSize 0x0200  1x 512 bytes
   bInterval   1
Device Qualifier (for other device speed):
 bLength10
 bDescriptorType 6
 bcdUSB   2.00
 bDeviceClass0 (Defined at Interface level)
 bDeviceSubClass 0
 bDeviceProtocol 0
 bMaxPacketSize064
 bNumConfigurations  1
Device Status: 0x
 (Bus Powered)

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  

Re: [RFC 0/3] Recursive reclaim (on __PF_MEMALLOC)

2007-09-12 Thread Christoph Lameter
On Tue, 21 Aug 2007, Nick Piggin wrote:

> The thing I don't much like about your patches is the addition of more
> of these global reserve type things in the allocators. They kind of
> suck (not your code, just the concept of them in general -- ie. including
> the PF_MEMALLOC reserve). I'd like to eventually reach a model where
> reclaimable memory from a given subsystem is always backed by enough
> resources to be able to reclaim it. What stopped you from going that
> route with the network subsystem? (too much churn, or something
> fundamental?)

That sounds very right aside from the global reserve. A given subsystem 
may exist in multiple instances and serve sub partitions of the system.
F.e. there may be a network card on node 5 and a job running on nodes 3-7
and another netwwork card on node 15 with the corresponding nodes 13-17 
doing I/O through it.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH/RFC] doc: about email clients for Linux kernel patches

2007-09-12 Thread Stefan Richter
>> On Sep 11 2007 21:26, Chris Friesen wrote:
>>> Thunderbird, at least, will automatically inline a single text/plain
>>> attachment when replying. (At least with my current settings, it does.)

I dont know about Thunderbird, but Seamonkey apparently only includes
text/plain attachments in the reply quote if they feature
''Content-Disposition: inline''.  It does not include attachments with
''Content-Disposition: attachment; filename="abc123.patch"'' even if
they are text/plain.
-- 
Stefan Richter
-=-=-=== =--= -==-=
http://arcgraph.de/sr/
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] wake up from a serial port

2007-09-12 Thread Andrew Morton
On Wed, 12 Sep 2007 20:50:10 +0200 (CEST)
Guennadi Liakhovetski <[EMAIL PROTECTED]> wrote:

> Enable wakeup from serial ports, make it run-time configurable over sysfs, 
> e.g.,
> 
> echo enabled > /sys/devices/platform/serial8250.0/tty/ttyS0/power/wakeup
> 
> Requires
> 
> # CONFIG_SYSFS_DEPRECATED is not set
> 
> Signed-off-by: Guennadi Liakhovetski <[EMAIL PROTECTED]>
> 
> ---
> 
> Following suggestions from Alan and Russell moved the may_wake_up checks 
> to serial_core.c. This time actually tested - it does even work. Could 
> someone, please, verify, that put_device after device_find_child is 
> correct?

Seems right to me, from reading device_find_child() and its associated
documentation.

> Also would be nice to test with a Natsemi UART, that can wake up 
> the system, if such systems exist.
>

It would help if you could provide simple-to-follow steps which a tester
should follow to perform this testing.

 > 
> diff --git a/drivers/serial/serial_core.c b/drivers/serial/serial_core.c
> index 9c57486..8a3d6ea 100644
> --- a/drivers/serial/serial_core.c
> +++ b/drivers/serial/serial_core.c
> @@ -1934,9 +1934,24 @@ static void uart_change_pm(struct uart_state *state, 
> int pm_state)
>   }
>  }
>  
> +struct uart_match {
> + struct uart_port *port;
> + struct uart_driver *driver;
> +};
> +
> +static int serial_match_port(struct device *dev, void *data)
> +{
> + struct uart_match *match = data;
> + dev_t devt = MKDEV(match->driver->major, match->driver->minor) + 
> match->port->line;
> +
> + return dev->devt == devt; /* Actually, only one tty per port */
> +}
> +
>  int uart_suspend_port(struct uart_driver *drv, struct uart_port *port)
>  {
>   struct uart_state *state = drv->state + port->line;
> + struct device *tty_dev;
> + struct uart_match match = {port, drv};
>  
>   mutex_lock(>mutex);
>  
> @@ -1947,6 +1962,15 @@ int uart_suspend_port(struct uart_driver *drv, struct 
> uart_port *port)
>   }
>  #endif
>  
> + tty_dev = device_find_child(port->dev, , serial_match_port);
> + if (device_may_wakeup(tty_dev)) {
> + enable_irq_wake(port->irq);
> + put_device(tty_dev);
> + mutex_unlock(>mutex);
> + return 0;
> + }
> + port->suspended = 1;
> +
>   if (state->info && state->info->flags & UIF_INITIALIZED) {
>   const struct uart_ops *ops = port->ops;
>  
> @@ -1995,6 +2019,13 @@ int uart_resume_port(struct uart_driver *drv, struct 
> uart_port *port)
>   }
>  #endif
>  
> + if (!port->suspended) {
> + disable_irq_wake(port->irq);
> + mutex_unlock(>mutex);
> + return 0;
> + }
> + port->suspended = 0;
> +
>   uart_change_pm(state, 0);
>  
>   /*
> @@ -2266,6 +2297,7 @@ int uart_add_one_port(struct uart_driver *drv, struct 
> uart_port *port)
>  {
>   struct uart_state *state;
>   int ret = 0;
> + struct device *tty_dev;
>  
>   BUG_ON(in_interrupt());
>  
> @@ -2301,7 +2333,13 @@ int uart_add_one_port(struct uart_driver *drv, struct 
> uart_port *port)
>* Register the port whether it's detected or not.  This allows
>* setserial to be used to alter this ports parameters.
>*/
> - tty_register_device(drv->tty_driver, port->line, port->dev);
> + tty_dev = tty_register_device(drv->tty_driver, port->line, port->dev);
> + if (likely(!IS_ERR(tty_dev))) {
> + device_can_wakeup(tty_dev) = 1;
> + device_set_wakeup_enable(tty_dev, 0);
> + } else
> + printk(KERN_ERR "Cannot register tty device on line %d\n",
> +port->line);
>  
>   /*
>* If this driver supports console, and it hasn't been
> diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
> index 773d8d8..60dedc0 100644
> --- a/include/linux/serial_core.h
> +++ b/include/linux/serial_core.h
> @@ -291,7 +291,8 @@ struct uart_port {
>   unsigned long   mapbase;/* for ioremap */
>   struct device   *dev;   /* parent device */
>   unsigned char   hub6;   /* this should be in 
> the 8250 driver */
> - unsigned char   unused[3];
> + unsigned char   suspended;
> + unsigned char   unused[2];
>   void*private_data;  /* generic platform 
> data pointer */
>  };
>  
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: O_NOLINK for open()

2007-09-12 Thread Bodo Eggert
Brent Casavant <[EMAIL PROTECTED]> wrote:

[...]
> I could mmap a temporary tmpfs file (tmpfs so that if there is a
> machine crash no sensitive data persists) which is created with
> permissions of 0, immediately unlink it, and pass the file
> descriptor through an AF_UNIX socket.  This does open up a very
> small window of vulnerability if another process is able to chmod
> the file and open it before the unlink.

If the process can chmod the file, it can ptrace the daemon, too.
Or, using CAP_DAC_OVERRIDE, it can patch the daemon.

Both will void any security.

> However, it occurs to me that this problem goes away if there were
> a method create a file in an unlinked state to begin with.  However
> there does not appear to be any such mechanism in Linux's open()
> interface.

Having no window for creating stale temp files is nice to have. We only
need a clever fool to implement it.-) But since it's hard to get killed
just in the right moment for having a stale temp file, there is very low
interest for this feature.
-- 
You know you're in trouble when packet floods are competing to flood you.
-- grc.com

Friß, Spammer: [EMAIL PROTECTED] [EMAIL PROTECTED]
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] local_t protection (critical section)

2007-09-12 Thread Christoph Lameter
On Wed, 5 Sep 2007, Mathieu Desnoyers wrote:

> Index: linux-2.6-lttng/include/asm-generic/local.h
> ===
> --- linux-2.6-lttng.orig/include/asm-generic/local.h  2007-09-04 
> 15:32:02.0 -0400
> +++ linux-2.6-lttng/include/asm-generic/local.h   2007-09-05 
> 08:50:47.0 -0400
> @@ -46,6 +46,15 @@ typedef struct
>  #define local_add_unless(l, a, u) atomic_long_add_unless((&(l)->a), (a), (u))
>  #define local_inc_not_zero(l) atomic_long_inc_not_zero(&(l)->a)
>  
> +#define local_enter_save(flags) local_irq_save(flags)
> +#define local_exit_restore(flags) local_irq_restore(flags)
> +#define local_enter() local_irq_disable()
> +#define local_exit() local_irq_enable()
> +#define local_nest_irq_save(flags) (flags)
> +#define local_nest_irq_restore(flags) (flags)
> +#define local_nest_irq_disable()
> +#define local_nest_irq_enable()
> +

This list is going to increase with RT support in SLUB? Argh.


> Index: linux-2.6-lttng/include/asm-i386/local.h
> ===
> --- linux-2.6-lttng.orig/include/asm-i386/local.h 2007-09-04 
> 15:28:52.0 -0400
> +++ linux-2.6-lttng/include/asm-i386/local.h  2007-09-05 08:49:19.0 
> -0400
> @@ -194,6 +194,23 @@ static __inline__ long local_sub_return(
>  })
>  #define local_inc_not_zero(l) local_add_unless((l), 1, 0)
>  
> +#define local_enter_save(flags) \
> + do { \
> + (flags); \
> + preempt_disable(); \
> + } while (0)


> +#define local_exit_restore(flags) \
> + do { \
> + (flags); \
> + preempt_enable(); \
> + } while (0)


This does not result in warnings because a variable is not used or used 
uninitialized?
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] slub - Use local_t protection

2007-09-12 Thread Christoph Lameter
On Wed, 5 Sep 2007, Mathieu Desnoyers wrote:

> Use local_enter/local_exit for protection in the fast path.

Sorry that it took some time to get back to this issue. KS interfered.

> @@ -1494,8 +1487,16 @@ new_slab:
>   c->page = new;
>   goto load_freelist;
>   }
> -
> + if (gfpflags & __GFP_WAIT) {
> + local_nest_irq_enable();
> + local_exit();
> + }
>   new = new_slab(s, gfpflags, node);
> + if (gfpflags & __GFP_WAIT) {
> + local_enter();
> + local_nest_irq_disable();
> + }
> +
>   if (new) {
>   c = get_cpu_slab(s, smp_processor_id());
>   if (c->page) {

H... Definitely an interesting change to move the interrupt 
enable/disable to __slab_alloc. But it looks like it is getting a bit 
messy. All my attempts ended also like this. Sigh.

> @@ -2026,8 +2032,11 @@ static struct kmem_cache_node *early_kme
>  
>   BUG_ON(kmalloc_caches->size < sizeof(struct kmem_cache_node));
>  
> + if (gfpflags & __GFP_WAIT)
> + local_irq_enable();
>   page = new_slab(kmalloc_caches, gfpflags, node);
> -
> + if (gfpflags & __GFP_WAIT)
> + local_irq_disable();
>   BUG_ON(!page);
>   if (page_to_nid(page) != node) {
>   printk(KERN_ERR "SLUB: Unable to allocate memory from "

H... Actually we could drop the irq disable / enable here since this 
is boot code. That would also allow the removal of the later 
local_irq_enable.

Good idea. I think I will do the moving of the interrupt enable/disable 
independently.

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: ata_piix, laptop cdrom, ICH7: EH, limiting speed to PIO

2007-09-12 Thread Michal Piotrowski
Sergey Dolgov pisze:
> On Wed, Sep 12, 2007 at 10:19:03PM +0200, Michal Piotrowski wrote:
>> Sergey Dolgov pisze:
>>> Hi Michal,
>>>
>>> On Wed, Sep 12, 2007 at 06:33:20PM +0200, Michal Piotrowski wrote:
 Hi Sergey,

 On 11/09/2007, Sergey Dolgov <[EMAIL PROTECTED]> wrote:
> Hi!
>
> On my hp nx7300 laptop, 2 following scenarios can happen during bootup
> (see attachments for the full logs): the "good" one [1] and the one
> where multiple EHs lead to limiting the speed [2].
>
> [1] one is more rare, but it seems to be persistent over reboots: once
> it happened, just rebooting the machine always results in behaviour
> like [1].
>
> [1] results in a working cdrom from the start. An attempt to use cdrom
> after [2] happened results in even more EHs, resulting in "configured
> for PIO4", after which the cdrom is finaly working.
>
> The version I'm using is 2.6.23-rc6, but the same used to happen with
> the previous rc's, and probably with 2.6.22 too (I can check).
 It would be great if you could check it.
>>> I've just built 2.6.22.6 with basically the same configuration, and
>>> the behaviour is just the same, i.e. both [1] and [2] still can
>>> happen.
>> Ok, thanks.
>>
>> BTW please check cables and PCU.
> 
> What's a PCU?

s/PCU/PSU - power supply unit

> BTW, this only happens when using libata of course. The
> old CONFIG_IDE stuff works fine every time.

This maybe one of libata weirdness (I really don't get it why some hardware
works perfectly fine with an old IDE and don't work well with libata).

> 
> [1]:
>
>[   13.026676] ata_piix :00:1f.1: version 2.12
>[   13.026701] ACPI: PCI Interrupt :00:1f.1[A] -> GSI 16 (level, 
> low) -> IRQ 16
>[   13.026898] PCI: Setting latency timer of device :00:1f.1 to 64
>[   13.026994] scsi4 : ata_piix
>[   13.027209] scsi5 : ata_piix
>[   13.027385] ata5: PATA max UDMA/133 cmd 0x000101f0 ctl 0x000103f6 
> bmdma 0x000140a0 irq 14
>[   13.027484] ata6: PATA max UDMA/133 cmd 0x00010170 ctl 0x00010376 
> bmdma 0x000140a8 irq 15
>[   13.339308] ata5.00: ATAPI: HL-DT-ST DVDRAM GSA-T10N, PC05, max 
> MWDMA2
>[   13.505922] ata5.00: configured for MWDMA2
>[   13.506026] ata6: port disabled. ignoring.
>[   13.509916] scsi 4:0:0:0: CD-ROMHL-DT-ST DVDRAM 
> GSA-T10N  PC05 PQ: 0 ANSI: 5
>
> [2]:
>
>[   10.007152] ata_piix :00:1f.1: version 2.12
>[   10.007178] ACPI: PCI Interrupt :00:1f.1[A] -> GSI 16 (level, 
> low) -> IRQ 16
>[   10.007369] PCI: Setting latency timer of device :00:1f.1 to 64
>[   10.007464] scsi4 : ata_piix
>[   10.007680] scsi5 : ata_piix
>[   10.007856] ata5: PATA max UDMA/133 cmd 0x000101f0 ctl 0x000103f6 
> bmdma 0x000140a0 irq 14
>[   10.007956] ata6: PATA max UDMA/133 cmd 0x00010170 ctl 0x00010376 
> bmdma 0x000140a8 irq 15
>[   10.320462] ata5.00: ATAPI: HL-DT-ST DVDRAM GSA-T10N, PC05, max 
> MWDMA2
>[   10.487077] ata5.00: configured for MWDMA2
>[   10.487181] ata6: port disabled. ignoring.
>[   15.985240] ata5.00: exception Emask 0x0 SAct 0x0 SErr 0x0 action 
> 0x2 frozen
>[   15.985325] ata5.00: cmd a0/01:00:00:00:00/00:00:00:00:00/a0 tag 0 
> cdb 0x12 data 96 in
>[   15.985327]  res 40/00:02:00:24:00/00:00:00:00:00/a0 Emask 
> 0x4 (timeout)
>[   15.985532] ata5: soft resetting port
>[   16.465489] ata5.00: configured for MWDMA2
>[   16.465571] ata5: EH complete
>[   21.963643] ata5.00: exception Emask 0x0 SAct 0x0 SErr 0x0 action 
> 0x2 frozen
>[   21.963725] ata5.00: cmd a0/01:00:00:00:00/00:00:00:00:00/a0 tag 0 
> cdb 0x12 data 96 in
>[   21.963727]  res 40/00:02:00:24:00/00:00:00:00:00/a0 Emask 
> 0x4 (timeout)
>[   21.963930] ata5: soft resetting port
>[   22.443900] ata5.00: configured for MWDMA2
>[   22.443979] ata5: EH complete
>[   27.942051] ata5.00: exception Emask 0x0 SAct 0x0 SErr 0x0 action 
> 0x2 frozen
>[   27.942134] ata5.00: cmd a0/01:00:00:00:00/00:00:00:00:00/a0 tag 0 
> cdb 0x12 data 96 in
>[   27.942136]  res 40/00:02:00:24:00/00:00:00:00:00/a0 Emask 
> 0x4 (timeout)
>[   27.942341] ata5: soft resetting port
>[   28.422309] ata5.00: configured for MWDMA2
>[   28.422389] ata5: EH complete
>[   33.920457] ata5.00: limiting speed to MWDMA1:PIO4
>[   33.920532] ata5.00: exception Emask 0x0 SAct 0x0 SErr 0x0 action 
> 0x2 frozen
>[   33.920614] ata5.00: cmd a0/01:00:00:00:00/00:00:00:00:00/a0 tag 0 
> cdb 0x12 data 96 in
>[   33.920616]  res 40/00:02:00:24:00/00:00:00:00:00/a0 Emask 
> 0x4 (timeout)
>[   33.920820] ata5: soft resetting 

Re: 2.6.23-rc1: USB hard disk broken (REGRESSION)

2007-09-12 Thread Mark Lord

Dan Zwell wrote:

Alan Stern wrote:

[  126.512815] usb 1-1: usb auto-resume
[  126.543447] uhci_hcd :00:1f.2: port 1 portsc 00a5,01
[  126.559426] usb 1-1: finish resume
[  126.561435] usb 1-1: gone after usb resume? status -19
[  126.561445] usb 1-1: can't resume, status -19
[  126.561451] hub 1-0:1.0: logical disconnect on port 1
[  126.562486] sd 5:0:0:0: [sdb] Result: hostbyte=DID_ERROR 
driverbyte=DRIVER_OK,SUGGEST_OK


This suggests a bug in the device's firmware, probably it sends a 
1-byte Device-Status reply instead of a 2-byte reply as required by 
the USB spec.  You could find out for certain by using usbmon.


But if that is indeed the problem, the patch below should help.  I've 
seen it before; perhaps we should adopt this workaround permanently.



Relevant info:
-obviously, I'm using uhci
-the drive is SATA, connected to USB with a SATA/IDE to USB adapter
-this problem does not occur with a USB flash drive
-reverting the commit that introduced auto-suspend prevents this error.


If necessary you could disable autosuspend for your drive.  But first 
test this patch.


Alan Stern



Index: 2.6.23-rc1/drivers/usb/core/hub.c
===
--- 2.6.23-rc1.orig/drivers/usb/core/hub.c
+++ 2.6.23-rc1/drivers/usb/core/hub.c
@@ -1644,9 +1644,10 @@ static int finish_port_resume(struct usb
  * and device drivers will know about any resume quirks.
  */
 if (status == 0) {
+devstatus = 0;
 status = usb_get_status(udev, USB_RECIP_DEVICE, 0, );
 if (status >= 0)
-status = (status == 2 ? 0 : -ENODEV);
+status = (status > 0 ? 0 : -ENODEV);
 }
 
 if (status) {





Alan,

Yes, that patch worked, and dmesg now shows the device auto-suspending 
and resuming every few seconds. Thanks a lot. I hope you do merge this 
patch or a workaround like it.


Dan


The same bug kills my Sandisk Cruzer Micro USB pen drives.
I plug them in, they work briefly, then the light goes out (abnormal),
and 30-second timeout/reset is needed for each subsequent access.  Ugh.

They work fine in 2.6.22.  I'll try the above patch here now and see if it fixes
this regression.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [patch] sunrpc: make closing of old temporary sockets work (was: problems with lockd in 2.6.22.6)

2007-09-12 Thread Wolfgang Walter
On Wednesday 12 September 2007, J. Bruce Fields wrote:
> On Wed, Sep 12, 2007 at 09:40:57PM +0200, Wolfgang Walter wrote:
> > On Wednesday 12 September 2007, J. Bruce Fields wrote:
> > > On Wed, Sep 12, 2007 at 04:14:06PM +0200, Neil Brown wrote:
> > > > So it is in 2.6.21 and later and should probably go to .stable for .21
> > > > and .22.
> > > > 
> > > > Bruce:  for you :-)
> > > 
> > > OK, thanks!  But, (as is alas often the case) I'm still confused:
> > > 
> > > > if (!test_and_set_bit(SK_OLD, >sk_flags))
> > > > continue;
> > > > -   if (atomic_read(>sk_inuse) || test_bit(SK_BUSY, 
>sk_flags))
> > > > +   if (atomic_read(>sk_inuse) > 1
> > > > +   || test_bit(SK_BUSY, >sk_flags))
> > > > continue;
> > > > atomic_inc(>sk_inuse);
> > > > list_move(le, _be_aged);
> > > 
> > > What is it that ensures svsk->sk_inuse isn't incremented or SK_BUSY set
> > > after that test?  Not all the code that does either of those is under
> > > the same serv->sv_lock lock that this code is.
> > > 
> > 
> > This should not matter - SK_CLOSED may be set at any time.
> > 
> > svc_age_temp_sockets only detaches the socket, sets SK_CLOSED and then 
> > enqueues it. If SK_BUSY is set its already enqueued and svc_sock_enqueue 
> > ensures that it is not enqueued twice.
> 
> Oh, got it.  And the list manipulation is safe thanks to sv_lock.  Neat,
> thanks.  Can you verify that this solves your problem?
> 

I'll test it tomorrow. So friday morning I'll know and mail you for sure.

Regards,
-- 
Wolfgang Walter
Studentenwerk München
Anstalt des öffentlichen Rechts
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [announce] CFS-devel, performance improvements

2007-09-12 Thread Roman Zippel
Hi,

On Tue, 11 Sep 2007, Ingo Molnar wrote:

> fresh back from the Kernel Summit, Peter Zijlstra and me are pleased to 
> announce the latest iteration of the CFS scheduler development tree. Our 
> main focus has been on simplifications and performance - and as part of 
> that we've also picked up some ideas from Roman Zippel's 'Really Fair 
> Scheduler' patch as well and integrated them into CFS. We'd like to ask 
> people go give these patches a good workout, especially with an eye on 
> any interactivity regressions.

I'm must really say, I'm quite impressed by your efforts to give me as 
little credit as possible.
On the one hand it's of course positive to see so much sudden activity, on 
the other hand I'm not sure how much had happened if I hadn't posted my 
patch, I don't really think it were my complaints about CFS's complexity 
that finally lead to the improvements in this area. I presented the basic 
concepts of my patch already with my first CFS review, but at that time 
you didn't show any interest and instead you were rather quick to simply 
dismiss it. My patch did not add that much new, it's mostly a conceptual 
improvement and describes the math in more detail, but it also 
demonstrated a number of improvements.

> The combo patch against 2.6.23-rc6 can be picked up from:
> 
>   http://people.redhat.com/mingo/cfs-scheduler/devel/
> 
> The sched-devel.git tree can be pulled from:
> 
>
> git://git.kernel.org/pub/scm/linux/kernel/git/mingo/linux-2.6-sched-devel.git

Am I the only one who can't clone that thing? So I can't go into much 
detail about the individual changes here.
The thing that makes me curious, is that it also includes patches by 
others. It can't be entirely explained with the Kernel Summit, as this is 
not the first time patches appear out of the blue in form of a git tree. 
The funny/sad thing is that at some point Linus complained about Con that 
his development activity happend on a separate mailing list, but there was 
at least a place to go to. CFS's development appears to mostly happen in 
private. Patches may be your primary form of communication, but that isn't 
true for many other people, with patches a lot of intent and motivation 
for a change is lost. I know it's rather tempting to immediately try out 
an idea first, but would it really hurt you so much to formulate an idea 
in a more conventional manner? Are you afraid it might hurt your 
ueberhacker status by occasionally screwing up in public? Patches on the 
other hand have the advantage to more easily cover that up by simply 
posting a fix - it makes it more difficult to understand what's going on.
A more conventional way of communication would give more people a chance 
to participate, they may not understand every detail of the patch, but 
they can try to understand the general concepts and apply them to their 
own situation and eventually come up with some ideas/improvements of their 
own, they would be less dependent on you to come up with a solution to 
their problem. Unless of course that's exactly what you want - unless you 
want to be in full control of the situation and you want to be the hero 
that saves the day.

> There are lots of small performance improvements in form of a 
> finegrained 29-patch series. We have removed a number of features and 
> metrics from CFS that might have been needed but ended up being 
> superfluous - while keeping the things that worked out fine, like 
> sleeper fairness. On 32-bit x86 there's a ~16% speedup (over -rc6) in 
> lmbench (lat_ctx -s 0 2) results:

In the patch you really remove _a_lot_ of stuff. You also removed a lot of 
things I tried to get you to explain them to me. On the one hand I could 
be happy that these things are gone, as they were the major road block to 
splitting up my own patch. On the other hand it still leaves me somewhat 
unsatisfied, as I still don't know what that stuff was good for.
In a more collaborative development model I would have expected that you 
tried to explain these features, which could have resulted in a discussion 
how else things can be implemented or if it's still needed at all. Instead 
of this you now simply decide unilaterally that these things are not 
needed anymore.

BTW the old sleeper fairness logic "that worked out fine" is actually 
completely gone and is now conceptually closer to what I'm already doing 
in my patch (only the amount of sleeper bonus differs).

>   (microseconds, lower is better)
>  
> v2.6.222.6.23-rc6(CFS) v2.6.23-rc6-CFS-devel
>  
>0.70  0.750.65
>0.62  0.660.63
>0.60  0.720.69
>0.62  0.740.61
>0.69  0.730.53
>0.66  0.73   

Re: [linux-usb-devel] spontaneous disconnect with "usb-storage: implement autosuspend"

2007-09-12 Thread Mark Lord

Oliver Neukum wrote:

Am Dienstag 14 August 2007 schrieb Paolo Ornati:

On Tue, 14 Aug 2007 17:46:16 +0200
Oliver Neukum <[EMAIL PROTECTED]> wrote:


Am Dienstag 14 August 2007 schrieb Paolo Ornati:
Hewlett-Packard PhotoSmart 720 / PhotoSmart 935 (storage)  

Please try this patch.

Tried on -rc3 but it doesn't work, dmesg attached.

However I've found that if "hald" is running the problems doesn't
happen (I think it's just hidden by the fact that hald do some polling
on it preventing autosuspend to trigger).


Exactly. This is not reliable. It needs to be done in kernel. This patch
should do it.

Regards
Oliver

---

--- a/drivers/usb/core/quirks.c 2007-08-14 17:42:22.0 +0200
+++ b/drivers/usb/core/quirks.c 2007-08-14 20:30:28.0 +0200
@@ -30,6 +30,8 @@
 static const struct usb_device_id usb_quirk_list[] = {
/* HP 5300/5370C scanner */
{ USB_DEVICE(0x03f0, 0x0701), .driver_info = USB_QUIRK_STRING_FETCH_255 
},
+   /* Hewlett-Packard PhotoSmart 720 / PhotoSmart 935 (storage) */
+   { USB_DEVICE(0x03f0, 0x4002), .driver_info = USB_QUIRK_NO_AUTOSUSPEND },
/* Acer Peripherals Inc. (now BenQ Corp.) Prisa 640BU */
{ USB_DEVICE(0x04a5, 0x207e), .driver_info = USB_QUIRK_NO_AUTOSUSPEND },
/* Benq S2W 3300U */
-


I believe the offending commit needs to be reverted.
It just breaks too much stuff, including my Sandisk USB sticks.


with "CONFIG_USB_SUSPEND=y", since commit:

8dfe4b14869fd185ca25ee88b02ada58a3005eaf
usb-storage: implement autosuspend

This patch (as930) implements autosuspend for usb-storage.  It is

adapted from a patch by Oliver Neukum.  Autosuspend is allowed except
during LUN scanning, resets, and command execution.

my USB photo-camera gets automagically disconnected before I can do
anything with it  ;) 


Ditto for several other devices that are being slowly special-cased,
and many that have yet to be tested.  This commit is (unfortunately)
a disaster with many regressions.

Andrew, Linus?
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Problems with USB disk [solved]

2007-09-12 Thread Mark Lord

Chuck Ebbert wrote:

On 08/13/2007 10:50 AM, Niels wrote:

On Sunday 12 August 2007 11:54, Niels wrote:


On Friday 10 August 2007 14:43, Niels wrote:


On Wednesday 08 August 2007 12:57, Ismail Dönmez wrote:


On Wednesday 08 August 2007 13:48:29 you wrote:

On Tuesday 07 August 2007 23:18, Greg KH wrote:

On Tue, Aug 07, 2007 at 10:26:15PM +0200, Niels wrote:

Hi,

I'm having problems with a new 500 GB USB disk. It works, but
sometimes I get these in dmesg:


usb 1-3: reset high speed USB device using ehci_hcd and address 2
usb 5-1: USB disconnect, address 2
drivers/usb/class/usblp.c: usblp0: removed
sd 0:0:0:0: Device not ready: <6>: Sense Key : 0x2 [current]

: ASC=0x4 ASCQ=0x2

end_request: I/O error, dev sda, sector 254148215
sd 0:0:0:0: Device not ready: <6>: Sense Key : 0x2 [current]

: ASC=0x4 ASCQ=0x2

end_request: I/O error, dev sda, sector 252434023
EXT3-fs error (device sda1): ext3_find_entry: reading directory
#15761836 offset 0


There's also a printer connected. This is on a pci/usb2 card. When
the above happens, I get I/O errors. When I mount the drive next,
there are errors and often missing files. Quite annoying!

Kernel is 2.6.21

What's going on?

You have a low voltage issue, or a bad cable.  The device is
electronically disconnecting itself.  Try using a externally-powered
hub, or a new cable.

I am seeing a similar problem with 2.6.22 and 2.6.23-* kernels with my
60G iPod Video, works fine with 2.6.18 kernel though.


So far I'm seeing this:

- On 2.6.21 I mount the drive. After a while it spins down, and when I
then unmount it, an error pops up in dmesg.

- On 2.6.18 I can't provoke the same error. The drive doesn't appear to
spin down. I don't know if the data corruption from 2.6.21 occurs with
regular use.

There are a number of other factor I need to eliminate on my system, but
that's it so far. CONFIG_USB_SUSPEND is not set on either kernel.

OK, on a vanilla 2.6.18.8 I also have this problem, with both the pci/usb2
card, and the usb1 on the board. I listen to music from the drive, and
after some time (10-20 minutes or so), it freaks out:

=
sd 1:0:0:0: Device not ready: <6>: Current: sense key=0x2
ASC=0x4 ASCQ=0x2
end_request: I/O error, dev sda, sector 126693711
sd 1:0:0:0: Device not ready: <6>: Current: sense key=0x2
ASC=0x4 ASCQ=0x2
end_request: I/O error, dev sda, sector 126693711
sd 1:0:0:0: Device not ready: <6>: Current: sense key=0x2
ASC=0x4 ASCQ=0x2
end_request: I/O error, dev sda, sector 126693711
=


Using a new PSU and a powered hub made no difference. But I found a solution
here:

http://alienghic.livejournal.com/382903.html

Basically, the problem is, as suspected, that the drive spins down / goes to
suspend. This can be disabled with "sdparm --clear STANDBY -6 /dev/sda".

It seems to me to be an error that the kernel reports this as something like
a hardware failure. Or at least very misleading.



Oh, nice. The usb-storage (SCSI) disk spins itself down and we can't handle 
that.
Should we be disabling auto-spindown when we connect the device, or be able to
handle this by sending the start command when needed?


There's more to this.

My Sandisk Cruzer Micro 1GB USB sticks suffer from this regression.
Plug one in, it works for about 5 seconds, then the light goes off (bad).
Next access requires a 30s timeout + reset.  Etc..

This is with 2.6.23-rc6.
Works without any problems in 2.6.22.  REGRESSION.

-ml
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: O_NOLINK for open()

2007-09-12 Thread Andreas Schwab
Brent Casavant <[EMAIL PROTECTED]> writes:

> I could mmap a temporary tmpfs file (tmpfs so that if there is a
> machine crash no sensitive data persists) which is created with
> permissions of 0, immediately unlink it, and pass the file
> descriptor through an AF_UNIX socket.  This does open up a very
> small window of vulnerability if another process is able to chmod
> the file and open it before the unlink.

Only the owner can chmod a file, so why is that a vulnerability?

Andreas.

-- 
Andreas Schwab, SuSE Labs, [EMAIL PROTECTED]
SuSE Linux Products GmbH, Maxfeldstraße 5, 90409 Nürnberg, Germany
PGP key fingerprint = 58CA 54C7 6D53 942B 1756  01D3 44D5 214B 8276 4ED5
"And now for something completely different."
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] pci: fix unterminated pci_device_id lists

2007-09-12 Thread Greg KH
On Wed, Sep 12, 2007 at 03:48:49PM +0400, Alexey Dobriyan wrote:
> On 9/12/07, Jeff Garzik <[EMAIL PROTECTED]> wrote:
> > Kees Cook wrote:
> > > This patch against 2.6.23-rc6 fixes a couple drivers that do not
> > > correctly terminate their pci_device_id lists.  This results in garbage
> > > being spewed into modules.pcimap when the module happens to not have
> > > 28 NULL bytes following the table, and/or the last PCI ID is actually
> > > truncated from the table when calculating the modules.alias PCI aliases,
> > > cause those unfortunate device IDs to not auto-load.
> > >
> > > Signed-off-by: Kees Cook <[EMAIL PROTECTED]>
> >
> > ACK
> 
> I mut say, non-terminated PCI ids lists are constant PITA. There should be
> a way to a) put it in macro[1], so that terminator automatically added, and
> b) still allow #ifdef inside table like, e.g. 8139too does.
> 
> [1] or not macro, because #ifdef inside macros aren't allowed.

If you know of a way to do this in an easier manner, patches are always
gladly accepted :)

thanks,

greg k-h
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.23-rc5 hangs on boot, apparently when initializing the EC

2007-09-12 Thread Alexey Starikovskiy
Chuck,

Please try last patch from bug 8709 (bugzilla.kernel.org), if it does not help, 
please open new bug, 
and submit acpidump and dmesg outputs.

Thanks,
Alex.

 
Chuck Ebbert wrote:
> 2.6.23-rc5-git1 hangs here, just before EC initialization.
> Pressing the power button briefly makes it continue, then the
> EC gets detected twice:
> 
> 
> ACPI: bus type pci registered
> PCI: Using configuration type 1
> 
> -- hangs here, press power button 
> 
> ACPI: EC: Look up EC in DSDT
> ACPI: EC: GPE = 0x10, I/O: command/status = 0x66, data = 0x62
> ACPI: System BIOS is requesting _OSI(Linux)
> ACPI: If "acpi_osi=Linux" works better,
> Please send dmidecode to [EMAIL PROTECTED]
> ACPI: Interpreter enabled
> ACPI: (supports S0 S3)
> ACPI: Using IOAPIC for interrupt routing
> ACPI: EC: GPE = 0x10, I/O: command/status = 0x66, data = 0x62
> ACPI: PCI Root Bridge [PCI0] (:00)
> PCI: Transparent bridge - :00:10.0
> ...
> 
> 
> System is an HP TX1000 notebook with AMD Turion X2 processor,
> running the x86_64 kernel. (And acpi_osi="Linux" doesn't seem to
> make any difference.)
> -
> To unsubscribe from this list: send the line "unsubscribe linux-acpi" in
> the body of a message to [EMAIL PROTECTED]
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: O_NOLINK for open()

2007-09-12 Thread H. Peter Anvin
Brent Casavant wrote:
> On Wed, 12 Sep 2007, H. Peter Anvin wrote:
> 
>> Brent Casavant wrote:
> 
>>> http://marc.info/?l=linux-kernel=93032806224160=2
>> This link talks about file flags handling.  I don't see the relevance to
>> this problem at all.  However, this is a very long thread, so if there
>> is anything specific that you want to point to, then please elucidate.
> 
> Oops, my mistake -- I pasted the wrong URL.  I meant this thread, this
> post in particular:
> 
>   http://marc.info/?l=linux-kernel=88937224115435=2
> 
> Still, O_NOLINK would seem to be a valuable addition, and greatly
> simplify secure temporary file creation.
> 

Avoiding -- or at least detecting -- symlink racing with mkdir() is
relatively simple: run mkdir(), make sure you don't get EEXIST or
something like that, lstat() the resulting path -- it should be a
directory with all the right modes and ownerships.

I believe -- but I'm not certain -- that mkdtemp() in glibc will do all
this for you.  If not, I would consider that a glibc bug.

-hpa
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: irq load balancing

2007-09-12 Thread Chris Snook

Venkat Subbiah wrote:

Most of the load in my system is triggered by a single ethernet IRQ.
Essentially the IRQ schedules a tasklet and most of the work is done in the
taskelet which is scheduled in the IRQ. From what I read looks like the
tasklet would be executed on the same CPU on which it was scheduled. So this
means even in an SMP system it will be one processor which is overloaded.

So will using the user space IRQ loadbalancer really help?


A little bit.  It'll keep other IRQs on different CPUs, which will prevent other 
interrupts from causing cache and TLB evictions that could slow down the 
interrupt handler for the NIC.



What I am doubtful
about is that the user space load balance comes along and changes the
affinity once in a while. But really what I need is every interrupt to go to
a different CPU in a round robin fashion.


Doing it in a round-robin fashion will be disastrous for performance.  Your 
cache miss rate will go through the roof and you'll hit the slow paths in the 
network stack most of the time.



Looks like the APIC  can distribute IRQ's dynamically? Is this supported in
the kernel and any config or proc interface to turn this on/off.


/proc/irq/$FOO/smp_affinity is a bitmask.  You can mask an irq to multiple 
processors.  Of course, this will absolutely kill your performance.  That's why 
irqbalance never does this.


-- Chris
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: O_NOLINK for open()

2007-09-12 Thread Brent Casavant
On Wed, 12 Sep 2007, H. Peter Anvin wrote:

> Brent Casavant wrote:

> > http://marc.info/?l=linux-kernel=93032806224160=2
> 
> This link talks about file flags handling.  I don't see the relevance to
> this problem at all.  However, this is a very long thread, so if there
> is anything specific that you want to point to, then please elucidate.

Oops, my mistake -- I pasted the wrong URL.  I meant this thread, this
post in particular:

http://marc.info/?l=linux-kernel=88937224115435=2

Still, O_NOLINK would seem to be a valuable addition, and greatly
simplify secure temporary file creation.

Brent

-- 
Brent Casavant  All music is folk music.  I ain't
[EMAIL PROTECTED]never heard a horse sing a song.
Silicon Graphics, Inc.-- Louis Armstrong
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: libata not working for sis5533

2007-09-12 Thread Chuck Ebbert
On 08/31/2007 06:20 PM, Patrizio Bassi wrote:
> Patrizio Bassi ha scritto:
>> Michal Piotrowski ha scritto:
>>> Hi,
>>>
>>> [Adding IDE wizards to CC]
>>>
>>> On 26/08/07, Patrizio Bassi <[EMAIL PROTECTED]> wrote:
>>>   
 My sis630 chipset shipped with Asus A1000
 doesn't work properly with suspend with ide drivers
 (http://bugzilla.kernel.org/show_bug.cgi?id=7077)

 i tried to switch to libata but i cannot boot.
 I've enabled generic ide and sis specific code, both in-kernel. of
 course scsi too.

 when i boot i get: irq #14 nobody cared and stop

 i have to remove battery to reboot pc.
 I'm using 2.6.22.5, but i never got any libata kernel working.

 Patrizio

 ps. i'm writing from my desktop as i'm doing hardware mainteinance on
 the laptop and could not boot it

 Please CC me.

 lspci:
 00:00.0 Host bridge: Silicon Integrated Systems [SiS] 630 Host (rev 11)
 00:00.1 IDE interface: Silicon Integrated Systems [SiS] 5513 [IDE] (rev d0)
 00:01.0 ISA bridge: Silicon Integrated Systems [SiS] SiS85C503/5513 (LPC
 Bridge)
 00:01.1 Ethernet controller: Silicon Integrated Systems [SiS] SiS900
 PCI Fast Ethernet (rev 80)
 00:01.2 USB Controller: Silicon Integrated Systems [SiS] USB  1.0
 Controller (rev 07)
 00:01.3 USB Controller: Silicon Integrated Systems [SiS] USB 1.0
 Controller (rev 07)
 00:01.4 Multimedia audio controller: Silicon Integrated Systems [SiS]
 SiS PCI Audio Accelerator (rev 01)
  00:01.6 Modem: Silicon Integrated Systems [SiS] AC'97 Modem
 Controller (rev a0)
 00:02.0 PCI bridge: Silicon Integrated Systems [SiS] Virtual
 PCI-to-PCI bridge (AGP)
 00:0a.0 CardBus bridge: Ricoh Co Ltd RL5c476 II (rev 80)
 00:0a.1 CardBus bridge: Ricoh Co Ltd RL5c476 II (rev 80)
 01:00.0 VGA compatible controller: Silicon Integrated Systems [SiS]
 630/730 PCI/AGP VGA Display Adapter (rev 11)
 
>>> Regards,
>>> Michal
>>>
>>>   
>>
> i've been out for a week, but found no notice, did i lost any email or
> no activity on this issue?
> 

I assume you tried the standard workarounds, like

  pci=nommconf,nomsi
  pci=noacpi
  pci=biosirq

(one at at time, not all at once.)

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 5/6] Filter based on a nodemask as well as a gfp_mask

2007-09-12 Thread Christoph Lameter
On Wed, 12 Sep 2007, Mel Gorman wrote:

> - z++)
> - ;
> + if (likely(nodes == NULL))
> + for (; zonelist_zone_idx(z) > highest_zoneidx;
> + z++)
> + ;
> + else
> + for (; zonelist_zone_idx(z) > highest_zoneidx ||
> + (z->zone && !zref_in_nodemask(z, nodes));
> + z++)
> + ;
>  

Minor nitpick here: "for (;" should become "for ( ;" to have correct 
whitespace. However, it would be clearer to use a while here.

while (zonelist_zone_idx(z)) > highest_zoneidx)
z++;

etc.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] Re: Kernel Panic - 2.6.23-rc4-mm1 ia64 - was Re: Update: [Automatic] NUMA replicated pagecache ...

2007-09-12 Thread Balbir Singh
Lee Schermerhorn wrote:
> On Wed, 2007-09-12 at 16:41 +0100, Andy Whitcroft wrote:
>> On Wed, Sep 12, 2007 at 11:09:47AM -0400, Lee Schermerhorn wrote:
>>
 Interesting, I don't see a memory controller function in the stack
 trace, but I'll double check to see if I can find some silly race
 condition in there.
>>> right.  I noticed that after I sent the mail.  
>>>
>>> Also, config available at:
>>> http://free.linux.hp.com/~lts/Temp/config-2.6.23-rc4-mm1-gwydyr-nomemcont
>> Be interested to know the outcome of any bisect you do.  Given its
>> tripping in reclaim.
> 
> Problem isolated to memory controller patches.  This patch seems to fix
> this particular problem.  I've only run the test for a few minutes with
> and without memory controller configured, but I did observe reclaim
> kicking in several times.  W/o this patch, system would panic as soon as
> I entered direct/zone reclaim--less than a minute.
> 

Thanks, excellent catch! The patch looks sane.  Thanks for your help in
sorting this issue out. Hmm.. that means I never hit direct/zone reclaim
in my tests (I'll make a mental note to enhance my test cases to cover
this scenario).

> Lee
> 
> 
> PATCH 2.6.23-rc4-mm1 Memory Controller:  initialize all scan_controls'
>   isolate_pages member.
> 
> We need to initialize all scan_controls' isolate_pages member.
> Otherwise, shrink_active_list() attempts to execute at undefined
> location.
> 
> Signed-off-by:  Lee Schermerhorn <[EMAIL PROTECTED]>
> 
>  mm/vmscan.c |2 ++
>  1 file changed, 2 insertions(+)
> 
> Index: Linux/mm/vmscan.c
> ===
> --- Linux.orig/mm/vmscan.c2007-09-10 13:22:21.0 -0400
> +++ Linux/mm/vmscan.c 2007-09-12 15:30:27.0 -0400
> @@ -1758,6 +1758,7 @@ unsigned long shrink_all_memory(unsigned
>   .swap_cluster_max = nr_pages,
>   .may_writepage = 1,
>   .swappiness = vm_swappiness,
> + .isolate_pages = isolate_pages_global,
>   };
> 
>   current->reclaim_state = _state;
> @@ -1941,6 +1942,7 @@ static int __zone_reclaim(struct zone *z
>   SWAP_CLUSTER_MAX),
>   .gfp_mask = gfp_mask,
>   .swappiness = vm_swappiness,
> + .isolate_pages = isolate_pages_global,
>   };
>   unsigned long slab_reclaimable;
> 
> 
> 


-- 
Warm Regards,
Balbir Singh
Linux Technology Center
IBM, ISTL
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: ata_piix, laptop cdrom, ICH7: EH, limiting speed to PIO

2007-09-12 Thread Sergey Dolgov
On Wed, Sep 12, 2007 at 10:19:03PM +0200, Michal Piotrowski wrote:
> Sergey Dolgov pisze:
> > Hi Michal,
> > 
> > On Wed, Sep 12, 2007 at 06:33:20PM +0200, Michal Piotrowski wrote:
> >> Hi Sergey,
> >>
> >> On 11/09/2007, Sergey Dolgov <[EMAIL PROTECTED]> wrote:
> >>> Hi!
> >>>
> >>> On my hp nx7300 laptop, 2 following scenarios can happen during bootup
> >>> (see attachments for the full logs): the "good" one [1] and the one
> >>> where multiple EHs lead to limiting the speed [2].
> >>>
> >>> [1] one is more rare, but it seems to be persistent over reboots: once
> >>> it happened, just rebooting the machine always results in behaviour
> >>> like [1].
> >>>
> >>> [1] results in a working cdrom from the start. An attempt to use cdrom
> >>> after [2] happened results in even more EHs, resulting in "configured
> >>> for PIO4", after which the cdrom is finaly working.
> >>>
> >>> The version I'm using is 2.6.23-rc6, but the same used to happen with
> >>> the previous rc's, and probably with 2.6.22 too (I can check).
> >> It would be great if you could check it.
> > 
> > I've just built 2.6.22.6 with basically the same configuration, and
> > the behaviour is just the same, i.e. both [1] and [2] still can
> > happen.
> 
> Ok, thanks.
> 
> BTW please check cables and PCU.

What's a PCU? BTW, this only happens when using libata of course. The
old CONFIG_IDE stuff works fine every time.

> 
> > 
> >>> [1]:
> >>>
> >>>[   13.026676] ata_piix :00:1f.1: version 2.12
> >>>[   13.026701] ACPI: PCI Interrupt :00:1f.1[A] -> GSI 16 (level, 
> >>> low) -> IRQ 16
> >>>[   13.026898] PCI: Setting latency timer of device :00:1f.1 to 64
> >>>[   13.026994] scsi4 : ata_piix
> >>>[   13.027209] scsi5 : ata_piix
> >>>[   13.027385] ata5: PATA max UDMA/133 cmd 0x000101f0 ctl 0x000103f6 
> >>> bmdma 0x000140a0 irq 14
> >>>[   13.027484] ata6: PATA max UDMA/133 cmd 0x00010170 ctl 0x00010376 
> >>> bmdma 0x000140a8 irq 15
> >>>[   13.339308] ata5.00: ATAPI: HL-DT-ST DVDRAM GSA-T10N, PC05, max 
> >>> MWDMA2
> >>>[   13.505922] ata5.00: configured for MWDMA2
> >>>[   13.506026] ata6: port disabled. ignoring.
> >>>[   13.509916] scsi 4:0:0:0: CD-ROMHL-DT-ST DVDRAM 
> >>> GSA-T10N  PC05 PQ: 0 ANSI: 5
> >>>
> >>> [2]:
> >>>
> >>>[   10.007152] ata_piix :00:1f.1: version 2.12
> >>>[   10.007178] ACPI: PCI Interrupt :00:1f.1[A] -> GSI 16 (level, 
> >>> low) -> IRQ 16
> >>>[   10.007369] PCI: Setting latency timer of device :00:1f.1 to 64
> >>>[   10.007464] scsi4 : ata_piix
> >>>[   10.007680] scsi5 : ata_piix
> >>>[   10.007856] ata5: PATA max UDMA/133 cmd 0x000101f0 ctl 0x000103f6 
> >>> bmdma 0x000140a0 irq 14
> >>>[   10.007956] ata6: PATA max UDMA/133 cmd 0x00010170 ctl 0x00010376 
> >>> bmdma 0x000140a8 irq 15
> >>>[   10.320462] ata5.00: ATAPI: HL-DT-ST DVDRAM GSA-T10N, PC05, max 
> >>> MWDMA2
> >>>[   10.487077] ata5.00: configured for MWDMA2
> >>>[   10.487181] ata6: port disabled. ignoring.
> >>>[   15.985240] ata5.00: exception Emask 0x0 SAct 0x0 SErr 0x0 action 
> >>> 0x2 frozen
> >>>[   15.985325] ata5.00: cmd a0/01:00:00:00:00/00:00:00:00:00/a0 tag 0 
> >>> cdb 0x12 data 96 in
> >>>[   15.985327]  res 40/00:02:00:24:00/00:00:00:00:00/a0 Emask 
> >>> 0x4 (timeout)
> >>>[   15.985532] ata5: soft resetting port
> >>>[   16.465489] ata5.00: configured for MWDMA2
> >>>[   16.465571] ata5: EH complete
> >>>[   21.963643] ata5.00: exception Emask 0x0 SAct 0x0 SErr 0x0 action 
> >>> 0x2 frozen
> >>>[   21.963725] ata5.00: cmd a0/01:00:00:00:00/00:00:00:00:00/a0 tag 0 
> >>> cdb 0x12 data 96 in
> >>>[   21.963727]  res 40/00:02:00:24:00/00:00:00:00:00/a0 Emask 
> >>> 0x4 (timeout)
> >>>[   21.963930] ata5: soft resetting port
> >>>[   22.443900] ata5.00: configured for MWDMA2
> >>>[   22.443979] ata5: EH complete
> >>>[   27.942051] ata5.00: exception Emask 0x0 SAct 0x0 SErr 0x0 action 
> >>> 0x2 frozen
> >>>[   27.942134] ata5.00: cmd a0/01:00:00:00:00/00:00:00:00:00/a0 tag 0 
> >>> cdb 0x12 data 96 in
> >>>[   27.942136]  res 40/00:02:00:24:00/00:00:00:00:00/a0 Emask 
> >>> 0x4 (timeout)
> >>>[   27.942341] ata5: soft resetting port
> >>>[   28.422309] ata5.00: configured for MWDMA2
> >>>[   28.422389] ata5: EH complete
> >>>[   33.920457] ata5.00: limiting speed to MWDMA1:PIO4
> >>>[   33.920532] ata5.00: exception Emask 0x0 SAct 0x0 SErr 0x0 action 
> >>> 0x2 frozen
> >>>[   33.920614] ata5.00: cmd a0/01:00:00:00:00/00:00:00:00:00/a0 tag 0 
> >>> cdb 0x12 data 96 in
> >>>[   33.920616]  res 40/00:02:00:24:00/00:00:00:00:00/a0 Emask 
> >>> 0x4 (timeout)
> >>>[   33.920820] ata5: soft resetting port
> >>>[   34.400708] ata5.00: configured for MWDMA1
> >>>[   34.400790] ata5: EH complete
> >>>[   34.400869] scsi scan: 96 byte inquiry failed.  Consider 
> >>> BLIST_INQUIRY_36 for this 

Re: r8169: can't send magic packet for Wake-On-Lan

2007-09-12 Thread Francois Romieu
Xavier Bestel <[EMAIL PROTECTED]> :
[...]
> Err sorry, I mixed up everything ... I'm using *etherwake* to make the
> WOL magic packet, and ethtool to check the interface options.

Weird.

Can you capture the traffic from the receiving (live) r8169 whith
both senders and specify the kernel version of the sender/receiver ?

-- 
Ueimor
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 11/11] UML - Use *_PER_* definitions

2007-09-12 Thread Jeff Dike
There are various uses of powers of 1000, plus the odd BILLION
constant in the time code.  However, there are perfectly good definitions of
*SEC_PER_*SEC in linux/time.h which can be used instaed.

These are replaced directly in kernel code.  Userspace code imports
those constants as UM_*SEC_PER_*SEC and uses these.

Signed-off-by: Jeff Dike <[EMAIL PROTECTED]>
--
 arch/um/include/common-offsets.h |4 
 arch/um/include/os.h |2 --
 arch/um/kernel/time.c|9 +
 arch/um/os-Linux/skas/process.c  |2 +-
 arch/um/os-Linux/time.c  |   16 
 5 files changed, 18 insertions(+), 15 deletions(-)

Index: linux-2.6.22/arch/um/kernel/time.c
===
--- linux-2.6.22.orig/arch/um/kernel/time.c 2007-09-12 15:47:42.0 
-0400
+++ linux-2.6.22/arch/um/kernel/time.c  2007-09-12 15:51:25.0 -0400
@@ -17,7 +17,7 @@
  */
 unsigned long long sched_clock(void)
 {
-   return (unsigned long long)jiffies_64 * (10 / HZ);
+   return (unsigned long long)jiffies_64 * (NSEC_PER_SEC / HZ);
 }
 
 void timer_handler(int sig, struct uml_pt_regs *regs)
@@ -119,8 +119,9 @@ void __init time_init(void)
long long nsecs;
 
nsecs = os_nsecs();
-   set_normalized_timespec(_to_monotonic, -nsecs / BILLION,
-   -nsecs % BILLION);
-   set_normalized_timespec(, nsecs / BILLION, nsecs % BILLION);
+   set_normalized_timespec(_to_monotonic, -nsecs / NSEC_PER_SEC,
+   -nsecs % NSEC_PER_SEC);
+   set_normalized_timespec(, nsecs / NSEC_PER_SEC,
+   nsecs % NSEC_PER_SEC);
late_time_init = setup_itimer;
 }
Index: linux-2.6.22/arch/um/os-Linux/time.c
===
--- linux-2.6.22.orig/arch/um/os-Linux/time.c   2007-09-12 15:41:43.0 
-0400
+++ linux-2.6.22/arch/um/os-Linux/time.c2007-09-12 15:52:19.0 
-0400
@@ -14,7 +14,7 @@
 
 int set_interval(void)
 {
-   int usec = 100/UM_HZ;
+   int usec = UM_USEC_PER_SEC / UM_HZ;
struct itimerval interval = ((struct itimerval) { { 0, usec },
  { 0, usec } });
 
@@ -26,11 +26,11 @@ int set_interval(void)
 
 int timer_one_shot(int ticks)
 {
-   unsigned long usec = ticks * 100 / UM_HZ;
-   unsigned long sec = usec / 100;
+   unsigned long usec = ticks * UM_USEC_PER_SEC / UM_HZ;
+   unsigned long sec = usec / UM_USEC_PER_SEC;
struct itimerval interval;
 
-   usec %= 100;
+   usec %= UM_USEC_PER_SEC;
interval = ((struct itimerval) { { 0, 0 }, { sec, usec } });
 
if (setitimer(ITIMER_VIRTUAL, , NULL) == -1)
@@ -47,8 +47,8 @@ static inline unsigned long long tv_to_n
 * return.
 */
 
-   return ((unsigned long long) tv->tv_sec) * BILLION +
-   tv->tv_usec * 1000;
+   return ((unsigned long long) tv->tv_sec) * UM_NSEC_PER_SEC +
+   tv->tv_usec * UM_NSEC_PER_SEC;
 }
 
 unsigned long long disable_timer(void)
@@ -74,8 +74,8 @@ extern void alarm_handler(int sig, struc
 
 void idle_sleep(unsigned long long nsecs)
 {
-   struct timespec ts = { .tv_sec  = nsecs / BILLION,
-  .tv_nsec = nsecs % BILLION };
+   struct timespec ts = { .tv_sec  = nsecs / UM_NSEC_PER_SEC,
+  .tv_nsec = nsecs % UM_NSEC_PER_SEC };
 
if (nanosleep(, ) == 0)
alarm_handler(SIGVTALRM, NULL);
Index: linux-2.6.22/arch/um/os-Linux/skas/process.c
===
--- linux-2.6.22.orig/arch/um/os-Linux/skas/process.c   2007-09-12 
15:47:42.0 -0400
+++ linux-2.6.22/arch/um/os-Linux/skas/process.c2007-09-12 
15:52:57.0 -0400
@@ -378,7 +378,7 @@ __initcall(init_thread_regs);
 
 int copy_context_skas0(unsigned long new_stack, int pid)
 {
-   struct timeval tv = { .tv_sec = 0, .tv_usec = 100 / UM_HZ };
+   struct timeval tv = { .tv_sec = 0, .tv_usec = UM_USEC_PER_SEC / UM_HZ };
int err;
unsigned long current_stack = current_stub_stack();
struct stub_data *data = (struct stub_data *) current_stack;
Index: linux-2.6.22/arch/um/include/common-offsets.h
===
--- linux-2.6.22.orig/arch/um/include/common-offsets.h  2007-09-11 
13:36:15.0 -0400
+++ linux-2.6.22/arch/um/include/common-offsets.h   2007-09-12 
15:51:25.0 -0400
@@ -34,3 +34,7 @@ DEFINE(crypto_tfm_ctx_offset, offsetof(s
 DEFINE(UM_THREAD_SIZE, THREAD_SIZE);
 
 DEFINE(UM_HZ, HZ);
+
+DEFINE(UM_USEC_PER_SEC, USEC_PER_SEC);
+DEFINE(UM_NSEC_PER_SEC, NSEC_PER_SEC);
+DEFINE(UM_NSEC_PER_USEC, NSEC_PER_USEC);
Index: linux-2.6.22/arch/um/include/os.h
===

[PATCH 4/11] UML - Move timer signal initialization

2007-09-12 Thread Jeff Dike
Move timer signal initialization from init_irq_signals to a new
function, timer_init.

Signed-off-by: Jeff Dike <[EMAIL PROTECTED]>
--
 arch/um/include/os.h  |1 +
 arch/um/kernel/time.c |2 ++
 arch/um/os-Linux/irq.c|4 
 arch/um/os-Linux/signal.c |   10 ++
 4 files changed, 13 insertions(+), 4 deletions(-)

Index: linux-2.6.22/arch/um/kernel/time.c
===
--- linux-2.6.22.orig/arch/um/kernel/time.c 2007-09-09 11:51:19.0 
-0400
+++ linux-2.6.22/arch/um/kernel/time.c  2007-09-12 14:55:39.0 -0400
@@ -97,6 +97,8 @@ static void register_timer(void)
 {
int err;
 
+   timer_init();
+
err = request_irq(TIMER_IRQ, um_timer, IRQF_DISABLED, "timer", NULL);
if (err != 0)
printk(KERN_ERR "register_timer : request_irq failed - "
Index: linux-2.6.22/arch/um/os-Linux/irq.c
===
--- linux-2.6.22.orig/arch/um/os-Linux/irq.c2007-09-09 10:23:31.0 
-0400
+++ linux-2.6.22/arch/um/os-Linux/irq.c 2007-09-12 14:50:58.0 -0400
@@ -145,10 +145,6 @@ void init_irq_signals(int on_sigstack)
 
flags = on_sigstack ? SA_ONSTACK : 0;
 
-   set_handler(SIGVTALRM, (__sighandler_t) alarm_handler,
-   flags | SA_RESTART, SIGUSR1, SIGIO, SIGWINCH, SIGALRM, -1);
-   set_handler(SIGALRM, (__sighandler_t) alarm_handler,
-   flags | SA_RESTART, SIGUSR1, SIGIO, SIGWINCH, SIGALRM, -1);
set_handler(SIGIO, (__sighandler_t) sig_handler, flags | SA_RESTART,
SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1);
signal(SIGWINCH, SIG_IGN);
Index: linux-2.6.22/arch/um/include/os.h
===
--- linux-2.6.22.orig/arch/um/include/os.h  2007-09-12 14:53:25.0 
-0400
+++ linux-2.6.22/arch/um/include/os.h   2007-09-12 14:56:15.0 -0400
@@ -227,6 +227,7 @@ extern int set_umid(char *name);
 extern char *get_umid(void);
 
 /* signal.c */
+extern void timer_init(void);
 extern void set_sigstack(void *sig_stack, int size);
 extern void remove_sigstack(void);
 extern void set_handler(int sig, void (*handler)(int), int flags, ...);
Index: linux-2.6.22/arch/um/os-Linux/signal.c
===
--- linux-2.6.22.orig/arch/um/os-Linux/signal.c 2007-09-11 20:28:13.0 
-0400
+++ linux-2.6.22/arch/um/os-Linux/signal.c  2007-09-12 14:55:18.0 
-0400
@@ -85,6 +85,16 @@ void alarm_handler(int sig, struct sigco
set_signals(enabled);
 }
 
+void timer_init(void)
+{
+   set_handler(SIGVTALRM, (__sighandler_t) alarm_handler,
+   SA_ONSTACK | SA_RESTART, SIGUSR1, SIGIO, SIGWINCH,
+   SIGALRM, -1);
+   set_handler(SIGALRM, (__sighandler_t) alarm_handler,
+   SA_ONSTACK | SA_RESTART, SIGUSR1, SIGIO, SIGWINCH,
+   SIGALRM, -1);
+}
+
 void set_sigstack(void *sig_stack, int size)
 {
stack_t stack = ((stack_t) { .ss_flags  = 0,
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 10/11] UML - Eliminate SIGALRM usage

2007-09-12 Thread Jeff Dike
Now that ITIMER_REAL is no longer used, there is no need for any use
of SIGALRM whatsoever.  This patch removes all mention of it.

In addition, real_alarm_handler took a signal argument which is now
always SIGVTALRM.  So, that is gone.

Signed-off-by: Jeff Dike <[EMAIL PROTECTED]>
--
 arch/um/os-Linux/irq.c  |2 +-
 arch/um/os-Linux/main.c |2 +-
 arch/um/os-Linux/process.c  |   10 +-
 arch/um/os-Linux/signal.c   |   28 +++-
 arch/um/os-Linux/skas/process.c |4 +---
 arch/um/os-Linux/skas/trap.c|5 ++---
 arch/um/os-Linux/trap.c |1 -
 7 files changed, 17 insertions(+), 35 deletions(-)

Index: linux-2.6.22/arch/um/os-Linux/skas/process.c
===
--- linux-2.6.22.orig/arch/um/os-Linux/skas/process.c   2007-09-12 
15:39:37.0 -0400
+++ linux-2.6.22/arch/um/os-Linux/skas/process.c2007-09-12 
15:47:42.0 -0400
@@ -222,7 +222,6 @@ static int userspace_tramp(void *stack)
sigemptyset(_mask);
sigaddset(_mask, SIGIO);
sigaddset(_mask, SIGWINCH);
-   sigaddset(_mask, SIGALRM);
sigaddset(_mask, SIGVTALRM);
sigaddset(_mask, SIGUSR1);
sa.sa_flags = SA_ONSTACK;
@@ -522,8 +521,7 @@ int start_idle_thread(void *stack, jmp_b
int n;
 
set_handler(SIGWINCH, (__sighandler_t) sig_handler,
-   SA_ONSTACK | SA_RESTART, SIGUSR1, SIGIO, SIGALRM,
-   SIGVTALRM, -1);
+   SA_ONSTACK | SA_RESTART, SIGUSR1, SIGIO, SIGVTALRM, -1);
 
/*
 * Can't use UML_SETJMP or UML_LONGJMP here because they save
Index: linux-2.6.22/arch/um/os-Linux/irq.c
===
--- linux-2.6.22.orig/arch/um/os-Linux/irq.c2007-09-12 14:50:58.0 
-0400
+++ linux-2.6.22/arch/um/os-Linux/irq.c 2007-09-12 15:47:42.0 -0400
@@ -146,6 +146,6 @@ void init_irq_signals(int on_sigstack)
flags = on_sigstack ? SA_ONSTACK : 0;
 
set_handler(SIGIO, (__sighandler_t) sig_handler, flags | SA_RESTART,
-   SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1);
+   SIGUSR1, SIGIO, SIGWINCH, SIGVTALRM, -1);
signal(SIGWINCH, SIG_IGN);
 }
Index: linux-2.6.22/arch/um/os-Linux/main.c
===
--- linux-2.6.22.orig/arch/um/os-Linux/main.c   2007-09-11 14:09:28.0 
-0400
+++ linux-2.6.22/arch/um/os-Linux/main.c2007-09-12 15:47:42.0 
-0400
@@ -161,7 +161,7 @@ int __init main(int argc, char **argv, c
 * some time) and cause a segfault.
 */
 
-   /* stop timers and set SIG*ALRM to be ignored */
+   /* stop timers and set SIGVTALRM to be ignored */
disable_timer();
 
/* disable SIGIO for the fds and set SIGIO to be ignored */
Index: linux-2.6.22/arch/um/os-Linux/process.c
===
--- linux-2.6.22.orig/arch/um/os-Linux/process.c2007-09-12 
14:50:58.0 -0400
+++ linux-2.6.22/arch/um/os-Linux/process.c 2007-09-12 15:47:42.0 
-0400
@@ -238,15 +238,15 @@ out:
 void init_new_thread_signals(void)
 {
set_handler(SIGSEGV, (__sighandler_t) sig_handler, SA_ONSTACK,
-   SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1);
+   SIGUSR1, SIGIO, SIGWINCH, SIGVTALRM, -1);
set_handler(SIGTRAP, (__sighandler_t) sig_handler, SA_ONSTACK,
-   SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1);
+   SIGUSR1, SIGIO, SIGWINCH, SIGVTALRM, -1);
set_handler(SIGFPE, (__sighandler_t) sig_handler, SA_ONSTACK,
-   SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1);
+   SIGUSR1, SIGIO, SIGWINCH, SIGVTALRM, -1);
set_handler(SIGILL, (__sighandler_t) sig_handler, SA_ONSTACK,
-   SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1);
+   SIGUSR1, SIGIO, SIGWINCH, SIGVTALRM, -1);
set_handler(SIGBUS, (__sighandler_t) sig_handler, SA_ONSTACK,
-   SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1);
+   SIGUSR1, SIGIO, SIGWINCH, SIGVTALRM, -1);
signal(SIGHUP, SIG_IGN);
 
init_irq_signals(1);
Index: linux-2.6.22/arch/um/os-Linux/signal.c
===
--- linux-2.6.22.orig/arch/um/os-Linux/signal.c 2007-09-12 15:41:04.0 
-0400
+++ linux-2.6.22/arch/um/os-Linux/signal.c  2007-09-12 15:48:20.0 
-0400
@@ -15,8 +15,7 @@
 #include "user.h"
 
 /*
- * These are the asynchronous signals.  SIGVTALRM and SIGARLM are handled
- * together under SIGVTALRM_BIT.  SIGPROF is excluded because we want to
+ * These are the asynchronous signals.  SIGPROF is excluded because we want to
  * be able to profile 

Re: [PATCH 4/6] Have zonelist contains structs with both a zone pointer and zone_idx

2007-09-12 Thread Christoph Lameter
On Wed, 12 Sep 2007, Mel Gorman wrote:

>  /*
> + * This struct contains information about a zone in a zonelist. It is stored
> + * here to avoid dereferences into large structures and lookups of tables
> + */
> +struct zoneref {
> + struct zone *zone;  /* Pointer to actual zone */
> + int zone_idx;   /* zone_idx(zoneref->zone) */
> +};


Well the structure is going to be 12 bytes wide. Since pointers have to be 
aligned to 8 bytes we will effectively have to use 16 bytes anyways. There 
is no additional memory use if we would be adding another 4 bytes.

But lets get this merged. We can sort this out later. Too many 
oscillations already.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2/11] UML - Clean up switching between virtual and real timers

2007-09-12 Thread Jeff Dike
Fix up the switching between virtual and real timers.  The idle loop
sleeps, the timer at that point must be real time.  At all other
times, the timer must be virtual.  Even when userspace is running, and
the kernel is asleep, the virtual timer is correct because the process
timer will be running.

The timer switch used to be in the context switch and in the timer
handler code.  This is moved to the idle loop and the signal handler,
making it much more clear why it is happening.

switch_timers now returns the old timer type so that it may be
restored.  The signal handler uses this in order to restore the
previous timer type when it returns.

Signed-off-by: Jeff Dike <[EMAIL PROTECTED]>
--
 arch/um/include/os.h  |2 +-
 arch/um/kernel/process.c  |9 ++---
 arch/um/os-Linux/signal.c |9 +++--
 arch/um/os-Linux/time.c   |   27 ---
 4 files changed, 26 insertions(+), 21 deletions(-)

Index: linux-2.6.22/arch/um/include/os.h
===
--- linux-2.6.22.orig/arch/um/include/os.h  2007-08-30 16:27:33.0 
-0400
+++ linux-2.6.22/arch/um/include/os.h   2007-08-30 16:30:49.0 -0400
@@ -250,7 +250,7 @@ extern void os_dump_core(void);
 /* time.c */
 #define BILLION (1000 * 1000 * 1000)
 
-extern void switch_timers(int to_real);
+extern int switch_timers(int to_real);
 extern void idle_sleep(int secs);
 extern int set_interval(int is_virtual);
 extern void disable_timer(void);
Index: linux-2.6.22/arch/um/kernel/process.c
===
--- linux-2.6.22.orig/arch/um/kernel/process.c  2007-08-30 16:27:32.0 
-0400
+++ linux-2.6.22/arch/um/kernel/process.c   2007-08-30 16:30:49.0 
-0400
@@ -95,18 +95,11 @@ void *_switch_to(void *prev, void *next,
do {
current->thread.saved_task = NULL;
 
-   /* XXX need to check runqueues[cpu].idle */
-   if (current->pid == 0)
-   switch_timers(0);
-
switch_threads(>thread.switch_buf,
   >thread.switch_buf);
 
arch_switch_to(current->thread.prev_sched, current);
 
-   if (current->pid == 0)
-   switch_timers(1);
-
if (current->thread.saved_task)
show_regs(&(current->thread.regs));
next= current->thread.saved_task;
@@ -251,7 +244,9 @@ void default_idle(void)
if (need_resched())
schedule();
 
+   switch_timers(1);
idle_sleep(10);
+   switch_timers(0);
}
 }
 
Index: linux-2.6.22/arch/um/os-Linux/signal.c
===
--- linux-2.6.22.orig/arch/um/os-Linux/signal.c 2007-08-30 16:27:32.0 
-0400
+++ linux-2.6.22/arch/um/os-Linux/signal.c  2007-08-30 16:30:49.0 
-0400
@@ -59,17 +59,11 @@ static void real_alarm_handler(int sig, 
 {
struct uml_pt_regs regs;
 
-   if (sig == SIGALRM)
-   switch_timers(0);
-
if (sc != NULL)
copy_sc(, sc);
regs.is_user = 0;
unblock_signals();
timer_handler(sig, );
-
-   if (sig == SIGALRM)
-   switch_timers(1);
 }
 
 void alarm_handler(int sig, struct sigcontext *sc)
@@ -116,6 +110,7 @@ void (*handlers[_NSIG])(int sig, struct 
 void handle_signal(int sig, struct sigcontext *sc)
 {
unsigned long pending = 0;
+   int timer = switch_timers(0);
 
do {
int nested, bail;
@@ -152,6 +147,8 @@ void handle_signal(int sig, struct sigco
if (!nested)
pending = from_irq_stack(nested);
} while (pending);
+
+   switch_timers(timer);
 }
 
 extern void hard_handler(int sig);
Index: linux-2.6.22/arch/um/os-Linux/time.c
===
--- linux-2.6.22.orig/arch/um/os-Linux/time.c   2007-08-30 16:30:13.0 
-0400
+++ linux-2.6.22/arch/um/os-Linux/time.c2007-08-30 16:30:49.0 
-0400
@@ -12,6 +12,8 @@
 #include "os.h"
 #include "user.h"
 
+static int is_real_timer = 0;
+
 int set_interval(int is_virtual)
 {
int usec = 100/UM_HZ;
@@ -39,12 +41,14 @@ void disable_timer(void)
signal(SIGVTALRM, SIG_IGN);
 }
 
-void switch_timers(int to_real)
+int switch_timers(int to_real)
 {
struct itimerval disable = ((struct itimerval) { { 0, 0 }, { 0, 0 }});
-   struct itimerval enable = ((struct itimerval) { { 0, 100/UM_HZ },
-   { 0, 100/UM_HZ }});
-   int old, new;
+   struct itimerval enable;
+   int old, new, old_type = is_real_timer;
+
+   if(to_real == old_type)
+   return to_real;
 
if (to_real) {
old = ITIMER_VIRTUAL;
@@ -55,10 +59,19 @@ void 

[PATCH 9/11] UML - Eliminate ticking in the idle loop

2007-09-12 Thread Jeff Dike
Now, the idle loop now longer needs SIGALRM firing - it can just sleep
for the requisite amount of time and fake a timer interrupt when it
finishes.

Any use of ITIMER_REAL now goes away.  disable_timer only turns off
ITIMER_VIRTUAL.  switch_timers is no longer needed, so it, and all
calls, goes away.

disable_timer now returns the amount of time remaining on the timer.
default_idle uses this to tell idle_sleep how long to sleep.
idle_sleep will call alarm_handler if nanosleep returns 0, which is
the case if it didn't return early due to an interrupt.  Otherwise, it
just returns.

Signed-off-by: Jeff Dike <[EMAIL PROTECTED]>
--
 arch/um/include/os.h  |5 +--
 arch/um/kernel/process.c  |7 +++--
 arch/um/os-Linux/signal.c |3 --
 arch/um/os-Linux/time.c   |   58 --
 4 files changed, 17 insertions(+), 56 deletions(-)

Index: linux-2.6.22/arch/um/os-Linux/time.c
===
--- linux-2.6.22.orig/arch/um/os-Linux/time.c   2007-09-12 15:15:48.0 
-0400
+++ linux-2.6.22/arch/um/os-Linux/time.c2007-09-12 15:41:43.0 
-0400
@@ -12,8 +12,6 @@
 #include "os.h"
 #include "user.h"
 
-static int is_real_timer = 0;
-
 int set_interval(void)
 {
int usec = 100/UM_HZ;
@@ -53,51 +51,15 @@ static inline unsigned long long tv_to_n
tv->tv_usec * 1000;
 }
 
-void disable_timer(void)
+unsigned long long disable_timer(void)
 {
-   struct itimerval disable = ((struct itimerval) { { 0, 0 }, { 0, 0 }});
+   struct itimerval time = ((struct itimerval) { { 0, 0 }, { 0, 0 } });
 
-   if ((setitimer(ITIMER_VIRTUAL, , NULL) < 0) ||
-   (setitimer(ITIMER_REAL, , NULL) < 0))
+   if(setitimer(ITIMER_VIRTUAL, , ) < 0)
printk(UM_KERN_ERR "disable_timer - setitimer failed, "
   "errno = %d\n", errno);
 
-   /* If there are signals already queued, after unblocking ignore them */
-   signal(SIGALRM, SIG_IGN);
-   signal(SIGVTALRM, SIG_IGN);
-}
-
-int switch_timers(int to_real)
-{
-   struct itimerval disable = ((struct itimerval) { { 0, 0 }, { 0, 0 }});
-   struct itimerval enable;
-   int old, new, old_type = is_real_timer;
-
-   if(to_real == old_type)
-   return to_real;
-
-   if (to_real) {
-   old = ITIMER_VIRTUAL;
-   new = ITIMER_REAL;
-   }
-   else {
-   old = ITIMER_REAL;
-   new = ITIMER_VIRTUAL;
-   }
-
-   if (setitimer(old, , ) < 0)
-   printk(UM_KERN_ERR "switch_timers - setitimer disable failed, "
-  "errno = %d\n", errno);
-
-   if((enable.it_value.tv_sec == 0) && (enable.it_value.tv_usec == 0))
-   enable.it_value = enable.it_interval;
-
-   if (setitimer(new, , NULL))
-   printk(UM_KERN_ERR "switch_timers - setitimer enable failed, "
-  "errno = %d\n", errno);
-
-   is_real_timer = to_real;
-   return old_type;
+   return tv_to_nsec(_value);
 }
 
 unsigned long long os_nsecs(void)
@@ -108,11 +70,13 @@ unsigned long long os_nsecs(void)
return tv_to_nsec();
 }
 
-void idle_sleep(int secs)
+extern void alarm_handler(int sig, struct sigcontext *sc);
+
+void idle_sleep(unsigned long long nsecs)
 {
-   struct timespec ts;
+   struct timespec ts = { .tv_sec  = nsecs / BILLION,
+  .tv_nsec = nsecs % BILLION };
 
-   ts.tv_sec = secs;
-   ts.tv_nsec = 0;
-   nanosleep(, NULL);
+   if (nanosleep(, ) == 0)
+   alarm_handler(SIGVTALRM, NULL);
 }
Index: linux-2.6.22/arch/um/os-Linux/signal.c
===
--- linux-2.6.22.orig/arch/um/os-Linux/signal.c 2007-09-12 14:55:18.0 
-0400
+++ linux-2.6.22/arch/um/os-Linux/signal.c  2007-09-12 15:41:04.0 
-0400
@@ -120,7 +120,6 @@ void (*handlers[_NSIG])(int sig, struct 
 void handle_signal(int sig, struct sigcontext *sc)
 {
unsigned long pending = 0;
-   int timer = switch_timers(0);
 
do {
int nested, bail;
@@ -157,8 +156,6 @@ void handle_signal(int sig, struct sigco
if (!nested)
pending = from_irq_stack(nested);
} while (pending);
-
-   switch_timers(timer);
 }
 
 extern void hard_handler(int sig);
Index: linux-2.6.22/arch/um/kernel/process.c
===
--- linux-2.6.22.orig/arch/um/kernel/process.c  2007-09-12 15:15:48.0 
-0400
+++ linux-2.6.22/arch/um/kernel/process.c   2007-09-12 15:41:04.0 
-0400
@@ -235,6 +235,8 @@ void initial_thread_cb(void (*proc)(void
 
 void default_idle(void)
 {
+   unsigned long long nsecs;
+
while(1) {
/* endless idle loop with no priority at all */
 
@@ -246,9 +248,8 @@ void default_idle(void)

[PATCH 1/11] UML - Eliminate hz()

2007-09-12 Thread Jeff Dike
Eliminate hz() since its only purpose was to provide a kernel-space
constant to userspace code.  This can be done instead by providing the
constant directly through kernel_constants.h.

Signed-off-by: Jeff Dike <[EMAIL PROTECTED]>
--
 arch/um/include/common-offsets.h |2 ++
 arch/um/include/kern_util.h  |1 -
 arch/um/kernel/time.c|5 -
 arch/um/os-Linux/skas/process.c  |5 +++--
 arch/um/os-Linux/time.c  |7 +++
 5 files changed, 8 insertions(+), 12 deletions(-)

Index: linux-2.6.22/arch/um/include/common-offsets.h
===
--- linux-2.6.22.orig/arch/um/include/common-offsets.h  2007-08-30 
21:27:32.0 +0100
+++ linux-2.6.22/arch/um/include/common-offsets.h   2007-08-30 
21:30:13.0 +0100
@@ -32,3 +32,5 @@ DEFINE(UM_GFP_ATOMIC, GFP_ATOMIC);
 DEFINE(crypto_tfm_ctx_offset, offsetof(struct crypto_tfm, __crt_ctx));
 
 DEFINE(UM_THREAD_SIZE, THREAD_SIZE);
+
+DEFINE(UM_HZ, HZ);
Index: linux-2.6.22/arch/um/include/kern_util.h
===
--- linux-2.6.22.orig/arch/um/include/kern_util.h   2007-08-30 
21:27:33.0 +0100
+++ linux-2.6.22/arch/um/include/kern_util.h2007-08-30 21:30:13.0 
+0100
@@ -64,7 +64,6 @@ extern void paging_init(void);
 extern void init_flush_vm(void);
 extern void *syscall_sp(void *t);
 extern void syscall_trace(struct uml_pt_regs *regs, int entryexit);
-extern int hz(void);
 extern unsigned int do_IRQ(int irq, struct uml_pt_regs *regs);
 extern void interrupt_end(void);
 extern void initial_thread_cb(void (*proc)(void *), void *arg);
Index: linux-2.6.22/arch/um/kernel/time.c
===
--- linux-2.6.22.orig/arch/um/kernel/time.c 2007-08-30 21:27:32.0 
+0100
+++ linux-2.6.22/arch/um/kernel/time.c  2007-09-02 19:01:28.0 +0100
@@ -11,11 +11,6 @@
 #include "kern_util.h"
 #include "os.h"
 
-int hz(void)
-{
-   return HZ;
-}
-
 /*
  * Scheduler clock - returns current time in nanosec units.
  */
Index: linux-2.6.22/arch/um/os-Linux/skas/process.c
===
--- linux-2.6.22.orig/arch/um/os-Linux/skas/process.c   2007-08-30 
21:27:32.0 +0100
+++ linux-2.6.22/arch/um/os-Linux/skas/process.c2007-09-02 
19:01:28.0 +0100
@@ -392,8 +392,9 @@ int copy_context_skas0(unsigned long new
*data = ((struct stub_data) { .offset   = MMAP_OFFSET(new_offset),
  .fd   = new_fd,
  .timer= ((struct itimerval)
-   { { 0, 100 / hz() },
- { 0, 100 / hz() }})});
+   { { 0, 100 / UM_HZ },
+ { 0, 100 / UM_HZ }})
+});
err = ptrace_setregs(pid, thread_regs);
if (err < 0)
panic("copy_context_skas0 : PTRACE_SETREGS failed, "
Index: linux-2.6.22/arch/um/os-Linux/time.c
===
--- linux-2.6.22.orig/arch/um/os-Linux/time.c   2007-08-30 21:27:32.0 
+0100
+++ linux-2.6.22/arch/um/os-Linux/time.c2007-09-02 19:01:28.0 
+0100
@@ -8,14 +8,13 @@
 #include 
 #include 
 #include 
-#include "kern_util.h"
 #include "kern_constants.h"
 #include "os.h"
 #include "user.h"
 
 int set_interval(int is_virtual)
 {
-   int usec = 100/hz();
+   int usec = 100/UM_HZ;
int timer_type = is_virtual ? ITIMER_VIRTUAL : ITIMER_REAL;
struct itimerval interval = ((struct itimerval) { { 0, usec },
  { 0, usec } });
@@ -43,8 +42,8 @@ void disable_timer(void)
 void switch_timers(int to_real)
 {
struct itimerval disable = ((struct itimerval) { { 0, 0 }, { 0, 0 }});
-   struct itimerval enable = ((struct itimerval) { { 0, 100/hz() },
-   { 0, 100/hz() }});
+   struct itimerval enable = ((struct itimerval) { { 0, 100/UM_HZ },
+   { 0, 100/UM_HZ }});
int old, new;
 
if (to_real) {
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 5/11] UML - GENERIC_TIME support

2007-09-12 Thread Jeff Dike
Enable CONFIG_GENERIC_TIME.

As a side-effect of this, the UML implementations of do_gettimeofday
and do_settimeofday go away, as these are provided by generic code.
set_time also goes away since it was only used by do_settimeofday.

Signed-off-by: Jeff Dike <[EMAIL PROTECTED]>
--
 arch/um/Kconfig|5 +
 arch/um/kernel/ksyms.c |3 ---
 arch/um/kernel/time.c  |   40 
 3 files changed, 5 insertions(+), 43 deletions(-)

Index: linux-2.6.22/arch/um/Kconfig
===
--- linux-2.6.22.orig/arch/um/Kconfig   2007-09-12 14:53:25.0 -0400
+++ linux-2.6.22/arch/um/Kconfig2007-09-12 14:58:02.0 -0400
@@ -55,6 +55,10 @@ config GENERIC_BUG
default y
depends on BUG
 
+config GENERIC_TIME
+   bool
+   default y
+
 # Used in kernel/irq/manage.c and include/linux/irq.h
 config IRQ_RELEASE_METHOD
bool
@@ -75,6 +79,7 @@ config STATIC_LINK
 
 source "arch/um/Kconfig.arch"
 source "mm/Kconfig"
+source "kernel/time/Kconfig"
 
 config LD_SCRIPT_STATIC
bool
Index: linux-2.6.22/arch/um/kernel/ksyms.c
===
--- linux-2.6.22.orig/arch/um/kernel/ksyms.c2007-09-12 14:53:25.0 
-0400
+++ linux-2.6.22/arch/um/kernel/ksyms.c 2007-09-12 14:58:02.0 -0400
@@ -67,9 +67,6 @@ EXPORT_SYMBOL(run_helper);
 EXPORT_SYMBOL(start_thread);
 EXPORT_SYMBOL(dump_thread);
 
-EXPORT_SYMBOL(do_gettimeofday);
-EXPORT_SYMBOL(do_settimeofday);
-
 #ifdef CONFIG_SMP
 
 /* required for SMP */
Index: linux-2.6.22/arch/um/kernel/time.c
===
--- linux-2.6.22.orig/arch/um/kernel/time.c 2007-09-12 14:55:39.0 
-0400
+++ linux-2.6.22/arch/um/kernel/time.c  2007-09-12 14:58:02.0 -0400
@@ -123,46 +123,6 @@ void time_init(void)
late_time_init = register_timer;
 }
 
-void do_gettimeofday(struct timeval *tv)
-{
-#ifdef CONFIG_UML_REAL_TIME_CLOCK
-   unsigned long long nsecs = get_time();
-#else
-   unsigned long long nsecs = (unsigned long long) xtime.tv_sec * BILLION +
-   xtime.tv_nsec;
-#endif
-   tv->tv_sec = nsecs / NSEC_PER_SEC;
-   /*
-* Careful about calculations here - this was originally done as
-* (nsecs - tv->tv_sec * NSEC_PER_SEC) / NSEC_PER_USEC
-* which gave bogus (> 100) values.  Dunno why, suspect gcc
-* (4.0.0) miscompiled it, or there's a subtle 64/32-bit conversion
-* problem that I missed.
-*/
-   nsecs -= tv->tv_sec * NSEC_PER_SEC;
-   tv->tv_usec = (unsigned long) nsecs / NSEC_PER_USEC;
-}
-
-static inline void set_time(unsigned long long nsecs)
-{
-   unsigned long long now;
-   unsigned long flags;
-
-   spin_lock_irqsave(_spinlock, flags);
-   now = os_nsecs();
-   local_offset = nsecs - now;
-   spin_unlock_irqrestore(_spinlock, flags);
-
-   clock_was_set();
-}
-
-int do_settimeofday(struct timespec *tv)
-{
-   set_time((unsigned long long) tv->tv_sec * NSEC_PER_SEC + tv->tv_nsec);
-
-   return 0;
-}
-
 void timer_handler(int sig, struct uml_pt_regs *regs)
 {
if (current_thread->cpu == 0)
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 8/11] UML - Tickless support

2007-09-12 Thread Jeff Dike
Enable tickless support.

CONFIG_TICK_ONESHOT and CONFIG_NO_HZ are enabled.

itimer_clockevent gets CLOCK_EVT_FEAT_ONESHOT and an implementation of
.set_next_event.

CONFIG_UML_REAL_TIME_CLOCK goes away because it only makes sense when
there is a clock ticking away all the time.  timer_handler now just
calls do_IRQ once without trying to figure out how many ticks to
emulate.

The idle loop now needs to turn ticking on and off.

For now, process ticks are ignored.  This breaks userspace time
accounting and this will be fixed later.

Signed-off-by: Jeff Dike <[EMAIL PROTECTED]>
--
 arch/um/Kconfig |   12 --
 arch/um/defconfig   |5 +---
 arch/um/include/os.h|1 
 arch/um/kernel/process.c|3 ++
 arch/um/kernel/time.c   |   45 
 arch/um/os-Linux/skas/process.c |   11 ++---
 arch/um/os-Linux/time.c |   29 -
 7 files changed, 51 insertions(+), 55 deletions(-)

Index: linux-2.6.22/arch/um/kernel/time.c
===
--- linux-2.6.22.orig/arch/um/kernel/time.c 2007-09-12 15:12:35.0 
-0400
+++ linux-2.6.22/arch/um/kernel/time.c  2007-09-12 15:15:48.0 -0400
@@ -20,41 +20,12 @@ unsigned long long sched_clock(void)
return (unsigned long long)jiffies_64 * (10 / HZ);
 }
 
-#ifdef CONFIG_UML_REAL_TIME_CLOCK
-static unsigned long long prev_nsecs[NR_CPUS];
-static long long delta[NR_CPUS];   /* Deviation per interval */
-#endif
-
 void timer_handler(int sig, struct uml_pt_regs *regs)
 {
-   unsigned long long ticks = 0;
unsigned long flags;
-#ifdef CONFIG_UML_REAL_TIME_CLOCK
-   int c = cpu();
-   if (prev_nsecs[c]) {
-   /* We've had 1 tick */
-   unsigned long long nsecs = os_nsecs();
-
-   delta[c] += nsecs - prev_nsecs[c];
-   prev_nsecs[c] = nsecs;
-
-   /* Protect against the host clock being set backwards */
-   if (delta[c] < 0)
-   delta[c] = 0;
-
-   ticks += (delta[c] * HZ) / BILLION;
-   delta[c] -= (ticks * BILLION) / HZ;
-   }
-   else prev_nsecs[c] = os_nsecs();
-#else
-   ticks = 1;
-#endif
 
local_irq_save(flags);
-   while (ticks > 0) {
-   do_IRQ(TIMER_IRQ, regs);
-   ticks--;
-   }
+   do_IRQ(TIMER_IRQ, regs);
local_irq_restore(flags);
 }
 
@@ -68,10 +39,8 @@ static void itimer_set_mode(enum clock_e
 
case CLOCK_EVT_MODE_SHUTDOWN:
case CLOCK_EVT_MODE_UNUSED:
-   disable_timer();
-   break;
case CLOCK_EVT_MODE_ONESHOT:
-   BUG();
+   disable_timer();
break;
 
case CLOCK_EVT_MODE_RESUME:
@@ -79,13 +48,19 @@ static void itimer_set_mode(enum clock_e
}
 }
 
+static int itimer_next_event(unsigned long delta,
+struct clock_event_device *evt)
+{
+   return timer_one_shot(delta + 1);
+}
+
 static struct clock_event_device itimer_clockevent = {
.name   = "itimer",
.rating = 250,
.cpumask= CPU_MASK_ALL,
-   .features   = CLOCK_EVT_FEAT_PERIODIC,
+   .features   = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
.set_mode   = itimer_set_mode,
-   .set_next_event = NULL,
+   .set_next_event = itimer_next_event,
.shift  = 32,
.irq= 0,
 };
Index: linux-2.6.22/arch/um/os-Linux/time.c
===
--- linux-2.6.22.orig/arch/um/os-Linux/time.c   2007-09-12 14:56:38.0 
-0400
+++ linux-2.6.22/arch/um/os-Linux/time.c2007-09-12 15:15:48.0 
-0400
@@ -26,6 +26,33 @@ int set_interval(void)
return 0;
 }
 
+int timer_one_shot(int ticks)
+{
+   unsigned long usec = ticks * 100 / UM_HZ;
+   unsigned long sec = usec / 100;
+   struct itimerval interval;
+
+   usec %= 100;
+   interval = ((struct itimerval) { { 0, 0 }, { sec, usec } });
+
+   if (setitimer(ITIMER_VIRTUAL, , NULL) == -1)
+   return -errno;
+
+   return 0;
+}
+
+static inline unsigned long long tv_to_nsec(struct timeval *tv)
+{
+   /* Cast tv->tv_sec to a long long in order to force this whole
+* calculation to 64 bits.  Otherwise, we'll get a negative
+* 32-bit number getting cast to a very large 64-bit number on
+* return.
+*/
+
+   return ((unsigned long long) tv->tv_sec) * BILLION +
+   tv->tv_usec * 1000;
+}
+
 void disable_timer(void)
 {
struct itimerval disable = ((struct itimerval) { { 0, 0 }, { 0, 0 }});
@@ -78,7 +105,7 @@ unsigned long long os_nsecs(void)
struct timeval tv;
 
gettimeofday(, NULL);
-   return (unsigned long long) tv.tv_sec * BILLION 

  1   2   3   4   5   6   7   8   >