[PATCH] i386: prevent auto select of mwait_idle for AMD CPUs

2007-04-10 Thread Andreas Herrmann
This fix is needed for AMD family 10h CPUs.

It prevents auto select of mwait_idle for AMD CPUs.
MWAIT does not enter C-states on family 10h and more
power saving is reached by entering C1 with
default_idle.

Signed-off-by: Andreas Herrmann <[EMAIL PROTECTED]>
---
 arch/i386/kernel/cpu/amd.c|4 
 arch/i386/kernel/process.c|3 ++-
 include/asm-i386/cpufeature.h |1 +
 3 files changed, 7 insertions(+), 1 deletions(-)

diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c
index 2d47db4..e056271 100644
--- a/arch/i386/kernel/cpu/amd.c
+++ b/arch/i386/kernel/cpu/amd.c
@@ -275,6 +275,10 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 
if (amd_apic_timer_broken())
set_bit(X86_FEATURE_LAPIC_TIMER_BROKEN, c->x86_capability);
+
+   /* prevent auto select of mwait_idle */
+   if (cpu_has(c, X86_FEATURE_MWAIT))
+   set_bit(X86_FEATURE_MWAIT_NO_CSTATE, c->x86_capability);
 }
 
 static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 * c, unsigned 
int size)
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
index 393a67d..64db049 100644
--- a/arch/i386/kernel/process.c
+++ b/arch/i386/kernel/process.c
@@ -259,7 +259,8 @@ static void mwait_idle(void)
 
 void __devinit select_idle_routine(const struct cpuinfo_x86 *c)
 {
-   if (cpu_has(c, X86_FEATURE_MWAIT)) {
+   if (cpu_has(c, X86_FEATURE_MWAIT) &&
+   !cpu_has(c, X86_FEATURE_MWAIT_NO_CSTATE)) {
printk("monitor/mwait feature present.\n");
/*
 * Skip, if setup has overridden idle.
diff --git a/include/asm-i386/cpufeature.h b/include/asm-i386/cpufeature.h
index d1b8e4a..f7f633d 100644
--- a/include/asm-i386/cpufeature.h
+++ b/include/asm-i386/cpufeature.h
@@ -76,6 +76,7 @@
 #define X86_FEATURE_PEBS   (3*32+12)  /* Precise-Event Based Sampling */
 #define X86_FEATURE_BTS(3*32+13)  /* Branch Trace Store */
 #define X86_FEATURE_LAPIC_TIMER_BROKEN (3*32+ 14) /* lapic timer broken in C1 
*/
+#define X86_FEATURE_MWAIT_NO_CSTATE (3*32+15) /* mwait does not enter c-state 
*/
 
 /* Intel-defined CPU features, CPUID level 0x0001 (ecx), word 4 */
 #define X86_FEATURE_XMM3   (4*32+ 0) /* Streaming SIMD Extensions-3 */
-- 
1.5.0.6




-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] x86_64: prevent auto select of mwait_idle for AMD CPUs

2007-04-10 Thread Andreas Herrmann
This fix is needed for AMD family 10h CPUs.

It prevents auto select of mwait_idle for AMD CPUs.
MWAIT does not enter C-states on family 10h and more
power saving is reached by entering C1 with
default_idle.

The patch also adds an idle=mwait command line option
to select mwait_idle for benchmarking.

Signed-off-by: Andreas Herrmann <[EMAIL PROTECTED]>
---
 arch/x86_64/kernel/process.c|9 +++--
 arch/x86_64/kernel/setup.c  |4 
 include/asm-x86_64/cpufeature.h |1 +
 3 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c
index d8d5ccc..18fe9de 100644
--- a/arch/x86_64/kernel/process.c
+++ b/arch/x86_64/kernel/process.c
@@ -271,7 +271,8 @@ static void mwait_idle(void)
 void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
 {
static int printed;
-   if (cpu_has(c, X86_FEATURE_MWAIT)) {
+   if (cpu_has(c, X86_FEATURE_MWAIT) &&
+   !cpu_has(c, X86_FEATURE_MWAIT_NO_CSTATE)) {
/*
 * Skip, if setup has overridden idle.
 * One CPU supports mwait => All CPUs supports mwait
@@ -291,7 +292,11 @@ static int __init idle_setup (char *str)
if (!strncmp(str, "poll", 4)) {
printk("using polling idle threads.\n");
pm_idle = poll_idle;
-   }
+   } else if (!strncmp(str, "mwait", 5))
+   if (boot_cpu_has(X86_FEATURE_MWAIT)) {
+   printk(KERN_INFO "using mwait in idle threads.\n");
+   pm_idle = mwait_idle;
+   }
 
boot_option_idle_override = 1;
return 1;
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index 3d98b69..b707025 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -612,6 +612,10 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 
/* RDTSC can be speculated around */
clear_bit(X86_FEATURE_SYNC_RDTSC, >x86_capability);
+
+   /* prevent auto select of mwait_idle */
+   if (cpu_has(c, X86_FEATURE_MWAIT))
+   set_bit(X86_FEATURE_MWAIT_NO_CSTATE, c->x86_capability);
 }
 
 static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
diff --git a/include/asm-x86_64/cpufeature.h b/include/asm-x86_64/cpufeature.h
index 0b3c686..9331d1a 100644
--- a/include/asm-x86_64/cpufeature.h
+++ b/include/asm-x86_64/cpufeature.h
@@ -70,6 +70,7 @@
 #define X86_FEATURE_ARCH_PERFMON (3*32+9) /* Intel Architectural PerfMon */
 #define X86_FEATURE_PEBS   (3*32+10) /* Precise-Event Based Sampling */
 #define X86_FEATURE_BTS(3*32+11) /* Branch Trace Store */
+#define X86_FEATURE_MWAIT_NO_CSTATE (3*32+12) /* mwait does not enter c-state 
*/
 
 /* Intel-defined CPU features, CPUID level 0x0001 (ecx), word 4 */
 #define X86_FEATURE_XMM3   (4*32+ 0) /* Streaming SIMD Extensions-3 */
-- 
1.5.0.6




-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] x86_64: prevent auto select of mwait_idle for AMD CPUs

2007-04-10 Thread Andreas Herrmann
This fix is needed for AMD family 10h CPUs.

It prevents auto select of mwait_idle for AMD CPUs.
MWAIT does not enter C-states on family 10h and more
power saving is reached by entering C1 with
default_idle.

The patch also adds an idle=mwait command line option
to select mwait_idle for benchmarking.

Signed-off-by: Andreas Herrmann [EMAIL PROTECTED]
---
 arch/x86_64/kernel/process.c|9 +++--
 arch/x86_64/kernel/setup.c  |4 
 include/asm-x86_64/cpufeature.h |1 +
 3 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c
index d8d5ccc..18fe9de 100644
--- a/arch/x86_64/kernel/process.c
+++ b/arch/x86_64/kernel/process.c
@@ -271,7 +271,8 @@ static void mwait_idle(void)
 void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
 {
static int printed;
-   if (cpu_has(c, X86_FEATURE_MWAIT)) {
+   if (cpu_has(c, X86_FEATURE_MWAIT) 
+   !cpu_has(c, X86_FEATURE_MWAIT_NO_CSTATE)) {
/*
 * Skip, if setup has overridden idle.
 * One CPU supports mwait = All CPUs supports mwait
@@ -291,7 +292,11 @@ static int __init idle_setup (char *str)
if (!strncmp(str, poll, 4)) {
printk(using polling idle threads.\n);
pm_idle = poll_idle;
-   }
+   } else if (!strncmp(str, mwait, 5))
+   if (boot_cpu_has(X86_FEATURE_MWAIT)) {
+   printk(KERN_INFO using mwait in idle threads.\n);
+   pm_idle = mwait_idle;
+   }
 
boot_option_idle_override = 1;
return 1;
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index 3d98b69..b707025 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -612,6 +612,10 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 
/* RDTSC can be speculated around */
clear_bit(X86_FEATURE_SYNC_RDTSC, c-x86_capability);
+
+   /* prevent auto select of mwait_idle */
+   if (cpu_has(c, X86_FEATURE_MWAIT))
+   set_bit(X86_FEATURE_MWAIT_NO_CSTATE, c-x86_capability);
 }
 
 static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
diff --git a/include/asm-x86_64/cpufeature.h b/include/asm-x86_64/cpufeature.h
index 0b3c686..9331d1a 100644
--- a/include/asm-x86_64/cpufeature.h
+++ b/include/asm-x86_64/cpufeature.h
@@ -70,6 +70,7 @@
 #define X86_FEATURE_ARCH_PERFMON (3*32+9) /* Intel Architectural PerfMon */
 #define X86_FEATURE_PEBS   (3*32+10) /* Precise-Event Based Sampling */
 #define X86_FEATURE_BTS(3*32+11) /* Branch Trace Store */
+#define X86_FEATURE_MWAIT_NO_CSTATE (3*32+12) /* mwait does not enter c-state 
*/
 
 /* Intel-defined CPU features, CPUID level 0x0001 (ecx), word 4 */
 #define X86_FEATURE_XMM3   (4*32+ 0) /* Streaming SIMD Extensions-3 */
-- 
1.5.0.6




-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] i386: prevent auto select of mwait_idle for AMD CPUs

2007-04-10 Thread Andreas Herrmann
This fix is needed for AMD family 10h CPUs.

It prevents auto select of mwait_idle for AMD CPUs.
MWAIT does not enter C-states on family 10h and more
power saving is reached by entering C1 with
default_idle.

Signed-off-by: Andreas Herrmann [EMAIL PROTECTED]
---
 arch/i386/kernel/cpu/amd.c|4 
 arch/i386/kernel/process.c|3 ++-
 include/asm-i386/cpufeature.h |1 +
 3 files changed, 7 insertions(+), 1 deletions(-)

diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c
index 2d47db4..e056271 100644
--- a/arch/i386/kernel/cpu/amd.c
+++ b/arch/i386/kernel/cpu/amd.c
@@ -275,6 +275,10 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 
if (amd_apic_timer_broken())
set_bit(X86_FEATURE_LAPIC_TIMER_BROKEN, c-x86_capability);
+
+   /* prevent auto select of mwait_idle */
+   if (cpu_has(c, X86_FEATURE_MWAIT))
+   set_bit(X86_FEATURE_MWAIT_NO_CSTATE, c-x86_capability);
 }
 
 static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 * c, unsigned 
int size)
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
index 393a67d..64db049 100644
--- a/arch/i386/kernel/process.c
+++ b/arch/i386/kernel/process.c
@@ -259,7 +259,8 @@ static void mwait_idle(void)
 
 void __devinit select_idle_routine(const struct cpuinfo_x86 *c)
 {
-   if (cpu_has(c, X86_FEATURE_MWAIT)) {
+   if (cpu_has(c, X86_FEATURE_MWAIT) 
+   !cpu_has(c, X86_FEATURE_MWAIT_NO_CSTATE)) {
printk(monitor/mwait feature present.\n);
/*
 * Skip, if setup has overridden idle.
diff --git a/include/asm-i386/cpufeature.h b/include/asm-i386/cpufeature.h
index d1b8e4a..f7f633d 100644
--- a/include/asm-i386/cpufeature.h
+++ b/include/asm-i386/cpufeature.h
@@ -76,6 +76,7 @@
 #define X86_FEATURE_PEBS   (3*32+12)  /* Precise-Event Based Sampling */
 #define X86_FEATURE_BTS(3*32+13)  /* Branch Trace Store */
 #define X86_FEATURE_LAPIC_TIMER_BROKEN (3*32+ 14) /* lapic timer broken in C1 
*/
+#define X86_FEATURE_MWAIT_NO_CSTATE (3*32+15) /* mwait does not enter c-state 
*/
 
 /* Intel-defined CPU features, CPUID level 0x0001 (ecx), word 4 */
 #define X86_FEATURE_XMM3   (4*32+ 0) /* Streaming SIMD Extensions-3 */
-- 
1.5.0.6




-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] x86_64: prevent auto select of mwait_idle for AMD CPUs

2007-04-10 Thread Andreas Herrmann
Actually I have also written patches to clear the MWAIT flag
for AMD CPUs.

But after re-reading of specs (also Intel's specs) I preferred
to keep the MWAIT flag but to introduce a MWAIT_NO_CSTATE flag.

I think this is the cleaner solution.


Regards,

Andreas



-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH RFC] x86: clear X86_FEATURE_MWAIT for AMD Fam10 CPU

2007-04-05 Thread Andreas Herrmann
Hi,

I send this as RFC because I won't manage it to test it
before end of Easter but want to have a consensus about
how the final patch should look like.

Andi,
what do you finally prefer?

(1) Something like the attached patch or
(2) a version which keeps to the MWAIT flag for Fam10 but
introduces an X86_FEATURE_MWAIT_DOESNT_SAVE_POWER as you
suggested.

An idle=mwait kernel parameter could (and should) be introduced
with both alternatives.

Meanwhile I think it would suffice to do (1) and issue another
cpuid if idle=mwait was used to select mwait_idle.


Regards,

Andreas

--


diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c
index 2d47db4..4e01262 100644
--- a/arch/i386/kernel/cpu/amd.c
+++ b/arch/i386/kernel/cpu/amd.c
@@ -228,6 +228,9 @@ #define CBAR_KEY(0X00CB)
}
 
switch (c->x86) {
+   case 16:
+   clear_bit(X86_FEATURE_MWAIT, c->x86_capability);
+   break;
case 15:
set_bit(X86_FEATURE_K8, c->x86_capability);
break;
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index 3d98b69..f53ee6c 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -583,6 +583,10 @@ #endif
if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) || level >= 
0x0f58))
set_bit(X86_FEATURE_REP_GOOD, >x86_capability);
 
+   /* disable use of mwait on idle */
+   if (c->x86 == 16)
+   clear_bit(X86_FEATURE_MWAIT, c->x86_capability);
+
/* Enable workaround for FXSAVE leak */
if (c->x86 >= 6)
set_bit(X86_FEATURE_FXSAVE_LEAK, >x86_capability);

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] x86: limit mwait_idle to Intel CPUs

2007-04-05 Thread Andreas Herrmann
On Thu, Apr 05, 2007 at 05:37:17PM +0200, Andi Kleen wrote:
> 
> > > > This patch will enable default_idle for non-Intel
> > > > CPUs even if mwait is supported.
> > > 
> > > It would be better to clear MONITOR/MWAIT in the AMD specific
> > > CPU initialize code than add workarounds everywhere else.
> > 
> > Why is that?
> > MONITOR/MWAIT is usable.
> 
> If it doesn't save power it's not usable imho.

But you can also think of monitor/mwait as a power saving means
for fast synchronization.

> > And I think this should 
> > be indicated by cpuinfo.
> > It's just inappropriate to use it in pm_idle.
> 
> There are no other users anyways and user space can't use it.
> Ok in theory you could add a X86_FEATURE_MWAIT_DOESNT_SAVE_POWER
> and check that, but just clearing it seems simpler and equivalent.

It is not equivalent. Usually users check /proc/cpuinfo for their
CPU features. Deleting that flag is kind of obfuscation.

I guess some time ago people did not care about their "svm" or "vmx"
flags. Nowadays (e.g. with kvm) some people are quite happy
if one of those strings occurs in /proc/cpuinfo (and they are quite
disappointed if this feature was disabled by BIOS).

Why not state it positively for CPUs that are able to even enter
C1 with MWAIT, e.g. X86_FEATURE_MWAIT_ENTERS_CSTATE?

If you dislike this wording than I still prefer to have the MWAIT
flag visible and but introducing an MWAIT_DOESNT_ENTER_CSTATE thing.

> What would perhaps make sense is to add a idle=mwait command
> line option for this though. So that the benchmarkers who currently
> use idle=poll could migrate to idle=mwait. That option would need
> to check the real cpuid bit of course again, but that should be
> easy enough.

That sounds good. Except that I prefer to check for
X86_FEATURE_MWAIT.



Regards,

Andreas

-- 
AMD Saxony, Dresden, Germany
Operating System Research Center
email: [EMAIL PROTECTED]
phone: +49-351-277-4919



-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] x86: limit mwait_idle to Intel CPUs

2007-04-05 Thread Andreas Herrmann
On Thu, Apr 05, 2007 at 04:24:45PM +0200, Andi Kleen wrote:
> On Thursday 05 April 2007 16:00:45 Andreas Herrmann wrote:
> > 
> > Commit 991528d7348667924176f3e29addea0675298944
> > introduced mwait_idle which is supposed to work
> > for Intel CPUs starting with Core Duo.
> > 
> > AMD Fam10 processors won't enter C1 on mwait.
> 
> Unfortunate. Will this be fixed?

No, it is not planned to change this behavior.

In fact mwait does certain power savings but the
core won't enter the C1 state. And power savings from
entering C1 are greater than power savings caused
by mwait (on AMD Fam10).

> 
> > This patch will enable default_idle for non-Intel
> > CPUs even if mwait is supported.
> 
> It would be better to clear MONITOR/MWAIT in the AMD specific
> CPU initialize code than add workarounds everywhere else.

Why is that?
MONITOR/MWAIT is usable. And I think this should
be indicated by cpuinfo.
It's just inappropriate to use it in pm_idle.


Regards,

Andreas

-- 
AMD Saxony, Dresden, Germany
Operating System Research Center



-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] x86: limit mwait_idle to Intel CPUs

2007-04-05 Thread Andreas Herrmann

Commit 991528d7348667924176f3e29addea0675298944
introduced mwait_idle which is supposed to work
for Intel CPUs starting with Core Duo.

AMD Fam10 processors won't enter C1 on mwait.
This patch will enable default_idle for non-Intel
CPUs even if mwait is supported.

Signed-off-by: Andreas Herrmann <[EMAIL PROTECTED]>
---
 arch/i386/kernel/process.c   |4 +++-
 arch/x86_64/kernel/process.c |3 ++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
index 393a67d..e3067e4 100644
--- a/arch/i386/kernel/process.c
+++ b/arch/i386/kernel/process.c
@@ -259,7 +259,9 @@ static void mwait_idle(void)
 
 void __devinit select_idle_routine(const struct cpuinfo_x86 *c)
 {
-   if (cpu_has(c, X86_FEATURE_MWAIT)) {
+   if (cpu_has(c, X86_FEATURE_MWAIT) &&
+   (c->x86_vendor == X86_VENDOR_INTEL)) {
+
printk("monitor/mwait feature present.\n");
/*
 * Skip, if setup has overridden idle.
diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c
index d8d5ccc..fed830c 100644
--- a/arch/x86_64/kernel/process.c
+++ b/arch/x86_64/kernel/process.c
@@ -271,7 +271,8 @@ static void mwait_idle(void)
 void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
 {
static int printed;
-   if (cpu_has(c, X86_FEATURE_MWAIT)) {
+   if (cpu_has(c, X86_FEATURE_MWAIT) &&
+   (c->x86_vendor == X86_VENDOR_INTEL)) {
/*
 * Skip, if setup has overridden idle.
 * One CPU supports mwait => All CPUs supports mwait
-- 
1.5.0.6




-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH RFC] x86: clear X86_FEATURE_MWAIT for AMD Fam10 CPU

2007-04-05 Thread Andreas Herrmann
Hi,

I send this as RFC because I won't manage it to test it
before end of Easter but want to have a consensus about
how the final patch should look like.

Andi,
what do you finally prefer?

(1) Something like the attached patch or
(2) a version which keeps to the MWAIT flag for Fam10 but
introduces an X86_FEATURE_MWAIT_DOESNT_SAVE_POWER as you
suggested.

An idle=mwait kernel parameter could (and should) be introduced
with both alternatives.

Meanwhile I think it would suffice to do (1) and issue another
cpuid if idle=mwait was used to select mwait_idle.


Regards,

Andreas

--


diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c
index 2d47db4..4e01262 100644
--- a/arch/i386/kernel/cpu/amd.c
+++ b/arch/i386/kernel/cpu/amd.c
@@ -228,6 +228,9 @@ #define CBAR_KEY(0X00CB)
}
 
switch (c-x86) {
+   case 16:
+   clear_bit(X86_FEATURE_MWAIT, c-x86_capability);
+   break;
case 15:
set_bit(X86_FEATURE_K8, c-x86_capability);
break;
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index 3d98b69..f53ee6c 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -583,6 +583,10 @@ #endif
if (c-x86 == 15  ((level = 0x0f48  level  0x0f50) || level = 
0x0f58))
set_bit(X86_FEATURE_REP_GOOD, c-x86_capability);
 
+   /* disable use of mwait on idle */
+   if (c-x86 == 16)
+   clear_bit(X86_FEATURE_MWAIT, c-x86_capability);
+
/* Enable workaround for FXSAVE leak */
if (c-x86 = 6)
set_bit(X86_FEATURE_FXSAVE_LEAK, c-x86_capability);

-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] x86: limit mwait_idle to Intel CPUs

2007-04-05 Thread Andreas Herrmann

Commit 991528d7348667924176f3e29addea0675298944
introduced mwait_idle which is supposed to work
for Intel CPUs starting with Core Duo.

AMD Fam10 processors won't enter C1 on mwait.
This patch will enable default_idle for non-Intel
CPUs even if mwait is supported.

Signed-off-by: Andreas Herrmann [EMAIL PROTECTED]
---
 arch/i386/kernel/process.c   |4 +++-
 arch/x86_64/kernel/process.c |3 ++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
index 393a67d..e3067e4 100644
--- a/arch/i386/kernel/process.c
+++ b/arch/i386/kernel/process.c
@@ -259,7 +259,9 @@ static void mwait_idle(void)
 
 void __devinit select_idle_routine(const struct cpuinfo_x86 *c)
 {
-   if (cpu_has(c, X86_FEATURE_MWAIT)) {
+   if (cpu_has(c, X86_FEATURE_MWAIT) 
+   (c-x86_vendor == X86_VENDOR_INTEL)) {
+
printk(monitor/mwait feature present.\n);
/*
 * Skip, if setup has overridden idle.
diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c
index d8d5ccc..fed830c 100644
--- a/arch/x86_64/kernel/process.c
+++ b/arch/x86_64/kernel/process.c
@@ -271,7 +271,8 @@ static void mwait_idle(void)
 void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
 {
static int printed;
-   if (cpu_has(c, X86_FEATURE_MWAIT)) {
+   if (cpu_has(c, X86_FEATURE_MWAIT) 
+   (c-x86_vendor == X86_VENDOR_INTEL)) {
/*
 * Skip, if setup has overridden idle.
 * One CPU supports mwait = All CPUs supports mwait
-- 
1.5.0.6




-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] x86: limit mwait_idle to Intel CPUs

2007-04-05 Thread Andreas Herrmann
On Thu, Apr 05, 2007 at 04:24:45PM +0200, Andi Kleen wrote:
 On Thursday 05 April 2007 16:00:45 Andreas Herrmann wrote:
  
  Commit 991528d7348667924176f3e29addea0675298944
  introduced mwait_idle which is supposed to work
  for Intel CPUs starting with Core Duo.
  
  AMD Fam10 processors won't enter C1 on mwait.
 
 Unfortunate. Will this be fixed?

No, it is not planned to change this behavior.

In fact mwait does certain power savings but the
core won't enter the C1 state. And power savings from
entering C1 are greater than power savings caused
by mwait (on AMD Fam10).

 
  This patch will enable default_idle for non-Intel
  CPUs even if mwait is supported.
 
 It would be better to clear MONITOR/MWAIT in the AMD specific
 CPU initialize code than add workarounds everywhere else.

Why is that?
MONITOR/MWAIT is usable. And I think this should
be indicated by cpuinfo.
It's just inappropriate to use it in pm_idle.


Regards,

Andreas

-- 
AMD Saxony, Dresden, Germany
Operating System Research Center



-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] x86: limit mwait_idle to Intel CPUs

2007-04-05 Thread Andreas Herrmann
On Thu, Apr 05, 2007 at 05:37:17PM +0200, Andi Kleen wrote:
 
This patch will enable default_idle for non-Intel
CPUs even if mwait is supported.
   
   It would be better to clear MONITOR/MWAIT in the AMD specific
   CPU initialize code than add workarounds everywhere else.
  
  Why is that?
  MONITOR/MWAIT is usable.
 
 If it doesn't save power it's not usable imho.

But you can also think of monitor/mwait as a power saving means
for fast synchronization.

  And I think this should 
  be indicated by cpuinfo.
  It's just inappropriate to use it in pm_idle.
 
 There are no other users anyways and user space can't use it.
 Ok in theory you could add a X86_FEATURE_MWAIT_DOESNT_SAVE_POWER
 and check that, but just clearing it seems simpler and equivalent.

It is not equivalent. Usually users check /proc/cpuinfo for their
CPU features. Deleting that flag is kind of obfuscation.

I guess some time ago people did not care about their svm or vmx
flags. Nowadays (e.g. with kvm) some people are quite happy
if one of those strings occurs in /proc/cpuinfo (and they are quite
disappointed if this feature was disabled by BIOS).

Why not state it positively for CPUs that are able to even enter
C1 with MWAIT, e.g. X86_FEATURE_MWAIT_ENTERS_CSTATE?

If you dislike this wording than I still prefer to have the MWAIT
flag visible and but introducing an MWAIT_DOESNT_ENTER_CSTATE thing.

 What would perhaps make sense is to add a idle=mwait command
 line option for this though. So that the benchmarkers who currently
 use idle=poll could migrate to idle=mwait. That option would need
 to check the real cpuid bit of course again, but that should be
 easy enough.

That sounds good. Except that I prefer to check for
X86_FEATURE_MWAIT.



Regards,

Andreas

-- 
AMD Saxony, Dresden, Germany
Operating System Research Center
email: [EMAIL PROTECTED]
phone: +49-351-277-4919



-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [discuss] [patch] mtrr: fix issues with large addresses

2007-02-06 Thread Andreas Herrmann
On Tue, Feb 06, 2007 at 10:54:23AM -0700, [EMAIL PROTECTED] wrote:
> "Andreas Herrmann" <[EMAIL PROTECTED]> writes:
> > On Mon, Feb 05, 2007 at 05:26:12PM -0700, [EMAIL PROTECTED] wrote:
> >> "Andreas Herrmann" <[EMAIL PROTECTED]> writes:
> >> >
> >> The limit is per cpu not per architecture.  So if you run a
> >> cpu that can run in 64bit mode in 32bit mode the limit
> >> is not 36 bits.  Even PAE in 32bit mode doesn't have that limit.
> >> 
> > Good point.
> >
> > I totally ignored that on 64 bit cpus in legacy mode
> > - PAE-paging means up to 52 physical address bits respectively
> > "physical address size of the underlying implementation"
> > - for non-PAE-paging with PSE enabled we have 40 bits for AMD and
> > with PSE36 36 bits for Intel
> 
> For non PAE-paging you have 32bits.

You are referring to current Linux implementation?
The AMD64 architecture increased physical address size in PSE mode to
40 bits. So at least it would be possible to use more than 32 bits.

> >> > diff --git a/arch/i386/kernel/cpu/mtrr/generic.c
> >> > b/arch/i386/kernel/cpu/mtrr/generic.c
> >> > index f77fc53..aa21d15 100644
> >> > --- a/arch/i386/kernel/cpu/mtrr/generic.c
> >> > +++ b/arch/i386/kernel/cpu/mtrr/generic.c
> >> > @@ -172,7 +172,7 @@ int generic_get_free_region(unsigned long base, 
> >> > unsigned
> >> > long size, int replace_
> >> >  static void generic_get_mtrr(unsigned int reg, unsigned long *base,
> >> >   unsigned long *size, mtrr_type *type)
> >> >  {
> >> > -unsigned int mask_lo, mask_hi, base_lo, base_hi;
> >> > +unsigned long mask_lo, mask_hi, base_lo, base_hi;
> >> 
> >> Why?  Given the low and the high I am assuming these are all implicitly
> >> 32bit quantities.  unsigned int is fine.
> >
> > It is not, please refer to the function body, e.g.
> >
> > *base = base_hi << (32 - PAGE_SHIFT) | base_lo >> PAGE_SHIFT;
> >
> > All leading 20 bits of "unsigned int" base_hi are snipped away. Thus
> > limiting base to use 44 bit instead of 52 bit in 64 bit mode. An
> > option would have been to use a type cast while shifting.
> >
> > (Hmm, having your first remark in mind I have to admit that my fix is
> > mainly focused on 64 bit mode not on 64 bit cpu running in 32 bit ...)
> 
> Yes.  So base needs to be come a u64. 

I was afraid you'ld say that.

> So base = ((base_hi << 32) | base_lo) >> PAGE_SHIFT.
> 
> I see where the 44bit limit comes in.  Do you actually have boxes
> with > 16TB?

No, I don't have access to such a box. Would be nice though.

> 
> Regardless it looks like base and possibly size needs to become
> a u64.  At which time the extra >> PAGE_SHIFT could be meaningless.
> Either that or because base and size need to be sized in something like
> megabytes.
> 
> I suspect making it a u64 sized in bytes will get the job done and
> result in simpler code.

Right you are!

> >> > diff --git a/arch/i386/kernel/cpu/mtrr/if.c 
> >> > b/arch/i386/kernel/cpu/mtrr/if.c
> >> > index 5ae1705..3abc3f1 100644
> >> > --- a/arch/i386/kernel/cpu/mtrr/if.c
> >> > +++ b/arch/i386/kernel/cpu/mtrr/if.c
> >> > @@ -137,6 +137,10 @@ mtrr_write(struct file *file, const char __user 
> >> > *buf,
> >> > size_t len, loff_t * ppos)
> >> >  for (i = 0; i < MTRR_NUM_TYPES; ++i) {
> >> >  if (strcmp(ptr, mtrr_strings[i]))
> >> >  continue;
> >> > +#ifndef CONFIG_X86_64
> >> > +if (base > 0xfULL)
> >> > +return -EINVAL;
> >> > +#endif
> >> 
> >> That is just silly.  If the cpu is running in long mode or should
> >> not affect this capability.



> > So I could do one of the following:
> > (1) prepare new patch omitting this silly hunk (-> old behaviour)
> > (2) check for 44 bit address size instead of 36 bit address size to
> > reflect the implicit truncation (-> avoid silent truncation)
> > (3) fix all mtrr code to be able to use up to 52 bit width physical
> > addresses instead of 44 bit ones if running in 32 bit mode on 64 bit
> > cpus.
> >
> > I prefer to do (2).
> > (IMHO those who have the need for n>44 bit width base address in an MTRR
> > should stick to 64 bit mode.)
> 
> I prefer (3).  Since the code is shared between 32 and 64bit mode it
> should behave the same in both.  I know there are people who regularly
> test 32bit kernels on boxes with 128 cpus and 128MB of ram.
> 
> People sometimes want crazy things and since it just a matter of changing
> the type it should be no real work to get the code to work in 32bit mode.

Ok, it is best to do (3).
I will come up with another patch asap.


> Eric


Regards,

Andreas

-- 
AMD Saxony, Dresden, Germany
Operating System Research Center



-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [discuss] [patch] mtrr: fix issues with large addresses

2007-02-06 Thread Andreas Herrmann
On Tue, Feb 06, 2007 at 11:54:57AM +0100, Andi Kleen wrote:
> On Tuesday 06 February 2007 10:53, Jan Beulich wrote:
> > >> I don't think I remember a restriction here, at least not below 44 bits
> > >> (that's where pfn-s would need to become 64-bit wide).
> > >
> > >The i386 mm code only supports 4 entries in the PGD, so more than 36bit 
> > >cannot 
> > >be mapped right now.
> > 
> > That has nothing to do with the number of physical address bits.
> 
> You couldn't use the memory in any ways.
> 
> Anyways I give up -- the check is probably not needed, unless Andreas
> comes up with a good reason.

No, I haven't a good reason to restrict the base address to fewer
than 44 bits.

So the question is, should I completely remove that check or adapt it
to check for 44 bit instead of 36 bit?


Regards,

Andreas

-- 
AMD Saxony, Dresden, Germany
Operating System Research Center



-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [discuss] [patch] mtrr: fix issues with large addresses

2007-02-06 Thread Andreas Herrmann
On Tue, Feb 06, 2007 at 09:31:45AM +, Jan Beulich wrote:
> >>> Andi Kleen <[EMAIL PROTECTED]> 06.02.07 08:53 >>>
> >On Monday 05 February 2007 23:50, Siddha, Suresh B wrote:
> >> On Mon, Feb 05, 2007 at 06:19:59PM +0100, Andreas Herrmann wrote:
> >> > o added check to restrict base address to 36 bit on i386
> >> 
> >> Why is this? It can go upto implemented physical bits, right?
> >
> >In theory it can, but Linux doesn't support it.
> 
> I don't think I remember a restriction here, at least not below 44 bits
> (that's where pfn-s would need to become 64-bit wide).
> 
> Jan
> 

Hi all,

shame on me.
Wanted to fix and interface issue where base address is truncated
at 44 bit in mtrr_write().

(And just thought 36 bit would be more than enough for that 32 bit
Linux version :)


Regards,

Andreas

-- 
AMD Saxony, Dresden, Germany
Operating System Research Center



-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [patch] mtrr: fix issues with large addresses

2007-02-06 Thread Andreas Herrmann
On Mon, Feb 05, 2007 at 05:26:12PM -0700, [EMAIL PROTECTED] wrote:
> "Andreas Herrmann" <[EMAIL PROTECTED]> writes:
> > mtrr: fix issues with large addresses
> >
> > Fixes some issues with /proc/mtrr interface:
> > o If physical address size crosses the 44 bit boundary
> >   size_or_mask is evaluated wrong
> > o size_and_mask limits physical base
> >   address for an MTRR to be less than 44 bit
> > o added check to restrict base address to 36 bit on i386
> 
> The limit is per cpu not per architecture.  So if you run a
> cpu that can run in 64bit mode in 32bit mode the limit
> is not 36 bits.  Even PAE in 32bit mode doesn't have that limit.
> 

Good point.

I totally ignored that on 64 bit cpus in legacy mode
- PAE-paging means up to 52 physical address bits respectively
"physical address size of the underlying implementation"
- for non-PAE-paging with PSE enabled we have 40 bits for AMD and
with PSE36 36 bits for Intel

What a mess.
(Hope anyone knows for sure which paging methods are relevant for
Linux if compiled for i386 and w/o CONFIG_X86_64?)

(Seems that in my mind this legacy stuff is still tied to 36 and 32
bits :(

> > diff --git a/arch/i386/kernel/cpu/mtrr/generic.c
> > b/arch/i386/kernel/cpu/mtrr/generic.c
> > index f77fc53..aa21d15 100644
> > --- a/arch/i386/kernel/cpu/mtrr/generic.c
> > +++ b/arch/i386/kernel/cpu/mtrr/generic.c
> > @@ -172,7 +172,7 @@ int generic_get_free_region(unsigned long base, unsigned
> > long size, int replace_
> >  static void generic_get_mtrr(unsigned int reg, unsigned long *base,
> >  unsigned long *size, mtrr_type *type)
> >  {
> > -   unsigned int mask_lo, mask_hi, base_lo, base_hi;
> > +   unsigned long mask_lo, mask_hi, base_lo, base_hi;
> 
> Why?  Given the low and the high I am assuming these are all implicitly
> 32bit quantities.  unsigned int is fine.

It is not, please refer to the function body, e.g.

*base = base_hi << (32 - PAGE_SHIFT) | base_lo >> PAGE_SHIFT;

All leading 20 bits of "unsigned int" base_hi are snipped away. Thus
limiting base to use 44 bit instead of 52 bit in 64 bit mode. An
option would have been to use a type cast while shifting.

(Hmm, having your first remark in mind I have to admit that my fix is
mainly focused on 64 bit mode not on 64 bit cpu running in 32 bit ...)

> > diff --git a/arch/i386/kernel/cpu/mtrr/if.c b/arch/i386/kernel/cpu/mtrr/if.c
> > index 5ae1705..3abc3f1 100644
> > --- a/arch/i386/kernel/cpu/mtrr/if.c
> > +++ b/arch/i386/kernel/cpu/mtrr/if.c
> > @@ -137,6 +137,10 @@ mtrr_write(struct file *file, const char __user *buf,
> > size_t len, loff_t * ppos)
> > for (i = 0; i < MTRR_NUM_TYPES; ++i) {
> > if (strcmp(ptr, mtrr_strings[i]))
> > continue;
> > +#ifndef CONFIG_X86_64
> > +   if (base > 0xfULL)
> > +   return -EINVAL;
> > +#endif
> 
> That is just silly.  If the cpu is running in long mode or should
> not affect this capability.

Yes, that check is wrong -- due to my wrong assumption.
Base can use 52 bits not just 36 bits in 32 bit mode on 64 bit cpu.

My intention was to avoid the silent truncation of base address
in the following lines:

base >>= PAGE_SHIFT;
size >>= PAGE_SHIFT;
err =
mtrr_add_page((unsigned long) base, ...

where base is cut at bit 44 due to the type cast.

The user doing
#> echo 0x1000 size=0x0815000 type=uncachable >/proc/mtrr
will end up with a new MTRR pair having PhysBase==0x0. (At least this
will give him some time to get a coffee when his system reboots after
the crash.)

So it seems that some more stuff needs to be fixed in the mtrr code.
All unsigned long base addresses used in this code implicitly restrict
the address to 44 bit (taking the PAGE_SHIFT into account).

So I could do one of the following:
(1) prepare new patch omitting this silly hunk (-> old behaviour)
(2) check for 44 bit address size instead of 36 bit address size to
reflect the implicit truncation (-> avoid silent truncation)
(3) fix all mtrr code to be able to use up to 52 bit width physical
addresses instead of 44 bit ones if running in 32 bit mode on 64 bit
cpus.

I prefer to do (2).
(IMHO those who have the need for n>44 bit width base address in an MTRR
should stick to 64 bit mode.)

> > diff --git a/arch/i386/kernel/cpu/mtrr/main.c 
> > b/arch/i386/kernel/cpu/mtrr/main.c
> > index 16bb7ea..0acfb6a 100644
> > --- a/arch/i386/kernel/cpu/mtrr/main.c
> > +++ b/arch/i386/kernel/cpu/mtrr/main.c
> > @@ -50,7 +50,7 @@ u32 num_var_ranges = 0;
> >  unsigned int *usage_table

Re: [patch] mtrr: fix issues with large addresses

2007-02-06 Thread Andreas Herrmann
On Mon, Feb 05, 2007 at 05:26:12PM -0700, [EMAIL PROTECTED] wrote:
 Andreas Herrmann [EMAIL PROTECTED] writes:
  mtrr: fix issues with large addresses
 
  Fixes some issues with /proc/mtrr interface:
  o If physical address size crosses the 44 bit boundary
size_or_mask is evaluated wrong
  o size_and_mask limits physical base
address for an MTRR to be less than 44 bit
  o added check to restrict base address to 36 bit on i386
 
 The limit is per cpu not per architecture.  So if you run a
 cpu that can run in 64bit mode in 32bit mode the limit
 is not 36 bits.  Even PAE in 32bit mode doesn't have that limit.
 

Good point.

I totally ignored that on 64 bit cpus in legacy mode
- PAE-paging means up to 52 physical address bits respectively
physical address size of the underlying implementation
- for non-PAE-paging with PSE enabled we have 40 bits for AMD and
with PSE36 36 bits for Intel

What a mess.
(Hope anyone knows for sure which paging methods are relevant for
Linux if compiled for i386 and w/o CONFIG_X86_64?)

(Seems that in my mind this legacy stuff is still tied to 36 and 32
bits :(

  diff --git a/arch/i386/kernel/cpu/mtrr/generic.c
  b/arch/i386/kernel/cpu/mtrr/generic.c
  index f77fc53..aa21d15 100644
  --- a/arch/i386/kernel/cpu/mtrr/generic.c
  +++ b/arch/i386/kernel/cpu/mtrr/generic.c
  @@ -172,7 +172,7 @@ int generic_get_free_region(unsigned long base, unsigned
  long size, int replace_
   static void generic_get_mtrr(unsigned int reg, unsigned long *base,
   unsigned long *size, mtrr_type *type)
   {
  -   unsigned int mask_lo, mask_hi, base_lo, base_hi;
  +   unsigned long mask_lo, mask_hi, base_lo, base_hi;
 
 Why?  Given the low and the high I am assuming these are all implicitly
 32bit quantities.  unsigned int is fine.

It is not, please refer to the function body, e.g.

*base = base_hi  (32 - PAGE_SHIFT) | base_lo  PAGE_SHIFT;

All leading 20 bits of unsigned int base_hi are snipped away. Thus
limiting base to use 44 bit instead of 52 bit in 64 bit mode. An
option would have been to use a type cast while shifting.

(Hmm, having your first remark in mind I have to admit that my fix is
mainly focused on 64 bit mode not on 64 bit cpu running in 32 bit ...)

  diff --git a/arch/i386/kernel/cpu/mtrr/if.c b/arch/i386/kernel/cpu/mtrr/if.c
  index 5ae1705..3abc3f1 100644
  --- a/arch/i386/kernel/cpu/mtrr/if.c
  +++ b/arch/i386/kernel/cpu/mtrr/if.c
  @@ -137,6 +137,10 @@ mtrr_write(struct file *file, const char __user *buf,
  size_t len, loff_t * ppos)
  for (i = 0; i  MTRR_NUM_TYPES; ++i) {
  if (strcmp(ptr, mtrr_strings[i]))
  continue;
  +#ifndef CONFIG_X86_64
  +   if (base  0xfULL)
  +   return -EINVAL;
  +#endif
 
 That is just silly.  If the cpu is running in long mode or should
 not affect this capability.

Yes, that check is wrong -- due to my wrong assumption.
Base can use 52 bits not just 36 bits in 32 bit mode on 64 bit cpu.

My intention was to avoid the silent truncation of base address
in the following lines:

base = PAGE_SHIFT;
size = PAGE_SHIFT;
err =
mtrr_add_page((unsigned long) base, ...

where base is cut at bit 44 due to the type cast.

The user doing
# echo 0x1000 size=0x0815000 type=uncachable /proc/mtrr
will end up with a new MTRR pair having PhysBase==0x0. (At least this
will give him some time to get a coffee when his system reboots after
the crash.)

So it seems that some more stuff needs to be fixed in the mtrr code.
All unsigned long base addresses used in this code implicitly restrict
the address to 44 bit (taking the PAGE_SHIFT into account).

So I could do one of the following:
(1) prepare new patch omitting this silly hunk (- old behaviour)
(2) check for 44 bit address size instead of 36 bit address size to
reflect the implicit truncation (- avoid silent truncation)
(3) fix all mtrr code to be able to use up to 52 bit width physical
addresses instead of 44 bit ones if running in 32 bit mode on 64 bit
cpus.

I prefer to do (2).
(IMHO those who have the need for n44 bit width base address in an MTRR
should stick to 64 bit mode.)

  diff --git a/arch/i386/kernel/cpu/mtrr/main.c 
  b/arch/i386/kernel/cpu/mtrr/main.c
  index 16bb7ea..0acfb6a 100644
  --- a/arch/i386/kernel/cpu/mtrr/main.c
  +++ b/arch/i386/kernel/cpu/mtrr/main.c
  @@ -50,7 +50,7 @@ u32 num_var_ranges = 0;
   unsigned int *usage_table;
   static DEFINE_MUTEX(mtrr_mutex);
   
  -u32 size_or_mask, size_and_mask;
  +u64 size_or_mask, size_and_mask;
   
   static struct mtrr_ops * mtrr_ops[X86_VENDOR_NUM] = {};
   
  @@ -662,8 +662,8 @@ void __init mtrr_bp_init(void)
   boot_cpu_data.x86_mask == 0x4))
  phys_addr = 36;
   
  -   size_or_mask = ~((1  (phys_addr - PAGE_SHIFT)) - 1);
  -   size_and_mask = ~size_or_mask  0xfff0

Re: [discuss] [patch] mtrr: fix issues with large addresses

2007-02-06 Thread Andreas Herrmann
On Tue, Feb 06, 2007 at 09:31:45AM +, Jan Beulich wrote:
  Andi Kleen [EMAIL PROTECTED] 06.02.07 08:53 
 On Monday 05 February 2007 23:50, Siddha, Suresh B wrote:
  On Mon, Feb 05, 2007 at 06:19:59PM +0100, Andreas Herrmann wrote:
   o added check to restrict base address to 36 bit on i386
  
  Why is this? It can go upto implemented physical bits, right?
 
 In theory it can, but Linux doesn't support it.
 
 I don't think I remember a restriction here, at least not below 44 bits
 (that's where pfn-s would need to become 64-bit wide).
 
 Jan
 

Hi all,

shame on me.
Wanted to fix and interface issue where base address is truncated
at 44 bit in mtrr_write().

(And just thought 36 bit would be more than enough for that 32 bit
Linux version :)


Regards,

Andreas

-- 
AMD Saxony, Dresden, Germany
Operating System Research Center



-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [discuss] [patch] mtrr: fix issues with large addresses

2007-02-06 Thread Andreas Herrmann
On Tue, Feb 06, 2007 at 11:54:57AM +0100, Andi Kleen wrote:
 On Tuesday 06 February 2007 10:53, Jan Beulich wrote:
   I don't think I remember a restriction here, at least not below 44 bits
   (that's where pfn-s would need to become 64-bit wide).
  
  The i386 mm code only supports 4 entries in the PGD, so more than 36bit 
  cannot 
  be mapped right now.
  
  That has nothing to do with the number of physical address bits.
 
 You couldn't use the memory in any ways.
 
 Anyways I give up -- the check is probably not needed, unless Andreas
 comes up with a good reason.

No, I haven't a good reason to restrict the base address to fewer
than 44 bits.

So the question is, should I completely remove that check or adapt it
to check for 44 bit instead of 36 bit?


Regards,

Andreas

-- 
AMD Saxony, Dresden, Germany
Operating System Research Center



-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [discuss] [patch] mtrr: fix issues with large addresses

2007-02-06 Thread Andreas Herrmann
On Tue, Feb 06, 2007 at 10:54:23AM -0700, [EMAIL PROTECTED] wrote:
 Andreas Herrmann [EMAIL PROTECTED] writes:
  On Mon, Feb 05, 2007 at 05:26:12PM -0700, [EMAIL PROTECTED] wrote:
  Andreas Herrmann [EMAIL PROTECTED] writes:
  
  The limit is per cpu not per architecture.  So if you run a
  cpu that can run in 64bit mode in 32bit mode the limit
  is not 36 bits.  Even PAE in 32bit mode doesn't have that limit.
  
  Good point.
 
  I totally ignored that on 64 bit cpus in legacy mode
  - PAE-paging means up to 52 physical address bits respectively
  physical address size of the underlying implementation
  - for non-PAE-paging with PSE enabled we have 40 bits for AMD and
  with PSE36 36 bits for Intel
 
 For non PAE-paging you have 32bits.

You are referring to current Linux implementation?
The AMD64 architecture increased physical address size in PSE mode to
40 bits. So at least it would be possible to use more than 32 bits.

   diff --git a/arch/i386/kernel/cpu/mtrr/generic.c
   b/arch/i386/kernel/cpu/mtrr/generic.c
   index f77fc53..aa21d15 100644
   --- a/arch/i386/kernel/cpu/mtrr/generic.c
   +++ b/arch/i386/kernel/cpu/mtrr/generic.c
   @@ -172,7 +172,7 @@ int generic_get_free_region(unsigned long base, 
   unsigned
   long size, int replace_
static void generic_get_mtrr(unsigned int reg, unsigned long *base,
 unsigned long *size, mtrr_type *type)
{
   -unsigned int mask_lo, mask_hi, base_lo, base_hi;
   +unsigned long mask_lo, mask_hi, base_lo, base_hi;
  
  Why?  Given the low and the high I am assuming these are all implicitly
  32bit quantities.  unsigned int is fine.
 
  It is not, please refer to the function body, e.g.
 
  *base = base_hi  (32 - PAGE_SHIFT) | base_lo  PAGE_SHIFT;
 
  All leading 20 bits of unsigned int base_hi are snipped away. Thus
  limiting base to use 44 bit instead of 52 bit in 64 bit mode. An
  option would have been to use a type cast while shifting.
 
  (Hmm, having your first remark in mind I have to admit that my fix is
  mainly focused on 64 bit mode not on 64 bit cpu running in 32 bit ...)
 
 Yes.  So base needs to be come a u64. 

I was afraid you'ld say that.

 So base = ((base_hi  32) | base_lo)  PAGE_SHIFT.
 
 I see where the 44bit limit comes in.  Do you actually have boxes
 with  16TB?

No, I don't have access to such a box. Would be nice though.

 
 Regardless it looks like base and possibly size needs to become
 a u64.  At which time the extra  PAGE_SHIFT could be meaningless.
 Either that or because base and size need to be sized in something like
 megabytes.
 
 I suspect making it a u64 sized in bytes will get the job done and
 result in simpler code.

Right you are!

   diff --git a/arch/i386/kernel/cpu/mtrr/if.c 
   b/arch/i386/kernel/cpu/mtrr/if.c
   index 5ae1705..3abc3f1 100644
   --- a/arch/i386/kernel/cpu/mtrr/if.c
   +++ b/arch/i386/kernel/cpu/mtrr/if.c
   @@ -137,6 +137,10 @@ mtrr_write(struct file *file, const char __user 
   *buf,
   size_t len, loff_t * ppos)
for (i = 0; i  MTRR_NUM_TYPES; ++i) {
if (strcmp(ptr, mtrr_strings[i]))
continue;
   +#ifndef CONFIG_X86_64
   +if (base  0xfULL)
   +return -EINVAL;
   +#endif
  
  That is just silly.  If the cpu is running in long mode or should
  not affect this capability.

snip

  So I could do one of the following:
  (1) prepare new patch omitting this silly hunk (- old behaviour)
  (2) check for 44 bit address size instead of 36 bit address size to
  reflect the implicit truncation (- avoid silent truncation)
  (3) fix all mtrr code to be able to use up to 52 bit width physical
  addresses instead of 44 bit ones if running in 32 bit mode on 64 bit
  cpus.
 
  I prefer to do (2).
  (IMHO those who have the need for n44 bit width base address in an MTRR
  should stick to 64 bit mode.)
 
 I prefer (3).  Since the code is shared between 32 and 64bit mode it
 should behave the same in both.  I know there are people who regularly
 test 32bit kernels on boxes with 128 cpus and 128MB of ram.
 
 People sometimes want crazy things and since it just a matter of changing
 the type it should be no real work to get the code to work in 32bit mode.

Ok, it is best to do (3).
I will come up with another patch asap.


 Eric


Regards,

Andreas

-- 
AMD Saxony, Dresden, Germany
Operating System Research Center



-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[patch] mtrr: fix issues with large addresses

2007-02-05 Thread Andreas Herrmann
Hi,

This is a repost of a mail sent to Richard Gooch and lkml some time
ago. Meanwhile I noticed that Richard has a new email address. And it
seems that he does not maintain the mtrr code anymore. (So how about
updating the MAINTAINERS file?)

Here we go again -- with new recipient and a slightly modified
version of the patch.


Regards,

Andreas


mtrr: fix issues with large addresses

Fixes some issues with /proc/mtrr interface:
o If physical address size crosses the 44 bit boundary
  size_or_mask is evaluated wrong
o size_and_mask limits physical base
  address for an MTRR to be less than 44 bit
o added check to restrict base address to 36 bit on i386

Signed-off-by: Andreas Herrmann <[EMAIL PROTECTED]>

--
diff --git a/arch/i386/kernel/cpu/mtrr/generic.c 
b/arch/i386/kernel/cpu/mtrr/generic.c
index f77fc53..aa21d15 100644
--- a/arch/i386/kernel/cpu/mtrr/generic.c
+++ b/arch/i386/kernel/cpu/mtrr/generic.c
@@ -172,7 +172,7 @@ int generic_get_free_region(unsigned long base, unsigned 
long size, int replace_
 static void generic_get_mtrr(unsigned int reg, unsigned long *base,
 unsigned long *size, mtrr_type *type)
 {
-   unsigned int mask_lo, mask_hi, base_lo, base_hi;
+   unsigned long mask_lo, mask_hi, base_lo, base_hi;
 
rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi);
if ((mask_lo & 0x800) == 0) {
diff --git a/arch/i386/kernel/cpu/mtrr/if.c b/arch/i386/kernel/cpu/mtrr/if.c
index 5ae1705..3abc3f1 100644
--- a/arch/i386/kernel/cpu/mtrr/if.c
+++ b/arch/i386/kernel/cpu/mtrr/if.c
@@ -137,6 +137,10 @@ mtrr_write(struct file *file, const char __user *buf, 
size_t len, loff_t * ppos)
for (i = 0; i < MTRR_NUM_TYPES; ++i) {
if (strcmp(ptr, mtrr_strings[i]))
continue;
+#ifndef CONFIG_X86_64
+   if (base > 0xfULL)
+   return -EINVAL;
+#endif
base >>= PAGE_SHIFT;
size >>= PAGE_SHIFT;
err =
diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c
index 16bb7ea..0acfb6a 100644
--- a/arch/i386/kernel/cpu/mtrr/main.c
+++ b/arch/i386/kernel/cpu/mtrr/main.c
@@ -50,7 +50,7 @@ u32 num_var_ranges = 0;
 unsigned int *usage_table;
 static DEFINE_MUTEX(mtrr_mutex);
 
-u32 size_or_mask, size_and_mask;
+u64 size_or_mask, size_and_mask;
 
 static struct mtrr_ops * mtrr_ops[X86_VENDOR_NUM] = {};
 
@@ -662,8 +662,8 @@ void __init mtrr_bp_init(void)
 boot_cpu_data.x86_mask == 0x4))
phys_addr = 36;
 
-   size_or_mask = ~((1 << (phys_addr - PAGE_SHIFT)) - 1);
-   size_and_mask = ~size_or_mask & 0xfff0;
+   size_or_mask = ~((1ULL << (phys_addr - PAGE_SHIFT)) - 
1);
+   size_and_mask = ~size_or_mask & 0xf0ULL;
} else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR &&
   boot_cpu_data.x86 == 6) {
/* VIA C* family have Intel style MTRRs, but
diff --git a/arch/i386/kernel/cpu/mtrr/mtrr.h b/arch/i386/kernel/cpu/mtrr/mtrr.h
index d61ea9d..289dfe6 100644
--- a/arch/i386/kernel/cpu/mtrr/mtrr.h
+++ b/arch/i386/kernel/cpu/mtrr/mtrr.h
@@ -84,7 +84,7 @@ void get_mtrr_state(void);
 
 extern void set_mtrr_ops(struct mtrr_ops * ops);
 
-extern u32 size_or_mask, size_and_mask;
+extern u64 size_or_mask, size_and_mask;
 extern struct mtrr_ops * mtrr_if;
 
 #define is_cpu(vnd)(mtrr_if && mtrr_if->vendor == X86_VENDOR_##vnd)



-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[patch] mtrr: fix issues with large addresses

2007-02-05 Thread Andreas Herrmann
Hi,

This is a repost of a mail sent to Richard Gooch and lkml some time
ago. Meanwhile I noticed that Richard has a new email address. And it
seems that he does not maintain the mtrr code anymore. (So how about
updating the MAINTAINERS file?)

Here we go again -- with new recipient and a slightly modified
version of the patch.


Regards,

Andreas


mtrr: fix issues with large addresses

Fixes some issues with /proc/mtrr interface:
o If physical address size crosses the 44 bit boundary
  size_or_mask is evaluated wrong
o size_and_mask limits physical base
  address for an MTRR to be less than 44 bit
o added check to restrict base address to 36 bit on i386

Signed-off-by: Andreas Herrmann [EMAIL PROTECTED]

--
diff --git a/arch/i386/kernel/cpu/mtrr/generic.c 
b/arch/i386/kernel/cpu/mtrr/generic.c
index f77fc53..aa21d15 100644
--- a/arch/i386/kernel/cpu/mtrr/generic.c
+++ b/arch/i386/kernel/cpu/mtrr/generic.c
@@ -172,7 +172,7 @@ int generic_get_free_region(unsigned long base, unsigned 
long size, int replace_
 static void generic_get_mtrr(unsigned int reg, unsigned long *base,
 unsigned long *size, mtrr_type *type)
 {
-   unsigned int mask_lo, mask_hi, base_lo, base_hi;
+   unsigned long mask_lo, mask_hi, base_lo, base_hi;
 
rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi);
if ((mask_lo  0x800) == 0) {
diff --git a/arch/i386/kernel/cpu/mtrr/if.c b/arch/i386/kernel/cpu/mtrr/if.c
index 5ae1705..3abc3f1 100644
--- a/arch/i386/kernel/cpu/mtrr/if.c
+++ b/arch/i386/kernel/cpu/mtrr/if.c
@@ -137,6 +137,10 @@ mtrr_write(struct file *file, const char __user *buf, 
size_t len, loff_t * ppos)
for (i = 0; i  MTRR_NUM_TYPES; ++i) {
if (strcmp(ptr, mtrr_strings[i]))
continue;
+#ifndef CONFIG_X86_64
+   if (base  0xfULL)
+   return -EINVAL;
+#endif
base = PAGE_SHIFT;
size = PAGE_SHIFT;
err =
diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c
index 16bb7ea..0acfb6a 100644
--- a/arch/i386/kernel/cpu/mtrr/main.c
+++ b/arch/i386/kernel/cpu/mtrr/main.c
@@ -50,7 +50,7 @@ u32 num_var_ranges = 0;
 unsigned int *usage_table;
 static DEFINE_MUTEX(mtrr_mutex);
 
-u32 size_or_mask, size_and_mask;
+u64 size_or_mask, size_and_mask;
 
 static struct mtrr_ops * mtrr_ops[X86_VENDOR_NUM] = {};
 
@@ -662,8 +662,8 @@ void __init mtrr_bp_init(void)
 boot_cpu_data.x86_mask == 0x4))
phys_addr = 36;
 
-   size_or_mask = ~((1  (phys_addr - PAGE_SHIFT)) - 1);
-   size_and_mask = ~size_or_mask  0xfff0;
+   size_or_mask = ~((1ULL  (phys_addr - PAGE_SHIFT)) - 
1);
+   size_and_mask = ~size_or_mask  0xf0ULL;
} else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR 
   boot_cpu_data.x86 == 6) {
/* VIA C* family have Intel style MTRRs, but
diff --git a/arch/i386/kernel/cpu/mtrr/mtrr.h b/arch/i386/kernel/cpu/mtrr/mtrr.h
index d61ea9d..289dfe6 100644
--- a/arch/i386/kernel/cpu/mtrr/mtrr.h
+++ b/arch/i386/kernel/cpu/mtrr/mtrr.h
@@ -84,7 +84,7 @@ void get_mtrr_state(void);
 
 extern void set_mtrr_ops(struct mtrr_ops * ops);
 
-extern u32 size_or_mask, size_and_mask;
+extern u64 size_or_mask, size_and_mask;
 extern struct mtrr_ops * mtrr_if;
 
 #define is_cpu(vnd)(mtrr_if  mtrr_if-vendor == X86_VENDOR_##vnd)



-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[patch] mtrr: fix size_or_mask and size_and_mask

2007-01-22 Thread Andreas Herrmann
mtrr: fix size_or_mask and size_and_mask

This fixes two bugs in /proc/mtrr interface:
o If physical address size crosses the 44 bit boundary
  size_or_mask is evaluated wrong.
o size_and_mask limits width of physical base
  address for an MTRR to be less than 44 bits.

Signed-off-by: Andreas Herrmann <[EMAIL PROTECTED]>
---
 arch/i386/kernel/cpu/mtrr/main.c |6 +++---
 arch/i386/kernel/cpu/mtrr/mtrr.h |2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c
index 16bb7ea..0acfb6a 100644
--- a/arch/i386/kernel/cpu/mtrr/main.c
+++ b/arch/i386/kernel/cpu/mtrr/main.c
@@ -50,7 +50,7 @@ u32 num_var_ranges = 0;
 unsigned int *usage_table;
 static DEFINE_MUTEX(mtrr_mutex);
 
-u32 size_or_mask, size_and_mask;
+u64 size_or_mask, size_and_mask;
 
 static struct mtrr_ops * mtrr_ops[X86_VENDOR_NUM] = {};
 
@@ -662,8 +662,8 @@ void __init mtrr_bp_init(void)
 boot_cpu_data.x86_mask == 0x4))
phys_addr = 36;
 
-   size_or_mask = ~((1 << (phys_addr - PAGE_SHIFT)) - 1);
-   size_and_mask = ~size_or_mask & 0xfff0;
+   size_or_mask = ~((1ULL << (phys_addr - PAGE_SHIFT)) - 
1);
+   size_and_mask = ~size_or_mask & 0xf0ULL;
} else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR &&
   boot_cpu_data.x86 == 6) {
/* VIA C* family have Intel style MTRRs, but
diff --git a/arch/i386/kernel/cpu/mtrr/mtrr.h b/arch/i386/kernel/cpu/mtrr/mtrr.h
index d61ea9d..289dfe6 100644
--- a/arch/i386/kernel/cpu/mtrr/mtrr.h
+++ b/arch/i386/kernel/cpu/mtrr/mtrr.h
@@ -84,7 +84,7 @@ void get_mtrr_state(void);
 
 extern void set_mtrr_ops(struct mtrr_ops * ops);
 
-extern u32 size_or_mask, size_and_mask;
+extern u64 size_or_mask, size_and_mask;
 extern struct mtrr_ops * mtrr_if;
 
 #define is_cpu(vnd)(mtrr_if && mtrr_if->vendor == X86_VENDOR_##vnd)
-- 
1.4.2.4




-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[patch] mtrr: fix size_or_mask and size_and_mask

2007-01-22 Thread Andreas Herrmann
mtrr: fix size_or_mask and size_and_mask

This fixes two bugs in /proc/mtrr interface:
o If physical address size crosses the 44 bit boundary
  size_or_mask is evaluated wrong.
o size_and_mask limits width of physical base
  address for an MTRR to be less than 44 bits.

Signed-off-by: Andreas Herrmann [EMAIL PROTECTED]
---
 arch/i386/kernel/cpu/mtrr/main.c |6 +++---
 arch/i386/kernel/cpu/mtrr/mtrr.h |2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c
index 16bb7ea..0acfb6a 100644
--- a/arch/i386/kernel/cpu/mtrr/main.c
+++ b/arch/i386/kernel/cpu/mtrr/main.c
@@ -50,7 +50,7 @@ u32 num_var_ranges = 0;
 unsigned int *usage_table;
 static DEFINE_MUTEX(mtrr_mutex);
 
-u32 size_or_mask, size_and_mask;
+u64 size_or_mask, size_and_mask;
 
 static struct mtrr_ops * mtrr_ops[X86_VENDOR_NUM] = {};
 
@@ -662,8 +662,8 @@ void __init mtrr_bp_init(void)
 boot_cpu_data.x86_mask == 0x4))
phys_addr = 36;
 
-   size_or_mask = ~((1  (phys_addr - PAGE_SHIFT)) - 1);
-   size_and_mask = ~size_or_mask  0xfff0;
+   size_or_mask = ~((1ULL  (phys_addr - PAGE_SHIFT)) - 
1);
+   size_and_mask = ~size_or_mask  0xf0ULL;
} else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR 
   boot_cpu_data.x86 == 6) {
/* VIA C* family have Intel style MTRRs, but
diff --git a/arch/i386/kernel/cpu/mtrr/mtrr.h b/arch/i386/kernel/cpu/mtrr/mtrr.h
index d61ea9d..289dfe6 100644
--- a/arch/i386/kernel/cpu/mtrr/mtrr.h
+++ b/arch/i386/kernel/cpu/mtrr/mtrr.h
@@ -84,7 +84,7 @@ void get_mtrr_state(void);
 
 extern void set_mtrr_ops(struct mtrr_ops * ops);
 
-extern u32 size_or_mask, size_and_mask;
+extern u64 size_or_mask, size_and_mask;
 extern struct mtrr_ops * mtrr_if;
 
 #define is_cpu(vnd)(mtrr_if  mtrr_if-vendor == X86_VENDOR_##vnd)
-- 
1.4.2.4




-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] zfcp: add rports to enable scsi_add_device to work again

2005-08-29 Thread Andreas Herrmann
On 27.08.2005 19:38 James Bottomley <[EMAIL PROTECTED]> wrote:
 
  > > this patch fixes a severe problem with 2.6.13-rc7.
  > > 
  > > Due to recent SCSI changes it is not possible to add any
  > > LUNs to the zfcp device driver anymore. With registration
  > > of remote ports this is fixed.
  > > 
  > > Please integrate the patch in the 2.6.13 kernel or if it
  > > is already too late for this release then please integrate it
  > > in 2.6.13.1
  > > 
  > > Thanks a lot.

  > Well, OK, but your usage isn't quite optimal.  The fibre channel
  > transport class retains a list of ports per host, so your maintenance 
of
  > an identical list in zfcp_adapter duplicates this.

I know what you mean. It would be better to store all "private" zfcp
data at dd_data in fc_rport.  Unfortunately it won't fit to zfcp's
current behaviour.  The rport depends on a specific host_id. Even the
name of the rport in sys/class/fc_remote_port inherits this id. This
means if the host is deregistered and registered again the old rport
structure is useless because new host_ids are assigned. Or do I miss
something here?

The zfcp_port structure is thought to be persistent if once configured
by the user, i.e. even if the host is deregistered and registered
again the port structure is kept.

I am not sure at the moment how this can be solved with the current
fc_transport. I think it would have been better to use the WWPN of an
rport as the name in sys/class/fc_remote_port. This would be a start
to keep rport structures independent of the host_id. (Why should the
transport depend on OS assigned ids anyway? The transport has already
unique identifiers like WWPNs.)

  > However, we can put this in for now and worry about removing all of 
the
  > fc transport class duplication from zfcp later.

  > James

In any case attributes provided by an rport should be removed from the
zfcp_port structure. This is what I will do next.


Regards,

Andreas
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] zfcp: add rports to enable scsi_add_device to work again

2005-08-29 Thread Andreas Herrmann
On 27.08.2005 19:38 James Bottomley [EMAIL PROTECTED] wrote:
 
this patch fixes a severe problem with 2.6.13-rc7.

Due to recent SCSI changes it is not possible to add any
LUNs to the zfcp device driver anymore. With registration
of remote ports this is fixed.

Please integrate the patch in the 2.6.13 kernel or if it
is already too late for this release then please integrate it
in 2.6.13.1

Thanks a lot.

   Well, OK, but your usage isn't quite optimal.  The fibre channel
   transport class retains a list of ports per host, so your maintenance 
of
   an identical list in zfcp_adapter duplicates this.

I know what you mean. It would be better to store all private zfcp
data at dd_data in fc_rport.  Unfortunately it won't fit to zfcp's
current behaviour.  The rport depends on a specific host_id. Even the
name of the rport in sys/class/fc_remote_port inherits this id. This
means if the host is deregistered and registered again the old rport
structure is useless because new host_ids are assigned. Or do I miss
something here?

The zfcp_port structure is thought to be persistent if once configured
by the user, i.e. even if the host is deregistered and registered
again the port structure is kept.

I am not sure at the moment how this can be solved with the current
fc_transport. I think it would have been better to use the WWPN of an
rport as the name in sys/class/fc_remote_port. This would be a start
to keep rport structures independent of the host_id. (Why should the
transport depend on OS assigned ids anyway? The transport has already
unique identifiers like WWPNs.)

   However, we can put this in for now and worry about removing all of 
the
   fc transport class duplication from zfcp later.

   James

In any case attributes provided by an rport should be removed from the
zfcp_port structure. This is what I will do next.


Regards,

Andreas
-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] zfcp: add rports to enable scsi_add_device to work again

2005-08-27 Thread Andreas Herrmann
Hi,

this patch fixes a severe problem with 2.6.13-rc7.

Due to recent SCSI changes it is not possible to add any
LUNs to the zfcp device driver anymore. With registration
of remote ports this is fixed.

Please integrate the patch in the 2.6.13 kernel or if it
is already too late for this release then please integrate it
in 2.6.13.1

Thanks a lot.


Regards,

Andreas


Signed-off-by: Andreas Herrmann <[EMAIL PROTECTED]>

diffstat:
 zfcp_aux.c  |   29 +++--
 zfcp_ccw.c  |   10 ++
 zfcp_def.h  |2 +-
 zfcp_erp.c  |   25 ++---
 zfcp_ext.h  |2 ++
 zfcp_fsf.c  |1 +
 zfcp_scsi.c |   25 -
 7 files changed, 63 insertions(+), 31 deletions(-)

diff -urN linux-2.6.13-rcx/drivers/s390/scsi/zfcp_aux.c 
linux-2.6.13-zfcpfctc/drivers/s390/scsi/zfcp_aux.c
--- linux-2.6.13-rcx/drivers/s390/scsi/zfcp_aux.c   2005-08-25 
10:53:15.0 +0200
+++ linux-2.6.13-zfcpfctc/drivers/s390/scsi/zfcp_aux.c  2005-08-27 
13:05:17.0 +0200
@@ -1299,13 +1299,10 @@
 zfcp_port_enqueue(struct zfcp_adapter *adapter, wwn_t wwpn, u32 status,
  u32 d_id)
 {
-   struct zfcp_port *port, *tmp_port;
+   struct zfcp_port *port;
int check_wwpn;
-   scsi_id_t scsi_id;
-   int found;
 
check_wwpn = !(status & ZFCP_STATUS_PORT_NO_WWPN);
-
/*
 * check that there is no port with this WWPN already in list
 */
@@ -1368,7 +1365,7 @@
} else {
snprintf(port->sysfs_device.bus_id,
 BUS_ID_SIZE, "0x%016llx", wwpn);
-   port->sysfs_device.parent = >ccw_device->dev;
+   port->sysfs_device.parent = >ccw_device->dev;
}
port->sysfs_device.release = zfcp_sysfs_port_release;
dev_set_drvdata(>sysfs_device, port);
@@ -1388,24 +1385,8 @@
 
zfcp_port_get(port);
 
-   scsi_id = 1;
-   found = 0;
write_lock_irq(_data.config_lock);
-   list_for_each_entry(tmp_port, >port_list_head, list) {
-   if (atomic_test_mask(ZFCP_STATUS_PORT_NO_SCSI_ID,
-_port->status))
-   continue;
-   if (tmp_port->scsi_id != scsi_id) {
-   found = 1;
-   break;
-   }
-   scsi_id++;
-   }
-   port->scsi_id = scsi_id;
-   if (found)
-   list_add_tail(>list, _port->list);
-   else
-   list_add_tail(>list, >port_list_head);
+   list_add_tail(>list, >port_list_head);
atomic_clear_mask(ZFCP_STATUS_COMMON_REMOVE, >status);
atomic_set_mask(ZFCP_STATUS_COMMON_RUNNING, >status);
if (d_id == ZFCP_DID_DIRECTORY_SERVICE)
@@ -1422,11 +1403,15 @@
 void
 zfcp_port_dequeue(struct zfcp_port *port)
 {
+   struct fc_port *rport;
+
zfcp_port_wait(port);
write_lock_irq(_data.config_lock);
list_del(>list);
port->adapter->ports--;
write_unlock_irq(_data.config_lock);
+   if (port->rport)
+   fc_remote_port_delete(rport);
zfcp_adapter_put(port->adapter);
zfcp_sysfs_port_remove_files(>sysfs_device,
 atomic_read(>status));
diff -urN linux-2.6.13-rcx/drivers/s390/scsi/zfcp_ccw.c 
linux-2.6.13-zfcpfctc/drivers/s390/scsi/zfcp_ccw.c
--- linux-2.6.13-rcx/drivers/s390/scsi/zfcp_ccw.c   2005-03-02 
08:37:50.0 +0100
+++ linux-2.6.13-zfcpfctc/drivers/s390/scsi/zfcp_ccw.c  2005-08-27 
13:28:35.0 +0200
@@ -202,9 +202,19 @@
 zfcp_ccw_set_offline(struct ccw_device *ccw_device)
 {
struct zfcp_adapter *adapter;
+   struct zfcp_port *port;
+   struct fc_port *rport;
 
down(_data.config_sema);
adapter = dev_get_drvdata(_device->dev);
+   /* might be racy, but we cannot take config_lock due to the fact that
+  fc_remote_port_delete might sleep */
+   list_for_each_entry(port, >port_list_head, list)
+   if (port->rport) {
+   rport = port->rport;
+   port->rport = NULL;
+   fc_remote_port_delete(rport);
+   }
zfcp_erp_adapter_shutdown(adapter, 0);
zfcp_erp_wait(adapter);
zfcp_adapter_scsi_unregister(adapter);
diff -urN linux-2.6.13-rcx/drivers/s390/scsi/zfcp_def.h 
linux-2.6.13-zfcpfctc/drivers/s390/scsi/zfcp_def.h
--- linux-2.6.13-rcx/drivers/s390/scsi/zfcp_def.h   2005-08-25 
10:53:15.0 +0200
+++ linux-2.6.13-zfcpfctc/drivers/s390/scsi/zfcp_def.h  2005-08-26 
19:00:18.0 +0200
@@ -906,6 +906,7 @@
  */
 struct zfcp_port {
struct device  sysfs_device;   /* sysfs device */
+   struct fc_rport*rport; /* rport of fc transport class */
struct list_head   list;   /* list of remote

[PATCH] zfcp: add rports to enable scsi_add_device to work again

2005-08-27 Thread Andreas Herrmann
Hi,

this patch fixes a severe problem with 2.6.13-rc7.

Due to recent SCSI changes it is not possible to add any
LUNs to the zfcp device driver anymore. With registration
of remote ports this is fixed.

Please integrate the patch in the 2.6.13 kernel or if it
is already too late for this release then please integrate it
in 2.6.13.1

Thanks a lot.


Regards,

Andreas


Signed-off-by: Andreas Herrmann [EMAIL PROTECTED]

diffstat:
 zfcp_aux.c  |   29 +++--
 zfcp_ccw.c  |   10 ++
 zfcp_def.h  |2 +-
 zfcp_erp.c  |   25 ++---
 zfcp_ext.h  |2 ++
 zfcp_fsf.c  |1 +
 zfcp_scsi.c |   25 -
 7 files changed, 63 insertions(+), 31 deletions(-)

diff -urN linux-2.6.13-rcx/drivers/s390/scsi/zfcp_aux.c 
linux-2.6.13-zfcpfctc/drivers/s390/scsi/zfcp_aux.c
--- linux-2.6.13-rcx/drivers/s390/scsi/zfcp_aux.c   2005-08-25 
10:53:15.0 +0200
+++ linux-2.6.13-zfcpfctc/drivers/s390/scsi/zfcp_aux.c  2005-08-27 
13:05:17.0 +0200
@@ -1299,13 +1299,10 @@
 zfcp_port_enqueue(struct zfcp_adapter *adapter, wwn_t wwpn, u32 status,
  u32 d_id)
 {
-   struct zfcp_port *port, *tmp_port;
+   struct zfcp_port *port;
int check_wwpn;
-   scsi_id_t scsi_id;
-   int found;
 
check_wwpn = !(status  ZFCP_STATUS_PORT_NO_WWPN);
-
/*
 * check that there is no port with this WWPN already in list
 */
@@ -1368,7 +1365,7 @@
} else {
snprintf(port-sysfs_device.bus_id,
 BUS_ID_SIZE, 0x%016llx, wwpn);
-   port-sysfs_device.parent = adapter-ccw_device-dev;
+   port-sysfs_device.parent = adapter-ccw_device-dev;
}
port-sysfs_device.release = zfcp_sysfs_port_release;
dev_set_drvdata(port-sysfs_device, port);
@@ -1388,24 +1385,8 @@
 
zfcp_port_get(port);
 
-   scsi_id = 1;
-   found = 0;
write_lock_irq(zfcp_data.config_lock);
-   list_for_each_entry(tmp_port, adapter-port_list_head, list) {
-   if (atomic_test_mask(ZFCP_STATUS_PORT_NO_SCSI_ID,
-tmp_port-status))
-   continue;
-   if (tmp_port-scsi_id != scsi_id) {
-   found = 1;
-   break;
-   }
-   scsi_id++;
-   }
-   port-scsi_id = scsi_id;
-   if (found)
-   list_add_tail(port-list, tmp_port-list);
-   else
-   list_add_tail(port-list, adapter-port_list_head);
+   list_add_tail(port-list, adapter-port_list_head);
atomic_clear_mask(ZFCP_STATUS_COMMON_REMOVE, port-status);
atomic_set_mask(ZFCP_STATUS_COMMON_RUNNING, port-status);
if (d_id == ZFCP_DID_DIRECTORY_SERVICE)
@@ -1422,11 +1403,15 @@
 void
 zfcp_port_dequeue(struct zfcp_port *port)
 {
+   struct fc_port *rport;
+
zfcp_port_wait(port);
write_lock_irq(zfcp_data.config_lock);
list_del(port-list);
port-adapter-ports--;
write_unlock_irq(zfcp_data.config_lock);
+   if (port-rport)
+   fc_remote_port_delete(rport);
zfcp_adapter_put(port-adapter);
zfcp_sysfs_port_remove_files(port-sysfs_device,
 atomic_read(port-status));
diff -urN linux-2.6.13-rcx/drivers/s390/scsi/zfcp_ccw.c 
linux-2.6.13-zfcpfctc/drivers/s390/scsi/zfcp_ccw.c
--- linux-2.6.13-rcx/drivers/s390/scsi/zfcp_ccw.c   2005-03-02 
08:37:50.0 +0100
+++ linux-2.6.13-zfcpfctc/drivers/s390/scsi/zfcp_ccw.c  2005-08-27 
13:28:35.0 +0200
@@ -202,9 +202,19 @@
 zfcp_ccw_set_offline(struct ccw_device *ccw_device)
 {
struct zfcp_adapter *adapter;
+   struct zfcp_port *port;
+   struct fc_port *rport;
 
down(zfcp_data.config_sema);
adapter = dev_get_drvdata(ccw_device-dev);
+   /* might be racy, but we cannot take config_lock due to the fact that
+  fc_remote_port_delete might sleep */
+   list_for_each_entry(port, adapter-port_list_head, list)
+   if (port-rport) {
+   rport = port-rport;
+   port-rport = NULL;
+   fc_remote_port_delete(rport);
+   }
zfcp_erp_adapter_shutdown(adapter, 0);
zfcp_erp_wait(adapter);
zfcp_adapter_scsi_unregister(adapter);
diff -urN linux-2.6.13-rcx/drivers/s390/scsi/zfcp_def.h 
linux-2.6.13-zfcpfctc/drivers/s390/scsi/zfcp_def.h
--- linux-2.6.13-rcx/drivers/s390/scsi/zfcp_def.h   2005-08-25 
10:53:15.0 +0200
+++ linux-2.6.13-zfcpfctc/drivers/s390/scsi/zfcp_def.h  2005-08-26 
19:00:18.0 +0200
@@ -906,6 +906,7 @@
  */
 struct zfcp_port {
struct device  sysfs_device;   /* sysfs device */
+   struct fc_rport*rport; /* rport of fc transport class */
struct list_head   list;   /* list of remote ports */
atomic_t

Re: [PATCH] remove name length check in a workqueue

2005-08-11 Thread Andreas Herrmann
Simon Derr <[EMAIL PROTECTED]> wrote:

  > It is sufficient to have a few HBAs and to insmod/rmmod the driver a 
few 
  > times.

  > Since the host_no is choosen with a mere counter increment 
  > in scsi_host_alloc():

  >   shost->host_no = scsi_host_next_hn++; /* XXX(hch): still racy */

  > Unused `host_no's are not reused and the 100 limit is reached even on 
  > smaller systems.

  > I have no idea of why someone would do repeated insmod/rmmods, though.
  > (But someone did).

You even don't have to use insmod/rmmod.  On s390 (using zfcp) it
suffices to take adapters offline and online (triggered via VM,
hardware, or within Linux). Just do so about 100 times ... You
know the result.


Regards,

Andreas
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] remove name length check in a workqueue

2005-08-11 Thread Andreas Herrmann
Simon Derr [EMAIL PROTECTED] wrote:

   It is sufficient to have a few HBAs and to insmod/rmmod the driver a 
few 
   times.

   Since the host_no is choosen with a mere counter increment 
   in scsi_host_alloc():

 shost-host_no = scsi_host_next_hn++; /* XXX(hch): still racy */

   Unused `host_no's are not reused and the 100 limit is reached even on 
   smaller systems.

   I have no idea of why someone would do repeated insmod/rmmods, though.
   (But someone did).

You even don't have to use insmod/rmmod.  On s390 (using zfcp) it
suffices to take adapters offline and online (triggered via VM,
hardware, or within Linux). Just do so about 100 times ... You
know the result.


Regards,

Andreas
-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] zfcp: fix compile error

2005-04-21 Thread Andreas Herrmann
Jan Dittmer <[EMAIL PROTECTED]> wrote:
21.04.2005 10:49

> > [EMAIL PROTECTED]:
> > [PATCH] zfcp: convert to compat_ioctl

> This does not seem to compile anymore with defconfig:

>   CC  drivers/s390/scsi/zfcp_aux.o
> /usr/src/ctest/rc/kernel/drivers/s390/scsi/zfcp_aux.c:63: warning: 
> initialization from incompatible pointer type
> /usr/src/ctest/rc/kernel/drivers/s390/scsi/zfcp_aux.c:366: error: conflicting 
> types for `zfcp_cfdc_dev_ioctl'


Oops. Submitted patch was incorrect.
Attached patch (against 2.6.12-rc3) will fix the problem.
Sorry, for any inconvenience.


Regards,

Andreas


zfcp: fix compile error

Signed-off-by: Andreas Herrmann <[EMAIL PROTECTED]>


diff -bBrauN linux-2.6.x/drivers/s390/scsi-orig/zfcp_aux.c 
linux-2.6.x/drivers/s390/scsi/zfcp_aux.c
--- linux-2.6.x/drivers/s390/scsi-orig/zfcp_aux.c   2005-04-21 
12:36:44.0 +0200
+++ linux-2.6.x/drivers/s390/scsi/zfcp_aux.c2005-04-21 12:40:48.0 
+0200
@@ -52,7 +52,7 @@
 static inline int zfcp_sg_list_copy_to_user(void __user *,
struct zfcp_sg_list *, size_t);
 
-static int zfcp_cfdc_dev_ioctl(struct file *, unsigned int, unsigned long);
+static long zfcp_cfdc_dev_ioctl(struct file *, unsigned int, unsigned long);
 
 #define ZFCP_CFDC_IOC_MAGIC 0xDD
 #define ZFCP_CFDC_IOC \
diff -bBrauN linux-2.6.x/drivers/s390/scsi-orig/zfcp_def.h 
linux-2.6.x/drivers/s390/scsi/zfcp_def.h
--- linux-2.6.x/drivers/s390/scsi-orig/zfcp_def.h   2005-04-21 
12:36:44.0 +0200
+++ linux-2.6.x/drivers/s390/scsi/zfcp_def.h2005-04-21 12:41:56.0 
+0200
@@ -61,7 +61,6 @@
 #include 
 #include 
 #include 
-#include 
 
 / DEBUG FLAGS 
*/
 

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] zfcp: fix compile error

2005-04-21 Thread Andreas Herrmann
Jan Dittmer [EMAIL PROTECTED] wrote:
21.04.2005 10:49

  [EMAIL PROTECTED]:
  [PATCH] zfcp: convert to compat_ioctl

 This does not seem to compile anymore with defconfig:

   CC  drivers/s390/scsi/zfcp_aux.o
 /usr/src/ctest/rc/kernel/drivers/s390/scsi/zfcp_aux.c:63: warning: 
 initialization from incompatible pointer type
 /usr/src/ctest/rc/kernel/drivers/s390/scsi/zfcp_aux.c:366: error: conflicting 
 types for `zfcp_cfdc_dev_ioctl'


Oops. Submitted patch was incorrect.
Attached patch (against 2.6.12-rc3) will fix the problem.
Sorry, for any inconvenience.


Regards,

Andreas


zfcp: fix compile error

Signed-off-by: Andreas Herrmann [EMAIL PROTECTED]


diff -bBrauN linux-2.6.x/drivers/s390/scsi-orig/zfcp_aux.c 
linux-2.6.x/drivers/s390/scsi/zfcp_aux.c
--- linux-2.6.x/drivers/s390/scsi-orig/zfcp_aux.c   2005-04-21 
12:36:44.0 +0200
+++ linux-2.6.x/drivers/s390/scsi/zfcp_aux.c2005-04-21 12:40:48.0 
+0200
@@ -52,7 +52,7 @@
 static inline int zfcp_sg_list_copy_to_user(void __user *,
struct zfcp_sg_list *, size_t);
 
-static int zfcp_cfdc_dev_ioctl(struct file *, unsigned int, unsigned long);
+static long zfcp_cfdc_dev_ioctl(struct file *, unsigned int, unsigned long);
 
 #define ZFCP_CFDC_IOC_MAGIC 0xDD
 #define ZFCP_CFDC_IOC \
diff -bBrauN linux-2.6.x/drivers/s390/scsi-orig/zfcp_def.h 
linux-2.6.x/drivers/s390/scsi/zfcp_def.h
--- linux-2.6.x/drivers/s390/scsi-orig/zfcp_def.h   2005-04-21 
12:36:44.0 +0200
+++ linux-2.6.x/drivers/s390/scsi/zfcp_def.h2005-04-21 12:41:56.0 
+0200
@@ -61,7 +61,6 @@
 #include linux/mempool.h
 #include linux/syscalls.h
 #include linux/ioctl.h
-#include linux/ioctl32.h
 
 / DEBUG FLAGS 
*/
 

-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


<    1   2   3   4