Hello.
Linus Torvalds wrote:
I think Pavel missed the fact that you have to check the VM86 bit in
Ah, thanks, I must have forgotten the
essentials of the own patch :(
That said, the "ldt_ss" case should be moved _after_ the conditional
tests, since most CPU's out there will do static prediction based on<>
forward/backwards direction if the branch predictor isn't primed.
OK, I moved the "ldt_ss" below the check.
The new patch is attached.
Alan, can you please apply that to an -ac
tree?
Summary:
Attached patch works around the corruption
of the high word of the ESP register, which
is the official bug of x86 CPUs. The bug
triggers only when the one is using the
16bit stack segment, and is described here:
http://www.intel.com/design/intarch/specupdt/27287402.PDF
Acked-by: Linus Torvalds <[EMAIL PROTECTED]>
Acked-by: Petr Vandrovec <[EMAIL PROTECTED]>
Acked-by: Pavel Machek <[EMAIL PROTECTED]>
Signed-off-by: Stas Sergeev <[EMAIL PROTECTED]>
diff -ur linux-2.6.11-ac2/arch/i386/kernel/cpu/common.c linux-2.6.11-ac2-stk/arch/i386/kernel/cpu/common.c
--- linux-2.6.11-ac2/arch/i386/kernel/cpu/common.c 2005-03-13 17:59:45.0 +0300
+++ linux-2.6.11-ac2-stk/arch/i386/kernel/cpu/common.c 2005-03-13 20:36:12.0 +0300
@@ -21,6 +21,10 @@
DEFINE_PER_CPU(struct desc_struct, cpu_gdt_table[GDT_ENTRIES]);
EXPORT_PER_CPU_SYMBOL(cpu_gdt_table);
+unsigned char cpu_16bit_stack[CPU_16BIT_STACK_SIZE];
+DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]);
+EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack);
+
static int cachesize_override __initdata = -1;
static int disable_x86_fxsr __initdata = 0;
static int disable_x86_serial_nr __initdata = 1;
@@ -557,6 +561,7 @@
int cpu = smp_processor_id();
struct tss_struct * t = &per_cpu(init_tss, cpu);
struct thread_struct *thread = ¤t->thread;
+ __u32 stk16_off = (__u32)&per_cpu(cpu_16bit_stack, cpu);
if (cpu_test_and_set(cpu, cpu_initialized)) {
printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
@@ -579,6 +584,13 @@
*/
memcpy(&per_cpu(cpu_gdt_table, cpu), cpu_gdt_table,
GDT_SIZE);
+
+ /* Set up GDT entry for 16bit stack */
+ *(__u64 *)&(per_cpu(cpu_gdt_table, cpu)[GDT_ENTRY_ESPFIX_SS]) |=
+ __u64)stk16_off) << 16) & 0x00ffULL) |
+ __u64)stk16_off) << 32) & 0xff00ULL) |
+ (CPU_16BIT_STACK_SIZE - 1);
+
cpu_gdt_descr[cpu].size = GDT_SIZE - 1;
cpu_gdt_descr[cpu].address =
(unsigned long)&per_cpu(cpu_gdt_table, cpu);
diff -ur linux-2.6.11-ac2/arch/i386/kernel/entry.S linux-2.6.11-ac2-stk/arch/i386/kernel/entry.S
--- linux-2.6.11-ac2/arch/i386/kernel/entry.S 2005-03-13 17:59:45.0 +0300
+++ linux-2.6.11-ac2-stk/arch/i386/kernel/entry.S 2005-03-14 00:45:16.0 +0300
@@ -47,6 +47,7 @@
#include
#include
#include
+#include
#include "irq_vectors.h"
#define nr_syscalls ((syscall_table_size)/4)
@@ -78,7 +79,7 @@
#define preempt_stop cli
#else
#define preempt_stop
-#define resume_kernel restore_all
+#define resume_kernel restore_nocheck
#endif
#define SAVE_ALL \
@@ -122,24 +123,6 @@
.previous
-#define RESTORE_ALL \
- RESTORE_REGS \
- addl $4, %esp; \
-1: iret; \
-.section .fixup,"ax"; \
-2: sti; \
- movl $(__USER_DS), %edx; \
- movl %edx, %ds; \
- movl %edx, %es; \
- movl $11,%eax; \
- call do_exit; \
-.previous; \
-.section __ex_table,"a";\
- .align 4; \
- .long 1b,2b; \
-.previous
-
-
ENTRY(ret_from_fork)
pushl %eax
call schedule_tail
@@ -163,7 +146,7 @@
movl EFLAGS(%esp), %eax # mix EFLAGS and CS
movb CS(%esp), %al
testl $(VM_MASK | 3), %eax
- jz resume_kernel # returning to kernel or vm86-space
+ jz resume_kernel
ENTRY(resume_userspace)
cli# make sure we don't miss an interrupt
# setting need_resched or sigpending
@@ -178,7 +161,7 @@
ENTRY(resume_kernel)
cli
cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ?
- jnz restore_all
+ jnz restore_nocheck
need_resched:
movl TI_flags(%ebp), %ecx # need_resched set ?
testb $_TIF_NEED_RESCHED, %cl
@@ -257,8 +240,56 @@
movl TI_flags(%ebp), %ecx
testw $_TIF_ALLWORK_MASK, %cx # current->work
jne syscall_exit_work
+
restore_all:
- RESTORE_ALL
+ movl EFLAGS(%esp), %eax # mix EFLAGS, SS and CS
+ movb OLDSS(%esp), %ah
+ movb CS(%esp), %al
+ andl $(VM_MASK | (4 << 8) | 3), %eax
+ cmpl $((4 << 8) | 3), %eax
+ je ldt_ss # returning to user-space with LDT SS
+restore_nocheck:
+ RESTORE_REGS
+ addl $4, %esp
+1: iret
+.section .fixup,"ax"
+iret_exc:
+ sti
+ movl $__USER_DS, %edx
+ movl %edx, %ds
+ movl %edx, %es
+ movl $11,%eax
+ call do_exit
+.previous
+.section __ex_table,"a"
+ .align 4
+ .long 1b,iret_exc
+.previous
+
+ldt_ss:
+ larl OLDSS(%esp), %eax
+ jnz restore_nocheck
+ testl $0x0040, %eax # returning to 32bit stack?
+ jnz restore_nocheck # allright, normal return
+ /* If returning to userspace with 16bit stack,
+ * try to fix the higher word of ESP, as the CPU
+ * won't restore it.
+ * This is an "official" bug of all the x86-compatible
+ * CPUs, which