On Fri, 27 Feb 2015, Philip Guenther wrote:
> I don't have a system to test this, but I suspect this diff will fix the
> problem: the i386_space_copy() macro can set the direction flag but
> doesn't clear it then, so functions using this macro can violate the ABI
> by returning with the direction flag set.
The insane^Wdaring will also try this diff, which deletes 44 cld's to
match the one added. Not for 5.7--not enough time to exercise all the
cases before then--but afterwards we should go the rest of the way.
amd64 will get a similar diff after unlock...
Philip
Index: i386/bus_space.c
===================================================================
RCS file: /data/src/openbsd/src/sys/arch/i386/i386/bus_space.c,v
retrieving revision 1.8
diff -u -p -r1.8 bus_space.c
--- i386/bus_space.c 17 Oct 2014 20:37:57 -0000 1.8
+++ i386/bus_space.c 27 Feb 2015 09:44:25 -0000
@@ -303,7 +303,7 @@ i386_bus_space_io_read_region_1(bus_spac
void *_addr = a;
int _port = h + o;
- __asm volatile("cld ;"
+ __asm volatile(
"1: inb %w2,%%al ;"
" stosb ;"
" incl %2 ;"
@@ -320,7 +320,7 @@ i386_bus_space_io_read_region_2(bus_spac
void *_addr = a;
int _port = h + o;
- __asm volatile("cld ;"
+ __asm volatile(
"1: inw %w2,%%ax ;"
" stosw ;"
" addl $2,%2 ;"
@@ -337,7 +337,7 @@ i386_bus_space_io_read_region_4(bus_spac
void *_addr = a;
int _port = h + o;
- __asm volatile("cld ;"
+ __asm volatile(
"1: inl %w2,%%eax ;"
" stosl ;"
" addl $4,%2 ;"
@@ -393,7 +393,7 @@ i386_bus_space_io_write_region_1(bus_spa
const void *_addr = a;
int _cnt = cnt;
- __asm volatile("cld ;"
+ __asm volatile(
"1: lodsb ;"
" outb %%al,%w0 ;"
" incl %0 ;"
@@ -410,7 +410,7 @@ i386_bus_space_io_write_region_2(bus_spa
const void *_addr = a;
int _cnt = cnt;
- __asm volatile("cld ;"
+ __asm volatile(
"1: lodsw ;"
" outw %%ax,%w0 ;"
" addl $2,%0 ;"
@@ -427,7 +427,7 @@ i386_bus_space_io_write_region_4(bus_spa
const void *_addr = a;
int _cnt = cnt;
- __asm volatile("cld ;"
+ __asm volatile(
"1: lodsl ;"
" outl %%eax,%w0 ;"
" addl $4,%0 ;"
@@ -442,7 +442,7 @@ i386_bus_space_io_set_multi_1(bus_space_
{
int _cnt = cnt;
- __asm volatile("cld ;"
+ __asm volatile(
"1: outb %b2, %w1 ;"
" loop 1b" :
"+c" (_cnt) : "d" (h + o), "a" (v) :
@@ -455,7 +455,7 @@ i386_bus_space_io_set_multi_2(bus_space_
{
int _cnt = cnt;
- __asm volatile("cld ;"
+ __asm volatile(
"1: outw %w2, %w1 ;"
" loop 1b" :
"+c" (_cnt) : "d" (h + o), "a" (v) :
@@ -468,7 +468,7 @@ i386_bus_space_io_set_multi_4(bus_space_
{
int _cnt = cnt;
- __asm volatile("cld ;"
+ __asm volatile(
"1: outl %2,%w1 ;"
" loop 1b" :
"+c" (_cnt) : "d" (h + o), "a" (v) :
@@ -617,7 +617,7 @@ i386_bus_space_mem_read_multi_1(bus_spac
{
void *_addr=a;
int _cnt=cnt;
- __asm volatile("cld ;"
+ __asm volatile(
"1: movb (%2),%%al ;"
" stosb ;"
" loop 1b" :
@@ -631,7 +631,7 @@ i386_bus_space_mem_read_multi_2(bus_spac
{
void *_addr=a;
int _cnt=cnt;
- __asm volatile("cld ;"
+ __asm volatile(
"1: movw (%2),%%ax ;"
" stosw ;"
" loop 1b" :
@@ -645,7 +645,7 @@ i386_bus_space_mem_read_multi_4(bus_spac
{
void *_addr=a;
int _cnt=cnt;
- __asm volatile("cld ;"
+ __asm volatile(
"1: movl (%2),%%eax ;"
" stosl ;"
" loop 1b" :
@@ -711,7 +711,7 @@ i386_bus_space_mem_write_multi_1(bus_spa
const void *_addr=a;
int _cnt=cnt;
- __asm volatile("cld ;"
+ __asm volatile(
"1: lodsb ;"
" movb %%al,(%2) ;"
" loop 1b" :
@@ -726,7 +726,7 @@ i386_bus_space_mem_write_multi_2(bus_spa
const void *_addr = a;
int _cnt = cnt;
- __asm volatile("cld ;"
+ __asm volatile(
"1: lodsw ;"
" movw %%ax,(%2) ;"
" loop 1b" :
@@ -741,7 +741,7 @@ i386_bus_space_mem_write_multi_4(bus_spa
const void *_addr=a;
int _cnt=cnt;
- __asm volatile("cld ;"
+ __asm volatile(
"1: lodsl ;"
" movl %%eax,(%2) ;"
" loop 1b" :
@@ -788,7 +788,7 @@ i386_bus_space_mem_set_multi_1(bus_space
{
int _cnt = cnt;
- __asm volatile("cld ;"
+ __asm volatile(
"1: movb %b2, (%1) ;"
" loop 1b" :
"+c" (_cnt) : "D" (h + o), "a" (v) :
@@ -801,7 +801,7 @@ i386_bus_space_mem_set_multi_2(bus_space
{
int _cnt = cnt;
- __asm volatile("cld ;"
+ __asm volatile(
"1: movw %w2, (%1) ;"
" loop 1b" :
"+c" (_cnt) : "D" (h + o), "a" (v) :
@@ -814,7 +814,7 @@ i386_bus_space_mem_set_multi_4(bus_space
{
int _cnt = cnt;
- __asm volatile("cld ;"
+ __asm volatile(
"1: movl %2,(%1) ;"
" loop 1b" :
"+c" (_cnt) : "D" (h + o), "a" (v) :
@@ -828,7 +828,7 @@ i386_bus_space_mem_set_region_1(bus_spac
int _port = h + o;
int _cnt = cnt;
- __asm volatile("cld ;"
+ __asm volatile(
" repne ;"
" stosb" :
"+D" (_port), "+c" (_cnt) : "a" (v) :
@@ -842,7 +842,7 @@ i386_bus_space_mem_set_region_2(bus_spac
int _port = h + o;
int _cnt = cnt;
- __asm volatile("cld ;"
+ __asm volatile(
" repne ;"
" stosw" :
"+D" (_port), "+c" (_cnt) : "a" (v) :
@@ -856,7 +856,7 @@ i386_bus_space_mem_set_region_4(bus_spac
int _port = h + o;
int _cnt = cnt;
- __asm volatile("cld ;"
+ __asm volatile(
" repne ;"
" stosl" :
"+D" (_port), "+c" (_cnt) : "a" (v) :
Index: i386/kvm86call.S
===================================================================
RCS file: /data/src/openbsd/src/sys/arch/i386/i386/kvm86call.S,v
retrieving revision 1.6
diff -u -p -r1.6 kvm86call.S
--- i386/kvm86call.S 5 Apr 2010 22:42:43 -0000 1.6
+++ i386/kvm86call.S 27 Feb 2015 09:49:21 -0000
@@ -74,7 +74,6 @@ ENTRY(kvm86_call)
movl vm86frame,%edi /* target frame location */
movl SCRARGFRAME,%esi /* source (set on entry) */
movl $FRAMESIZE/4,%ecx /* sizeof(struct trapframe)/4 */
- cld
rep
movsl /* copy frame to new stack */
@@ -163,7 +162,6 @@ ENTRY(kvm86_ret)
movl 8(%ebp),%esi /* source */
movl SCRARGFRAME,%edi /* destination */
movl $FRAMESIZE/4,%ecx /* size */
- cld
rep
movsl /* copy frame to original frame */
Index: i386/locore.s
===================================================================
RCS file: /data/src/openbsd/src/sys/arch/i386/i386/locore.s,v
retrieving revision 1.150
diff -u -p -r1.150 locore.s
--- i386/locore.s 11 Feb 2015 00:16:07 -0000 1.150
+++ i386/locore.s 27 Feb 2015 09:48:58 -0000
@@ -525,7 +525,6 @@ try586: /* Use the `cpuid' instruction.
subl %edi,%ecx # size of tables
shrl $2,%ecx
xorl %eax, %eax
- cld
rep
stosl
@@ -731,8 +730,7 @@ ENTRY(kcopy)
subl %esi,%eax
cmpl %ecx,%eax # overlapping?
jb 1f
- cld # nope, copy forward
- shrl $2,%ecx # copy by 32-bit words
+ shrl $2,%ecx # nope, copy forward by 32-bit words
rep
movsl
movl 24+FPADD(%esp),%ecx
@@ -821,7 +819,6 @@ _C_LABEL(_copyout_stac):
SMAP_NOP
/* bcopy(%esi, %edi, %eax); */
- cld
movl %eax,%ecx
shrl $2,%ecx
rep
@@ -876,7 +873,6 @@ _C_LABEL(_copyin_stac):
ja _C_LABEL(copy_fault)
/* bcopy(%esi, %edi, %eax); */
- cld
movl %eax,%ecx
shrl $2,%ecx
rep
@@ -951,7 +947,6 @@ _C_LABEL(_copyoutstr_stac):
movl %eax,20+FPADD(%esp)
1: incl %edx
- cld
1: decl %edx
jz 2f
@@ -1009,7 +1004,6 @@ _C_LABEL(_copyinstr_stac):
movl %eax,20+FPADD(%esp)
1: incl %edx
- cld
1: decl %edx
jz 2f
@@ -1071,7 +1065,6 @@ ENTRY(copystr)
movl 16+FPADD(%esp),%edi # edi = to
movl 20+FPADD(%esp),%edx # edx = maxlen
incl %edx
- cld
1: decl %edx
jz 4f
@@ -1519,7 +1512,6 @@ ENTRY(bzero)
movl 8(%esp),%edi
movl 12(%esp),%edx
- cld /* set fill direction forward */
xorl %eax,%eax /* set fill data to 0 */
/*
@@ -1599,7 +1591,6 @@ ENTRY(i686_pagezero)
movl 12(%esp), %edi
movl $1024, %ecx
- cld
ALIGN_TEXT
1:
Index: i386/vector.s
===================================================================
RCS file: /data/src/openbsd/src/sys/arch/i386/i386/vector.s,v
retrieving revision 1.18
diff -u -p -r1.18 vector.s
--- i386/vector.s 28 Nov 2013 19:30:46 -0000 1.18
+++ i386/vector.s 27 Feb 2015 09:49:11 -0000
@@ -49,10 +49,6 @@
* If the interrupt frame is made more flexible, INTR can push %eax first and
* decide the ipending case with less overhead, e.g., by avoiding loading the
* segment registers.
- *
- * XXX
- * Should we do a cld on every system entry to avoid the requirement for
- * scattered cld's?
*/
.globl _C_LABEL(isa_strayintr)
Index: include/bus.h
===================================================================
RCS file: /data/src/openbsd/src/sys/arch/i386/include/bus.h,v
retrieving revision 1.65
diff -u -p -r1.65 bus.h
--- include/bus.h 24 Jan 2015 15:13:55 -0000 1.65
+++ include/bus.h 27 Feb 2015 09:42:56 -0000
@@ -446,23 +446,26 @@ struct i386_bus_space_ops {
!!! bus_space_copy_8 unimplemented !!!
#endif
-#define i386_space_copy1(a1, a2, cnt, movs, df) \
- __asm volatile(df "\n\trep\n\t" movs : \
+#define i386_space_copy1up(a1, a2, cnt, movs) \
+ __asm volatile("rep\n\t" movs : \
+ "+S" (a1), "+D" (a2), "+c" (cnt) :: "memory", "cc");
+#define i386_space_copy1down(a1, a2, cnt, movs) \
+ __asm volatile("std\n\trep\n\t" movs "\n\tcld": \
"+S" (a1), "+D" (a2), "+c" (cnt) :: "memory", "cc");
#define i386_space_copy(a1, a2, sz, cnt) do {
\
if ((void *)(a1) < (void *)(a2)) { \
a1 += ((cnt) - 1) * (sz); a2 += ((cnt) - 1) * (sz); \
switch (sz) { \
- case 1: i386_space_copy1(a1,a2,cnt,"movsb","std");break;\
- case 2: i386_space_copy1(a1,a2,cnt,"movsw","std");break;\
- case 4: i386_space_copy1(a1,a2,cnt,"movsl","std");break;\
+ case 1: i386_space_copy1down(a1,a2,cnt,"movsb"); break; \
+ case 2: i386_space_copy1down(a1,a2,cnt,"movsw"); break; \
+ case 4: i386_space_copy1down(a1,a2,cnt,"movsl"); break; \
} \
} else \
switch (sz) { \
- case 1: i386_space_copy1(a1,a2,cnt,"movsb","cld");break;\
- case 2: i386_space_copy1(a1,a2,cnt,"movsw","cld");break;\
- case 4: i386_space_copy1(a1,a2,cnt,"movsl","cld");break;\
+ case 1: i386_space_copy1up(a1,a2,cnt,"movsb"); break; \
+ case 2: i386_space_copy1up(a1,a2,cnt,"movsw"); break; \
+ case 4: i386_space_copy1up(a1,a2,cnt,"movsl"); break; \
} \
} while (0)
Index: include/pio.h
===================================================================
RCS file: /data/src/openbsd/src/sys/arch/i386/include/pio.h,v
retrieving revision 1.11
diff -u -p -r1.11 pio.h
--- include/pio.h 29 Mar 2014 18:09:29 -0000 1.11
+++ include/pio.h 27 Feb 2015 09:49:56 -0000
@@ -76,7 +76,7 @@ __inb(int port)
static __inline void
insb(int port, void *addr, int cnt)
{
- __asm volatile("cld\n\trepne\n\tinsb"
+ __asm volatile("repne\n\tinsb"
: "+D" (addr), "+c" (cnt) : "d" (port) : "memory", "cc");
}
@@ -102,7 +102,7 @@ __inw(int port)
static __inline void
insw(int port, void *addr, int cnt)
{
- __asm volatile("cld\n\trepne\n\tinsw"
+ __asm volatile("repne\n\tinsw"
: "+D" (addr), "+c" (cnt) : "d" (port) : "memory", "cc");
}
@@ -128,7 +128,7 @@ __inl(int port)
static __inline void
insl(int port, void *addr, int cnt)
{
- __asm volatile("cld\n\trepne\n\tinsl"
+ __asm volatile("repne\n\tinsl"
: "+D" (addr), "+c" (cnt) : "d" (port) : "memory", "cc");
}
@@ -150,7 +150,7 @@ __outb(int port, u_int8_t data)
static __inline void
outsb(int port, const void *addr, int cnt)
{
- __asm volatile("cld\n\trepne\n\toutsb"
+ __asm volatile("repne\n\toutsb"
: "+S" (addr), "+c" (cnt) : "d" (port) : "cc");
}
@@ -172,7 +172,7 @@ __outw(int port, u_int16_t data)
static __inline void
outsw(int port, const void *addr, int cnt)
{
- __asm volatile("cld\n\trepne\n\toutsw"
+ __asm volatile("repne\n\toutsw"
: "+S" (addr), "+c" (cnt) : "d" (port) : "cc");
}
@@ -194,7 +194,7 @@ __outl(int port, u_int32_t data)
static __inline void
outsl(int port, const void *addr, int cnt)
{
- __asm volatile("cld\n\trepne\n\toutsl"
+ __asm volatile("repne\n\toutsl"
: "+S" (addr), "+c" (cnt) : "d" (port) : "cc");
}