i386 assembly timing issue

2001-04-20 Thread David Howells

I've attached two slightly different bits of i386 assembly that achieve the
same end, but in slightly different ways. Can some one tell me why Case 1 is
faster than Case 2? Case 1 involves an extra CALL instruction.

* Case 1 has a little wrapper function that saves ECX and EDX before
  calling rwsem_wake().

* Case 2 merges the contents of the wrapper with the caller.

Case 1 is what's generated by the rw-semaphore inline assembly code as of
2.4.4-pre5. Case 2 looks like it ought to be a faster version of the same
thing.

David

###
#
# CASE 1: registers saved in the rwsem_wake register saving stub
#
.text
.align 16

#
# void test_up_read(struct rw_semaphore *sem)
# {
#   up_read(sem);
# }
#
.globl test_up_read
.type   test_up_read,@function
test_up_read:
movl4(%esp), %eax
movl$-1, %edx
xadd%edx,(%eax)
js  test_up_read_contention
test_up_read_done:
ret

#
# Register saving stub for rwsem_wake
#
.globl __rwsem_wake
__rwsem_wake:
pushl   %edx
pushl   %ecx
callrwsem_wake
popl%ecx
popl%edx
ret

#
# Contention handler stub for up_read
#
.section .text.lock,"ax"
test_up_read_contention:
decl%edx
testl   $65535,%edx
jnz test_up_read_done
call__rwsem_wake
jmp test_up_read_done

###
#
# CASE 2: registers saved in the contention handler stub
#
.text
.align 16

#
# void test_up_read(struct rw_semaphore *sem)
# {
#   up_read(sem);
# }
#
.globl test_up_read
.type   test_up_read,@function
test_up_read:
movl4(%esp), %eax
movl$-1, %edx
xadd%edx,(%eax)
js  test_up_read_contention
test_up_read_done:
ret

#
# Contention handler stub for up_read
#
.section .text.lock,"ax"
test_up_read_contention:
decl%edx
testl   $65535,%edx
jnz test_up_read_done
pushl   %edx
pushl   %ecx
call__rwsem_wake
popl%ecx
popl%edx
jmp test_up_read_done
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



i386 assembly timing issue

2001-04-20 Thread David Howells

I've attached two slightly different bits of i386 assembly that achieve the
same end, but in slightly different ways. Can some one tell me why Case 1 is
faster than Case 2? Case 1 involves an extra CALL instruction.

* Case 1 has a little wrapper function that saves ECX and EDX before
  calling rwsem_wake().

* Case 2 merges the contents of the wrapper with the caller.

Case 1 is what's generated by the rw-semaphore inline assembly code as of
2.4.4-pre5. Case 2 looks like it ought to be a faster version of the same
thing.

David

###
#
# CASE 1: registers saved in the rwsem_wake register saving stub
#
.text
.align 16

#
# void test_up_read(struct rw_semaphore *sem)
# {
#   up_read(sem);
# }
#
.globl test_up_read
.type   test_up_read,@function
test_up_read:
movl4(%esp), %eax
movl$-1, %edx
xadd%edx,(%eax)
js  test_up_read_contention
test_up_read_done:
ret

#
# Register saving stub for rwsem_wake
#
.globl __rwsem_wake
__rwsem_wake:
pushl   %edx
pushl   %ecx
callrwsem_wake
popl%ecx
popl%edx
ret

#
# Contention handler stub for up_read
#
.section .text.lock,"ax"
test_up_read_contention:
decl%edx
testl   $65535,%edx
jnz test_up_read_done
call__rwsem_wake
jmp test_up_read_done

###
#
# CASE 2: registers saved in the contention handler stub
#
.text
.align 16

#
# void test_up_read(struct rw_semaphore *sem)
# {
#   up_read(sem);
# }
#
.globl test_up_read
.type   test_up_read,@function
test_up_read:
movl4(%esp), %eax
movl$-1, %edx
xadd%edx,(%eax)
js  test_up_read_contention
test_up_read_done:
ret

#
# Contention handler stub for up_read
#
.section .text.lock,"ax"
test_up_read_contention:
decl%edx
testl   $65535,%edx
jnz test_up_read_done
pushl   %edx
pushl   %ecx
call__rwsem_wake
popl%ecx
popl%edx
jmp test_up_read_done
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/