--- Begin Message ---
diff -ruNp old/kernel/ludivmul.inc new/kernel/ludivmul.inc
--- old/kernel/ludivmul.inc 2004-07-17 11:26:04.000000000 +0000
+++ new/kernel/ludivmul.inc 2004-07-17 12:23:14.000000000 +0000
@@ -1,21 +1,26 @@
-; this one adapted from elks, http://elks.sourceforge.net
-; multiply cx:bx * dx:ax, result in dx:ax
+; this one adapted from elks, http://elks.sourceforge.net
+; multiply cx:bx * dx:ax, result in dx:ax
+; optimized by Arkady Belousov:
+; dx:ax * cx:bx
+; = xh:xl * yh:yl
+; = xh:xl*yh*w + xh:xl*yl
+; = [xh*yh*w*w +] (xl*yh + xh*yl)*w + xl*yl
%macro LMULU 0
- push si
- push cx
- mov si, ax ; save _ax in si
- mov ax, bx ; cx:ax = _cx:_bx
- mul dx ; dx:ax = _bx*_dx (forget dx)
- xchg cx, ax ; cx = low(_dx*_bx)
- mul si ; dx:ax = _cx*_ax (forget dx)
- add cx, ax ; cx = low(_cx*_ax + _dx*_bx)
- mov ax, si ; restore _ax
- mul bx ; dx:ax = _bx*_ax
- add dx, cx ; dx = high(_bx*_ax)+low(_cx*_ax + _dx*_bx)
- pop cx
- pop si
+ push cx
+ push si
+ xchg si,ax ; si=xl (XCHG instead MOV)
+ xchg ax,dx ; ax=xh (XCHG instead MOV)
+ mul bx ; dx:ax=xh*yl (forget dx)
+ xchg cx,ax ; cx=low(xh*yl), ax=yh
+ mul si ; dx:ax=xl*yh (forget dx)
+ add cx,ax ; cx=low(xl*yh+xh*yl)
+ xchg ax,si ; ax=xl (XCHG instead MOV)
+ mul bx ; dx:ax=xl*yl
+ add dx,cx
+ pop si
+ pop cx
ret
%endmacro
@@ -25,117 +30,110 @@
%macro LDIVMODU 0
; this one is adapted from an assembly gem:
; gem writer: Norbert Juffa, [EMAIL PROTECTED]
-
; Dividing 64-bit unsigned integers Assembler / 80386
-
-; Here is a division routine for dividing two 64-bit unsigned integers.
-; I derived it by modifying some old
-; 16-bit code for dividing 32-bit integers that I did several years ago for a
-; Turbo-Pascal replacement library.
-; If a 64-bit signed integer division is needed, appropriate shell code for
-; this routine can easily be written.
-;
; (adapted back to 32-bit by Bart Oldeman ;-))
-;
-; __U4D divides two unsigned long numbers, the dividend and the divisor
+; ...bugfixed and optimized by Arkady Belousov.
+
+; This macro divides two unsigned long numbers, the dividend and the divisor
; resulting in a quotient and a remainder.
;
; input:
-; dx:ax = dividend
-; cx:bx = divisor
-;
+; dx:ax = dividend (x=xh:xl)
+; cx:bx = divisor (y=yh:yl)
; output:
-; dx:ax = quotient of division of dividend by divisor
-; cx:bx = remainder of division of dividend by divisor
-;
+; dx:ax = quotient of division of dividend by divisor (q=x/y)
+; cx:bx = remainder of division of dividend by divisor (r=x%y)
; destroys:
; flags
;
%if XCPU < 386
- test cx, cx ; divisor > 2^32-1 ?
- jnz %%big_divisor ; yes, divisor > 32^32-1
- cmp dx, bx ; only one division needed ? (ecx = 0)
- jb %%one_div ; yes, one division sufficient
-
-
- xchg cx, ax ; save dividend-lo in cx, ax=0
- xchg ax, dx ; get dividend-hi in ax, dx=0
- div bx ; quotient-hi in eax
- xchg ax, cx ; cx = quotient-hi, ax =dividend-lo
-
-%%one_div:
- div bx ; ax = quotient-lo
- mov bx, dx ; bx = remainder-lo
- mov dx, cx ; dx = quotient-hi(quotient in dx:ax)
- xor cx, cx ; cx = remainder-hi (rem. in cx:bx)
- ret
+ jcxz %%div3216 ; cx=0 -> divisor < 2^16
+
+ push si ; save temp
+ push di ; variables
+
+ push dx ; save
+ push ax ; dividend x
+ mov si,bx ; si=yl
+ mov di,cx ; di:si=cx:bx=y
-%%big_divisor:
- push si ; save temp
- push di ; variables
- push dx ; save
- push ax ; dividend
- mov si, bx ; divisor now in
- mov di, cx ; di:bx and cx:si
%%shift_loop:
- shr dx, 1 ; shift both
- rcr ax, 1 ; divisor and
- shr di, 1 ; and dividend
- rcr bx, 1 ; right by 1 bit
- jnz %%shift_loop ; loop if di non-zero (rcr does not touch ZF)
- mov di, cx ; restore original divisor (di:si)
- div bx ; compute quotient
- pop bx ; get dividend lo-word
- mov cx, ax ; save quotient
- mul di ; quotient * divisor hi-word (low only)
- xchg ax, di ; save in di
- mov ax, cx ; ax=quotient
- mul si ; quotient * divisor lo-word
- add dx, di ; dx:ax = quotient * divisor
- sub bx, ax ; dividend-lo - (quot.*divisor)-lo
- mov ax, cx ; get quotient
- pop cx ; restore dividend hi-word
- sbb cx, dx ; subtract divisor * quot. from dividend
- sbb dx, dx ; 0 if remainder > 0, else FFFFFFFFh
- and si, dx ; nothing to add
- and di, dx ; back if remainder positive
- add bx, si ; correct remaider
- adc cx, di ; and quotient if
- add ax, dx ; necessary
- xor dx, dx ; clear hi-word of quot (ax<=FFFFFFFFh)
- pop di ; restore temp
- pop si ; variables
+ shr dx,1 ; shift both
+ rcr ax,1 ; divisor and
+ shr cx,1 ; and dividend
+ rcr bx,1 ; right by 1 bit (rcr preserves ZF)
+ jnz %%shift_loop ; until zero in cx (divisor < 2^16)
+ div bx ; ax=quotient q, di:si=y
+
+ mov cx,ax ; cx=q
+ mul di ; dx:ax=q*yh (forget dx)
+ xchg bx,ax ; bx=low(q*yh) (XCHG instead MOV)
+ mov ax,cx ; ax=q
+ mul si ; dx:ax=q*yl
+ add dx,bx ; dx:ax=q*y, cx=q
+
+ pop bx ; bx=xl
+ sub bx,ax ; bx=xl-low(q*y)
+ xchg ax,cx ; ax=q (XCHG instead MOV)
+ pop cx ; cx=xh
+ sbb cx,dx ; cx:bx=x-q*y=remainder r, ax=q
+
+ jae %%div_done ; if remainder < 0
+ add bx,si
+ adc cx,di ; correct remainder (r+=y)
+ dec ax ; and quotient (q-=1)
+%%div_done:
+ xor dx,dx ; dx:ax=0:q=q
+
+ pop di ; restore temp
+ pop si ; variables
+ ret
+
+; dx:ax=x, bx=y, cx=0
+; x=xh:xl=xh*w+xl=[xh/y]*y*w+xh%y*w+xl=[xh/y]*y*w+xt
+; w=2^16, xh=x/w, xl=x%w, xt=xh%y*w+xl
+; remainder = x%y = xt%y
+; quotient = [x/y] = [xh/y]*w+xt/y
+
+%%div3216:
+ cmp dx,bx ; xh < y ?
+ jb %%one_div ; yes, one division sufficient
+
+ xchg cx,ax ; ax=0, cx=xl
+ xchg ax,dx ; dx:ax=0:xh, cx=xl
+ div bx ; ax=xh/y, dx=xh%y, cx=xl
+ xchg ax,cx ; dx:ax=xh%y*w+xl=xt, cx=xh/y
+
+%%one_div:
+ div bx ; ax=xt/y, dx=xt%d=x%d, cx=xh/y
+ mov bx,dx ; bx=x%d
+ mov dx,cx ; dx:ax=xh/y*w+xt/y=x/y
+ xor cx,cx ; cx:bx=x%d
ret
%else ; XCPU >= 386 (Svilen Stoianov and Luchezar Georgiev, Varna, Bulgaria)
- push eax
- pop ax
- push edx
- pop dx
- push ecx
-; pop cx
-; push cx
- push bx
- pop ecx
- push dx
- push ax
- pop eax
+ push eax ; save eax.high
+ pop ax
+ push edx ; save edx.high
+ push ax
+ pop eax ; eax=x
+ push ecx ; save ecx.high
+ push bx
+ pop ecx ; ecx=y
+
xor edx,edx
- div ecx
+ div ecx ; eax=q, edx=r
+
push edx
- pop bx
- pop cx
+ pop bx
+ pop ecx ; restore ecx.high, cx:bx=r
push eax
- pop ax
- pop dx
- push cx
- pop ecx
- push dx
- pop edx
+ pop ax
+ pop edx ; restore edx.high, dx:ax=q
push ax
- pop eax
+ pop eax ; restore eax.high
ret
%endif
--- End Message ---