Hi!

- bugfix: division of long numbers in LDIVMODU macro (used for Watcom)
  was wrong. Example: 100000h%8000Fh was gives 0FFFF1h instead 7_FFF1h.
- shorter code.

--- Begin Message ---
diff -ruNp old/kernel/ludivmul.inc new/kernel/ludivmul.inc
--- old/kernel/ludivmul.inc     2004-07-17 11:26:04.000000000 +0000
+++ new/kernel/ludivmul.inc     2004-07-17 12:23:14.000000000 +0000
@@ -1,21 +1,26 @@
-;  this one adapted from elks, http://elks.sourceforge.net
-;  multiply cx:bx * dx:ax, result in dx:ax        
+; this one adapted from elks, http://elks.sourceforge.net
+; multiply cx:bx * dx:ax, result in dx:ax
+; optimized by Arkady Belousov:
+;   dx:ax * cx:bx
+; = xh:xl * yh:yl
+; = xh:xl*yh*w + xh:xl*yl
+; = [xh*yh*w*w +] (xl*yh + xh*yl)*w + xl*yl
 
 %macro LMULU 0
 
-        push    si
-        push    cx
-       mov     si, ax   ; save _ax in si
-        mov     ax, bx   ; cx:ax = _cx:_bx
-       mul     dx       ; dx:ax = _bx*_dx (forget dx)
-        xchg    cx, ax   ; cx = low(_dx*_bx)
-       mul     si       ; dx:ax = _cx*_ax (forget dx)
-       add     cx, ax   ; cx = low(_cx*_ax + _dx*_bx)
-       mov     ax, si   ; restore _ax
-       mul     bx       ; dx:ax = _bx*_ax
-        add     dx, cx   ; dx = high(_bx*_ax)+low(_cx*_ax + _dx*_bx)
-        pop     cx
-        pop     si
+       push    cx
+       push    si
+       xchg    si,ax           ; si=xl                 (XCHG instead MOV)
+       xchg    ax,dx           ; ax=xh                 (XCHG instead MOV)
+       mul     bx              ; dx:ax=xh*yl (forget dx)
+       xchg    cx,ax           ; cx=low(xh*yl), ax=yh
+       mul     si              ; dx:ax=xl*yh (forget dx)
+       add     cx,ax           ; cx=low(xl*yh+xh*yl)
+       xchg    ax,si           ; ax=xl                 (XCHG instead MOV)
+       mul     bx              ; dx:ax=xl*yl
+       add     dx,cx
+       pop     si
+       pop     cx
        ret
 
 %endmacro
@@ -25,117 +30,110 @@
 %macro LDIVMODU 0
 ; this one is adapted from an assembly gem:
 ; gem writer: Norbert Juffa, [EMAIL PROTECTED]
-
 ; Dividing 64-bit unsigned integers Assembler / 80386
-
-;   Here is a division routine for dividing two 64-bit unsigned integers. 
-;   I derived it by modifying some old
-;   16-bit code for dividing 32-bit integers that I did several years ago for a 
-;   Turbo-Pascal replacement library.
-;   If a 64-bit signed integer division is needed, appropriate shell code for 
-;   this routine can easily be written.
-;
 ;   (adapted back to 32-bit by Bart Oldeman ;-))
-;
-; __U4D divides two unsigned long numbers, the dividend and the divisor
+;   ...bugfixed and optimized by Arkady Belousov.
+
+; This macro divides two unsigned long numbers, the dividend and the divisor
 ; resulting in a quotient and a remainder.
 ;
 ; input:
-;   dx:ax = dividend
-;   cx:bx = divisor
-;
+;   dx:ax = dividend (x=xh:xl)
+;   cx:bx = divisor  (y=yh:yl)
 ; output:
-;   dx:ax = quotient of division of dividend by divisor
-;   cx:bx = remainder of division of dividend by divisor
-;
+;   dx:ax = quotient of division of dividend by divisor (q=x/y)
+;   cx:bx = remainder of division of dividend by divisor (r=x%y)
 ; destroys:
 ;   flags
 ;
 %if XCPU < 386
 
-       test    cx, cx             ; divisor > 2^32-1 ?
-       jnz     %%big_divisor      ; yes, divisor > 32^32-1
-       cmp     dx, bx             ; only one division needed ? (ecx = 0)
-       jb      %%one_div          ; yes, one division sufficient
-
-
-       xchg    cx, ax             ; save dividend-lo in cx, ax=0
-       xchg    ax, dx             ; get dividend-hi in ax, dx=0
-       div     bx                 ; quotient-hi in eax
-       xchg    ax, cx             ; cx = quotient-hi, ax =dividend-lo
-
-%%one_div:    
-       div     bx                 ; ax = quotient-lo
-       mov     bx, dx             ; bx = remainder-lo
-       mov     dx, cx             ; dx = quotient-hi(quotient in dx:ax)
-       xor     cx, cx             ; cx = remainder-hi (rem. in cx:bx)
-       ret
+       jcxz    %%div3216       ; cx=0 -> divisor < 2^16
+
+       push    si              ; save temp
+        push   di              ;  variables
+
+       push    dx              ; save
+        push   ax              ;  dividend x
+       mov     si,bx           ; si=yl
+        mov    di,cx           ; di:si=cx:bx=y
 
-%%big_divisor:
-       push    si                 ; save temp
-       push    di                 ;  variables
-       push    dx                 ; save
-       push    ax                 ;  dividend
-       mov     si, bx             ; divisor now in
-       mov     di, cx             ;  di:bx and cx:si
 %%shift_loop:
-       shr     dx, 1              ; shift both
-       rcr     ax, 1              ;  divisor and
-       shr     di, 1              ;   and dividend
-       rcr     bx, 1              ;    right by 1 bit
-        jnz     %%shift_loop       ;     loop if di non-zero (rcr does not touch ZF)
-       mov     di, cx             ; restore original divisor (di:si)
-       div     bx                 ; compute quotient
-       pop     bx                 ; get dividend lo-word
-       mov     cx, ax             ; save quotient
-        mul     di                 ; quotient * divisor hi-word (low only)
-        xchg    ax, di             ; save in di
-        mov     ax, cx             ; ax=quotient
-       mul     si                 ; quotient * divisor lo-word
-       add     dx, di             ; dx:ax = quotient * divisor
-       sub     bx, ax             ; dividend-lo - (quot.*divisor)-lo
-       mov     ax, cx             ; get quotient
-       pop     cx                 ; restore dividend hi-word
-       sbb     cx, dx             ; subtract divisor * quot. from dividend
-       sbb     dx, dx             ; 0 if remainder > 0, else FFFFFFFFh
-       and     si, dx             ; nothing to add
-       and     di, dx             ;  back if remainder positive
-       add     bx, si             ; correct remaider
-       adc     cx, di             ;  and quotient if
-       add     ax, dx             ;   necessary
-       xor     dx, dx             ; clear hi-word of quot (ax<=FFFFFFFFh)
-       pop     di                 ; restore temp  
-       pop     si                 ;  variables
+       shr     dx,1            ; shift both
+        rcr    ax,1            ;  divisor and
+       shr     cx,1            ;   and dividend
+        rcr    bx,1            ;    right by 1 bit (rcr preserves ZF)
+       jnz     %%shift_loop    ;     until zero in cx (divisor < 2^16)
+       div     bx              ; ax=quotient q, di:si=y
+
+       mov     cx,ax           ; cx=q
+       mul     di              ; dx:ax=q*yh (forget dx)
+       xchg    bx,ax           ; bx=low(q*yh)  (XCHG instead MOV)
+       mov     ax,cx           ; ax=q
+       mul     si              ; dx:ax=q*yl
+       add     dx,bx           ; dx:ax=q*y, cx=q
+
+       pop     bx              ; bx=xl
+       sub     bx,ax           ; bx=xl-low(q*y)
+       xchg    ax,cx           ; ax=q          (XCHG instead MOV)
+       pop     cx              ; cx=xh
+       sbb     cx,dx           ; cx:bx=x-q*y=remainder r, ax=q
+
+       jae     %%div_done      ; if remainder < 0
+       add     bx,si
+        adc    cx,di           ; correct remainder (r+=y)
+       dec     ax              ;  and quotient (q-=1)
+%%div_done:
+       xor     dx,dx           ; dx:ax=0:q=q
+
+       pop     di              ; restore temp
+       pop     si              ;  variables
+       ret
+
+; dx:ax=x, bx=y, cx=0
+; x=xh:xl=xh*w+xl=[xh/y]*y*w+xh%y*w+xl=[xh/y]*y*w+xt
+; w=2^16, xh=x/w, xl=x%w, xt=xh%y*w+xl
+; remainder =  x%y  =          xt%y
+; quotient  = [x/y] = [xh/y]*w+xt/y
+
+%%div3216:
+       cmp     dx,bx           ; xh < y ?
+       jb      %%one_div       ; yes, one division sufficient
+
+       xchg    cx,ax           ; ax=0, cx=xl
+       xchg    ax,dx           ; dx:ax=0:xh, cx=xl
+       div     bx              ; ax=xh/y, dx=xh%y, cx=xl
+       xchg    ax,cx           ; dx:ax=xh%y*w+xl=xt, cx=xh/y
+
+%%one_div:
+       div     bx              ; ax=xt/y, dx=xt%d=x%d, cx=xh/y
+       mov     bx,dx           ; bx=x%d
+       mov     dx,cx           ; dx:ax=xh/y*w+xt/y=x/y
+       xor     cx,cx           ; cx:bx=x%d
        ret
 
 %else  ; XCPU >= 386 (Svilen Stoianov and Luchezar Georgiev, Varna, Bulgaria)
 
-       push    eax
-       pop     ax
-       push    edx
-       pop     dx
-       push    ecx
-;      pop     cx
-;      push    cx
-       push    bx
-       pop     ecx
-       push    dx
-       push    ax
-       pop     eax
+       push    eax             ; save eax.high
+        pop    ax
+       push    edx             ; save edx.high
+        push   ax
+        pop    eax             ; eax=x
+       push    ecx             ; save ecx.high
+        push   bx
+        pop    ecx             ; ecx=y
+
        xor     edx,edx
-       div     ecx
+       div     ecx             ; eax=q, edx=r
+
        push    edx
-       pop     bx
-       pop     cx
+        pop    bx
+        pop    ecx             ; restore ecx.high, cx:bx=r
        push    eax
-       pop     ax
-       pop     dx
-       push    cx
-       pop     ecx
-       push    dx
-       pop     edx
+        pop    ax
+        pop    edx             ; restore edx.high, dx:ax=q
        push    ax
-       pop     eax
+        pop    eax             ; restore eax.high
        ret
 
 %endif

--- End Message ---

Reply via email to