Having prototyped my new polygon filler to my satisfaction in C, today
I've been converting it to assembler. With the iPhone stuff and an
Acorn Electron project I've been working on, I haven't done any z80 in
far too long and am not particularly optimistic that I'm writing good
stuff. Actually, it strikes me that I've never really shown any z80
code to anyone, so maybe I'm just not great in general.

Below is most of my new polygon filler. It's incomplete, but only in
relatively minor ways — the scan converter handles edges where x
increases only (obviously x decreases will be the same code with subs
and decs rather than adds and incs, thought I'd leave that until I'm
more confident in the stuff overall) and chucks pixels on the screen
to show scanline ends rather than drawing an actual scanline of pixels
(for which I'll be subverting SP per the usual sort of stuff).

When calculating y intercepts it breaks down to either traditional
Bresenham for lines that change in y more than in x or run-slice
Bresenham for lines that change in x more than in y. Part of the
reasoning for that is that it gives me something to compare the speeds
of the two approaches. If run-slice does seem to be faster than
standard for lines above a certain length (probably 9 or 10 pixels?)
as I suspect, then obviously I'll use it for both.

Anyway, if some of you z80 experts could have a quick look and tell me
if I'm making any obvious style errors or otherwise missing obvious
optimisations — even if only on a peephole level — I'd be infinitely
grateful. Sorry if the comments are occasionally a bit opaque; some of
them just document which registers are holding which variables from
the original C.

Thanks in advance!




;
;       DrawPoly - draws a filled polygon using A vertices, in two arrays
;       with x positions starting at (H:0) and y positions at (H+1:0)
;
;       clobbers: af, bc, de, hl, af', bc', de', hl'
;

DS ALIGN 256

LEFTTAB:
        ds 256
RIGHTTTAB:
        ds 256

NUMVERTS:
        db 0
VERTEXPOINTER:
        dw 0
        
STARTY:
        db 0
ENDY:
        db 0

DrawPoly:

        ; store stuff
        ld (VERTEXPOINTER), hl
        ld (NUMVERTS), a
        inc h
        ld e, a
        ld d, a

        ; use b to store current highest vertex pointer, c to store value
        ld l, 0
        ld b, 0
        ld c, (hl)

        ; get highest vertex pointer to b

@highloop:
        inc l           ; look at next y value
        
        ; check if look is over yet, exit if so
        dec d
        jr z, @+highloopdone

        ld a, (hl)                      ; load new y value
        cp c                            ; compare to current highest
        jr nc, @-highloop       ; don't do anything if it is lower
        
        ld b, l
        ld c, a
        jr @-highloop
        
@highloopdone:

        ; highest value is now in c
        ld a, c
        ld (ENDY), a

        ; use c to store current lowest vertex pointer, d to store value
        ld l, 0
        ld c, 0
        ld d, (hl)
        
        ; get highest vertex pointer to c
@lowloop:
        inc l           ; look at next y value

        ; check if loop is over yet, exit if so
        dec e
        jr z, @+lowloopdone
        
        ld a, (hl)
        cp d
        jr c, @-lowloop

        ld c, l
        ld d, a
        jr @-lowloop

@lowloopdone:

        ; highest value is now in d
        ld a, d
        ld (STARTY), a

        push bc         ; b = current vertex, c = target

        ld hl, RIGHTTTAB

@leftloop:
        ld a, b
        cp c
        jr z, @+leftloopdone

        dec a
        jp p, @+noreload

        ld a, (NUMVERTS)
        dec a

        @noreload:

        call @+PushToArray
        ld b, a
        jr @-leftloop

@leftloopdone:

        pop bc
        ld hl, LEFTTAB
        ld d, (NUMVERTS)

@rightloop:
        ld a, b
        cp c
        jr z, @+rightloopdone

        inc a
        cp d
        jr nz, @+noreload

        xor a

        @noreload:

        call @+PushToArray
        ld b, a
        jr @-rightloop

@rightloopdone:

        ;
        ; page in the screen, for drawing
        ;

                LD C, HMPR
                IN a, (C)
                push af
                ld a, (rampage)
                OUT (C), a

                ld h, LEFTTAB >> 8
                ld a, (ENDY)
                ld l, a

                ld a, (STARTY)
                sub l
                ld b, a

                @plotloop:

                        ; left pixel
                        ld a, (hl)
                        inc h
                        
                        ; right pixel
                        ld c, (hl)
                        inc l

                        dec h
                        
                        ld d, l
                        ld e, a
                        srl d
                        rr e
                        jr nc, @+rpx
                        
                                ld a, 0x0f
                                jr @+pxd
                        
                        @rpx:
                                ld a, 0xf0
                                
                        @pxd:
                        set 7, d
                        ld (de), a

                        ld d, l
                        ld e, c
                        srl d
                        rr e
                        jr nc, @+rpx
                        
                                ld a, 0x0e
                                jr @+pxd
                                
                        @rpx:
                                ld a, 0xe0
                                
                        @pxd:
                        set 7, d
                        ld (de), a

                djnz @-plotloop

        ;
        ; page the program RAM back in
        ;

                pop af
                ld c, HMPR
                out (c), a

        ret


;
;       PushToArray will add the vertical intersections for the line from b to a
;       to the table pointed to by hl
;
@PushToArray:

        push de
        push af
        push bc
        push hl

                ; get x1, y1 (the higher one, if either is higher) into bc
                ld hl, (VERTEXPOINTER)
                ld l, b

                ld b, (hl)
                inc h
                ld c, (hl)

                ; get x2, y2 (the lower one, if either is lower) into de
                ld l, a
                ld a, (hl)

                dec h
                ld d, (hl)
                ld e, a

                ; turn e into yDelta - this will always be positive because 
points were
                ; initially clockwise and are passed to this function from 
highest to
                ; lowest

                ld a, e
                sub c
                jp z, @+endOfPushToArray        ; zero height lines contribute 
nothing
                ld e, a

                ; get table write address into hl
                pop hl
                push hl

                ld l, c

                ; calculate x positive or x negative, branch appropriately

                ld a, d
                sub b
                jp c, @+xnegative

        @xpositive:             ; unused label, just for reading
                ld d, a
                ; positive xdelta is in a, compare to positive ydelta from e
                cp e

                jr z, @+diagonalxpos
                jr nc, @+xdeltagreaterxpos

                ; y delta is greater, x delta is positive - traditional 
Bresenham

                ; use a for delta
                ld a, e
                srl a

                ; get table write address into hl
                ld c, e

        @xdgxploop:
                ld (hl), b

                sub d
                jr nc, @+noxinc

                inc b
                add e

                @noxinc:
                inc l
                dec c
                jr nz, @-xdgxploop

                jp @+endOfPushToArray

        @diagonalxpos:
                ld c, b
                ld b, d

        @diagonalloop:
                ld (hl), c
                inc c
                djnz @-diagonalxpos

@xdeltagreaterxpos:

                ; x delta is greater, is positive
                xor a
                push de

                call DIV88              ; now d = xDelta / yDelta, a = xDelta % 
yDelta

                ; aiming for:
                ; errorTerm = HL', adjustUp = BC', adjustDown = DE'
                ;

                exx
                pop de
                ld d, 0
                sla e
                rlc d                   ; de = adjustDown

                ld b, 0
                ld c, a                 ; bc = adjustUp >> 1

                ld h, b
                ld l, c
                and a
                sbc hl, de              ; hl = errorTerm

                sla c
                rlc b                   ; bc = adjustUp

                exx

                ld a, d
                srl a
                inc a                   ; a = initialPixelCount = 
finalPixelCount, d = wholeStep

                push af                 ; store for finalPixelCount

                ld a, d
                sra a                   ; test for wholeStep&1
                exx
                jr nc, @+nolowbit

                        ; errorTerm += yDelta (double errorTerm, add 
adjustDown, halve it)
                        add hl, hl
                        add hl, de
                        sra h
                        rr l

                jr @+lowbitdone

                @nolowbit:
                        ; if !adjustUp then initialPixelCount--
                        ld (@+astorepos+1), a

                        ld a, b
                        or c

                        @astorepos:
                        ld a, 23

                        jr nz, @+noadjust

                        exx

                        dec a
                        jr @+lowbitdone

                        @noadjust:

                @lowbitdone:

                        dec de
                        exx

                        ; To here:
                        ;
                        ; e = initialiPixelCount
                        ; b = x1, c = y1
                        ; d = wholeStep
                        ; hl' = errorTerm
                        ; bc' = adjustUp
                        ; de' = adjustDown + 1
                        ; hl = address of table
                        ; top of stack = af pair with a = finalPixelCount

                pop af
                push af

                srl a
                add b
                ld (hl), a

                        ; will progress with a = x
polyt:
                inc l
                dec e
                jr z, @+noloop

                @storeloop:

                        add d

                        exx

                                adc hl, bc              ; to ensure flags set; 
carry is clear from the add d

                                jr nc, @+noextra        ; no carry = negative 
or zero?

                                        inc a
                                        sbc hl, de              ; carry will be 
set, but predecremented de

                                @noextra:

                        exx
                        ld (hl), a

                        inc l
                        dec e
                jr nz, @-storeloop

@noloop:
                ld b, a
                pop af
                add b
                ld (hl), a

@xnegative:

@endOfPushToArray:
        pop hl
        pop bc
        pop af
        pop de

        ret


;
;       DIV88 - 8 bit divide with remainder; adapted from slightly broken 
version
;       at http://map.grauw.nl/sources/external/z80bits.html
;
; input: d = dividend, e = divisor, a = 0
; output: d = quotient, a = remainder
;
; clobbered: f
;
; takes between 243 and 351 cycles
;

DIV88:

        sla d
        rla
        cp e
        jr c, @+C1

@NC0:
        sub e
        sl1 d
        rla
        cp e
        jr c, @+C2

@NC1:
        sub e
        sl1 d
        rla
        cp e
        jr c, @+C3
        
@NC2:
        sub e
        sl1 d
        rla
        cp e
        jr c, @+C4
        
@NC3:
        sub e
        sl1 d
        rla
        cp e
        jr c, @+C5
        
@NC4:
        sub e
        sl1 d
        rla
        cp e
        jr c, @+C6
        
@NC5:
        sub e
        sl1 d
        rla
        cp e
        jr c, @+C7
        
@NC6:
        sub e
        sl1 d
        rla
        cp e
        jr c, @+C8

@NC7:
        sub e
        sl1 d
        ret
        
@C1:
        sla d
        rla
        cp e
        jr nc, @-NC1

@C2:
        sla d
        rla
        cp e
        jr nc, @-NC2

@C3:
        sla d
        rla
        cp e
        jr nc, @-NC3

@C4:
        sla d
        rla
        cp e
        jr nc, @-NC4

@C5:
        sla d
        rla
        cp e
        jr nc, @-NC5

@C6:
        sla d
        rla
        cp e
        jr nc, @-NC6

@C7:
        sla d
        rla
        cp e
        jr nc, @-NC7

@C8:
        sla d
        rla
        ret

Reply via email to