Having prototyped my new polygon filler to my satisfaction in C, today
I've been converting it to assembler. With the iPhone stuff and an
Acorn Electron project I've been working on, I haven't done any z80 in
far too long and am not particularly optimistic that I'm writing good
stuff. Actually, it strikes me that I've never really shown any z80
code to anyone, so maybe I'm just not great in general.
Below is most of my new polygon filler. It's incomplete, but only in
relatively minor ways — the scan converter handles edges where x
increases only (obviously x decreases will be the same code with subs
and decs rather than adds and incs, thought I'd leave that until I'm
more confident in the stuff overall) and chucks pixels on the screen
to show scanline ends rather than drawing an actual scanline of pixels
(for which I'll be subverting SP per the usual sort of stuff).
When calculating y intercepts it breaks down to either traditional
Bresenham for lines that change in y more than in x or run-slice
Bresenham for lines that change in x more than in y. Part of the
reasoning for that is that it gives me something to compare the speeds
of the two approaches. If run-slice does seem to be faster than
standard for lines above a certain length (probably 9 or 10 pixels?)
as I suspect, then obviously I'll use it for both.
Anyway, if some of you z80 experts could have a quick look and tell me
if I'm making any obvious style errors or otherwise missing obvious
optimisations — even if only on a peephole level — I'd be infinitely
grateful. Sorry if the comments are occasionally a bit opaque; some of
them just document which registers are holding which variables from
the original C.
Thanks in advance!
;
; DrawPoly - draws a filled polygon using A vertices, in two arrays
; with x positions starting at (H:0) and y positions at (H+1:0)
;
; clobbers: af, bc, de, hl, af', bc', de', hl'
;
DS ALIGN 256
LEFTTAB:
ds 256
RIGHTTTAB:
ds 256
NUMVERTS:
db 0
VERTEXPOINTER:
dw 0
STARTY:
db 0
ENDY:
db 0
DrawPoly:
; store stuff
ld (VERTEXPOINTER), hl
ld (NUMVERTS), a
inc h
ld e, a
ld d, a
; use b to store current highest vertex pointer, c to store value
ld l, 0
ld b, 0
ld c, (hl)
; get highest vertex pointer to b
@highloop:
inc l ; look at next y value
; check if look is over yet, exit if so
dec d
jr z, @+highloopdone
ld a, (hl) ; load new y value
cp c ; compare to current highest
jr nc, @-highloop ; don't do anything if it is lower
ld b, l
ld c, a
jr @-highloop
@highloopdone:
; highest value is now in c
ld a, c
ld (ENDY), a
; use c to store current lowest vertex pointer, d to store value
ld l, 0
ld c, 0
ld d, (hl)
; get highest vertex pointer to c
@lowloop:
inc l ; look at next y value
; check if loop is over yet, exit if so
dec e
jr z, @+lowloopdone
ld a, (hl)
cp d
jr c, @-lowloop
ld c, l
ld d, a
jr @-lowloop
@lowloopdone:
; highest value is now in d
ld a, d
ld (STARTY), a
push bc ; b = current vertex, c = target
ld hl, RIGHTTTAB
@leftloop:
ld a, b
cp c
jr z, @+leftloopdone
dec a
jp p, @+noreload
ld a, (NUMVERTS)
dec a
@noreload:
call @+PushToArray
ld b, a
jr @-leftloop
@leftloopdone:
pop bc
ld hl, LEFTTAB
ld d, (NUMVERTS)
@rightloop:
ld a, b
cp c
jr z, @+rightloopdone
inc a
cp d
jr nz, @+noreload
xor a
@noreload:
call @+PushToArray
ld b, a
jr @-rightloop
@rightloopdone:
;
; page in the screen, for drawing
;
LD C, HMPR
IN a, (C)
push af
ld a, (rampage)
OUT (C), a
ld h, LEFTTAB >> 8
ld a, (ENDY)
ld l, a
ld a, (STARTY)
sub l
ld b, a
@plotloop:
; left pixel
ld a, (hl)
inc h
; right pixel
ld c, (hl)
inc l
dec h
ld d, l
ld e, a
srl d
rr e
jr nc, @+rpx
ld a, 0x0f
jr @+pxd
@rpx:
ld a, 0xf0
@pxd:
set 7, d
ld (de), a
ld d, l
ld e, c
srl d
rr e
jr nc, @+rpx
ld a, 0x0e
jr @+pxd
@rpx:
ld a, 0xe0
@pxd:
set 7, d
ld (de), a
djnz @-plotloop
;
; page the program RAM back in
;
pop af
ld c, HMPR
out (c), a
ret
;
; PushToArray will add the vertical intersections for the line from b to a
; to the table pointed to by hl
;
@PushToArray:
push de
push af
push bc
push hl
; get x1, y1 (the higher one, if either is higher) into bc
ld hl, (VERTEXPOINTER)
ld l, b
ld b, (hl)
inc h
ld c, (hl)
; get x2, y2 (the lower one, if either is lower) into de
ld l, a
ld a, (hl)
dec h
ld d, (hl)
ld e, a
; turn e into yDelta - this will always be positive because
points were
; initially clockwise and are passed to this function from
highest to
; lowest
ld a, e
sub c
jp z, @+endOfPushToArray ; zero height lines contribute
nothing
ld e, a
; get table write address into hl
pop hl
push hl
ld l, c
; calculate x positive or x negative, branch appropriately
ld a, d
sub b
jp c, @+xnegative
@xpositive: ; unused label, just for reading
ld d, a
; positive xdelta is in a, compare to positive ydelta from e
cp e
jr z, @+diagonalxpos
jr nc, @+xdeltagreaterxpos
; y delta is greater, x delta is positive - traditional
Bresenham
; use a for delta
ld a, e
srl a
; get table write address into hl
ld c, e
@xdgxploop:
ld (hl), b
sub d
jr nc, @+noxinc
inc b
add e
@noxinc:
inc l
dec c
jr nz, @-xdgxploop
jp @+endOfPushToArray
@diagonalxpos:
ld c, b
ld b, d
@diagonalloop:
ld (hl), c
inc c
djnz @-diagonalxpos
@xdeltagreaterxpos:
; x delta is greater, is positive
xor a
push de
call DIV88 ; now d = xDelta / yDelta, a = xDelta %
yDelta
; aiming for:
; errorTerm = HL', adjustUp = BC', adjustDown = DE'
;
exx
pop de
ld d, 0
sla e
rlc d ; de = adjustDown
ld b, 0
ld c, a ; bc = adjustUp >> 1
ld h, b
ld l, c
and a
sbc hl, de ; hl = errorTerm
sla c
rlc b ; bc = adjustUp
exx
ld a, d
srl a
inc a ; a = initialPixelCount =
finalPixelCount, d = wholeStep
push af ; store for finalPixelCount
ld a, d
sra a ; test for wholeStep&1
exx
jr nc, @+nolowbit
; errorTerm += yDelta (double errorTerm, add
adjustDown, halve it)
add hl, hl
add hl, de
sra h
rr l
jr @+lowbitdone
@nolowbit:
; if !adjustUp then initialPixelCount--
ld (@+astorepos+1), a
ld a, b
or c
@astorepos:
ld a, 23
jr nz, @+noadjust
exx
dec a
jr @+lowbitdone
@noadjust:
@lowbitdone:
dec de
exx
; To here:
;
; e = initialiPixelCount
; b = x1, c = y1
; d = wholeStep
; hl' = errorTerm
; bc' = adjustUp
; de' = adjustDown + 1
; hl = address of table
; top of stack = af pair with a = finalPixelCount
pop af
push af
srl a
add b
ld (hl), a
; will progress with a = x
polyt:
inc l
dec e
jr z, @+noloop
@storeloop:
add d
exx
adc hl, bc ; to ensure flags set;
carry is clear from the add d
jr nc, @+noextra ; no carry = negative
or zero?
inc a
sbc hl, de ; carry will be
set, but predecremented de
@noextra:
exx
ld (hl), a
inc l
dec e
jr nz, @-storeloop
@noloop:
ld b, a
pop af
add b
ld (hl), a
@xnegative:
@endOfPushToArray:
pop hl
pop bc
pop af
pop de
ret
;
; DIV88 - 8 bit divide with remainder; adapted from slightly broken
version
; at http://map.grauw.nl/sources/external/z80bits.html
;
; input: d = dividend, e = divisor, a = 0
; output: d = quotient, a = remainder
;
; clobbered: f
;
; takes between 243 and 351 cycles
;
DIV88:
sla d
rla
cp e
jr c, @+C1
@NC0:
sub e
sl1 d
rla
cp e
jr c, @+C2
@NC1:
sub e
sl1 d
rla
cp e
jr c, @+C3
@NC2:
sub e
sl1 d
rla
cp e
jr c, @+C4
@NC3:
sub e
sl1 d
rla
cp e
jr c, @+C5
@NC4:
sub e
sl1 d
rla
cp e
jr c, @+C6
@NC5:
sub e
sl1 d
rla
cp e
jr c, @+C7
@NC6:
sub e
sl1 d
rla
cp e
jr c, @+C8
@NC7:
sub e
sl1 d
ret
@C1:
sla d
rla
cp e
jr nc, @-NC1
@C2:
sla d
rla
cp e
jr nc, @-NC2
@C3:
sla d
rla
cp e
jr nc, @-NC3
@C4:
sla d
rla
cp e
jr nc, @-NC4
@C5:
sla d
rla
cp e
jr nc, @-NC5
@C6:
sla d
rla
cp e
jr nc, @-NC6
@C7:
sla d
rla
cp e
jr nc, @-NC7
@C8:
sla d
rla
ret