Hi,

Could you try the attached patch? It replaces memcpy into 32-bit-wise
copy, at least for 8-bpp framebuffer with 8, 12, or 16-width font.

Thanks,
rin

On 2019/08/03 10:05, Michael wrote:
Hello,

On Sat, 3 Aug 2019 08:58:02 +0900
Rin Okuyama <rokuyama...@gmail.com> wrote:

Hi Michael,

I'm so sorry for the breakage. I'll investigate it.
Font width is 8?

Doesn't seem to matter, the G5 used Gallant 12x22, the others used an 8
pixels wide font. Colour depth is always 8 though.

PS
I decided to buy my own macppc machine. Could anyone
recommend me a model of

- serial console capable
- newer and smaller as possible

G4 and models don't really have serial ports anymore, unfortunately,
although many can be fitted with a serial port instead of a modem as a
3rd party option.

have fun
Michael

Index: sys/dev/rasops/rasops.c
===================================================================
RCS file: /cvsroot/src/sys/dev/rasops/rasops.c,v
retrieving revision 1.108
diff -p -u -r1.108 rasops.c
--- sys/dev/rasops/rasops.c     2 Aug 2019 23:24:37 -0000       1.108
+++ sys/dev/rasops/rasops.c     3 Aug 2019 14:40:45 -0000
@@ -50,6 +50,7 @@ __KERNEL_RCSID(0, "$NetBSD: rasops.c,v 1
 
 #define        _RASOPS_PRIVATE
 #include <dev/rasops/rasops.h>
+#include <dev/rasops/rasops_masks.h>   /* XXX for MBE */
 
 #ifndef _KERNEL
 #include <errno.h>
@@ -69,6 +70,23 @@ struct rasops_matchdata {
        int ident;
 };     
 
+const uint32_t rasops_lmask32[4 + 1] = {
+       MBE(0x00000000), MBE(0x00ffffff), MBE(0x0000ffff), MBE(0x000000ff),
+       MBE(0x00000000),
+};
+
+const uint32_t rasops_rmask32[4 + 1] = {
+       MBE(0x00000000), MBE(0xff000000), MBE(0xffff0000), MBE(0xffffff00),
+       MBE(0xffffffff),
+};
+
+const uint32_t rasops_pmask32[4][4] = {
+      { MBE(0xffffffff), MBE(0xff000000), MBE(0xffff0000), MBE(0xffffffff), },
+      { MBE(0x00000000), MBE(0x00ff0000), MBE(0x00ffff00), MBE(0x00ffffff), },
+      { MBE(0x00000000), MBE(0x0000ff00), MBE(0x0000ffff), MBE(0x00000000), },
+      { MBE(0x00000000), MBE(0x000000ff), MBE(0x00000000), MBE(0x00000000), },
+};
+
 /* ANSI colormap (R,G,B). Upper 8 are high-intensity */
 const uint8_t rasops_cmap[256 * 3] = {
        0x00, 0x00, 0x00, /* black */
@@ -429,7 +447,8 @@ rasops_reconfig(struct rasops_info *ri, 
 
        /* Clear the entire display */
        if ((ri->ri_flg & RI_CLEAR) != 0)
-               memset(ri->ri_bits, 0, ri->ri_stride * ri->ri_height);
+               rasops_memset32((uint32_t *)ri->ri_bits, 0,
+                   ri->ri_stride * ri->ri_height);
 
        /* Now centre our window if needs be */
        if ((ri->ri_flg & RI_CENTER) != 0) {
@@ -690,7 +709,7 @@ static void
 rasops_copyrows(void *cookie, int src, int dst, int num)
 {
        struct rasops_info *ri = (struct rasops_info *)cookie;
-       uint8_t *sp, *dp, *hp;
+       uint32_t *sp, *dp, *hp;
        int n, stride;
 
        hp = NULL;      /* XXX GCC */
@@ -723,19 +742,19 @@ rasops_copyrows(void *cookie, int src, i
        n = ri->ri_emustride;
        stride = ri->ri_stride;
 
-       sp = ri->ri_bits + src * ri->ri_yscale;
-       dp = ri->ri_bits + dst * ri->ri_yscale;
+       sp = (uint32_t *)(ri->ri_bits + src * ri->ri_yscale);
+       dp = (uint32_t *)(ri->ri_bits + dst * ri->ri_yscale);
        if (ri->ri_hwbits)
-               hp = ri->ri_hwbits + dst * ri->ri_yscale;
+               hp = (uint32_t *)(ri->ri_hwbits + dst * ri->ri_yscale);
 
        while (num--) {
-               memcpy(dp, sp, n);
-               dp += stride;
+               rasops_memcpy32(dp, sp, n);
+               DELTA(dp, stride, uint32_t *);
                if (ri->ri_hwbits) {
-                       memcpy(hp, sp, n);
-                       hp += stride;
+                       rasops_memcpy32(hp, sp, n);
+                       DELTA(hp, stride, uint32_t *);
                }
-               sp += stride;
+               DELTA(sp, stride, uint32_t *);
        }
 }
 
@@ -792,9 +811,9 @@ rasops_copycols(void *cookie, int row, i
                hp = ri->ri_hwbits + row + dst * ri->ri_xscale;
 
        while (height--) {
-               memmove(dp, sp, num);
+               memmove(dp, sp, num);           /* XXXRO not 32-bit operation */
                if (ri->ri_hwbits) {
-                       memcpy(hp, dp, num);
+                       memcpy(hp, dp, num);    /* XXXRO */
                        hp += ri->ri_stride;
                }
                dp += ri->ri_stride;
@@ -980,9 +999,8 @@ void
 rasops_eraserows(void *cookie, int row, int num, long attr)
 {
        struct rasops_info *ri = (struct rasops_info *)cookie;
-       uint32_t *buf = (uint32_t *)ri->ri_buf;
        uint32_t *rp, *hp, clr;
-       int stride, cnt;
+       int n, stride;
 
        hp = NULL;      /* XXX GCC */
 
@@ -1008,29 +1026,28 @@ rasops_eraserows(void *cookie, int row, 
         * the RI_FULLCLEAR flag is set, clear the entire display.
         */
        if (num == ri->ri_rows && (ri->ri_flg & RI_FULLCLEAR) != 0) {
-               stride = ri->ri_stride;
+               n = ri->ri_stride;
                num = ri->ri_height;
                rp = (uint32_t *)ri->ri_origbits;
                if (ri->ri_hwbits)
                        hp = (uint32_t *)ri->ri_hworigbits;
        } else {
-               stride = ri->ri_emustride;
+               n = ri->ri_emustride;
                num *= ri->ri_font->fontheight;
                rp = (uint32_t *)(ri->ri_bits + row * ri->ri_yscale);
                if (ri->ri_hwbits)
                        hp = (uint32_t *)(ri->ri_hwbits + row * ri->ri_yscale);
        }
 
-       for (cnt = 0; cnt < stride >> 2; cnt++)
-               buf[cnt] = clr;
+       stride = ri->ri_stride;
 
        while (num--) {
-               memcpy(rp, buf, stride);
+               rasops_memset32(rp, clr, n);
+               DELTA(rp, stride, uint32_t *);
                if (ri->ri_hwbits) {
-                       memcpy(hp, buf, stride);
-                       DELTA(hp, ri->ri_stride, uint32_t *);
+                       rasops_memset32(hp, clr, n);
+                       DELTA(hp, stride, uint32_t *);
                }
-               DELTA(rp, ri->ri_stride, uint32_t *);
        }
 }
 
@@ -1042,9 +1059,8 @@ static void
 rasops_do_cursor(struct rasops_info *ri)
 {
        int full, height, cnt, slop1, slop2, row, col;
-       uint32_t tmp32, msk1, msk2;
-       uint8_t tmp8;
-       uint8_t *dp, *rp, *hp;
+       uint32_t mask, mask1, mask2, tmp32;
+       uint32_t *dp, *rp, *hp;
 
        hp = NULL;      /* XXX GCC */
 
@@ -1069,9 +1085,11 @@ rasops_do_cursor(struct rasops_info *ri)
                col = ri->ri_ccol;
        }
 
-       rp = ri->ri_bits + row * ri->ri_yscale + col * ri->ri_xscale;
+       col *= ri->ri_xscale;
+       rp = (uint32_t *)(ri->ri_bits + row * ri->ri_yscale + (col & ~3));
        if (ri->ri_hwbits)
-               hp = ri->ri_hwbits + row * ri->ri_yscale + col * ri->ri_xscale;
+               hp = (uint32_t *)(ri->ri_hwbits + row * ri->ri_yscale +
+                   (col & ~3));
        height = ri->ri_font->fontheight;
 
        /*
@@ -1081,17 +1099,18 @@ rasops_do_cursor(struct rasops_info *ri)
         * fontwidth = 8 and bpp = 1. So we take care of it.
         */
        if (ri->ri_xscale == 1) {
-               while (height--) {
-                       tmp8 = ~*rp;
-
-                       *rp = tmp8;
-                       rp += ri->ri_stride;
+               mask = rasops_pmask32[col & 3][1];
 
+               while (height--) {
+                       tmp32 = *rp ^ mask;
+                       *rp = tmp32;
+                       DELTA(rp, ri->ri_stride, uint32_t *);
                        if (ri->ri_hwbits) {
-                               *hp = tmp8;
-                               hp += ri->ri_stride;
+                               *hp = tmp32;
+                               DELTA(hp, ri->ri_stride, uint32_t *);
                        }
                }
+
                return;
        }
 
@@ -1101,42 +1120,35 @@ rasops_do_cursor(struct rasops_info *ri)
         * Note that siop1 <= ri_xscale even for ri_xscale = 2,
         * since rp % 3 = 0 or 2 (ri_stride % 4 = 0).
         */
-       slop1 = (4 - ((uintptr_t)rp & 3)) & 3;
+       slop1 = (4 - (col & 3)) & 3;
        slop2 = (ri->ri_xscale - slop1) & 3;
        full = (ri->ri_xscale - slop1 /* - slop2 */) >> 2;
 
-       rp = (uint8_t *)((uintptr_t)rp & ~3);
-       hp = (uint8_t *)((uintptr_t)hp & ~3);
-
-       msk1 = !slop1 ? 0 : be32toh(0xffffffffU >> (32 - (8 * slop1)));
-       msk2 = !slop2 ? 0 : be32toh(0xffffffffU << (32 - (8 * slop2)));
+       mask1 = rasops_lmask32[4 - slop1];
+       mask2 = rasops_rmask32[slop2];
 
        while (height--) {
                dp = rp;
 
                if (slop1) {
-                       tmp32 = *(uint32_t *)dp ^ msk1;
-                       *(uint32_t *)dp = tmp32;
-                       dp += 4;
+                       *dp = *dp ^ mask1;
+                       dp++;
                }
 
                for (cnt = full; cnt; cnt--) {
-                       tmp32 = ~*(uint32_t *)dp;
-                       *(uint32_t *)dp = tmp32;
-                       dp += 4;
+                       *dp = ~*dp;
+                       dp++;
                }
 
-               if (slop2) {
-                       tmp32 = *(uint32_t *)dp ^ msk2;
-                       *(uint32_t *)dp = tmp32;
-               }
+               if (slop2)
+                       *dp = *dp ^ mask2;
 
                if (ri->ri_hwbits) {
-                       memcpy(hp, rp, ((slop1 != 0) + full +
+                       rasops_memcpy32(hp, rp, ((slop1 != 0) + full +
                            (slop2 != 0)) << 2);
-                       hp += ri->ri_stride;
+                       DELTA(hp, ri->ri_stride, uint32_t *);
                }
-               rp += ri->ri_stride;
+               DELTA(rp, ri->ri_stride, uint32_t *);
        }
 }
 
@@ -1147,9 +1159,8 @@ void
 rasops_erasecols(void *cookie, int row, int col, int num, long attr)
 {
        struct rasops_info *ri = (struct rasops_info *)cookie;
-       uint32_t *buf = ri->ri_buf;
-       int height, cnt, clr;
-       uint32_t *dp, *rp, *hp;
+       uint32_t height, clr, *hp;
+       uint8_t *rp;
 
        hp = NULL;      /* XXX GCC */
 
@@ -1170,33 +1181,33 @@ rasops_erasecols(void *cookie, int row, 
 #endif
 
        num *= ri->ri_xscale;
-       rp = (uint32_t *)(ri->ri_bits + row*ri->ri_yscale + col*ri->ri_xscale);
+       col *= ri->ri_xscale;
+       rp = ri->ri_bits + row * ri->ri_yscale + col;
        if (ri->ri_hwbits)
-               hp = (uint32_t *)(ri->ri_hwbits + row*ri->ri_yscale +
-                   col*ri->ri_xscale);
+               hp = (uint32_t *)(ri->ri_hwbits + row * ri->ri_yscale +
+                   (col & ~3));
        height = ri->ri_font->fontheight;
        clr = ri->ri_devcmap[((uint32_t)attr >> 16) & 0xf];
 
-       dp = buf;
-
-       /* Write 4 bytes per loop */
-       for (cnt = num >> 2; cnt; cnt--)
-               *dp++ = clr;
-
-       /* Write unaligned trailing slop */
-       for (cnt = num & 3; cnt; cnt--) {
-               *(uint8_t *)dp = clr;
-               DELTA(dp, 1, uint32_t *);
-       }
-
-       while (height--) {
-               memcpy(rp, buf, num);
-               DELTA(rp, ri->ri_stride, uint32_t *);
-               if (ri->ri_hwbits) {
-                       memcpy(hp, buf, num);
-                       DELTA(hp, ri->ri_stride, uint32_t *);
+       if (ri->ri_xscale & 3)
+               while (height--) {
+                       int changed = rasops_memset32ua(rp, clr, num);
+                       if (ri->ri_hwbits) {
+                               rasops_memcpy32(hp,
+                                   (uint32_t *)((uintptr_t)rp & ~3), changed);
+                               DELTA(hp, ri->ri_stride, uint32_t *);
+                       }
+                       rp += ri->ri_stride;
+               }
+       else
+               while (height--) {
+                       rasops_memset32((uint32_t *)rp, clr, num);
+                       rp += ri->ri_stride;
+                       if (ri->ri_hwbits) {
+                               rasops_memset32(hp, clr, num);
+                               DELTA(hp, ri->ri_stride, uint32_t *);
+                       }
                }
-       }
 }
 
 #if NRASOPS_ROTATION > 0
Index: sys/dev/rasops/rasops.h
===================================================================
RCS file: /cvsroot/src/sys/dev/rasops/rasops.h,v
retrieving revision 1.43
diff -p -u -r1.43 rasops.h
--- sys/dev/rasops/rasops.h     3 Aug 2019 06:29:52 -0000       1.43
+++ sys/dev/rasops/rasops.h     3 Aug 2019 14:40:45 -0000
@@ -193,12 +193,70 @@ void      rasops32_init(struct rasops_info *)
 
 void   rasops_allocstamp(struct rasops_info *, size_t);
 
+extern const uint32_t rasops_lmask32[4 + 1];
+extern const uint32_t rasops_rmask32[4 + 1];
+extern const uint32_t rasops_pmask32[4][4];
+
 #define        DELTA(p, d, cast) ((p) = (cast)((uint8_t *)(p) + (d)))
 
 #define        FONT_GLYPH(uc, font, ri)                                        
\
        ((uint8_t *)(font)->data + ((uc) - ((font)->firstchar)) *       \
            (ri)->ri_fontscale)
 
+static __inline void
+rasops_memcpy32(uint32_t * restrict dst, const uint32_t * restrict src,
+    size_t bytes)
+{
+       size_t cnt;
+
+       for (cnt = bytes >> 2; cnt; cnt--)
+               *dst++ = *src++;
+}
+
+static __inline void
+rasops_memset32(uint32_t *p, uint32_t val, size_t bytes)
+{
+       size_t cnt;
+
+       for (cnt = bytes >> 2; cnt; cnt--)
+               *p++ = val;
+}
+
+static __inline int
+rasops_memset32ua(void *p, uint32_t val, size_t bytes)
+{
+       int slop1, slop2, full, cnt;
+       uint32_t mask, mask1, mask2, *dp;
+
+       dp = (uint32_t *)((uintptr_t)p & ~3);
+
+       if (bytes == 1) {
+               mask = rasops_pmask32[(uintptr_t)p & 3][1];
+               *dp = (*dp & ~mask) | (val & mask);
+               return 4;
+       }
+
+       slop1 = (4 - ((uintptr_t)p & 3)) & 3;
+       slop2 = (bytes - slop1) & 3;
+       full = (bytes - slop1 /* - slop2 */) >> 2;
+
+       mask1 = rasops_lmask32[4 - slop1];
+       mask2 = rasops_rmask32[slop2];
+
+       if (slop1) {
+               *dp = (*dp & ~mask1) | (val & mask1);
+               dp++;
+       }
+
+       for (cnt = full; cnt; cnt--)
+               *dp++ = val;
+
+       if (slop2)
+               *dp = (*dp & ~mask2) | (val & mask2);
+
+       return ((slop1 != 0) + full + (slop2 != 0)) << 2;
+}
+
 static __inline uint32_t
 be32uatoh(uint8_t *p)
 {

Reply via email to