Module Name:    src
Committed By:   rin
Date:           Wed Aug  7 11:47:33 UTC 2019

Modified Files:
        src/sys/dev/rasops: rasops.c rasops.h rasops15.c rasops2.c rasops24.c
            rasops32.c rasops4.c rasops8.c rasops_putchar_width.h

Log Message:
Stop allocating ri_buf and ri_stamp dynamically. As commented in
rasops.h, it is not safe to use kmem_alloc(9) in rasops_init();
rasops routines can be used for early putchar, which means that
UVM is not fully initialized.

Should fix a problem reported by macallan:
http://mail-index.netbsd.org/tech-kern/2019/08/02/msg025327.html

Instead of using ri_buf, inline function rasops_memcpy32() is
introduced to fill 32bit data efficiently.

Instead of using ri_stamp (per device stamp), stamp_ri is
introduced to distinguish for which device stamp is calculated.


To generate a diff of this commit:
cvs rdiff -u -r1.114 -r1.115 src/sys/dev/rasops/rasops.c
cvs rdiff -u -r1.43 -r1.44 src/sys/dev/rasops/rasops.h
cvs rdiff -u -r1.34 -r1.35 src/sys/dev/rasops/rasops15.c
cvs rdiff -u -r1.29 -r1.30 src/sys/dev/rasops/rasops2.c
cvs rdiff -u -r1.46 -r1.47 src/sys/dev/rasops/rasops24.c
cvs rdiff -u -r1.42 -r1.43 src/sys/dev/rasops/rasops32.c
cvs rdiff -u -r1.24 -r1.25 src/sys/dev/rasops/rasops4.c
cvs rdiff -u -r1.47 -r1.48 src/sys/dev/rasops/rasops8.c
cvs rdiff -u -r1.10 -r1.11 src/sys/dev/rasops/rasops_putchar_width.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/dev/rasops/rasops.c
diff -u src/sys/dev/rasops/rasops.c:1.114 src/sys/dev/rasops/rasops.c:1.115
--- src/sys/dev/rasops/rasops.c:1.114	Wed Aug  7 11:08:44 2019
+++ src/sys/dev/rasops/rasops.c	Wed Aug  7 11:47:33 2019
@@ -1,4 +1,4 @@
-/*	 $NetBSD: rasops.c,v 1.114 2019/08/07 11:08:44 rin Exp $	*/
+/*	 $NetBSD: rasops.c,v 1.115 2019/08/07 11:47:33 rin Exp $	*/
 
 /*-
  * Copyright (c) 1999 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: rasops.c,v 1.114 2019/08/07 11:08:44 rin Exp $");
+__KERNEL_RCSID(0, "$NetBSD: rasops.c,v 1.115 2019/08/07 11:47:33 rin Exp $");
 
 #ifdef _KERNEL_OPT
 #include "opt_rasops.h"
@@ -509,21 +509,6 @@ rasops_reconfig(struct rasops_info *ri, 
 		    WSSCREEN_WSCOLORS | WSSCREEN_REVERSE;
 	}
 
-	if (ri->ri_buf != NULL) {
-		kmem_free(ri->ri_buf, ri->ri_buflen);
-		ri->ri_buf = NULL;
-	}
-	len = (ri->ri_flg & RI_FULLCLEAR) ? ri->ri_stride : ri->ri_emustride;
-	ri->ri_buflen = len;
-	ri->ri_buf = kmem_alloc(len, KM_SLEEP);
-
-#ifndef RASOPS_SMALL
-	if (ri->ri_stamp != NULL) {
-		kmem_free(ri->ri_stamp, ri->ri_stamp_len);
-		ri->ri_stamp = NULL;
-	}
-#endif
-
 	switch (ri->ri_depth) {
 #if NRASOPS1 > 0
 	case 1:
@@ -1004,9 +989,8 @@ void
 rasops_eraserows(void *cookie, int row, int num, long attr)
 {
 	struct rasops_info *ri = (struct rasops_info *)cookie;
-	uint32_t *buf = (uint32_t *)ri->ri_buf;
 	uint32_t *rp, *hp, clr;
-	int stride, cnt;
+	int stride;
 
 	hp = NULL;	/* XXX GCC */
 
@@ -1045,13 +1029,10 @@ rasops_eraserows(void *cookie, int row, 
 			hp = (uint32_t *)(ri->ri_hwbits + row * ri->ri_yscale);
 	}
 
-	for (cnt = 0; cnt < stride >> 2; cnt++)
-		buf[cnt] = clr;
-
 	while (num--) {
-		memcpy(rp, buf, stride);
+		rasops_memset32(rp, clr, stride);
 		if (ri->ri_hwbits) {
-			memcpy(hp, buf, stride);
+			memcpy(hp, rp, stride);
 			DELTA(hp, ri->ri_stride, uint32_t *);
 		}
 		DELTA(rp, ri->ri_stride, uint32_t *);
@@ -1166,9 +1147,8 @@ void
 rasops_erasecols(void *cookie, int row, int col, int num, long attr)
 {
 	struct rasops_info *ri = (struct rasops_info *)cookie;
-	uint32_t *buf = ri->ri_buf;
-	int height, cnt, clr;
-	uint32_t *dp, *rp, *hp;
+	int height, clr;
+	uint32_t *rp, *hp;
 
 	hp = NULL;	/* XXX GCC */
 
@@ -1196,25 +1176,13 @@ rasops_erasecols(void *cookie, int row, 
 	height = ri->ri_font->fontheight;
 	clr = ri->ri_devcmap[((uint32_t)attr >> 16) & 0xf];
 
-	dp = buf;
-
-	/* Write 4 bytes per loop */
-	for (cnt = num >> 2; cnt; cnt--)
-		*dp++ = clr;
-
-	/* Write unaligned trailing slop */
-	for (cnt = num & 3; cnt; cnt--) {
-		*(uint8_t *)dp = clr;
-		DELTA(dp, 1, uint32_t *);
-	}
-
 	while (height--) {
-		memcpy(rp, buf, num);
-		DELTA(rp, ri->ri_stride, uint32_t *);
+		rasops_memset32(rp, clr, num);
 		if (ri->ri_hwbits) {
-			memcpy(hp, buf, num);
+			memcpy(hp, rp, num);
 			DELTA(hp, ri->ri_stride, uint32_t *);
 		}
+		DELTA(rp, ri->ri_stride, uint32_t *);
 	}
 }
 
@@ -1689,15 +1657,3 @@ rasops_get_cmap(struct rasops_info *ri, 
 		memcpy(palette, rasops_cmap, uimin(bytes, sizeof(rasops_cmap)));
 	return 0;
 }
-
-#ifndef RASOPS_SMALL
-void
-rasops_allocstamp(struct rasops_info *ri, size_t len)
-{
-
-	KASSERT(ri->ri_stamp == NULL);
-	ri->ri_stamp_len = len;
-	ri->ri_stamp = kmem_zalloc(len, KM_SLEEP);
-	ri->ri_stamp_attr = 0;
-}
-#endif

Index: src/sys/dev/rasops/rasops.h
diff -u src/sys/dev/rasops/rasops.h:1.43 src/sys/dev/rasops/rasops.h:1.44
--- src/sys/dev/rasops/rasops.h:1.43	Sat Aug  3 06:29:52 2019
+++ src/sys/dev/rasops/rasops.h	Wed Aug  7 11:47:33 2019
@@ -1,4 +1,4 @@
-/* 	$NetBSD: rasops.h,v 1.43 2019/08/03 06:29:52 rin Exp $ */
+/* 	$NetBSD: rasops.h,v 1.44 2019/08/07 11:47:33 rin Exp $ */
 
 /*-
  * Copyright (c) 1999 The NetBSD Foundation, Inc.
@@ -191,14 +191,51 @@ void	rasops15_init(struct rasops_info *)
 void	rasops24_init(struct rasops_info *);
 void	rasops32_init(struct rasops_info *);
 
-void	rasops_allocstamp(struct rasops_info *, size_t);
-
 #define	DELTA(p, d, cast) ((p) = (cast)((uint8_t *)(p) + (d)))
 
 #define	FONT_GLYPH(uc, font, ri)					\
 	((uint8_t *)(font)->data + ((uc) - ((font)->firstchar)) *	\
 	    (ri)->ri_fontscale)
 
+static __inline void
+rasops_memset32(void *p, uint32_t val, size_t bytes)
+{
+	int slop1, slop2, full;
+	uint8_t *dp = (uint8_t *)p;
+
+	if (bytes == 1) {
+		*dp = val;
+		return;
+	}
+
+	slop1 = (4 - ((uintptr_t)dp & 3)) & 3;
+	slop2 = (bytes - slop1) & 3;
+	full = (bytes - slop1 /* - slop2 */) >> 2;
+
+	if (slop1 & 1)
+		*dp++ = val;
+
+	if (slop1 & 2) {
+		*(uint16_t *)dp = val;
+		dp += 2;
+	}
+
+	for (; full; full--) {
+		*(uint32_t *)dp = val;
+		dp += 4;
+	}
+
+	if (slop2 & 2) {
+		*(uint16_t *)dp = val;
+		dp += 2;
+	}
+
+	if (slop2 & 1)
+		*dp = val;
+
+	return;
+}
+
 static __inline uint32_t
 be32uatoh(uint8_t *p)
 {

Index: src/sys/dev/rasops/rasops15.c
diff -u src/sys/dev/rasops/rasops15.c:1.34 src/sys/dev/rasops/rasops15.c:1.35
--- src/sys/dev/rasops/rasops15.c:1.34	Fri Aug  2 04:40:53 2019
+++ src/sys/dev/rasops/rasops15.c	Wed Aug  7 11:47:33 2019
@@ -1,4 +1,4 @@
-/* 	$NetBSD: rasops15.c,v 1.34 2019/08/02 04:40:53 rin Exp $	*/
+/* 	$NetBSD: rasops15.c,v 1.35 2019/08/07 11:47:33 rin Exp $	*/
 
 /*-
  * Copyright (c) 1999 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: rasops15.c,v 1.34 2019/08/02 04:40:53 rin Exp $");
+__KERNEL_RCSID(0, "$NetBSD: rasops15.c,v 1.35 2019/08/07 11:47:33 rin Exp $");
 
 #include "opt_rasops.h"
 
@@ -55,6 +55,11 @@ static void	rasops15_makestamp(struct ra
 #endif
 
 #ifndef RASOPS_SMALL
+/* 4x1 stamp for optimized character blitting */
+static uint32_t			stamp[32];
+static long			stamp_attr;
+static struct rasops_info	*stamp_ri;
+
 /*
  * offset = STAMP_SHIFT(fontbits, nibble #) & STAMP_MASK
  * destination uint32_t[0] = STAMP_READ(offset)
@@ -104,12 +109,17 @@ rasops15_init(struct rasops_info *ri)
 	}
 
 #ifndef RASOPS_SMALL
-	rasops_allocstamp(ri, sizeof(uint32_t) * 32);
+	stamp_attr = 0;
+	stamp_ri = NULL;
 #endif
 }
 
+#undef	RASOPS_AA
+#include "rasops_putchar.h"
+
+#define	RASOPS_AA
 #include "rasops_putchar.h"
-#include "rasops_putchar_aa.h"
+#undef	RASOPS_AA
 
 #ifndef RASOPS_SMALL
 /*
@@ -118,13 +128,14 @@ rasops15_init(struct rasops_info *ri)
 static void
 rasops15_makestamp(struct rasops_info *ri, long attr)
 {
-	uint32_t *stamp = (uint32_t *)ri->ri_stamp;
 	uint32_t fg, bg;
 	int i;
 
+	stamp_attr = attr;
+	stamp_ri = ri;
+
 	fg = ri->ri_devcmap[((uint32_t)attr >> 24) & 0xf] & 0xffff;
 	bg = ri->ri_devcmap[((uint32_t)attr >> 16) & 0xf] & 0xffff;
-	ri->ri_stamp_attr = attr;
 
 	for (i = 0; i < 32; i += 2) {
 #if BYTE_ORDER == LITTLE_ENDIAN

Index: src/sys/dev/rasops/rasops2.c
diff -u src/sys/dev/rasops/rasops2.c:1.29 src/sys/dev/rasops/rasops2.c:1.30
--- src/sys/dev/rasops/rasops2.c:1.29	Fri Aug  2 04:39:09 2019
+++ src/sys/dev/rasops/rasops2.c	Wed Aug  7 11:47:33 2019
@@ -1,4 +1,4 @@
-/* 	$NetBSD: rasops2.c,v 1.29 2019/08/02 04:39:09 rin Exp $	*/
+/* 	$NetBSD: rasops2.c,v 1.30 2019/08/07 11:47:33 rin Exp $	*/
 
 /*-
  * Copyright (c) 1999 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: rasops2.c,v 1.29 2019/08/02 04:39:09 rin Exp $");
+__KERNEL_RCSID(0, "$NetBSD: rasops2.c,v 1.30 2019/08/07 11:47:33 rin Exp $");
 
 #include "opt_rasops.h"
 
@@ -58,6 +58,12 @@ static void	rasops2_putchar16(void *, in
 static void	rasops2_makestamp(struct rasops_info *, long);
 #endif
 
+#ifndef RASOPS_SMALL
+/* 4x1 stamp for optimized character blitting */
+static uint8_t			stamp[16];
+static long			stamp_attr;
+static struct rasops_info	*stamp_ri;
+
 /*
  * offset = STAMP_SHIFT(fontbits, nibble #) & STAMP_MASK
  * destination = STAMP_READ(offset)
@@ -65,6 +71,7 @@ static void	rasops2_makestamp(struct ras
 #define	STAMP_SHIFT(fb, n)	((n) ? (fb) >> 4 : (fb))
 #define	STAMP_MASK		0xf
 #define	STAMP_READ(o)		stamp[o]
+#endif
 
 /*
  * Initialize rasops_info struct for this colordepth.
@@ -90,14 +97,15 @@ rasops2_init(struct rasops_info *ri)
 	case 16:
 		ri->ri_ops.putchar = rasops2_putchar16;
 		break;
-#endif	/* !RASOPS_SMALL */
+#endif
 	default:
 		ri->ri_ops.putchar = rasops2_putchar;
 		return;
 	}
 
 #ifndef RASOPS_SMALL
-	rasops_allocstamp(ri, sizeof(uint8_t) * 16);
+	stamp_attr = 0;
+	stamp_ri = NULL;
 #endif
 }
 
@@ -108,12 +116,13 @@ rasops2_init(struct rasops_info *ri)
 static void
 rasops2_makestamp(struct rasops_info *ri, long attr)
 {
-	uint8_t *stamp = (uint8_t *)ri->ri_stamp;
 	int i, fg, bg;
 
+	stamp_attr = attr;
+	stamp_ri = ri;
+
 	fg = ri->ri_devcmap[((uint32_t)attr >> 24) & 0xf] & 3;
 	bg = ri->ri_devcmap[((uint32_t)attr >> 16) & 0xf] & 3;
-	ri->ri_stamp_attr = attr;
 
 	for (i = 0; i < 16; i++) {
 #if BYTE_ORDER == BIG_ENDIAN

Index: src/sys/dev/rasops/rasops24.c
diff -u src/sys/dev/rasops/rasops24.c:1.46 src/sys/dev/rasops/rasops24.c:1.47
--- src/sys/dev/rasops/rasops24.c:1.46	Fri Aug  2 23:24:37 2019
+++ src/sys/dev/rasops/rasops24.c	Wed Aug  7 11:47:33 2019
@@ -1,4 +1,4 @@
-/* 	$NetBSD: rasops24.c,v 1.46 2019/08/02 23:24:37 rin Exp $	*/
+/* 	$NetBSD: rasops24.c,v 1.47 2019/08/07 11:47:33 rin Exp $	*/
 
 /*-
  * Copyright (c) 1999 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: rasops24.c,v 1.46 2019/08/02 23:24:37 rin Exp $");
+__KERNEL_RCSID(0, "$NetBSD: rasops24.c,v 1.47 2019/08/07 11:47:33 rin Exp $");
 
 #include "opt_rasops.h"
 
@@ -62,6 +62,12 @@ static void 	rasops24_putchar16(void *, 
 static void	rasops24_makestamp(struct rasops_info *, long);
 #endif
 
+#ifndef RASOPS_SMALL
+/* 4x1 stamp for optimized character blitting */
+static uint32_t			stamp[64];
+static long			stamp_attr;
+static struct rasops_info	*stamp_ri;
+
 /*
  * offset = STAMP_SHIFT(fontbits, nibble #) & STAMP_MASK
  * destination uint32_t[0] = STAMP_READ(offset)
@@ -71,6 +77,7 @@ static void	rasops24_makestamp(struct ra
 #define	STAMP_SHIFT(fb, n)	((n) ? (fb) : (fb) << 4)
 #define	STAMP_MASK		(0xf << 4)
 #define	STAMP_READ(o)		(*(uint32_t *)((uint8_t *)stamp + (o)))
+#endif
 
 /*
  * Initialize rasops_info struct for this colordepth.
@@ -113,7 +120,8 @@ rasops24_init(struct rasops_info *ri)
 	}
 
 #ifndef RASOPS_SMALL
-	rasops_allocstamp(ri, sizeof(uint32_t) * 64);
+	stamp_attr = 0;
+	stamp_ri = NULL;
 #endif
 }
 
@@ -121,13 +129,13 @@ rasops24_init(struct rasops_info *ri)
 #include "rasops_putchar_aa.h"
 
 static __inline void
-rasops24_makestamp1(struct rasops_info *ri, uint32_t *stamp,
+rasops24_makestamp1(struct rasops_info *ri, uint32_t *xstamp,
     uint32_t c1, uint32_t c2, uint32_t c3, uint32_t c4)
 {
 
-	stamp[0] = (c1 <<  8) | (c2 >> 16);
-	stamp[1] = (c2 << 16) | (c3 >>  8);
-	stamp[2] = (c3 << 24) |  c4;
+	xstamp[0] = (c1 <<  8) | (c2 >> 16);
+	xstamp[1] = (c2 << 16) | (c3 >>  8);
+	xstamp[2] = (c3 << 24) |  c4;
 
 #if BYTE_ORDER == LITTLE_ENDIAN
 	if ((ri->ri_flg & RI_BSWAP) == 0)
@@ -135,9 +143,9 @@ rasops24_makestamp1(struct rasops_info *
 	if ((ri->ri_flg & RI_BSWAP) != 0)
 #endif
 	{
-		stamp[0] = bswap32(stamp[0]);
-		stamp[1] = bswap32(stamp[1]);
-		stamp[2] = bswap32(stamp[2]);
+		xstamp[0] = bswap32(xstamp[0]);
+		xstamp[1] = bswap32(xstamp[1]);
+		xstamp[2] = bswap32(xstamp[2]);
 	}
 }
 
@@ -148,13 +156,14 @@ rasops24_makestamp1(struct rasops_info *
 static void
 rasops24_makestamp(struct rasops_info *ri, long attr)
 {
-	uint32_t *stamp = (uint32_t *)ri->ri_stamp;
 	uint32_t fg, bg, c1, c2, c3, c4;
 	int i;
 
+	stamp_attr = attr;
+	stamp_ri = ri;
+
 	fg = ri->ri_devcmap[((uint32_t)attr >> 24) & 0xf] & 0xffffff;
 	bg = ri->ri_devcmap[((uint32_t)attr >> 16) & 0xf] & 0xffffff;
-	ri->ri_stamp_attr = attr;
 
 	for (i = 0; i < 64; i += 4) {
 #if BYTE_ORDER == LITTLE_ENDIAN
@@ -193,9 +202,8 @@ static void
 rasops24_eraserows(void *cookie, int row, int num, long attr)
 {
 	struct rasops_info *ri = (struct rasops_info *)cookie;
-	uint32_t *buf = (uint32_t *)ri->ri_buf;
 	int full, slop, cnt, stride;
-	uint32_t *rp, *dp, *hp, clr, stamp[3];
+	uint32_t *rp, *dp, *hp, clr, xstamp[3];
 
 	hp = NULL;	/* XXX GCC */
 
@@ -222,7 +230,7 @@ rasops24_eraserows(void *cookie, int row
 #endif
 
 	clr = ri->ri_devcmap[((uint32_t)attr >> 16) & 0xf] & 0xffffff;
-	rasops24_makestamp1(ri, stamp, clr, clr, clr, clr);
+	rasops24_makestamp1(ri, xstamp, clr, clr, clr, clr);
 
 	/*
 	 * XXX the wsdisplay_emulops interface seems a little deficient in
@@ -247,25 +255,23 @@ rasops24_eraserows(void *cookie, int row
 	full = stride / (4 * 3);
 	slop = (stride - full * (4 * 3)) / 4;
 
-	dp = buf;
-
-	for (cnt = full; cnt; cnt--) {
-		dp[0] = stamp[0];
-		dp[1] = stamp[1];
-		dp[2] = stamp[2];
-		dp += 3;
-	}
-
-	for (cnt = 0; cnt < slop; cnt++)
-		*dp++ = stamp[cnt];
-
 	while (num--) {
-		memcpy(rp, buf, stride);
-		DELTA(rp, ri->ri_stride, uint32_t *);
+		dp = rp;
+		for (cnt = full; cnt; cnt--) {
+			dp[0] = xstamp[0];
+			dp[1] = xstamp[1];
+			dp[2] = xstamp[2];
+			dp += 3;
+		}
+		for (cnt = 0; cnt < slop; cnt++)
+			*dp++ = xstamp[cnt];
+
 		if (ri->ri_hwbits) {
-			memcpy(hp, buf, stride);
+			memcpy(hp, rp, stride);
 			DELTA(hp, ri->ri_stride, uint32_t *);
 		}
+
+		DELTA(rp, ri->ri_stride, uint32_t *);
 	}
 }
 
@@ -276,9 +282,8 @@ static void
 rasops24_erasecols(void *cookie, int row, int col, int num, long attr)
 {
 	struct rasops_info *ri = (struct rasops_info *)cookie;
-	void *buf = ri->ri_buf;
-	int height, cnt, clr, stamp[3];
-	uint32_t *dp;
+	int height, cnt, slop1, slop2, full;
+	uint32_t clr, xstamp[3], *dp;
 	uint8_t *rp, *hp, *dbp;
 
 	hp = NULL;	/* XXX GCC */
@@ -313,37 +318,62 @@ rasops24_erasecols(void *cookie, int row
 	if (ri->ri_hwbits)
 		hp = ri->ri_hwbits + row * ri->ri_yscale + col * ri->ri_xscale;
 
-	num *= ri->ri_font->fontwidth;
+	num *= ri->ri_xscale;
 	height = ri->ri_font->fontheight;
 
 	clr = ri->ri_devcmap[((uint32_t)attr >> 16) & 0xf] & 0xffffff;
-	rasops24_makestamp1(ri, stamp, clr, clr, clr, clr);
+	rasops24_makestamp1(ri, xstamp, clr, clr, clr, clr);
 
-	/* 4 pels per loop */
-	dp = (uint32_t *)buf;
-	for (cnt = num >> 2; cnt; cnt--) {
-		dp[0] = stamp[0];
-		dp[1] = stamp[1];
-		dp[2] = stamp[2];
-		dp += 3;
-	}
-
-	/* Trailing slop */
-	dbp = (uint8_t *)dp;
-	for (cnt = num & 3; cnt; cnt--) {
-		*dbp++ = (clr >> 16);
-		*dbp++ = (clr >> 8);
-		*dbp++ =  clr;
-	}
-
-	num *= 3;
+	/*
+	 * Align to word boundary by 24-bit-wise operations:
+	 *
+	 * rp % 4 == 1 ---> slop1 = 3:
+	 *	0123
+	 *	-RGB
+	 *
+	 * rp % 4 == 2 ---> slop1 = 6:
+	 *	0123 0123
+	 *	--RG BRGB
+	 *
+	 * rp % 4 == 3 ---> slop1 = 9:
+	 *	0123 0123 0123
+	 *	---R GBRG BRGB
+	 */
+	slop1 = 3 * ((uintptr_t)rp % 4);
+	slop2 = (num - slop1) % 12;
+	full = (num - slop1 /* - slop2 */) / 12;
 
 	while (height--) {
-		memcpy(rp, buf, num);
-		rp += ri->ri_stride;
+		/* Align to word boundary */
+		dbp = rp;
+		for (cnt = slop1; cnt; cnt -= 3) {
+			*dbp++ = (clr >> 16);
+			*dbp++ = (clr >> 8);
+			*dbp++ = clr;
+		}
+
+		/* 4 pels per loop */
+		dp = (uint32_t *)dbp;
+		for (cnt = full; cnt; cnt--) {
+			dp[0] = xstamp[0];
+			dp[1] = xstamp[1];
+			dp[2] = xstamp[2];
+			dp += 3;
+		}
+
+		/* Trailing slop */
+		dbp = (uint8_t *)dp;
+		for (cnt = slop2; cnt; cnt -= 3) {
+			*dbp++ = (clr >> 16);
+			*dbp++ = (clr >> 8);
+			*dbp++ = clr;
+		}
+
 		if (ri->ri_hwbits) {
-			memcpy(hp, buf, num);
+			memcpy(hp, rp, num);
 			hp += ri->ri_stride;
 		}
+
+		rp += ri->ri_stride;
 	}
 }

Index: src/sys/dev/rasops/rasops32.c
diff -u src/sys/dev/rasops/rasops32.c:1.42 src/sys/dev/rasops/rasops32.c:1.43
--- src/sys/dev/rasops/rasops32.c:1.42	Fri Aug  2 04:40:53 2019
+++ src/sys/dev/rasops/rasops32.c	Wed Aug  7 11:47:33 2019
@@ -1,4 +1,4 @@
-/*	 $NetBSD: rasops32.c,v 1.42 2019/08/02 04:40:53 rin Exp $	*/
+/*	 $NetBSD: rasops32.c,v 1.43 2019/08/07 11:47:33 rin Exp $	*/
 
 /*-
  * Copyright (c) 1999 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: rasops32.c,v 1.42 2019/08/02 04:40:53 rin Exp $");
+__KERNEL_RCSID(0, "$NetBSD: rasops32.c,v 1.43 2019/08/07 11:47:33 rin Exp $");
 
 #include "opt_rasops.h"
 
@@ -54,6 +54,12 @@ static void	rasops32_putchar16(void *, i
 static void	rasops32_makestamp(struct rasops_info *, long);
 #endif
 
+#ifndef RASOPS_SMALL
+/* 4x1 stamp for optimized character blitting */
+static uint32_t			stamp[64];
+static long			stamp_attr;
+static struct rasops_info	*stamp_ri;
+
 /*
  * offset = STAMP_SHIFT(fontbits, nibble #) & STAMP_MASK
  * destination uint32_t[0] = STAMP_READ(offset)
@@ -64,6 +70,7 @@ static void	rasops32_makestamp(struct ra
 #define	STAMP_SHIFT(fb, n)	((n) ? (fb) : (fb) << 4)
 #define	STAMP_MASK		(0xf << 4)
 #define	STAMP_READ(o)		(*(uint32_t *)((uint8_t *)stamp + (o)))
+#endif
 
 /*
  * Initialize a 'rasops_info' descriptor for this depth.
@@ -103,7 +110,8 @@ rasops32_init(struct rasops_info *ri)
 	}
 
 #ifndef RASOPS_SMALL
-	rasops_allocstamp(ri, sizeof(uint32_t) * 64);
+	stamp_attr = 0;
+	stamp_ri = NULL;
 #endif
 }
 
@@ -117,13 +125,14 @@ rasops32_init(struct rasops_info *ri)
 static void
 rasops32_makestamp(struct rasops_info *ri, long attr)
 {
-	uint32_t *stamp = (uint32_t *)ri->ri_stamp;
 	uint32_t fg, bg;
 	int i;
 
+	stamp_attr = attr;
+	stamp_ri = ri;
+
 	fg = ri->ri_devcmap[((uint32_t)attr >> 24) & 0xf];
 	bg = ri->ri_devcmap[((uint32_t)attr >> 16) & 0xf];
-	ri->ri_stamp_attr = attr;
 
 	for (i = 0; i < 64; i += 4) {
 		stamp[i + 0] = i & 32 ? fg : bg;

Index: src/sys/dev/rasops/rasops4.c
diff -u src/sys/dev/rasops/rasops4.c:1.24 src/sys/dev/rasops/rasops4.c:1.25
--- src/sys/dev/rasops/rasops4.c:1.24	Fri Aug  2 04:39:09 2019
+++ src/sys/dev/rasops/rasops4.c	Wed Aug  7 11:47:33 2019
@@ -1,4 +1,4 @@
-/* 	$NetBSD: rasops4.c,v 1.24 2019/08/02 04:39:09 rin Exp $	*/
+/* 	$NetBSD: rasops4.c,v 1.25 2019/08/07 11:47:33 rin Exp $	*/
 
 /*-
  * Copyright (c) 1999 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: rasops4.c,v 1.24 2019/08/02 04:39:09 rin Exp $");
+__KERNEL_RCSID(0, "$NetBSD: rasops4.c,v 1.25 2019/08/07 11:47:33 rin Exp $");
 
 #include "opt_rasops.h"
 
@@ -58,6 +58,12 @@ static void	rasops4_putchar16(void *, in
 static void	rasops4_makestamp(struct rasops_info *, long);
 #endif
 
+#ifndef RASOPS_SMALL
+/* 4x1 stamp for optimized character blitting */
+static uint16_t			stamp[16];
+static long			stamp_attr;
+static struct rasops_info	*stamp_ri;
+
 /*
  * offset = STAMP_SHIFT(fontbits, nibble #) & STAMP_MASK
  * destination = STAMP_READ(offset)
@@ -65,6 +71,7 @@ static void	rasops4_makestamp(struct ras
 #define STAMP_SHIFT(fb, n)	((n) ? (fb) >> 4 : (fb))
 #define STAMP_MASK		0xf
 #define STAMP_READ(o)		stamp[o]
+#endif
 
 /*
  * Initialize rasops_info struct for this colordepth.
@@ -97,7 +104,8 @@ rasops4_init(struct rasops_info *ri)
 	}
 
 #ifndef RASOPS_SMALL
-	rasops_allocstamp(ri, sizeof(uint16_t) * 16);
+	stamp_attr = 0;
+	stamp_ri = NULL;
 #endif
 }
 
@@ -108,12 +116,13 @@ rasops4_init(struct rasops_info *ri)
 static void
 rasops4_makestamp(struct rasops_info *ri, long attr)
 {
-	uint16_t *stamp = (uint16_t *)ri->ri_stamp;
 	int i, fg, bg;
 
+	stamp_attr = attr;
+	stamp_ri = ri;
+
 	fg = ri->ri_devcmap[((uint32_t)attr >> 24) & 0xf] & 0xf;
 	bg = ri->ri_devcmap[((uint32_t)attr >> 16) & 0xf] & 0xf;
-	ri->ri_stamp_attr = attr;
 
 	for (i = 0; i < 16; i++) {
 #if BYTE_ORDER == BIG_ENDIAN

Index: src/sys/dev/rasops/rasops8.c
diff -u src/sys/dev/rasops/rasops8.c:1.47 src/sys/dev/rasops/rasops8.c:1.48
--- src/sys/dev/rasops/rasops8.c:1.47	Fri Aug  2 04:40:53 2019
+++ src/sys/dev/rasops/rasops8.c	Wed Aug  7 11:47:33 2019
@@ -1,4 +1,4 @@
-/* 	$NetBSD: rasops8.c,v 1.47 2019/08/02 04:40:53 rin Exp $	*/
+/* 	$NetBSD: rasops8.c,v 1.48 2019/08/07 11:47:33 rin Exp $	*/
 
 /*-
  * Copyright (c) 1999 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: rasops8.c,v 1.47 2019/08/02 04:40:53 rin Exp $");
+__KERNEL_RCSID(0, "$NetBSD: rasops8.c,v 1.48 2019/08/07 11:47:33 rin Exp $");
 
 #include "opt_rasops.h"
 
@@ -54,6 +54,12 @@ static void 	rasops8_putchar16(void *, i
 static void	rasops8_makestamp(struct rasops_info *ri, long);
 #endif
 
+#ifndef RASOPS_SMALL
+/* 4x1 stamp for optimized character blitting */
+static uint32_t			stamp[16];
+static long			stamp_attr;
+static struct rasops_info	*stamp_ri;
+
 /*
  * offset = STAMP_SHIFT(fontbits, nibble #) & STAMP_MASK
  * destination = STAMP_READ(offset)
@@ -61,6 +67,7 @@ static void	rasops8_makestamp(struct ras
 #define	STAMP_SHIFT(fb, n)	((n) ? (fb) >> 2 : (fb) << 2)
 #define	STAMP_MASK		(0xf << 2)
 #define	STAMP_READ(o)		(*(uint32_t *)((uint8_t *)stamp + (o)))
+#endif
 
 /*
  * Initialize a 'rasops_info' descriptor for this depth.
@@ -101,7 +108,8 @@ rasops8_init(struct rasops_info *ri)
 	}
 
 #ifndef RASOPS_SMALL
-	rasops_allocstamp(ri, sizeof(uint32_t) * 16);
+	stamp_attr = 0;
+	stamp_ri = NULL;
 #endif
 }
 
@@ -115,13 +123,14 @@ rasops8_init(struct rasops_info *ri)
 static void
 rasops8_makestamp(struct rasops_info *ri, long attr)
 {
-	uint32_t *stamp = (uint32_t *)ri->ri_stamp;
 	uint32_t fg, bg;
 	int i;
 
+	stamp_attr = attr;
+	stamp_ri = ri;
+
 	fg = ri->ri_devcmap[((uint32_t)attr >> 24) & 0xf] & 0xff;
 	bg = ri->ri_devcmap[((uint32_t)attr >> 16) & 0xf] & 0xff;
-	ri->ri_stamp_attr = attr;
 
 	for (i = 0; i < 16; i++) {
 #if BYTE_ORDER == BIG_ENDIAN

Index: src/sys/dev/rasops/rasops_putchar_width.h
diff -u src/sys/dev/rasops/rasops_putchar_width.h:1.10 src/sys/dev/rasops/rasops_putchar_width.h:1.11
--- src/sys/dev/rasops/rasops_putchar_width.h:1.10	Wed Jul 31 02:04:14 2019
+++ src/sys/dev/rasops/rasops_putchar_width.h	Wed Aug  7 11:47:33 2019
@@ -1,4 +1,4 @@
-/* $NetBSD: rasops_putchar_width.h,v 1.10 2019/07/31 02:04:14 rin Exp $ */
+/* $NetBSD: rasops_putchar_width.h,v 1.11 2019/08/07 11:47:33 rin Exp $ */
 
 /* NetBSD: rasops8.c,v 1.41 2019/07/25 03:02:44 rin Exp  */
 /*-
@@ -204,7 +204,6 @@ PUTCHAR_WIDTH(RASOPS_DEPTH, RASOPS_WIDTH
 {
 	struct rasops_info *ri = (struct rasops_info *)cookie;
 	struct wsdisplay_font *font = PICK_FONT(ri, uc);
-	STAMP_TYPE *stamp = (STAMP_TYPE *)ri->ri_stamp;
 	int height, fs;
 	uint8_t *fr;
 	STAMP_TYPE *rp, *hp;
@@ -225,7 +224,7 @@ PUTCHAR_WIDTH(RASOPS_DEPTH, RASOPS_WIDTH
 		return;
 
 	/* Recompute stamp? */
-	if (attr != ri->ri_stamp_attr)
+	if (attr != stamp_attr || __predict_false(ri != stamp_ri))
 		MAKESTAMP(RASOPS_DEPTH)(ri, attr);
 
 	rp = (STAMP_TYPE *)(ri->ri_bits + row * ri->ri_yscale +

Reply via email to