Hello,
On Wed, 29 Oct 2025, Chad Jablonski wrote:
HOST_DATA as a source for 2D blits (ROP3_SRCCOPY) was unimplemented,
preventing X.org text rendering with 2D acceleration enabled. This
implements monochrome HOST_DATA blits with color expansion, enabling
xterm to render text correctly.
Great stuff, I meant to work on this but never got to it yet. Keep these
coming! It's not likely we can polish it enough to be in next release but
there's still a few days left, otherwise I take it as an RFC and here's a
quick review.
The blit itself has been implemented as a separate function to keep
things focused. Future work could refactor this, and the existing
blits, to use shared helpers for common patterns.
Changes:
- Add clipping register handlers (SC_TOP_LEFT, SC_BOTTOM_RIGHT,
SRC_SC_BOTTOM_RIGHT) and defaulting
- Add DP_SRC_FRGD_CLR and DP_SRC_BKGD_CLR register write handlers to
enable color expansion
- Implement expand_colors() to convert 1bpp monochrome data to 32bpp
color
- Add ati_blt_mono_host_to_screen() for monochrome HOST_DATA blits
That's a lot of changes for a single patch. Could this be split into a series
along the above lines to put each logical change into its own patch for
easier review?
The host data buffer has a fixed 4MiB size. This _should_ cover typical
use cases and overflow is logged. Future work could implement dynamic
allocation based on blit dimensions.
Tested on Rage 128. From what I understand the R100 also shares these
registers but that device has not been tested against Xorg. There
seem to be other issues preventing Xorg from starting with the radeon
driver.
I think the problem with R100 is that it needs CCE and that's the main issue
with the driver, at least with Linux DRM and the Amiga like drivers I've
checked, I don't know about older Linux framebuffer and Xorg drivers.
Note: The xorg/xf86-video-r128 drivers support both CCE (DMA)
and MMIO acceleration. This implements MMIO, which the driver
uses when built with --disable-dri. CCE of course is another can of
worms.
I know about two other uses of host data that this could be tested with:
Solaris also uses it and could reproduce it with sol-10-u11-ga-x86-dvd.iso I
think it needs qemu-system-x86_64 -m 1G -cpu coreduo -device
ati-vga,model=rv100 but it was a long time ago I've last tried.
MorphOS when rendering bitmap fonts as reported here:
https://morph.zone/modules/newbb_plus/viewtopic.php?forum=3&topic_id=13912&post_id=160655&viewmode=flat&sortorder=0&showonepost=1
I could reproduce it with TVPaint under MorphOS.
If you don't have these or can't test it I can eventually try with these but
just in case you need more test cases these are that I know of.
Some more comments below.
Signed-off-by: Chad Jablonski <[email protected]>
---
hw/display/ati.c | 78 ++++++++++++++++++++++++++++--
hw/display/ati_2d.c | 110 +++++++++++++++++++++++++++++++++++++++++-
hw/display/ati_dbg.c | 9 ++++
hw/display/ati_int.h | 6 +++
hw/display/ati_regs.h | 28 ++++++++++-
5 files changed, 223 insertions(+), 8 deletions(-)
diff --git a/hw/display/ati.c b/hw/display/ati.c
index f7c0006a87..1dfaa79202 100644
--- a/hw/display/ati.c
+++ b/hw/display/ati.c
@@ -510,6 +510,15 @@ static uint64_t ati_mm_read(void *opaque, hwaddr addr,
unsigned int size)
case DEFAULT_SC_BOTTOM_RIGHT:
val = s->regs.default_sc_bottom_right;
break;
+ case SC_TOP_LEFT:
+ val = s->regs.sc_top_left;
+ break;
+ case SC_BOTTOM_RIGHT:
+ val = s->regs.sc_bottom_right;
+ break;
+ case SRC_SC_BOTTOM_RIGHT:
+ val = s->regs.src_sc_bottom_right;
+ break;
default:
break;
}
@@ -804,9 +813,14 @@ static void ati_mm_write(void *opaque, hwaddr addr,
}
break;
case DST_WIDTH:
+ {
+ uint32_t src = s->regs.dp_gui_master_cntl & GMC_SRC_SOURCE_MASK;
s->regs.dst_width = data & 0x3fff;
- ati_2d_blt(s);
+ if (src != GMC_SRC_SOURCE_HOST_DATA) {
+ ati_2d_blt(s);
+ }
break;
+ }
case DST_HEIGHT:
s->regs.dst_height = data & 0x3fff;
break;
@@ -853,21 +867,39 @@ static void ati_mm_write(void *opaque, hwaddr addr,
s->regs.dst_y = (data >> 16) & 0x3fff;
break;
case DST_HEIGHT_WIDTH:
+ {
+ uint32_t src = s->regs.dp_gui_master_cntl & GMC_SRC_SOURCE_MASK;
s->regs.dst_width = data & 0x3fff;
s->regs.dst_height = (data >> 16) & 0x3fff;
- ati_2d_blt(s);
+ if (src != GMC_SRC_SOURCE_HOST_DATA) {
+ ati_2d_blt(s);
+ }
break;
+ }
case DP_GUI_MASTER_CNTL:
- s->regs.dp_gui_master_cntl = data & 0xf800000f;
+ s->regs.dp_gui_master_cntl = data & 0xff80000f;
s->regs.dp_datatype = (data & 0x0f00) >> 8 | (data & 0x30f0) << 4 |
(data & 0x4000) << 16;
s->regs.dp_mix = (data & GMC_ROP3_MASK) | (data & 0x7000000) >> 16;
+
+ if ((data & GMC_SRC_CLIPPING_MASK) == GMC_SRC_CLIP_DEFAULT) {
+ s->regs.src_sc_bottom_right = s->regs.default_sc_bottom_right;
+ }
+ if ((data & GMC_DST_CLIPPING_MASK) == GMC_DST_CLIP_DEFAULT) {
+ s->regs.sc_top_left = 0;
+ s->regs.sc_bottom_right = s->regs.default_sc_bottom_right;
+ }
break;
case DST_WIDTH_X:
+ {
+ uint32_t src = s->regs.dp_gui_master_cntl & GMC_SRC_SOURCE_MASK;
s->regs.dst_x = data & 0x3fff;
s->regs.dst_width = (data >> 16) & 0x3fff;
- ati_2d_blt(s);
+ if (src != GMC_SRC_SOURCE_HOST_DATA) {
+ ati_2d_blt(s);
+ }
break;
+ }
case SRC_X_Y:
s->regs.src_y = data & 0x3fff;
s->regs.src_x = (data >> 16) & 0x3fff;
@@ -877,10 +909,15 @@ static void ati_mm_write(void *opaque, hwaddr addr,
s->regs.dst_x = (data >> 16) & 0x3fff;
break;
case DST_WIDTH_HEIGHT:
+ {
+ uint32_t src = s->regs.dp_gui_master_cntl & GMC_SRC_SOURCE_MASK;
s->regs.dst_height = data & 0x3fff;
s->regs.dst_width = (data >> 16) & 0x3fff;
- ati_2d_blt(s);
+ if (src != GMC_SRC_SOURCE_HOST_DATA) {
+ ati_2d_blt(s);
+ }
break;
+ }
case DST_HEIGHT_Y:
s->regs.dst_y = data & 0x3fff;
s->regs.dst_height = (data >> 16) & 0x3fff;
@@ -909,6 +946,12 @@ static void ati_mm_write(void *opaque, hwaddr addr,
case DP_CNTL:
s->regs.dp_cntl = data;
break;
+ case DP_SRC_FRGD_CLR:
+ s->regs.dp_src_frgd_clr = data;
+ break;
+ case DP_SRC_BKGD_CLR:
+ s->regs.dp_src_bkgd_clr = data;
+ break;
case DP_DATATYPE:
s->regs.dp_datatype = data & 0xe0070f0f;
break;
@@ -937,6 +980,29 @@ static void ati_mm_write(void *opaque, hwaddr addr,
case DEFAULT_SC_BOTTOM_RIGHT:
s->regs.default_sc_bottom_right = data & 0x3fff3fff;
break;
+ case SC_TOP_LEFT:
+ s->regs.sc_top_left = data;
+ break;
+ case SC_BOTTOM_RIGHT:
+ s->regs.sc_bottom_right = data;
+ break;
+ case SRC_SC_BOTTOM_RIGHT:
+ s->regs.src_sc_bottom_right = data;
+ break;
+ case HOST_DATA0 ... HOST_DATA7:
+ case HOST_DATA_LAST:
+ if (s->host_data_pos + 4 > sizeof(s->host_data_buffer)) {
+ qemu_log_mask(LOG_UNIMP, "HOST_DATA buffer overflow "
+ "(buffer size: %zu bytes)\n",
+ sizeof(s->host_data_buffer));
+ return;
+ }
+ stn_he_p(&s->host_data_buffer[s->host_data_pos], 4, data);
+ s->host_data_pos += 4;
+ if (addr == HOST_DATA_LAST) {
+ ati_2d_blt(s);
+ }
+ break;
default:
break;
}
@@ -1018,6 +1084,7 @@ static void ati_vga_realize(PCIDevice *dev, Error
**errp)
/* most interrupts are not yet emulated but MacOS needs at least VBlank
*/
dev->config[PCI_INTERRUPT_PIN] = 1;
timer_init_ns(&s->vblank_timer, QEMU_CLOCK_VIRTUAL, ati_vga_vblank_irq,
s);
+ s->host_data_pos = 0;
You probably don't need this line because device state is 0 init and reset
will run after realize and overwrite it so it's enough to reset it in the
reset method below.
}
static void ati_vga_reset(DeviceState *dev)
@@ -1030,6 +1097,7 @@ static void ati_vga_reset(DeviceState *dev)
/* reset vga */
vga_common_reset(&s->vga);
s->mode = VGA_MODE;
+ s->host_data_pos = 0;
}
static void ati_vga_exit(PCIDevice *dev)
diff --git a/hw/display/ati_2d.c b/hw/display/ati_2d.c
index 309bb5ccb6..6b0b4775ff 100644
--- a/hw/display/ati_2d.c
+++ b/hw/display/ati_2d.c
@@ -24,6 +24,9 @@
* possible.
*/
+#define DEFAULT_CNTL (s->regs.dp_gui_master_cntl &
GMC_DST_PITCH_OFFSET_CNTL)
May need a better name? There are different defaults so not clear which one
is this or is it used for all parameters?
+#define EXPANDED_SRC_BPP 32
+
static int ati_bpp_from_datatype(ATIVGAState *s)
{
switch (s->regs.dp_datatype & 0xf) {
@@ -43,7 +46,106 @@ static int ati_bpp_from_datatype(ATIVGAState *s)
}
}
-#define DEFAULT_CNTL (s->regs.dp_gui_master_cntl & GMC_DST_PITCH_OFFSET_CNTL)