I've taken a first stab at implementing combining character support in st.

Since damn near every line of st assumes that the buffer is a grid that maps
exactly to the window, I've implemented it in the worst way possible. Each
glyph now holds a union of two combining characters and a pointer to a zero
terminated array. This is purely to make the most of the space. No length is
kept and every character added past 2 causes a realloc.

I think I've hit all the problem spots with proper frees but someone else will
have to verify this.

There's currently a problem with rendering where Xft doesn't draw the
combining characters as a part of the character they combine to, which causes
things like Hangul Jamo to be completely unusable stacks of characters instead
of the proper combined form.
I do not know how to fix this.

If someone wants to pick this up they're welcome to.

-- Joakim






diff --git a/st.c b/st.c
index 3dd5caf..82952ce 100644
--- a/st.c
+++ b/st.c
@@ -98,6 +98,7 @@ enum glyph_attribute {
 	ATTR_WRAP       = 1 << 8,
 	ATTR_WIDE       = 1 << 9,
 	ATTR_WDUMMY     = 1 << 10,
+	ATTR_DECOMPPTR  = 1 << 11,
 	ATTR_BOLD_FAINT = ATTR_BOLD | ATTR_FAINT,
 };
 
@@ -190,6 +191,10 @@ typedef XftColor Color;
 
 typedef struct {
 	Rune u;           /* character code */
+	union {
+		Rune c[2];
+		Rune *pc;
+	} dc;
 	ushort mode;      /* attribute flags */
 	uint32_t fg;      /* foreground  */
 	uint32_t bg;      /* background  */
@@ -233,6 +238,7 @@ typedef struct {
 	Line *alt;    /* alternate screen */
 	bool *dirty;  /* dirtyness of lines */
 	XftGlyphFontSpec *specbuf; /* font spec buffer used for rendering */
+	int specbuflen;
 	TCursor c;    /* cursor */
 	int top;      /* top    scroll limit */
 	int bot;      /* bottom scroll limit */
@@ -399,6 +405,7 @@ static void tscrollup(int, int);
 static void tscrolldown(int, int);
 static void tsetattr(int *, int);
 static void tsetchar(Rune, Glyph *, int, int);
+static void taddcchar(Rune, int, int);
 static void tsetscroll(int, int);
 static void tswapscreen(void);
 static void tsetdirt(int, int);
@@ -419,8 +426,9 @@ static void ttywrite(const char *, size_t);
 static void tstrsequence(uchar);
 
 static inline ushort sixd_to_16bit(int);
-static int xmakeglyphfontspecs(XftGlyphFontSpec *, const Glyph *, int, int, int);
-static void xdrawglyphfontspecs(const XftGlyphFontSpec *, Glyph, int, int, int);
+static int xmakeglyphfontspecs(XftGlyphFontSpec *, int, const Glyph *, int, int, int);
+static int xmakeglyphfontspecsintermbuf(const Glyph *, int, int, int);
+static void xdrawglyphfontspecs(const XftGlyphFontSpec *, Glyph, int, int, int, int);
 static void xdrawglyph(Glyph, int, int);
 static void xhints(void);
 static void xclear(int, int, int, int);
@@ -957,11 +965,12 @@ getsel(void) {
 	char *str, *ptr;
 	int y, bufsize, lastx, linelen;
 	Glyph *gp, *last;
+	Rune *pc;
 
 	if(sel.ob.x == -1)
 		return NULL;
 
-	bufsize = (term.col+1) * (sel.ne.y-sel.nb.y+1) * UTF_SIZ;
+	bufsize = (term.specbuflen+1) * (sel.ne.y-sel.nb.y+1) * UTF_SIZ;
 	ptr = str = xmalloc(bufsize);
 
 	/* append every set & selected glyph to the selection */
@@ -984,6 +993,16 @@ getsel(void) {
 				continue;
 
 			ptr += utf8encode(gp->u, ptr);
+			if(gp->mode & ATTR_DECOMPPTR) {
+				pc = gp->dc.pc;
+				for(pc = gp->dc.pc; *pc; pc++)
+					ptr += utf8encode(*pc, ptr);
+			} else {
+				if(gp->dc.c[0])
+					ptr += utf8encode(gp->dc.c[0], ptr);
+				if(gp->dc.c[1])
+					ptr += utf8encode(gp->dc.c[1], ptr);
+			}
 		}
 
 		/*
@@ -1681,6 +1700,58 @@ tsetchar(Rune u, Glyph *attr, int x, int y) {
 }
 
 void
+taddcchar(Rune u, int x, int y) {
+	Rune *p;
+	int sz;
+
+	if(!u)
+		return;
+
+	/* don't add combining chars to wide dummies, add them to the real char */
+	if(term.line[y][x].mode == ATTR_WDUMMY) {
+		if(x == 0) {
+			if(y > 0) {
+				y--;
+				x = term.col-1;
+			} else {
+				/* this shouldn't happen, but if it does... */
+				return;
+			}
+		} else {
+			x--;
+		}
+	}
+	/* can this happen? */
+	if(term.line[y][x].mode == ATTR_WDUMMY)
+		return;
+
+	term.dirty[y] = 1;
+	if(term.line[y][x].mode & ATTR_DECOMPPTR) {
+		p = term.line[y][x].dc.pc;
+		while(*p)
+			p++;
+		sz = (p - term.line[y][x].dc.pc) + 2;
+		term.line[y][x].dc.pc = xrealloc(term.line[y][x].dc.pc, sz * sizeof(Rune));
+		term.line[y][x].dc.pc[sz-2] = u;
+		term.line[y][x].dc.pc[sz-1] = 0;
+	} else {
+		if(!term.line[y][x].dc.c[0]) {
+			term.line[y][x].dc.c[0] = u;
+		} else if(!term.line[y][x].dc.c[1]) {
+			term.line[y][x].dc.c[1] = u;
+		} else {
+			p = xmalloc(4 * sizeof(Rune));
+			p[0] = term.line[y][x].dc.c[0];
+			p[1] = term.line[y][x].dc.c[1];
+			p[2] = u;
+			p[3] = 0;
+			term.line[y][x].dc.pc = p;
+			term.line[y][x].mode |= ATTR_DECOMPPTR;
+		}
+	}
+}
+
+void
 tclearregion(int x1, int y1, int x2, int y2) {
 	int x, y, temp;
 	Glyph *gp;
@@ -1703,7 +1774,11 @@ tclearregion(int x1, int y1, int x2, int y2) {
 				selclear(NULL);
 			gp->fg = term.c.attr.fg;
 			gp->bg = term.c.attr.bg;
+			if(gp->mode & ATTR_DECOMPPTR)
+				free(gp->dc.pc);
 			gp->mode = 0;
+			gp->dc.c[0] = 0;
+			gp->dc.c[1] = 0;
 			gp->u = ' ';
 		}
 	}
@@ -1711,7 +1786,7 @@ tclearregion(int x1, int y1, int x2, int y2) {
 
 void
 tdeletechar(int n) {
-	int dst, src, size;
+	int dst, src, size, i;
 	Glyph *line;
 
 	LIMIT(n, 0, term.col - term.c.x);
@@ -1721,13 +1796,17 @@ tdeletechar(int n) {
 	size = term.col - src;
 	line = term.line[term.c.y];
 
+	for(i = dst; i < src; i++) {
+		if(line[i].mode & ATTR_DECOMPPTR)
+			line[i].dc.pc = NULL;
+	}
 	memmove(&line[dst], &line[src], size * sizeof(Glyph));
 	tclearregion(term.col-n, term.c.y, term.col-1, term.c.y);
 }
 
 void
 tinsertblank(int n) {
-	int dst, src, size;
+	int dst, src, size, i;
 	Glyph *line;
 
 	LIMIT(n, 0, term.col - term.c.x);
@@ -1737,6 +1816,10 @@ tinsertblank(int n) {
 	size = term.col - dst;
 	line = term.line[term.c.y];
 
+	for(i = src; i < dst; i++) {
+		if(line[i].mode & ATTR_DECOMPPTR)
+			line[i].dc.pc = NULL;
+	}
 	memmove(&line[dst], &line[src], size * sizeof(Glyph));
 	tclearregion(src, term.c.y, dst - 1, term.c.y);
 }
@@ -2774,6 +2857,19 @@ tputc(Rune u) {
 	if(sel.ob.x != -1 && BETWEEN(term.c.y, sel.ob.y, sel.oe.y))
 		selclear(NULL);
 
+	/* combining chars are added to the previous cell */
+	if(width == 0) {
+		if(term.c.x == 0) {
+			if(term.c.y == 0)
+				taddcchar(u, 0, 0);
+			else
+				taddcchar(u, term.col-1, term.c.y-1);
+		} else {
+			taddcchar(u, term.c.x-1, term.c.y);
+		}
+		return;
+	}
+
 	gp = &term.line[term.c.y][term.c.x];
 	if(IS_SET(MODE_WRAP) && (term.c.state & CURSOR_WRAPNEXT)) {
 		gp->mode |= ATTR_WRAP;
@@ -2807,7 +2903,7 @@ tputc(Rune u) {
 
 void
 tresize(int col, int row) {
-	int i;
+	int i, j;
 	int minrow = MIN(row, term.row);
 	int mincol = MIN(col, term.col);
 	bool *bp;
@@ -2825,6 +2921,12 @@ tresize(int col, int row) {
 	 * memmove because we're freeing the earlier lines
 	 */
 	for(i = 0; i <= term.c.y - row; i++) {
+		for(j = 0; j < term.col; j++) {
+			if(term.line[i][j].mode & ATTR_DECOMPPTR)
+				free(term.line[i][j].dc.pc);
+			if(term.alt[i][j].mode & ATTR_DECOMPPTR)
+				free(term.alt[i][j].dc.pc);
+		}
 		free(term.line[i]);
 		free(term.alt[i]);
 	}
@@ -2834,12 +2936,19 @@ tresize(int col, int row) {
 		memmove(term.alt, term.alt + i, row * sizeof(Line));
 	}
 	for(i += row; i < term.row; i++) {
+		for(j = 0; j < term.col; j++) {
+			if(term.line[i][j].mode & ATTR_DECOMPPTR)
+				free(term.line[i][j].dc.pc);
+			if(term.alt[i][j].mode & ATTR_DECOMPPTR)
+				free(term.alt[i][j].dc.pc);
+		}
 		free(term.line[i]);
 		free(term.alt[i]);
 	}
 
 	/* resize to new width */
-	term.specbuf = xrealloc(term.specbuf, col * sizeof(XftGlyphFontSpec));
+	term.specbuflen = col * 2;
+	term.specbuf = xrealloc(term.specbuf, term.specbuflen * sizeof(XftGlyphFontSpec));
 
 	/* resize to new height */
 	term.line = xrealloc(term.line, row * sizeof(Line));
@@ -2849,14 +2958,26 @@ tresize(int col, int row) {
 
 	/* resize each row to new width, zero-pad if needed */
 	for(i = 0; i < minrow; i++) {
+		for(j = col; j < term.col; j++) {
+			if(term.line[i][j].mode & ATTR_DECOMPPTR)
+				free(term.line[i][j].dc.pc);
+			if(term.alt[i][j].mode & ATTR_DECOMPPTR)
+				free(term.alt[i][j].dc.pc);
+		}
 		term.line[i] = xrealloc(term.line[i], col * sizeof(Glyph));
 		term.alt[i]  = xrealloc(term.alt[i],  col * sizeof(Glyph));
+		if(col > term.col) {
+			memset(&term.line[i][term.col], 0, (col - term.col) * sizeof(Glyph));
+			memset(&term.alt[i][term.col], 0, (col - term.col) * sizeof(Glyph));
+		}
 	}
 
 	/* allocate any new rows */
 	for(/* i == minrow */; i < row; i++) {
 		term.line[i] = xmalloc(col * sizeof(Glyph));
 		term.alt[i] = xmalloc(col * sizeof(Glyph));
+		memset(term.line[i], 0, col * sizeof(Glyph));
+		memset(term.alt[i], 0, col * sizeof(Glyph));
 	}
 	if(col > term.col) {
 		bp = term.tabs + term.col;
@@ -3280,12 +3401,13 @@ xinit(void) {
 }
 
 int
-xmakeglyphfontspecs(XftGlyphFontSpec *specs, const Glyph *glyphs, int len, int x, int y)
+xmakeglyphfontspecs(XftGlyphFontSpec *specs, int nspecs, const Glyph *glyphs, int len, int x, int y)
 {
-	float winx = borderpx + x * xw.cw, winy = borderpx + y * xw.ch, xp, yp;
+	float winx = borderpx + x * xw.cw, winy = borderpx + y * xw.ch;
 	ushort mode, prevmode = USHRT_MAX;
 	Font *font = &dc.font;
 	int frcflags = FRC_NORMAL;
+	float xp = winx, yp = winy + font->ascent;
 	float runewidth = xw.cw;
 	Rune rune;
 	FT_UInt glyphidx;
@@ -3293,23 +3415,39 @@ xmakeglyphfontspecs(XftGlyphFontSpec *specs, const Glyph *glyphs, int len, int x
 	FcPattern *fcpattern, *fontpattern;
 	FcFontSet *fcsets[] = { NULL };
 	FcCharSet *fccharset;
-	int i, f, numspecs = 0;
+	int i = 0, ci = -1, j, f, numspecs = 0;
 
-	for(i = 0, xp = winx, yp = winy + font->ascent; i < len; ++i) {
+	while(i < len) {
 		/* Fetch rune and mode for current glyph. */
 		rune = glyphs[i].u;
 		mode = glyphs[i].mode;
 
 		/* Skip dummy wide-character spacing. */
-		if(mode == ATTR_WDUMMY)
+		if(mode == ATTR_WDUMMY) {
+			i++;
 			continue;
+		}
+
+		if((j = ci++) >= 0) {
+			if(mode & ATTR_DECOMPPTR)
+				rune = glyphs[i].dc.pc[j];
+			else
+				rune = (j < 2) ? glyphs[i].dc.c[j] : 0;
+
+			if(!rune) {
+				i++;
+				ci = -1;
+				xp += runewidth;
+				continue;
+			}
+		}
 
 		/* Determine font for glyph if different from previous glyph. */
 		if(prevmode != mode) {
 			prevmode = mode;
 			font = &dc.font;
 			frcflags = FRC_NORMAL;
-			runewidth = xw.cw * ((mode & ATTR_WIDE) ? 2.0f : 1.0f);
+			runewidth = xw.cw * ((mode & ATTR_WIDE) ? 2.0 : 1.0);
 			if((mode & ATTR_ITALIC) && (mode & ATTR_BOLD)) {
 				font = &dc.ibfont;
 				frcflags = FRC_ITALICBOLD;
@@ -3326,11 +3464,12 @@ xmakeglyphfontspecs(XftGlyphFontSpec *specs, const Glyph *glyphs, int len, int x
 		/* Lookup character index with default font. */
 		glyphidx = XftCharIndex(xw.dpy, font->match, rune);
 		if(glyphidx) {
-			specs[numspecs].font = font->match;
-			specs[numspecs].glyph = glyphidx;
-			specs[numspecs].x = (short)xp;
-			specs[numspecs].y = (short)yp;
-			xp += runewidth;
+			if(numspecs < nspecs) {
+				specs[numspecs].font = font->match;
+				specs[numspecs].glyph = glyphidx;
+				specs[numspecs].x = (short)xp;
+				specs[numspecs].y = (short)yp;
+			}
 			numspecs++;
 			continue;
 		}
@@ -3401,20 +3540,43 @@ xmakeglyphfontspecs(XftGlyphFontSpec *specs, const Glyph *glyphs, int len, int x
 			FcCharSetDestroy(fccharset);
 		}
 
-		specs[numspecs].font = frc[f].font;
-		specs[numspecs].glyph = glyphidx;
-		specs[numspecs].x = (short)xp;
-		specs[numspecs].y = (short)(winy + frc[f].font->ascent);
-		xp += runewidth;
+		if(numspecs < nspecs) {
+			specs[numspecs].font = frc[f].font;
+			specs[numspecs].glyph = glyphidx;
+			specs[numspecs].x = (short)xp;
+			specs[numspecs].y = (short)(winy + frc[f].font->ascent);
+		}
 		numspecs++;
 	}
 
 	return numspecs;
 }
 
+int
+xmakeglyphfontspecsintermbuf(const Glyph *glyphs, int len, int x, int y)
+{
+	int numspecs;
+
+	while(1) {
+		numspecs = xmakeglyphfontspecs(term.specbuf, term.specbuflen, glyphs, len, x, y);
+		if(numspecs <= term.specbuflen)
+			break;
+		if(term.specbuflen == INT_MAX)
+			abort();
+
+		/* combining chars require a larger spec buffer */
+		if(term.specbuflen > INT_MAX/2)
+			term.specbuflen = INT_MAX;
+		else
+			term.specbuflen *= 2;
+		term.specbuf = xrealloc(term.specbuf, term.specbuflen * sizeof(XftGlyphFontSpec));
+	}
+
+	return numspecs;
+}
+
 void
-xdrawglyphfontspecs(const XftGlyphFontSpec *specs, Glyph base, int len, int x, int y) {
-	int charlen = len * ((base.mode & ATTR_WIDE) ? 2 : 1);
+xdrawglyphfontspecs(const XftGlyphFontSpec *specs, Glyph base, int len, int charlen, int x, int y) {
 	int winx = borderpx + x * xw.cw, winy = borderpx + y * xw.ch,
 	    width = charlen * xw.cw;
 	Color *fg, *bg, *temp, revfg, revbg, truefg, truebg;
@@ -3548,16 +3710,15 @@ xdrawglyphfontspecs(const XftGlyphFontSpec *specs, Glyph base, int len, int x, i
 void
 xdrawglyph(Glyph g, int x, int y) {
 	int numspecs;
-	XftGlyphFontSpec spec;
-	numspecs = xmakeglyphfontspecs(&spec, &g, 1, x, y);
-	xdrawglyphfontspecs(&spec, g, numspecs, x, y);
+	numspecs = xmakeglyphfontspecsintermbuf(&g, 1, x, y);
+	xdrawglyphfontspecs(term.specbuf, g, numspecs, ((g.mode & ATTR_WIDE) ? 2 : 1), x, y);
 }
 
 void
 xdrawcursor(void) {
 	static int oldx = 0, oldy = 0;
 	int curx;
-	Glyph g = {' ', ATTR_NULL, defaultbg, defaultcs};
+	Glyph g = {' ', {{0, 0}}, ATTR_NULL, defaultbg, defaultcs};
 
 	LIMIT(oldx, 0, term.col-1);
 	LIMIT(oldy, 0, term.row-1);
@@ -3570,7 +3731,9 @@ xdrawcursor(void) {
 	if(term.line[term.c.y][curx].mode & ATTR_WDUMMY)
 		curx--;
 
-	g.u = term.line[term.c.y][term.c.x].u;
+	g.u = term.line[term.c.y][curx].u;
+	g.dc = term.line[term.c.y][curx].dc;
+	g.mode |= term.line[term.c.y][curx].mode & ATTR_DECOMPPTR;
 
 	/* remove the old cursor */
 	xdrawglyph(term.line[oldy][oldx], oldx, oldy);
@@ -3664,8 +3827,9 @@ draw(void) {
 
 void
 drawregion(int x1, int y1, int x2, int y2) {
-	int i, x, y, ox, numspecs;
+	int i, w, x, y, ox, numspecs;
 	Glyph base, new;
+	Rune *pc;
 	XftGlyphFontSpec* specs;
 	bool ena_sel = sel.ob.x != -1 && sel.alt == IS_SET(MODE_ALTSCREEN);
 
@@ -3679,30 +3843,43 @@ drawregion(int x1, int y1, int x2, int y2) {
 		xtermclear(0, y, term.col, y);
 		term.dirty[y] = 0;
 
+		numspecs = xmakeglyphfontspecsintermbuf(&term.line[y][x1], x2 - x1, x1, y);
 		specs = term.specbuf;
-		numspecs = xmakeglyphfontspecs(specs, &term.line[y][x1], x2 - x1, x1, y);
 
-		i = ox = 0;
-		for(x = x1; x < x2 && i < numspecs; x++) {
+		i = w = 0;
+		for(ox = x = x1; x < x2 && i < numspecs; x++) {
 			new = term.line[y][x];
 			if(new.mode == ATTR_WDUMMY)
 				continue;
 			if(ena_sel && selected(x, y))
 				new.mode ^= ATTR_REVERSE;
 			if(i > 0 && ATTRCMP(base, new)) {
-				xdrawglyphfontspecs(specs, base, i, ox, y);
+				xdrawglyphfontspecs(specs, base, i, w, ox, y);
 				specs += i;
 				numspecs -= i;
 				i = 0;
 			}
 			if(i == 0) {
-				ox = x;
+				ox += w;
 				base = new;
+				w = 0;
+			}
+			if(new.mode & ATTR_DECOMPPTR) {
+				pc = new.dc.pc;
+				while(*pc)
+					pc++;
+				i += pc - new.dc.pc;
+			} else {
+				if(new.dc.c[0])
+					i++;
+				if(new.dc.c[1])
+					i++;
 			}
 			i++;
+			w += (new.mode & ATTR_WIDE) ? 2 : 1;
 		}
 		if(i > 0)
-			xdrawglyphfontspecs(specs, base, i, ox, y);
+			xdrawglyphfontspecs(specs, base, i, w, ox, y);
 	}
 	xdrawcursor();
 }

Reply via email to