On Thu, 2021-04-15 at 14:20 +0200, Martijn van Duren wrote:
> I did some archeology today and found that it used to behave as
> non-printable, but it got broken in release 334 (august 2018), when
> CharWidth was introduced. Before that my_wcwidth was used directly.
> 
> Since there doesn't appear to be a repository with commit messages I'm
> not 100% sure why this macro was introduced. My best guess at this
> point would be the following line from the xterm.log.html:
> several minor performance improvements using macros, e.g., inline checks
> for character width.
> Which would imply that it is indeed a bug in xterm.
> I mailed Thomas Dickey to ask his view on the situation and maybe get
> some context. Answer pending.

Answer received: It's as I described it and fixing it is on his todo
list.
> 
> On Wed, 2021-04-14 at 21:25 +0200, Martijn van Duren wrote:
> > On Wed, 2021-04-14 at 20:10 +0300, Lauri Tirkkonen wrote:
> > > Since the discussion seems to have died out, I take my patch will not be
> > > accepted.
> > > 
> > > The decision appears to be that OpenBSD is right and everyone else is 
> > > wrong in
> > > this matter. Given that, and the calls to change the behavior of other 
> > > OSes and
> > > terminal emulators around SHY: are you going to at least patch xterm 
> > > in-tree so
> > > that it does not render SHY?
> > > 
> > > Or must it remain broken?
> > > 
> > Looking closer at the xterm source corroborated my previous reasoning.
> > From xterm's wcwidth.c:
> > /*
> >  * Provide a way to change the behavior of soft-hyphen.
> >  */
> > void mk_wcwidth_init(int mode)
> > {
> >   use_latin1 = (mode == 0);
> > }
> > 
> > and
> > 
> >  *    - SOFT HYPHEN (U+00AD) has a column width of 1 in Latin-1, 0 in 
> > Unicode.
> >  *      An initialization function is used to switch between the two.
> > 
> > So it is the intention of xterm to not display the soft hyphen in
> > unicode mode.
> > 
> > This is also corrobarated by charproc.c5799 where the error occurs:
> >                         if (ch == 0xad) {                            
> >                             /*                                              
> >                                             
> >                              * Only display soft-hyphen if it happens to be 
> > at
> >                              * the right-margin.  While that means that only
> >                              * the displayed character could be selected for
> >                              * pasting, a well-behaved application would 
> > never
> >                              * send this, anyway...
> >                              */
> > 
> > The problem here is that on line 5795 we have:
> > last_chomp = CharWidth(buf[n]);
> > which expands to:
> > CharWidth(n) (((n) < 256) ? (IsLatin1(n) ? 1 : 0) : my_wcwidth((wchar_t) 
> > (n)))
> > and
> > #define IsLatin1(n)  (((n) >= 32 && (n) <= 126) || ((n) >= 160 && (n) <= 
> > 255))
> > 
> > So here's the big oops: CharWidth doesn't know we're in UTF-8 mode and
> > we never reach my_wcwidth.
> > 
> > Diff below fixes this behaviour for me and restores the printing
> > behaviour when I run xterm with +u8 to reset utf-8 mode.
> > However, I'm no xterm hacker and it's quite a beast, so this needs
> > proper testing and scrutiny from someone who knows the code to make
> > sure there's no use of uninitialized variables. (CC matthieu@)
> > 
> > No intention of pushing this for 6.9, but maybe someone brave is
> > willing to dive in here after me.
> > 
> > martijn@
> > 
> > Index: charproc.c
> > ===================================================================
> > RCS file: /cvs/xenocara/app/xterm/charproc.c,v
> > retrieving revision 1.49
> > diff -u -p -r1.49 charproc.c
> > --- charproc.c  2 Apr 2021 18:44:19 -0000       1.49
> > +++ charproc.c  14 Apr 2021 19:24:14 -0000
> > @@ -2305,7 +2305,7 @@ doparsing(XtermWidget xw, unsigned c, st
> >          */
> >         if (c >= 0x300
> >             && screen->wide_chars
> > -           && CharWidth(c) == 0
> > +           && CharWidth(screen, c) == 0
> >             && !isWideControl(c)) {
> >             int prev, test;
> >             Boolean used = True;
> > @@ -2330,9 +2330,9 @@ doparsing(XtermWidget xw, unsigned c, st
> >                 prev = (int) XTERM_CELL(use_row, use_col);
> >                 test = do_precomposition(prev, (int) c);
> >                 TRACE(("do_precomposition (U+%04X [%d], U+%04X [%d]) -> 
> > U+%04X [%d]\n",
> > -                      prev, CharWidth(prev),
> > -                      (int) c, CharWidth(c),
> > -                      test, CharWidth(test)));
> > +                      prev, CharWidth(screen, prev),
> > +                      (int) c, CharWidth(screen, c),
> > +                      test, CharWidth(screen, test)));
> >             } else {
> >                 prev = -1;
> >                 test = -1;
> > @@ -2342,7 +2342,7 @@ doparsing(XtermWidget xw, unsigned c, st
> >              * only if it does not change the width of the base character
> >              */
> >             if (test != -1
> > -               && CharWidth(test) == CharWidth(prev)) {
> > +               && CharWidth(screen, test) == CharWidth(screen, prev)) {
> >                 putXtermCell(screen, use_row, use_col, test);
> >             } else if (screen->char_was_written
> >                        || getXtermCell(screen, use_row, use_col) >= ' ') {
> > @@ -4551,7 +4551,7 @@ doparsing(XtermWidget xw, unsigned c, st
> >                 value = zero_if_default(0);
> >  
> >                 TRACE(("CASE_DECFRA - Fill rectangular area\n"));
> > -               if (nparam > 0 && CharWidth(value) > 0) {
> > +               if (nparam > 0 && CharWidth(screen, value) > 0) {
> >                     xtermParseRect(xw, ParamPair(1), &myRect);
> >                     ScrnFillRectangle(xw, &myRect, value, xw->flags, True);
> >                 }
> > @@ -4860,7 +4860,7 @@ doparsing(XtermWidget xw, unsigned c, st
> >  
> >         case CASE_REP:
> >             TRACE(("CASE_REP\n"));
> > -           if (CharWidth(sp->lastchar) > 0) {
> > +           if (CharWidth(screen, sp->lastchar) > 0) {
> >                 IChar repeated[2];
> >                 count = one_if_default(0);
> >                 repeated[0] = (IChar) sp->lastchar;
> > @@ -5792,7 +5792,7 @@ dotext(XtermWidget xw,
> >                            buf[n] <= 0xa0) {
> >                     last_chomp = 1;
> >                 } else {
> > -                   last_chomp = CharWidth(buf[n]);
> > +                   last_chomp = CharWidth(screen, buf[n]);
> >                     if (last_chomp <= 0) {
> >                         IChar ch = buf[n];
> >                         Bool eat_it = !screen->utf8_mode && (ch > 127);
> > Index: fontutils.c
> > ===================================================================
> > RCS file: /cvs/xenocara/app/xterm/fontutils.c,v
> > retrieving revision 1.37
> > diff -u -p -r1.37 fontutils.c
> > --- fontutils.c 2 Apr 2021 18:44:19 -0000       1.37
> > +++ fontutils.c 14 Apr 2021 19:24:14 -0000
> > @@ -2442,7 +2442,7 @@ dumpXft(XtermWidget xw, XTermXftFonts *d
> >  #endif
> >      for (c = first; c <= last; ++c) {
> >         if (FcCharSetHasChar(xft->charset, c)) {
> > -           int width = CharWidth(c);
> > +           int width = CharWidth(screen, c);
> >             XGlyphInfo extents;
> >             Boolean big_x;
> >             Boolean big_y;
> > @@ -2610,7 +2610,7 @@ checkXftWidth(XtermWidget xw, XTermXftFo
> >       * Ignore control characters - their extent information is misleading.
> >       */
> >      for (c = 32; c < 256; ++c) {
> > -       if (CharWidth(c) <= 0)
> > +       if (CharWidth(TScreenOf(xw), c) <= 0)
> >             continue;
> >         if (FcCharSetHasChar(source->font->charset, c)) {
> >             (void) checkedXftWidth(XtDisplay(xw),
> > @@ -3626,8 +3626,7 @@ xtermMissingChar(unsigned ch, XTermFonts
> >  #endif
> >  
> >      if (pc == 0 || CI_NONEXISTCHAR(pc)) {
> > -       TRACE2(("xtermMissingChar %#04x (!exists), %d cells\n",
> > -               ch, CharWidth(ch)));
> > +       TRACE2(("xtermMissingChar %#04x (!exists)\n", ch));
> >         result = True;
> >      }
> >      if (ch < KNOWN_MISSING) {
> > @@ -4054,7 +4053,7 @@ foundXftGlyph(XtermWidget xw, XftFont *f
> >      if (font != 0 && XftGlyphExists(screen->display, font, wc)) {
> >         int expect;
> >  
> > -       if ((expect = CharWidth(wc)) > 0) {
> > +       if ((expect = CharWidth(screen, wc)) > 0) {
> >             XGlyphInfo gi;
> >             int actual;
> >  
> > Index: util.c
> > ===================================================================
> > RCS file: /cvs/xenocara/app/xterm/util.c,v
> > retrieving revision 1.39
> > diff -u -p -r1.39 util.c
> > --- util.c      2 Apr 2021 18:44:19 -0000       1.39
> > +++ util.c      14 Apr 2021 19:24:14 -0000
> > @@ -2937,14 +2937,14 @@ getXftColor(XtermWidget xw, Pixel pixel)
> >               ? (((ch) >= 128 && (ch) < 160) \
> >                   ? (TScreenOf(xw)->c1_printable ? 1 : 0) \
> >                   : 1) \
> > -             : CharWidth(ch)))
> > +             : CharWidth(TScreenOf(xw), ch)))
> >  #else
> >  #define XtermCellWidth(xw, ch) \
> >         (((ch) == 0 || (ch) == 127) \
> >           ? 0 \
> >           : (((ch) < 256) \
> >               ? 1 \
> > -             : CharWidth(ch)))
> > +             : CharWidth(TScreenOf(xw), ch)))
> >  #endif
> >  
> >  #endif /* OPT_RENDERWIDE */
> > @@ -3247,7 +3247,7 @@ ucs_workaround(XTermDraw * params,
> >         IChar eqv = (IChar) AsciiEquivs(ch);
> >  
> >         if (eqv != (IChar) ch) {
> > -           int width = CharWidth(ch);
> > +           int width = CharWidth(screen, ch);
> >  
> >             do {
> >                 drawXtermText(params,
> > @@ -3939,7 +3939,7 @@ drawXtermText(XTermDraw * params,
> >                 unsigned ch = (unsigned) text[last];
> >                 int filler = 0;
> >  #if OPT_WIDE_CHARS
> > -               int needed = forceDbl ? 2 : CharWidth(ch);
> > +               int needed = forceDbl ? 2 : CharWidth(screen, ch);
> >                 XftFont *currFont = pickXftFont(needed, font, wfont);
> >                 XftFont *tempFont = 0;
> >  #define CURR_TEMP (tempFont ? tempFont : currFont)
> > @@ -4210,7 +4210,7 @@ drawXtermText(XTermDraw * params,
> >                 drewBoxes = True;
> >                 continue;
> >             }
> > -           ch_width = CharWidth(ch);
> > +           ch_width = CharWidth(screen, ch);
> >             isMissing =
> >                 IsXtermMissingChar(screen, ch,
> >                                    ((recur.on_wide || ch_width > 1)
> > @@ -4335,7 +4335,7 @@ drawXtermText(XTermDraw * params,
> >  
> >             if (!needWide
> >                 && !IsIcon(screen)
> > -               && ((recur.on_wide || CharWidth(ch) > 1)
> > +               && ((recur.on_wide || CharWidth(screen, ch) > 1)
> >                     && okFont(NormalWFont(screen)))) {
> >                 needWide = True;
> >             }
> > @@ -5059,7 +5059,7 @@ addXtermCombining(TScreen *screen, int r
> >         size_t off;
> >  
> >         TRACE(("addXtermCombining %d,%d U+%04X (%d)\n",
> > -              row, col, ch, CharWidth(ch)));
> > +              row, col, ch, CharWidth(screen, ch)));
> >  
> >         for_each_combData(off, ld) {
> >             if (!ld->combData[off][col]) {
> > Index: xterm.h
> > ===================================================================
> > RCS file: /cvs/xenocara/app/xterm/xterm.h,v
> > retrieving revision 1.47
> > diff -u -p -r1.47 xterm.h
> > --- xterm.h     2 Apr 2021 18:44:19 -0000       1.47
> > +++ xterm.h     14 Apr 2021 19:24:14 -0000
> > @@ -963,10 +963,10 @@ extern void report_char_class(XtermWidge
> >  #define WideCells(n) (((IChar)(n) >= first_widechar) ? 
> > my_wcwidth((wchar_t) (n)) : 1)
> >  #define isWideFrg(n) (((n) == HIDDEN_CHAR) || (WideCells((n)) == 2))
> >  #define isWide(n)    (((IChar)(n) >= first_widechar) && isWideFrg(n))
> > -#define CharWidth(n) (((n) < 256) ? (IsLatin1(n) ? 1 : 0) : 
> > my_wcwidth((wchar_t) (n)))
> > +#define CharWidth(screen, n) ((!(screen->utf8_mode) && ((n) < 256)) ? 
> > (IsLatin1(n) ? 1 : 0) : my_wcwidth((wchar_t) (n)))
> >  #else
> >  #define WideCells(n) 1
> > -#define CharWidth(n) (IsLatin1(n) ? 1 : 0)
> > +#define CharWidth(screen, n) (IsLatin1(n) ? 1 : 0)
> >  #endif
> >  
> >  /* cachedCgs.c */
> > 
> > 
> 
> 


Reply via email to