Dear Werner,
some time ago you wrote:
...  groff doesn't yet support UTF8 input.
You have to convert your file first to something groff can understand.

Below is a small perl script which does that.

Attached is a small C program that does that.
Why not to put something like that code into groff?

                                Sincerely, Michail
PS.
Your perl code and a free library I have stolen code from
are also attached.

Attachment: uni2groff.pl
Description: Binary data

Attachment: libutf-8-1.0.tar.gz
Description: GNU Zip compressed data


#include <stdio.h>
#include <stdlib.h>

#define INVALID 0x80000000

#define get(c)  c = fgetc(input); \
        if ((c) == EOF) return (unsigned int)EOF

unsigned int fgetu8(FILE *input) {
        unsigned int c;
        int ch, i, iterations;

        if (input == NULL)
                return (unsigned int)EOF;

        get(c);

        if ((c & 0xFE) == 0xFC) {
                c &= 0x01;
                iterations = 5;
        }
        else if ((c & 0xFC) == 0xF8) {
                c &= 0x03;
                iterations = 4;
        }
        else if ((c & 0xF8) == 0xF0) {
                c &= 0x07;
                iterations = 3;
        }
        else if ((c & 0xF0) == 0xE0) {
                c &= 0x0F;
                iterations = 2;
        }
        else if ((c & 0xE0) == 0xC0) {
                c &= 0x1F;
                iterations = 1;
        }
        else if ((c & 0x80) == 0x80)
                return INVALID;
        else return c;

        for (i = 0; i < iterations; i++) {
                get(ch);

                if ((ch & 0xC0) != 0x80)
                        return INVALID;

                c <<= 6;
                c |= ch & 0x3F;
        }

        return c;
}
int main()
{
        unsigned int c;
        
        while((c=fgetu8(stdin))!=EOF) {
                if (c!=INVALID) {
                        if (c<=0x7f) {
                                putchar(c);
                        } else {
                                printf("\\[u%04X]",c);
                        }
                }
                else {
                        fputs("Error decoding UTF-8\n",stderr);
                        exit(1);
                }
        }
        exit(0);
}
_______________________________________________
Groff mailing list
[email protected]
http://lists.gnu.org/mailman/listinfo/groff

Reply via email to