Dear Werner, some time ago you wrote:
... groff doesn't yet support UTF8 input. You have to convert your file first to something groff can understand.Below is a small perl script which does that.
Attached is a small C program that does that.
Why not to put something like that code into groff?
Sincerely, Michail
PS.
Your perl code and a free library I have stolen code from
are also attached.
uni2groff.pl
Description: Binary data
libutf-8-1.0.tar.gz
Description: GNU Zip compressed data
#include <stdio.h>
#include <stdlib.h>
#define INVALID 0x80000000
#define get(c) c = fgetc(input); \
if ((c) == EOF) return (unsigned int)EOF
unsigned int fgetu8(FILE *input) {
unsigned int c;
int ch, i, iterations;
if (input == NULL)
return (unsigned int)EOF;
get(c);
if ((c & 0xFE) == 0xFC) {
c &= 0x01;
iterations = 5;
}
else if ((c & 0xFC) == 0xF8) {
c &= 0x03;
iterations = 4;
}
else if ((c & 0xF8) == 0xF0) {
c &= 0x07;
iterations = 3;
}
else if ((c & 0xF0) == 0xE0) {
c &= 0x0F;
iterations = 2;
}
else if ((c & 0xE0) == 0xC0) {
c &= 0x1F;
iterations = 1;
}
else if ((c & 0x80) == 0x80)
return INVALID;
else return c;
for (i = 0; i < iterations; i++) {
get(ch);
if ((ch & 0xC0) != 0x80)
return INVALID;
c <<= 6;
c |= ch & 0x3F;
}
return c;
}
int main()
{
unsigned int c;
while((c=fgetu8(stdin))!=EOF) {
if (c!=INVALID) {
if (c<=0x7f) {
putchar(c);
} else {
printf("\\[u%04X]",c);
}
}
else {
fputs("Error decoding UTF-8\n",stderr);
exit(1);
}
}
exit(0);
}
_______________________________________________ Groff mailing list [email protected] http://lists.gnu.org/mailman/listinfo/groff
