On Mon, 18 Nov 2024 22:07:40 -0500 Bruce Momjian <br...@momjian.us> wrote:
> On Tue, Nov 19, 2024 at 11:29:07AM +0900, Yugo NAGATA wrote: > > On Mon, 18 Nov 2024 16:04:20 -0500 > > > So, the failure of ligatures is caused usually by not using the right > > > Adobe Font Metric (AFM) file, I think. I have seen faulty ligature > > > rendering in PDFs but was alway able to fix it by using the right AFM > > > file. Odds are, failure is caused by using a standard Latin1 AFM file > > > and not the AFM file that matches the font being used. > > > > > > > [1] https://xmlgraphics.apache.org/fop/faq.html#pdf-characters > > > > > > > > However, it seems that using iconv to detect non-Latin1 characters may > > > > be still > > > > useful because these are likely not displayed in PDF. For example, we > > > > can do this > > > > in make check as the attached patch 0002. It cannot show the filname > > > > where one > > > > is found, though. > > > > > > I was thinking something like: > > > > > > grep -l --recursive -P '[\x80-\xFF]' . | > > > while read FILE > > > do iconv -f UTF-8 -t ISO-8859-1 "$FILE" || exit 1 > > > done > > > > > > This only checks files with non-ASCII characters. > > > > Checking non-latin1 after non-ASCII characters seems good idea. > > I attached a updated patch (0002) that uses perl instead of grep > > because non-GNU grep could not have escape sequences for hex. > > Yes, good point. > > > > So, are we sure this will be the message even for non-English users? I > > > thought checking for warning message text was too fragile. > > > > I am not sure whether fop has messages in non-English, although I've never > > seen Japanese messages output. > > > > I wonder we can get unified results if executed with LANG=C. > > The updated patch 0001 is fixed in this direction. > > Yes, good idea. > > > + @ ( $(PERL) -ne '/[\x80-\xFF]/ and `${ICONV} -t ISO-8859-1 -f UTF-8 > > "$$ARGV" 2>/dev/null` and print("$$ARGV:$$_"),$$n++; END {exit($$n>0)}' \ > > I am thinking we should have -f before -t becaues it is from/to. I've updated the patch 0002 to move -f before -t. Also, I added a new patch 0003 that updates configure scripts to check whether iconv exists. When it does not exist, the message "ERROR: `iconv' is missing on your system." will be raised. However, this change may be unnecessary since iconv is POSIX standard and most of UNIX-like system would have it. Regards, Yugo Nagata -- Yugo NAGATA <nag...@sraoss.co.jp>
>From 93adc51c0135d274cea75f2de2b328480c72a94c Mon Sep 17 00:00:00 2001 From: Yugo Nagata <nag...@sraoss.co.jp> Date: Tue, 19 Nov 2024 19:19:14 +0900 Subject: [PATCH v3 3/3] Check whether iconv exists for detecting non-latin1 characters --- configure | 65 ++++++++++++++++++++++++++++++++++++++---- configure.ac | 1 + doc/src/sgml/Makefile | 6 +++- src/Makefile.global.in | 1 + 4 files changed, 67 insertions(+), 6 deletions(-) diff --git a/configure b/configure index f58eae1baa..eaf02c5660 100755 --- a/configure +++ b/configure @@ -632,6 +632,7 @@ PG_VERSION_NUM LDFLAGS_EX_BE PROVE DBTOEPUB +ICONV FOP XSLTPROC XMLLINT @@ -14728,7 +14729,7 @@ else We can't simply define LARGE_OFF_T to be 9223372036854775807, since some C++ compilers masquerading as C compilers incorrectly reject 9223372036854775807. */ -#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62)) +#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31)) int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721 && LARGE_OFF_T % 2147483647 == 1) ? 1 : -1]; @@ -14774,7 +14775,7 @@ else We can't simply define LARGE_OFF_T to be 9223372036854775807, since some C++ compilers masquerading as C compilers incorrectly reject 9223372036854775807. */ -#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62)) +#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31)) int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721 && LARGE_OFF_T % 2147483647 == 1) ? 1 : -1]; @@ -14798,7 +14799,7 @@ rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext We can't simply define LARGE_OFF_T to be 9223372036854775807, since some C++ compilers masquerading as C compilers incorrectly reject 9223372036854775807. */ -#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62)) +#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31)) int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721 && LARGE_OFF_T % 2147483647 == 1) ? 1 : -1]; @@ -14843,7 +14844,7 @@ else We can't simply define LARGE_OFF_T to be 9223372036854775807, since some C++ compilers masquerading as C compilers incorrectly reject 9223372036854775807. */ -#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62)) +#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31)) int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721 && LARGE_OFF_T % 2147483647 == 1) ? 1 : -1]; @@ -14867,7 +14868,7 @@ rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext We can't simply define LARGE_OFF_T to be 9223372036854775807, since some C++ compilers masquerading as C compilers incorrectly reject 9223372036854775807. */ -#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62)) +#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31)) int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721 && LARGE_OFF_T % 2147483647 == 1) ? 1 : -1]; @@ -18535,6 +18536,60 @@ $as_echo_n "checking for FOP... " >&6; } $as_echo "$FOP" >&6; } fi +if test -z "$ICONV"; then + for ac_prog in iconv +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_path_ICONV+:} false; then : + $as_echo_n "(cached) " >&6 +else + case $ICONV in + [\\/]* | ?:[\\/]*) + ac_cv_path_ICONV="$ICONV" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_path_ICONV="$as_dir/$ac_word$ac_exec_ext" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +ICONV=$ac_cv_path_ICONV +if test -n "$ICONV"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ICONV" >&5 +$as_echo "$ICONV" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$ICONV" && break +done + +else + # Report the value of ICONV in configure's output in all cases. + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for ICONV" >&5 +$as_echo_n "checking for ICONV... " >&6; } + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ICONV" >&5 +$as_echo "$ICONV" >&6; } +fi + if test -z "$DBTOEPUB"; then for ac_prog in dbtoepub do diff --git a/configure.ac b/configure.ac index 82c5009e3e..1196f857cf 100644 --- a/configure.ac +++ b/configure.ac @@ -2321,6 +2321,7 @@ fi PGAC_PATH_PROGS(XMLLINT, xmllint) PGAC_PATH_PROGS(XSLTPROC, xsltproc) PGAC_PATH_PROGS(FOP, fop) +PGAC_PATH_PROGS(ICONV, iconv) PGAC_PATH_PROGS(DBTOEPUB, dbtoepub) # diff --git a/doc/src/sgml/Makefile b/doc/src/sgml/Makefile index 820ae7c456..416dfc6c89 100644 --- a/doc/src/sgml/Makefile +++ b/doc/src/sgml/Makefile @@ -36,6 +36,10 @@ ifndef FOP FOP = $(missing) fop endif +ifndef ICONV +ICONV = $(missing) iconv +endif + PANDOC = pandoc XMLINCLUDE = --path . --path $(srcdir) @@ -271,7 +275,7 @@ check-nbsp: # Non-Latin1 characters cannot be displayed in PDF. check-non-latin1: - @ ( $(PERL) -ne '/[\x80-\xFF]/ and `iconv -f UTF-8 -t ISO-8859-1 "$$ARGV" 2>/dev/null` and print("$$ARGV:$$_"),$$n++; END {exit($$n>0)}' \ + @ ( $(PERL) -ne '/[\x80-\xFF]/ and `LANG=C ${ICONV} -f UTF-8 -t ISO-8859-1 "$$ARGV"` and print("$$ARGV:$$_"),$$n++; END {exit($$n>0)}' \ $(wildcard $(srcdir)/*.sgml $(srcdir)/ref/*.sgml $(srcdir)/*.xsl $(srcdir)/images/*.xsl) ) || \ (echo "Non-Latin1 characters appear in SGML/XML files" 1>&2; exit 1) diff --git a/src/Makefile.global.in b/src/Makefile.global.in index 0f38d712d1..f3bd700664 100644 --- a/src/Makefile.global.in +++ b/src/Makefile.global.in @@ -517,6 +517,7 @@ STRIP_SHARED_LIB = @STRIP_SHARED_LIB@ DBTOEPUB = @DBTOEPUB@ FOP = @FOP@ +ICONV = @ICONV@ XMLLINT = @XMLLINT@ XSLTPROC = @XSLTPROC@ -- 2.34.1
>From d07e2646a0a27852e169686fcce6c5647840abf3 Mon Sep 17 00:00:00 2001 From: Yugo Nagata <nag...@sraoss.co.jp> Date: Mon, 11 Nov 2024 19:45:18 +0900 Subject: [PATCH v3 2/3] Check non-latin1 characters in make check --- doc/src/sgml/Makefile | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/doc/src/sgml/Makefile b/doc/src/sgml/Makefile index 18bf87d031..820ae7c456 100644 --- a/doc/src/sgml/Makefile +++ b/doc/src/sgml/Makefile @@ -160,7 +160,6 @@ XSLTPROC_FO_FLAGS += --stringparam img.src.path '$(srcdir)/' awk 'BEGIN{err=0}{print}/not available in font/{err=1}END{exit err}' 1>&2 || \ (echo "Found characters that cannot be displayed in PDF" 1>&2; exit 1) - ## ## EPUB ## @@ -197,7 +196,7 @@ MAKEINFO = makeinfo ## # Quick syntax check without style processing -check: postgres.sgml $(ALL_SGML) check-tabs check-nbsp +check: postgres.sgml $(ALL_SGML) check-tabs check-nbsp check-non-latin1 $(XMLLINT) $(XMLINCLUDE) --noout --valid $< @@ -270,6 +269,12 @@ check-nbsp: $(wildcard $(srcdir)/*.sgml $(srcdir)/ref/*.sgml $(srcdir)/*.xsl $(srcdir)/images/*.xsl) ) || \ (echo "Non-breaking spaces appear in SGML/XML files" 1>&2; exit 1) +# Non-Latin1 characters cannot be displayed in PDF. +check-non-latin1: + @ ( $(PERL) -ne '/[\x80-\xFF]/ and `iconv -f UTF-8 -t ISO-8859-1 "$$ARGV" 2>/dev/null` and print("$$ARGV:$$_"),$$n++; END {exit($$n>0)}' \ + $(wildcard $(srcdir)/*.sgml $(srcdir)/ref/*.sgml $(srcdir)/*.xsl $(srcdir)/images/*.xsl) ) || \ + (echo "Non-Latin1 characters appear in SGML/XML files" 1>&2; exit 1) + ## ## Clean ## -- 2.34.1
>From 3abf606f693776410dd667bd59b0d33b9b6a75f3 Mon Sep 17 00:00:00 2001 From: Yugo Nagata <nag...@sraoss.co.jp> Date: Mon, 11 Nov 2024 19:22:02 +0900 Subject: [PATCH v3 1/3] Disallow characters that cannot be displayed in PDF --- doc/src/sgml/Makefile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/src/sgml/Makefile b/doc/src/sgml/Makefile index a04c532b53..18bf87d031 100644 --- a/doc/src/sgml/Makefile +++ b/doc/src/sgml/Makefile @@ -156,7 +156,9 @@ XSLTPROC_FO_FLAGS += --stringparam img.src.path '$(srcdir)/' $(XSLTPROC) $(XMLINCLUDE) $(XSLTPROCFLAGS) $(XSLTPROC_FO_FLAGS) --stringparam paper.type USletter -o $@ $^ %.pdf: %.fo $(ALL_IMAGES) - $(FOP) -fo $< -pdf $@ + CLANG=C $(FOP) -fo $< -pdf $@ 2>&1 | \ + awk 'BEGIN{err=0}{print}/not available in font/{err=1}END{exit err}' 1>&2 || \ + (echo "Found characters that cannot be displayed in PDF" 1>&2; exit 1) ## -- 2.34.1