On Mon, 18 Nov 2024 22:07:40 -0500
Bruce Momjian <br...@momjian.us> wrote:

> On Tue, Nov 19, 2024 at 11:29:07AM +0900, Yugo NAGATA wrote:
> > On Mon, 18 Nov 2024 16:04:20 -0500
> > > So, the failure of ligatures is caused usually by not using the right
> > > Adobe Font Metric (AFM) file, I think.  I have seen faulty ligature
> > > rendering in PDFs but was alway able to fix it by using the right AFM
> > > file.  Odds are, failure is caused by using a standard Latin1 AFM file
> > > and not the AFM file that matches the font being used.
> > > 
> > > > [1] https://xmlgraphics.apache.org/fop/faq.html#pdf-characters
> > > > 
> > > > However, it seems that using iconv to detect non-Latin1 characters may 
> > > > be still
> > > > useful because these are likely not displayed in PDF. For example, we 
> > > > can do this
> > > > in make check as the attached patch 0002. It cannot show the filname 
> > > > where one
> > > > is found, though.
> > > 
> > > I was thinking something like:
> > > 
> > >   grep -l --recursive  -P '[\x80-\xFF]' . |
> > >   while read FILE
> > >   do  iconv -f UTF-8 -t ISO-8859-1 "$FILE" || exit 1
> > >   done
> > > 
> > > This only checks files with non-ASCII characters.
> > 
> > Checking non-latin1 after non-ASCII characters seems good idea.
> > I attached a updated patch (0002) that uses perl instead of grep
> > because non-GNU grep could not have escape sequences for hex.
> 
> Yes, good point.
> 
> > > So, are we sure this will be the message even for non-English users? I
> > > thought checking for warning message text was too fragile.
> > 
> > I am not sure whether fop has messages in non-English, although I've never
> > seen Japanese messages output. 
> > 
> > I wonder we can get unified results if executed with LANG=C.
> > The updated patch 0001 is fixed in this direction.
> 
> Yes, good idea.
> 
> > +   @ ( $(PERL) -ne '/[\x80-\xFF]/ and `${ICONV} -t ISO-8859-1 -f UTF-8 
> > "$$ARGV" 2>/dev/null` and print("$$ARGV:$$_"),$$n++; END {exit($$n>0)}' \
> 
> I am thinking we should have -f before -t becaues it is from/to.

I've updated the patch 0002 to move -f before -t.

Also, I added a new patch 0003 that updates configure scripts to check
whether iconv exists. When it does not exist, the message 
"ERROR: `iconv' is missing on your system." will be raised.
However, this change may be unnecessary since iconv is POSIX standard
and most of UNIX-like system would have it. 

Regards,
Yugo Nagata


-- 
Yugo NAGATA <nag...@sraoss.co.jp>
>From 93adc51c0135d274cea75f2de2b328480c72a94c Mon Sep 17 00:00:00 2001
From: Yugo Nagata <nag...@sraoss.co.jp>
Date: Tue, 19 Nov 2024 19:19:14 +0900
Subject: [PATCH v3 3/3] Check whether iconv exists for detecting non-latin1
 characters

---
 configure              | 65 ++++++++++++++++++++++++++++++++++++++----
 configure.ac           |  1 +
 doc/src/sgml/Makefile  |  6 +++-
 src/Makefile.global.in |  1 +
 4 files changed, 67 insertions(+), 6 deletions(-)

diff --git a/configure b/configure
index f58eae1baa..eaf02c5660 100755
--- a/configure
+++ b/configure
@@ -632,6 +632,7 @@ PG_VERSION_NUM
 LDFLAGS_EX_BE
 PROVE
 DBTOEPUB
+ICONV
 FOP
 XSLTPROC
 XMLLINT
@@ -14728,7 +14729,7 @@ else
     We can't simply define LARGE_OFF_T to be 9223372036854775807,
     since some C++ compilers masquerading as C compilers
     incorrectly reject 9223372036854775807.  */
-#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62))
+#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31))
   int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721
 		       && LARGE_OFF_T % 2147483647 == 1)
 		      ? 1 : -1];
@@ -14774,7 +14775,7 @@ else
     We can't simply define LARGE_OFF_T to be 9223372036854775807,
     since some C++ compilers masquerading as C compilers
     incorrectly reject 9223372036854775807.  */
-#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62))
+#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31))
   int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721
 		       && LARGE_OFF_T % 2147483647 == 1)
 		      ? 1 : -1];
@@ -14798,7 +14799,7 @@ rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
     We can't simply define LARGE_OFF_T to be 9223372036854775807,
     since some C++ compilers masquerading as C compilers
     incorrectly reject 9223372036854775807.  */
-#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62))
+#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31))
   int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721
 		       && LARGE_OFF_T % 2147483647 == 1)
 		      ? 1 : -1];
@@ -14843,7 +14844,7 @@ else
     We can't simply define LARGE_OFF_T to be 9223372036854775807,
     since some C++ compilers masquerading as C compilers
     incorrectly reject 9223372036854775807.  */
-#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62))
+#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31))
   int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721
 		       && LARGE_OFF_T % 2147483647 == 1)
 		      ? 1 : -1];
@@ -14867,7 +14868,7 @@ rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
     We can't simply define LARGE_OFF_T to be 9223372036854775807,
     since some C++ compilers masquerading as C compilers
     incorrectly reject 9223372036854775807.  */
-#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62))
+#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31))
   int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721
 		       && LARGE_OFF_T % 2147483647 == 1)
 		      ? 1 : -1];
@@ -18535,6 +18536,60 @@ $as_echo_n "checking for FOP... " >&6; }
 $as_echo "$FOP" >&6; }
 fi
 
+if test -z "$ICONV"; then
+  for ac_prog in iconv
+do
+  # Extract the first word of "$ac_prog", so it can be a program name with args.
+set dummy $ac_prog; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_path_ICONV+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  case $ICONV in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_ICONV="$ICONV" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_path_ICONV="$as_dir/$ac_word$ac_exec_ext"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+  ;;
+esac
+fi
+ICONV=$ac_cv_path_ICONV
+if test -n "$ICONV"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ICONV" >&5
+$as_echo "$ICONV" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+  test -n "$ICONV" && break
+done
+
+else
+  # Report the value of ICONV in configure's output in all cases.
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for ICONV" >&5
+$as_echo_n "checking for ICONV... " >&6; }
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ICONV" >&5
+$as_echo "$ICONV" >&6; }
+fi
+
 if test -z "$DBTOEPUB"; then
   for ac_prog in dbtoepub
 do
diff --git a/configure.ac b/configure.ac
index 82c5009e3e..1196f857cf 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2321,6 +2321,7 @@ fi
 PGAC_PATH_PROGS(XMLLINT, xmllint)
 PGAC_PATH_PROGS(XSLTPROC, xsltproc)
 PGAC_PATH_PROGS(FOP, fop)
+PGAC_PATH_PROGS(ICONV, iconv)
 PGAC_PATH_PROGS(DBTOEPUB, dbtoepub)
 
 #
diff --git a/doc/src/sgml/Makefile b/doc/src/sgml/Makefile
index 820ae7c456..416dfc6c89 100644
--- a/doc/src/sgml/Makefile
+++ b/doc/src/sgml/Makefile
@@ -36,6 +36,10 @@ ifndef FOP
 FOP = $(missing) fop
 endif
 
+ifndef ICONV
+ICONV = $(missing) iconv
+endif
+
 PANDOC = pandoc
 
 XMLINCLUDE = --path . --path $(srcdir)
@@ -271,7 +275,7 @@ check-nbsp:
 
 # Non-Latin1 characters cannot be displayed in PDF.
 check-non-latin1:
-	@ ( $(PERL) -ne '/[\x80-\xFF]/ and `iconv -f UTF-8 -t ISO-8859-1 "$$ARGV" 2>/dev/null` and print("$$ARGV:$$_"),$$n++; END {exit($$n>0)}' \
+	@ ( $(PERL) -ne '/[\x80-\xFF]/ and `LANG=C ${ICONV} -f UTF-8 -t ISO-8859-1 "$$ARGV"` and print("$$ARGV:$$_"),$$n++; END {exit($$n>0)}' \
 	  $(wildcard $(srcdir)/*.sgml $(srcdir)/ref/*.sgml $(srcdir)/*.xsl $(srcdir)/images/*.xsl) ) || \
 	(echo "Non-Latin1 characters appear in SGML/XML files" 1>&2;  exit 1)
 
diff --git a/src/Makefile.global.in b/src/Makefile.global.in
index 0f38d712d1..f3bd700664 100644
--- a/src/Makefile.global.in
+++ b/src/Makefile.global.in
@@ -517,6 +517,7 @@ STRIP_SHARED_LIB = @STRIP_SHARED_LIB@
 
 DBTOEPUB	= @DBTOEPUB@
 FOP				= @FOP@
+ICONV			= @ICONV@
 XMLLINT			= @XMLLINT@
 XSLTPROC		= @XSLTPROC@
 
-- 
2.34.1

>From d07e2646a0a27852e169686fcce6c5647840abf3 Mon Sep 17 00:00:00 2001
From: Yugo Nagata <nag...@sraoss.co.jp>
Date: Mon, 11 Nov 2024 19:45:18 +0900
Subject: [PATCH v3 2/3] Check non-latin1 characters in make check

---
 doc/src/sgml/Makefile | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/doc/src/sgml/Makefile b/doc/src/sgml/Makefile
index 18bf87d031..820ae7c456 100644
--- a/doc/src/sgml/Makefile
+++ b/doc/src/sgml/Makefile
@@ -160,7 +160,6 @@ XSLTPROC_FO_FLAGS += --stringparam img.src.path '$(srcdir)/'
 	awk 'BEGIN{err=0}{print}/not available in font/{err=1}END{exit err}' 1>&2  || \
 	(echo "Found characters that cannot be displayed in PDF" 1>&2;  exit 1)
 
-
 ##
 ## EPUB
 ##
@@ -197,7 +196,7 @@ MAKEINFO = makeinfo
 ##
 
 # Quick syntax check without style processing
-check: postgres.sgml $(ALL_SGML) check-tabs check-nbsp
+check: postgres.sgml $(ALL_SGML) check-tabs check-nbsp check-non-latin1
 	$(XMLLINT) $(XMLINCLUDE) --noout --valid $<
 
 
@@ -270,6 +269,12 @@ check-nbsp:
 	  $(wildcard $(srcdir)/*.sgml $(srcdir)/ref/*.sgml $(srcdir)/*.xsl $(srcdir)/images/*.xsl) ) || \
 	(echo "Non-breaking spaces appear in SGML/XML files" 1>&2;  exit 1)
 
+# Non-Latin1 characters cannot be displayed in PDF.
+check-non-latin1:
+	@ ( $(PERL) -ne '/[\x80-\xFF]/ and `iconv -f UTF-8 -t ISO-8859-1 "$$ARGV" 2>/dev/null` and print("$$ARGV:$$_"),$$n++; END {exit($$n>0)}' \
+	  $(wildcard $(srcdir)/*.sgml $(srcdir)/ref/*.sgml $(srcdir)/*.xsl $(srcdir)/images/*.xsl) ) || \
+	(echo "Non-Latin1 characters appear in SGML/XML files" 1>&2;  exit 1)
+
 ##
 ## Clean
 ##
-- 
2.34.1

>From 3abf606f693776410dd667bd59b0d33b9b6a75f3 Mon Sep 17 00:00:00 2001
From: Yugo Nagata <nag...@sraoss.co.jp>
Date: Mon, 11 Nov 2024 19:22:02 +0900
Subject: [PATCH v3 1/3] Disallow characters that cannot be displayed in PDF

---
 doc/src/sgml/Makefile | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/doc/src/sgml/Makefile b/doc/src/sgml/Makefile
index a04c532b53..18bf87d031 100644
--- a/doc/src/sgml/Makefile
+++ b/doc/src/sgml/Makefile
@@ -156,7 +156,9 @@ XSLTPROC_FO_FLAGS += --stringparam img.src.path '$(srcdir)/'
 	$(XSLTPROC) $(XMLINCLUDE) $(XSLTPROCFLAGS) $(XSLTPROC_FO_FLAGS) --stringparam paper.type USletter -o $@ $^
 
 %.pdf: %.fo $(ALL_IMAGES)
-	$(FOP) -fo $< -pdf $@
+	CLANG=C $(FOP) -fo $< -pdf $@ 2>&1 | \
+	awk 'BEGIN{err=0}{print}/not available in font/{err=1}END{exit err}' 1>&2  || \
+	(echo "Found characters that cannot be displayed in PDF" 1>&2;  exit 1)
 
 
 ##
-- 
2.34.1

Reply via email to