Hi,

On Tue, 2 Sept 2025 at 17:54, Andrew Dunstan <and...@dunslane.net> wrote:
>
> Ah, you’re right, but then again,  I’d expect ALL_SGML to be used 
> consistently, but it isn't and I didn't check.
> v3 does that.
> Note that GENERATED_SGML where'te included in these two targets but I think 
> there's no harm in checking them too.
>
> Do we actually care about those? I don't want to add needless cycles 
> anywhere. I note that the meson.build doesn't appear to have a check target 
> at all, or anything that looks for hard tabs or nbsps.Those checks were added 
> to the Makefile back in October in commit 5b7da5c261d, but that got missed 
> even though Daniel had mentioned it in the discussion thread.[1]

I have been working on running these checks under the Meson build
system. To do this, I converted the checks into a Perl script
(sgml_syntax_check) and ran it against both the Makefile and Meson.
Test's name is 'sgml_syntax_check' in the Meson. One difference I
noticed: I could not find a way in Meson to create a test that does
not run by default. As a result, this syntax test runs every time you
run the 'meson test'. This behaviour differs from Autoconf, but I
think it is acceptable.

Additionally, some of the CI OSes were missing docbook-xml; but it has
now been installed.

I did not create a new thread for that, I can create one if you think
that it would be better.

CI run with the attached patch applied:
https://cirrus-ci.com/build/6610354173640704

-- 
Regards,
Nazir Bilal Yavuz
Microsoft
From 27ab61775945d837e37ed6a0ce0c301697d183a1 Mon Sep 17 00:00:00 2001
From: Nazir Bilal Yavuz <byavu...@gmail.com>
Date: Mon, 8 Sep 2025 17:16:05 +0300
Subject: [PATCH v1] Add sgml_syntax_check test to the Meson build

The 'sgml' check from the Makefile has been converted into a Perl script
(sgml_syntax_check) and integrated into meson.build. Unlike Autoconf,
Meson does not provide a way to mark tests as non-default, so this
script runs on every 'meson test'. While this differs from the previous
behavior, it is considered acceptable.
---
 doc/src/sgml/Makefile               |  16 +---
 doc/src/sgml/meson.build            |  23 ++++++
 doc/src/sgml/t/sgml_syntax_check.pl | 118 ++++++++++++++++++++++++++++
 .cirrus.tasks.yml                   |   3 +
 4 files changed, 146 insertions(+), 14 deletions(-)
 create mode 100755 doc/src/sgml/t/sgml_syntax_check.pl

diff --git a/doc/src/sgml/Makefile b/doc/src/sgml/Makefile
index 11aac913812..3256340a5b2 100644
--- a/doc/src/sgml/Makefile
+++ b/doc/src/sgml/Makefile
@@ -200,8 +200,8 @@ MAKEINFO = makeinfo
 ##
 
 # Quick syntax check without style processing
-check: postgres.sgml $(ALL_SGML) check-tabs check-nbsp
-       $(XMLLINT) $(XMLINCLUDE) --noout --valid $<
+check: postgres.sgml $(ALL_SGML)
+       $(PERL) $(srcdir)/t/sgml_syntax_check.pl --xmllint "$(XMLLINT)" 
--srcdir $(srcdir)
 
 
 ##
@@ -261,18 +261,6 @@ clean-man:
 
 endif # sqlmansectnum != 7
 
-# tabs are harmless, but it is best to avoid them in SGML files
-check-tabs:
-       @( ! grep '     ' $(wildcard $(srcdir)/*.sgml $(srcdir)/ref/*.sgml 
$(srcdir)/*.xsl) ) || \
-       (echo "Tabs appear in SGML/XML files" 1>&2;  exit 1)
-
-# Non-breaking spaces are harmless, but it is best to avoid them in SGML files.
-# Use perl command because non-GNU grep or sed could not have hex escape 
sequence.
-check-nbsp:
-       @ ( $(PERL) -ne '/\xC2\xA0/ and print("$$ARGV:$$_"),$$n++; END 
{exit($$n>0)}' \
-         $(wildcard $(srcdir)/*.sgml $(srcdir)/ref/*.sgml $(srcdir)/*.xsl 
$(srcdir)/images/*.xsl) ) || \
-       (echo "Non-breaking spaces appear in SGML/XML files" 1>&2;  exit 1)
-
 ##
 ## Clean
 ##
diff --git a/doc/src/sgml/meson.build b/doc/src/sgml/meson.build
index 6ae192eac68..89d8b01c944 100644
--- a/doc/src/sgml/meson.build
+++ b/doc/src/sgml/meson.build
@@ -306,3 +306,26 @@ endif
 if alldocs.length() != 0
   alias_target('alldocs', alldocs)
 endif
+
+sgml_syntax_check = files(
+  't/sgml_syntax_check.pl'
+)
+
+test(
+  'sgml_syntax_check',
+  perl,
+  protocol: 'exitcode',
+  suite: 'doc',
+  args: [
+    sgml_syntax_check,
+    '--xmllint',
+      '@0@ --nonet'.format(xmllint_bin.full_path()),
+    '--srcdir',
+      meson.current_source_dir(),
+    '--builddir',
+      meson.current_build_dir(),
+  ],
+  depends: doc_generated
+)
+
+testprep_targets += doc_generated
diff --git a/doc/src/sgml/t/sgml_syntax_check.pl 
b/doc/src/sgml/t/sgml_syntax_check.pl
new file mode 100755
index 00000000000..7ff1d9a7c26
--- /dev/null
+++ b/doc/src/sgml/t/sgml_syntax_check.pl
@@ -0,0 +1,118 @@
+# /usr/bin/perl
+
+# doc/src/sgml/sgml_syntax_check.pl
+
+use strict;
+use warnings FATAL => 'all';
+use Getopt::Long;
+
+use File::Find;
+
+my $xmllint;
+my $srcdir;
+my $builddir;
+
+GetOptions(
+       'xmllint:s' => \$xmllint,
+       'srcdir:s' => \$srcdir,
+       'builddir:s' => \$builddir) or die "$0: wrong arguments";
+
+die "$0: --srcdir must be specified\n" unless defined $srcdir;
+
+my $postgres_sgml = "postgres.sgml";
+my $xmlinclude = "--path . --path $srcdir";
+$xmlinclude .= " --path $builddir" if defined $builddir;
+
+# find files to process in check_tabs, check_nbsp will use additional files
+my @files_to_process;
+my @dirs_to_search = ($srcdir);
+push @dirs_to_search, $builddir if defined $builddir;
+find(
+       sub {
+               return unless -f $_;
+               return if $_ !~ /\.xsl$/;
+               push @files_to_process, $File::Find::name;
+       },
+       @dirs_to_search,);
+
+push @dirs_to_search, "$srcdir/ref";
+find(
+       sub {
+               return unless -f $_;
+               return unless /\.sgml$/;
+               push @files_to_process, $File::Find::name;
+       },
+       @dirs_to_search,);
+
+
+# tabs are harmless, but it is best to avoid them in SGML files
+sub check_tabs
+{
+       my @files = @files_to_process;
+
+       my $errors = 0;
+       for my $f (@files)
+       {
+               open my $fh, "<:encoding(UTF-8)", $f or die "Can't open $f: $!";
+               while (<$fh>)
+               {
+                       if (/\t/)
+                       {
+                               warn "Tab found in $f:$_";
+                               $errors++;
+                       }
+               }
+       }
+
+       if ($errors)
+       {
+               die "Tabs appear in SGML/XML files\n";
+       }
+}
+
+# non-breaking spaces are harmless, but it is best to avoid them in SGML files
+sub check_nbsp
+{
+       my @files;
+
+       # find additional '$srcdir/images/*.xsl' files to process in check_nbsp
+       find(
+               sub {
+                       return unless -f $_;
+                       return if $_ !~ /\.xsl$/;
+                       push @files, $File::Find::name;
+               },
+               "$srcdir/images",);
+       push @files, @files_to_process;
+
+       my $errors = 0;
+       for my $f (@files)
+       {
+               open my $fh, "<:raw", $f or die "Can't open $f: $!";
+               my $line_no = 0;
+               while (<$fh>)
+               {
+                       $line_no++;
+                       if (/\xC2\xA0/)
+                       {
+                               warn "$f:$line_no: contains non-breaking 
space\n";
+                               $errors++;
+                       }
+               }
+       }
+
+       if ($errors)
+       {
+               die "Non-breaking spaces appear in SGML/XML files\n";
+       }
+}
+
+sub run_xmllint
+{
+       my $cmd = "$xmllint $xmlinclude --noout --valid $postgres_sgml";
+       system($cmd) == 0 or die "xmllint validation failed\n";
+}
+
+run_xmllint();
+check_tabs();
+check_nbsp();
diff --git a/.cirrus.tasks.yml b/.cirrus.tasks.yml
index eca9d62fc22..1c937247a9a 100644
--- a/.cirrus.tasks.yml
+++ b/.cirrus.tasks.yml
@@ -627,6 +627,8 @@ task:
     TEST_JOBS: 8
     IMAGE: ghcr.io/cirruslabs/macos-runner:sonoma
 
+    XML_CATALOG_FILES: /opt/local/share/xml/docbook/4.5/catalog.xml
+
     CIRRUS_WORKING_DIR: ${HOME}/pgsql/
     CCACHE_DIR: ${HOME}/ccache
     MACPORTS_CACHE: ${HOME}/macports-cache
@@ -641,6 +643,7 @@ task:
 
     MACOS_PACKAGE_LIST: >-
       ccache
+      docbook-xml-4.5
       icu
       kerberos5
       lz4
-- 
2.51.0

Reply via email to