From d306b92edcbca3afec24370b3b25d41dbf556cff Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Carlo=20Marcelo=20Arenas=20Bel=C3=B3n?= <carenas@gmail.com>
Date: Fri, 6 Jan 2023 20:40:07 -0800
Subject: [PATCH] pcre: use UTF only when available in the library

Before this change, if linked with a PCRE library without unicode
any invocations of grep when using a UTF locale will error with:

  grep: this version of PCRE2 does not have Unicode support

* src/pcresearch.c: Check whether Unicode was compiled in.
* tests/pcre-utf8-w: Add check to skip test.
* tests/pcre-utf8: Update check.
---
 src/pcresearch.c  | 4 +++-
 tests/pcre-utf8   | 2 +-
 tests/pcre-utf8-w | 5 ++++-
 3 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/src/pcresearch.c b/src/pcresearch.c
index 45b67ee..a8034fb 100644
--- a/src/pcresearch.c
+++ b/src/pcresearch.c
@@ -145,7 +145,9 @@ Pcompile (char *pattern, idx_t size, reg_syntax_t ignored, bool exact)
     = pcre2_general_context_create (private_malloc, private_free, NULL);
   pcre2_compile_context *ccontext = pcre2_compile_context_create (gcontext);

-  if (localeinfo.multibyte)
+  uint32_t unicode = 1;
+  pcre2_config (PCRE2_CONFIG_UNICODE, &unicode);
+  if (unicode && localeinfo.multibyte)
     {
       if (! localeinfo.using_utf8)
         die (EXIT_TROUBLE, 0, _("-P supports only unibyte and UTF-8 locales"));
diff --git a/tests/pcre-utf8 b/tests/pcre-utf8
index 1b3b2d3..4d97e69 100755
--- a/tests/pcre-utf8
+++ b/tests/pcre-utf8
@@ -14,7 +14,7 @@ LC_ALL=en_US.UTF-8 require_pcre_
 fail=0

 echo '$' | LC_ALL=en_US.UTF-8 grep -qP '\p{S}' \
-  || skip_ 'PCRE support is compiled out, or it does not support properties'
+  || skip_ 'PCRE unicode support is compiled out'

 euro='\342\202\254 euro'
 printf "$euro\\n" > in || framework_failure_
diff --git a/tests/pcre-utf8-w b/tests/pcre-utf8-w
index 4cd7db6..81ac9ff 100755
--- a/tests/pcre-utf8-w
+++ b/tests/pcre-utf8-w
@@ -13,9 +13,12 @@ LC_ALL=en_US.UTF-8
 export LC_ALL
 require_pcre_

+echo . | grep -qP '(*UTF).' 2>/dev/null \
+  || skip_ 'PCRE unicode support is compiled out'
+
 fail=0

-echo 'Perú'> in || framework_failure_
+echo 'Perú' > in || framework_failure_

 echo 'ú' > exp || framework_failure_
 grep -Po '.\b' in > out || fail=1
-- 
2.39.0.132.g8a4e8f6a67

