Bug#999923: Porting XML Copy Editor to PCRE2

2022-10-04 Thread Miriam Ruiz
Cool!!

Lots of thanks!!

Miry

El mar, 4 oct 2022 a las 13:54, Zane Ji () escribió:
>
> Hi Miry,
>
> After your patch is applied, I just set matchArray to
> pcre2_get_ovector_pointer(patternMatchData) each time when pcre2_match is
> called, then regex searching/replacing works.
>
> The source code has been updated:
> https://sourceforge.net/p/xml-copy-editor/code/ci/3d17bca4196670183ad45c0af369acf4acdc7d7e/
>
> Regards,
> Zane



Bug#999923: Porting XML Copy Editor to PCRE2

2022-10-04 Thread Zane Ji
Hi Miry,

After your patch is applied, I just set matchArray to
pcre2_get_ovector_pointer(patternMatchData) each time when pcre2_match is
called, then regex searching/replacing works.

The source code has been updated:
https://sourceforge.net/p/xml-copy-editor/code/ci/3d17bca4196670183ad45c0af369acf4acdc7d7e/

Regards,
Zane
diff --git a/configure.ac b/configure.ac
index d0ab3af..1c1f0dd 100755
--- a/configure.ac
+++ b/configure.ac
@@ -72,8 +72,7 @@ AC_ARG_ENABLE(debug,
 ])
 
 # Check pcre is available
-AC_CHECK_HEADER(pcre.h, ,
-	AC_MSG_ERROR([PCRE headers not found]))
+PKG_CHECK_MODULES([PCRE2], [libpcre2-8])
 
 # Check boost::shared_ptr is available
 AC_LANG(C++)
diff --git a/src/Makefile.am b/src/Makefile.am
index 7b0c81c..15bf572 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -83,7 +83,8 @@ xmlcopyeditor_LDADD = $(WX_LIBS) \
 	$(ENCHANT_LIBS) \
 	$(GTK_LIBS) \
 	$(XSLT_LIBS) \
-	-lexpat -lpcre -lxerces-c
+	$(PCRE2_LIBS) \
+	-lexpat -lxerces-c
 
 nobase_dist_xmlcopyeditor_DATA = $(srcdir)/catalog/catalog \
 	$(srcdir)/dtd/*.* \
@@ -133,5 +134,5 @@ EXTRA_DIST = \
 	$(srcdir)/xmlcopyeditor.rc \
 	$(srcdir)/xmlschemaparser.cpp
 
-AM_CPPFLAGS = $(XML2_CFLAGS) $(ENCHANT_CFLAGS) $(GTK_CFLAGS)
+AM_CPPFLAGS = $(XML2_CFLAGS) $(ENCHANT_CFLAGS) $(GTK_CFLAGS) $(PCRE2_CFLAGS)
 
diff --git a/src/rule.cpp b/src/rule.cpp
index 487a364..37bfe3c 100644
--- a/src/rule.cpp
+++ b/src/rule.cpp
@@ -26,12 +26,11 @@ using namespace std;
 Rule::Rule (
 const string& pattern,
 bool matchCase,
-const string& replace,
-const int arrayLength ) : WrapRegex (
+const string& replace
+	) : WrapRegex (
 		pattern,
 		matchCase,
-		replace,
-		arrayLength )
+		replace )
 {
 	adjustCaseAttribute = tentativeAttribute = false;
 }
diff --git a/src/rule.h b/src/rule.h
index a89289e..1abfbab 100644
--- a/src/rule.h
+++ b/src/rule.h
@@ -32,8 +32,7 @@ class Rule : public WrapRegex
 		Rule (
 		const string& pattern,
 		bool matchCase,
-		const string& replace = "",
-		const int arrayLength = 60 );
+		const string& replace = "");
 		bool getAdjustCaseAttribute();
 		bool getTentativeAttribute();
 		string getReport();
diff --git a/src/wrapregex.cpp b/src/wrapregex.cpp
index ff8d622..99d2a7c 100644
--- a/src/wrapregex.cpp
+++ b/src/wrapregex.cpp
@@ -31,40 +31,39 @@ using namespace std;
 WrapRegex::WrapRegex (
 const string& pattern,
 bool matchCase,
-const string& replaceParameter,
-const int arrayLengthParameter ) :
+const string& replaceParameter ) :
 		replace ( replaceParameter ),
-		arrayLength ( arrayLengthParameter ),
 		returnValue ( 0 )
 {
 	if ( pattern.empty() || pattern == ".*" )
 	{
 		disabled = true;
-		matchArray = NULL;
-		patternStructure = NULL;
-		patternExtraStructure = NULL;
+		patternCode = NULL;
+		patternMatchData = NULL;
+		patternMatchContext = NULL;
 		return;
 	}
 	disabled = false;
 
-	matchArray = new int[arrayLength];
-
 	// compile
-	int optionsFlag = ( matchCase ) ? PCRE_UTF8 : PCRE_CASELESS | PCRE_UTF8;
-	const char *errorPointer;
-	int errorOffset;
-
-	if ( ( patternStructure = pcre_compile (
-	  pattern.c_str(),
-	  optionsFlag,
-	  ,
-	  ,
-	  NULL ) ) == NULL )
+	uint32_t optionsFlag = ( matchCase ? 0 : PCRE2_CASELESS ) | PCRE2_UTF | PCRE2_NO_UTF_CHECK;
+	int errorCode;
+	PCRE2_SIZE errorOffset;
+
+	if ( ( patternCode = pcre2_compile (
+		(PCRE2_SPTR)pattern.c_str(), // pattern
+		PCRE2_ZERO_TERMINATED, // pattern is zero-terminated
+		optionsFlag, // options
+		, // error number
+		, // error offset
+		NULL ) ) == NULL ) // default compile context
 	{
-		throw runtime_error ( errorPointer );
+		char buf[256];
+		pcre2_get_error_message ( errorCode, (PCRE2_UCHAR *)buf, sizeof(buf) );
+		throw runtime_error ( string(buf) );
 	}
-
-	patternExtraStructure = pcre_study ( patternStructure, 0,  );
+	patternMatchData = pcre2_match_data_create_from_pattern ( patternCode, NULL );
+	patternMatchContext = pcre2_match_context_create ( NULL );
 }
 
 WrapRegex::~WrapRegex()
@@ -72,9 +71,9 @@ WrapRegex::~WrapRegex()
 	if ( disabled )
 		return;
 
-	pcre_free ( patternStructure );
-	pcre_free ( patternExtraStructure );
-	delete[] matchArray;
+	pcre2_match_data_free ( patternMatchData );
+	pcre2_code_free ( patternCode );
+	pcre2_match_context_free ( patternMatchContext );
 }
 
 int WrapRegex::matchPatternGlobal (
@@ -108,18 +107,18 @@ string WrapRegex::replaceGlobal (
 	string output, match;
 
 	output.reserve ( buffer.size() );
-	while ( ( returnValue = pcre_exec (
-	patternStructure,
-	patternExtraStructure,
-	s,
-	strlen ( s ),
-	0,
-	0,
-	matchArray,
-	

Bug#999923: Porting XML Copy Editor to PCRE2

2022-09-29 Thread Zane Ji
Hi Miry,

Thanks for submit the patch. I will look into it.

Best regards,
Zane

On Fri, Sep 30, 2022 at 7:28 AM Miriam Ruiz  wrote:

> Hi,
>
> My name is Miriam Ruiz and I am currently maintaining XML Copy Editor in
> Debian.
>
> According to the bug report #23 ( https://bugs.debian.org/23
> ), pcre3 libraries will soon be removed from Debian, so a transition
> to pcre2 is needed.
>
> I have prepared a 1st draft of a possible patch, but I haven't tested
> it thoroughly yet and also I am not too familiar with pcre2, so I
> cannot that it works properly. It seems to build properly.
>
> I am attaching my patch (it is also linked in the bug report) in case
> you might be interested.
>
> Greetings and thanks,
> Miry
>
> PS: I am attaching the bug report as Cc
>


Bug#999923: Porting XML Copy Editor to PCRE2

2022-09-29 Thread Miriam Ruiz
Hi,

My name is Miriam Ruiz and I am currently maintaining XML Copy Editor in Debian.

According to the bug report #23 ( https://bugs.debian.org/23
), pcre3 libraries will soon be removed from Debian, so a transition
to pcre2 is needed.

I have prepared a 1st draft of a possible patch, but I haven't tested
it thoroughly yet and also I am not too familiar with pcre2, so I
cannot that it works properly. It seems to build properly.

I am attaching my patch (it is also linked in the bug report) in case
you might be interested.

Greetings and thanks,
Miry

PS: I am attaching the bug report as Cc
# See: https://github.com/wch/r-source/blob/af9a038e277d14b038ef877366652b6e5400c399/src/main/grep.c
# See: https://github.com/Sigil-Ebook/Sigil/issues/630
# See: https://github.com/PCRE2Project/pcre2/issues/51
# See: https://github.com/HaxeFoundation/haxe/issues/10491
# See: https://github.com/SWI-Prolog/packages-pcre/issues/2
# See: https://github.com/SWI-Prolog/packages-pcre/pull/3/files
# See: https://pcre.org/current/doc/html/
# See: https://github.com/luvit/pcre2/blob/master/src/pcre2demo.c
# See: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=23
# See: http://washitake.com/mail/exim/mirror/pcre/Testing/PCRE2_proposal.pdf
# See: https://redmine.openinfosecfoundation.org/issues/3194
# See: https://pcre.org/current/doc/html/pcre2api.html

Index: xmlcopyeditor-1.3.0.0/configure.ac
===
--- xmlcopyeditor-1.3.0.0.orig/configure.ac
+++ xmlcopyeditor-1.3.0.0/configure.ac
@@ -71,9 +71,15 @@ AC_ARG_ENABLE(debug,
   CXXFLAGS="${CXXFLAGS} -DNDEBUG -DwxDEBUG_LEVEL=0"
 ])
 
-# Check pcre is available
-AC_CHECK_HEADER(pcre.h, ,
-	AC_MSG_ERROR([PCRE headers not found]))
+# Check pcre2 is available
+AC_CHECK_HEADER([pcre2.h], [],
+	[AC_MSG_ERROR([PCRE2 headers not found])],
+	[#define PCRE2_CODE_UNIT_WIDTH 8
+	#include 
+	])
+
+PCRE2_LIBS="-lpcre2-8"
+AC_SUBST(PCRE2_LIBS)
 
 # Check boost::shared_ptr is available
 AC_LANG(C++)
Index: xmlcopyeditor-1.3.0.0/src/wrapregex.h
===
--- xmlcopyeditor-1.3.0.0.orig/src/wrapregex.h
+++ xmlcopyeditor-1.3.0.0/src/wrapregex.h
@@ -21,10 +21,14 @@
 #ifndef WRAPREGEX_H
 #define WRAPREGEX_H
 
+#ifndef PCRE2_CODE_UNIT_WIDTH
+#define PCRE2_CODE_UNIT_WIDTH 8
+#endif
+
 #include 
 #include 
 #include 
-#include 
+#include 
 #include 
 #include "contexthandler.h"
 
@@ -53,8 +57,11 @@ class WrapRegex : private boost::noncopy
 		int returnValue;
 		bool disabled;
 
-		pcre *patternStructure;
-		pcre_extra *patternExtraStructure;
+		//~ pcre *patternStructure;
+		//~ pcre_extra *patternExtraStructure;
+		pcre2_code *patternCode;
+		pcre2_match_data *patternMatchData;
+		pcre2_match_context *patternMatchContext;
 		int *matchArray;
 
 		string getInterpolatedString_ ( const char *buffer,
Index: xmlcopyeditor-1.3.0.0/src/wrapregex.cpp
===
--- xmlcopyeditor-1.3.0.0.orig/src/wrapregex.cpp
+++ xmlcopyeditor-1.3.0.0/src/wrapregex.cpp
@@ -41,8 +41,11 @@ WrapRegex::WrapRegex (
 	{
 		disabled = true;
 		matchArray = NULL;
-		patternStructure = NULL;
-		patternExtraStructure = NULL;
+		//~ patternStructure = NULL;
+		//~ patternExtraStructure = NULL;
+		patternCode = NULL;
+		patternMatchData = NULL;
+		patternMatchContext = NULL;
 		return;
 	}
 	disabled = false;
@@ -50,21 +53,25 @@ WrapRegex::WrapRegex (
 	matchArray = new int[arrayLength];
 
 	// compile
-	int optionsFlag = ( matchCase ) ? PCRE_UTF8 : PCRE_CASELESS | PCRE_UTF8;
-	const char *errorPointer;
-	int errorOffset;
-
-	if ( ( patternStructure = pcre_compile (
-	  pattern.c_str(),
-	  optionsFlag,
-	  ,
-	  ,
-	  NULL ) ) == NULL )
+	uint32_t optionsFlag = ( matchCase ) ? PCRE2_UTF | PCRE2_NO_UTF_CHECK : PCRE2_CASELESS | PCRE2_UTF | PCRE2_NO_UTF_CHECK;
+	int errorCode;
+	PCRE2_SIZE errorOffset;
+
+	if ( ( patternCode = pcre2_compile (
+		(PCRE2_SPTR)pattern.c_str(), // pattern
+		PCRE2_ZERO_TERMINATED, // pattern is zero-terminated
+		optionsFlag, // options
+		, // error number
+		, // error offset
+		NULL ) ) == NULL ) // default compile context
 	{
-		throw runtime_error ( errorPointer );
+		char buf[256];
+		pcre2_get_error_message ( errorCode, (PCRE2_UCHAR *)buf, sizeof(buf) );
+		throw runtime_error ( string(buf) );
 	}
 
-	patternExtraStructure = pcre_study ( patternStructure, 0,  );
+	patternMatchData = pcre2_match_data_create_from_pattern ( patternCode, NULL );
+	patternMatchContext = pcre2_match_context_create ( NULL );
 }
 
 WrapRegex::~WrapRegex()
@@ -72,8 +79,11 @@ WrapRegex::~WrapRegex()
 	if ( disabled )
 		return;
 
-	pcre_free ( patternStructure );
-	pcre_free ( patternExtraStructure );
+	//~ pcre_free ( patternStructure );
+	//~ pcre_free ( patternExtraStructure );
+	pcre2_match_data_free ( 

Bug#999923: Porting XML Copy Editor to PCRE2

2022-09-29 Thread Miriam Ruiz
Hi,

My name is Miriam Ruiz and I am currently maintaining XML Copy Editor in Debian.

According to the bug report #23 ( https://bugs.debian.org/23
), pcre3 libraries will soon be removed from Debian, so a transition
to pcre2 is needed.

I have prepared a 1st draft of a possible patch, but I haven't tested
it thoroughly yet and also I am not too familiar with pcre2, so I
cannot that it works properly. It seems to build properly.

I am attaching my patch (it is also linked in the bug report) in case
you might be interested.

Greetings and thanks,
Miry

PS: I am attaching the bug report as Cc
# See: https://github.com/wch/r-source/blob/af9a038e277d14b038ef877366652b6e5400c399/src/main/grep.c
# See: https://github.com/Sigil-Ebook/Sigil/issues/630
# See: https://github.com/PCRE2Project/pcre2/issues/51
# See: https://github.com/HaxeFoundation/haxe/issues/10491
# See: https://github.com/SWI-Prolog/packages-pcre/issues/2
# See: https://github.com/SWI-Prolog/packages-pcre/pull/3/files
# See: https://pcre.org/current/doc/html/
# See: https://github.com/luvit/pcre2/blob/master/src/pcre2demo.c
# See: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=23
# See: http://washitake.com/mail/exim/mirror/pcre/Testing/PCRE2_proposal.pdf
# See: https://redmine.openinfosecfoundation.org/issues/3194
# See: https://pcre.org/current/doc/html/pcre2api.html

Index: xmlcopyeditor-1.3.0.0/configure.ac
===
--- xmlcopyeditor-1.3.0.0.orig/configure.ac
+++ xmlcopyeditor-1.3.0.0/configure.ac
@@ -71,9 +71,15 @@ AC_ARG_ENABLE(debug,
   CXXFLAGS="${CXXFLAGS} -DNDEBUG -DwxDEBUG_LEVEL=0"
 ])
 
-# Check pcre is available
-AC_CHECK_HEADER(pcre.h, ,
-	AC_MSG_ERROR([PCRE headers not found]))
+# Check pcre2 is available
+AC_CHECK_HEADER([pcre2.h], [],
+	[AC_MSG_ERROR([PCRE2 headers not found])],
+	[#define PCRE2_CODE_UNIT_WIDTH 8
+	#include 
+	])
+
+PCRE2_LIBS="-lpcre2-8"
+AC_SUBST(PCRE2_LIBS)
 
 # Check boost::shared_ptr is available
 AC_LANG(C++)
Index: xmlcopyeditor-1.3.0.0/src/wrapregex.h
===
--- xmlcopyeditor-1.3.0.0.orig/src/wrapregex.h
+++ xmlcopyeditor-1.3.0.0/src/wrapregex.h
@@ -21,10 +21,14 @@
 #ifndef WRAPREGEX_H
 #define WRAPREGEX_H
 
+#ifndef PCRE2_CODE_UNIT_WIDTH
+#define PCRE2_CODE_UNIT_WIDTH 8
+#endif
+
 #include 
 #include 
 #include 
-#include 
+#include 
 #include 
 #include "contexthandler.h"
 
@@ -53,8 +57,11 @@ class WrapRegex : private boost::noncopy
 		int returnValue;
 		bool disabled;
 
-		pcre *patternStructure;
-		pcre_extra *patternExtraStructure;
+		//~ pcre *patternStructure;
+		//~ pcre_extra *patternExtraStructure;
+		pcre2_code *patternCode;
+		pcre2_match_data *patternMatchData;
+		pcre2_match_context *patternMatchContext;
 		int *matchArray;
 
 		string getInterpolatedString_ ( const char *buffer,
Index: xmlcopyeditor-1.3.0.0/src/wrapregex.cpp
===
--- xmlcopyeditor-1.3.0.0.orig/src/wrapregex.cpp
+++ xmlcopyeditor-1.3.0.0/src/wrapregex.cpp
@@ -41,8 +41,11 @@ WrapRegex::WrapRegex (
 	{
 		disabled = true;
 		matchArray = NULL;
-		patternStructure = NULL;
-		patternExtraStructure = NULL;
+		//~ patternStructure = NULL;
+		//~ patternExtraStructure = NULL;
+		patternCode = NULL;
+		patternMatchData = NULL;
+		patternMatchContext = NULL;
 		return;
 	}
 	disabled = false;
@@ -50,21 +53,25 @@ WrapRegex::WrapRegex (
 	matchArray = new int[arrayLength];
 
 	// compile
-	int optionsFlag = ( matchCase ) ? PCRE_UTF8 : PCRE_CASELESS | PCRE_UTF8;
-	const char *errorPointer;
-	int errorOffset;
-
-	if ( ( patternStructure = pcre_compile (
-	  pattern.c_str(),
-	  optionsFlag,
-	  ,
-	  ,
-	  NULL ) ) == NULL )
+	uint32_t optionsFlag = ( matchCase ) ? PCRE2_UTF | PCRE2_NO_UTF_CHECK : PCRE2_CASELESS | PCRE2_UTF | PCRE2_NO_UTF_CHECK;
+	int errorCode;
+	PCRE2_SIZE errorOffset;
+
+	if ( ( patternCode = pcre2_compile (
+		(PCRE2_SPTR)pattern.c_str(), // pattern
+		PCRE2_ZERO_TERMINATED, // pattern is zero-terminated
+		optionsFlag, // options
+		, // error number
+		, // error offset
+		NULL ) ) == NULL ) // default compile context
 	{
-		throw runtime_error ( errorPointer );
+		char buf[256];
+		pcre2_get_error_message ( errorCode, (PCRE2_UCHAR *)buf, sizeof(buf) );
+		throw runtime_error ( string(buf) );
 	}
 
-	patternExtraStructure = pcre_study ( patternStructure, 0,  );
+	patternMatchData = pcre2_match_data_create_from_pattern ( patternCode, NULL );
+	patternMatchContext = pcre2_match_context_create ( NULL );
 }
 
 WrapRegex::~WrapRegex()
@@ -72,8 +79,11 @@ WrapRegex::~WrapRegex()
 	if ( disabled )
 		return;
 
-	pcre_free ( patternStructure );
-	pcre_free ( patternExtraStructure );
+	//~ pcre_free ( patternStructure );
+	//~ pcre_free ( patternExtraStructure );
+	pcre2_match_data_free (