Hi hackers, Ivan reported a small bug in irregex (#888) that Alex agreed was an oversight, so I fixed it upstream. I noticed that the last bugfix we applied to Chicken's irregex was a while ago and a handful of fixes have gone in since, which means we're slightly out of date.
Attached are a few patches which bring us up-to-date with upstream irregex bugfix release 0.8.3, and one patch for #888. Meanwhile, Alex has merged my submatch extraction change(!) but this is a very large and invasive change which I think is probably best to postpone until after 4.8.0 is released (which I hope is RSN). However, the patches I've attached include one very important bugfix and the patch for #888 is so trivial I think it's worth including in the release. Cheers, Peter -- http://sjamaan.ath.cx -- "The process of preparing programs for a digital computer is especially attractive, not only because it can be economically and scientifically rewarding, but also because it can be an aesthetic experience much like composing poetry or music." -- Donald Knuth
>From dee52f34a68a37aab0f9613b29ac81951f79e82f Mon Sep 17 00:00:00 2001 From: Peter Bex <[email protected]> Date: Wed, 18 Jul 2012 20:26:27 +0200 Subject: [PATCH 1/4] Fix hang in irregex-fold caused by patterns matching the empty string (upstream changeset ba70feace1dd) --- irregex-core.scm | 28 ++++++++++++++++++---------- tests/test-irregex.scm | 5 +++++ 2 files changed, 23 insertions(+), 10 deletions(-) diff --git a/irregex-core.scm b/irregex-core.scm index 982f57e..54413bf 100644 --- a/irregex-core.scm +++ b/irregex-core.scm @@ -1485,7 +1485,7 @@ (map (lambda (_) `(/ ,(integer->char #x80) ,(integer->char #xFF))) (zero-to (+ i lo-len)))))) - (zero-to (- (length hi-ls) lo-len 1))) + (zero-to (- (length hi-ls) (+ lo-len 1)))) (list (sre-sequence (cons `(/ ,(integer->char @@ -3752,10 +3752,13 @@ matches))) (if (not m) (finish i acc) - (let* ((end (%irregex-match-end-index m 0)) - (acc (kons i m acc))) - (irregex-reset-matches! matches) - (lp end acc)))))))) + (let ((end (%irregex-match-end-index m 0))) + (if (= end i) + ;; skip one char forward if we match the empty string + (lp (+ end 1) acc) + (let ((acc (kons i m acc))) + (irregex-reset-matches! matches) + (lp end acc)))))))))) (define (irregex-fold irx kons . args) (if (not (procedure? kons)) (%irregex-error 'irregex-fold "not a procedure" kons)) @@ -3777,11 +3780,16 @@ (let ((m (irregex-search/matches irx cnk start i matches))) (if (not m) (finish start i acc) - (let* ((acc (kons start i m acc)) - (end-src (%irregex-match-end-chunk m 0)) - (end-index (%irregex-match-end-index m 0))) - (irregex-reset-matches! matches) - (lp end-src end-index acc)))))))) + (let ((end-src (%irregex-match-end-chunk m 0)) + (end-index (%irregex-match-end-index m 0))) + (if (and (eq? end-src start) (= end-index i)) + (if (>= end-index ((chunker-get-end cnk) end-src )) + (let ((next ((chunker-get-next cnk) end-src))) + (lp next ((chunker-get-start cnk) next) acc)) + (lp end-src (+ end-index 1) acc)) + (let ((acc (kons start i m acc))) + (irregex-reset-matches! matches) + (lp end-src end-index acc)))))))))) (define (irregex-fold/chunked irx kons . args) (if (not (procedure? kons)) (%irregex-error 'irregex-fold/chunked "not a procedure" kons)) diff --git a/tests/test-irregex.scm b/tests/test-irregex.scm index a06bc6b..11bf225 100644 --- a/tests/test-irregex.scm +++ b/tests/test-irregex.scm @@ -358,6 +358,11 @@ rope-chunker (rope "[email protected] and [email protected]") (lambda (src i s) (reverse s)))) + (test-equal '("poo poo ") + (irregex-fold '(* "poo ") + (lambda (i m s) (cons (irregex-match-substring m) s)) + '() + "poo poo platter")) ) -- 1.7.9.1
>From 3247177f6d17aa1607cb0e2b58309c8ceac8389d Mon Sep 17 00:00:00 2001 From: Peter Bex <[email protected]> Date: Wed, 18 Jul 2012 20:31:09 +0200 Subject: [PATCH 2/4] Add complemented unicode char-set tests for irregex (upstream changeset 78ba6b09e021) --- tests/test-irregex.scm | 10 +++++++++- 1 files changed, 9 insertions(+), 1 deletions(-) diff --git a/tests/test-irregex.scm b/tests/test-irregex.scm index 11bf225..fd2cb97 100644 --- a/tests/test-irregex.scm +++ b/tests/test-irregex.scm @@ -504,5 +504,13 @@ (test-assert (not (irregex-search "(?u:<[あ-ん]*>)" "<ひらgがな>"))) (test-assert (not (irregex-search "(?u:<[^あ-ん語]*>)" "<語>"))) -(test-end)(test-exit) +(test-assert (irregex-search "(?u:<[^あ-ん]*>)" "<abc>")) +(test-assert (not (irregex-search "(?u:<[^あ-ん]*>)" "<あん>"))) +(test-assert (not (irregex-search "(?u:<[^あ-ん]*>)" "<ひらがな>"))) +(test-assert (irregex-search "(?u:<[^あ-ん語]*>)" "<abc>")) +(test-assert (not (irregex-search "(?u:<[^あ-ん語]*>)" "<あん>"))) +(test-assert (not (irregex-search "(?u:<[^あ-ん語]*>)" "<ひらがな>"))) +(test-assert (not (irregex-search "(?u:<[^あ-ん語]*>)" "<語>"))) + +(test-end) -- 1.7.9.1
>From 4c666088bb5a02c7a96e2d9593dc8551b52c3c21 Mon Sep 17 00:00:00 2001 From: Peter Bex <[email protected]> Date: Wed, 18 Jul 2012 20:36:30 +0200 Subject: [PATCH 3/4] Update irregex copyright and NEWS; this brings us exactly up to upstream release 0.8.3 (upstream changeset 88104ffcd77a) --- LICENSE | 2 +- NEWS | 2 +- irregex-core.scm | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/LICENSE b/LICENSE index 2ebd33b..c4217e0 100644 --- a/LICENSE +++ b/LICENSE @@ -60,7 +60,7 @@ synrules.scm: irregex.scm: - Copyright (c) 2005-2010, Alex Shinn + Copyright (c) 2005-2011, Alex Shinn All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/NEWS b/NEWS index dfc5c77..77e7999 100644 --- a/NEWS +++ b/NEWS @@ -144,7 +144,7 @@ - deprecated C_hash_string and C_hash_string_ci functions in the C API in favor of the more secure versions C_u_i_string_hash, C_u_i_string_ci_hash - a number of bugs in the irregex regular expression engine have been - fixed + fixed; it has been updated to upstream release 0.8.3 - "with-input-from-file", "with-output-to-file", "with-input-from-pipe" and "with-output-to-pipe" now properly restore the standard input/output ports in case the body thunk escapes diff --git a/irregex-core.scm b/irregex-core.scm index 54413bf..ebc3553 100644 --- a/irregex-core.scm +++ b/irregex-core.scm @@ -1,6 +1,6 @@ ;;;; irregex.scm -- IrRegular Expressions ;; -;; Copyright (c) 2005-2010 Alex Shinn. All rights reserved. +;; Copyright (c) 2005-2011 Alex Shinn. All rights reserved. ;; BSD-style license: http://synthcode.com/license.txt ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -31,6 +31,7 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; History ;; +;; 0.8.3: 2011/12/18 - various accumulated bugfixes ;; 0.8.2: 2010/08/28 - (...)? submatch extraction fix and alternate ;; named submatches from Peter Bex ;; Added irregex-split, irregex-extract, -- 1.7.9.1
>From a66599b471fe44abd5ebca532009dbea68a8119c Mon Sep 17 00:00:00 2001 From: Peter Bex <[email protected]> Date: Wed, 18 Jul 2012 20:40:45 +0200 Subject: [PATCH 4/4] Fix irregex builtin "real" utility pattern to allow leading +/- sign. This fixes #888 (upstream changeset 3c51418853de) --- NEWS | 1 + irregex-core.scm | 4 +++- 2 files changed, 4 insertions(+), 1 deletions(-) diff --git a/NEWS b/NEWS index 77e7999..01a3bf9 100644 --- a/NEWS +++ b/NEWS @@ -134,6 +134,7 @@ - added "alist-update" ("data-structures" unit) - "irregex-replace" returns the original string instead of #f when the regex does not match + - irregex "real" built-in utility pattern now accepts a leading sign - added "change-directory*" ("posix" unit) - number parsing has been made more reliable and standards compliant - deprecated "none?", "always?" and "never?" diff --git a/irregex-core.scm b/irregex-core.scm index ebc3553..5b3e80a 100644 --- a/irregex-core.scm +++ b/irregex-core.scm @@ -2261,7 +2261,9 @@ ;; extended library patterns (integer . (seq (? (or #\+ #\-)) (+ numeric))) - (real . (seq (+ numeric) (? #\. (+ numeric)) (? (or #\e #\E) integer))) + (real . (seq (? (or #\+ #\-)) + (+ numeric) (? #\. (+ numeric)) + (? (or #\e #\E) integer))) ;; slightly more lax than R5RS, allow ->foo, etc. (symbol-initial . (or alpha ("!$%&*/:<=>?^_~"))) (symbol-subsequent . (or symbol-initial digit ("+-.@"))) -- 1.7.9.1
_______________________________________________ Chicken-hackers mailing list [email protected] https://lists.nongnu.org/mailman/listinfo/chicken-hackers
