Hi hackers,

Ivan reported a small bug in irregex (#888) that Alex agreed was an
oversight, so I fixed it upstream.  I noticed that the last bugfix we
applied to Chicken's irregex was a while ago and a handful of fixes
have gone in since, which means we're slightly out of date.

Attached are a few patches which bring us up-to-date with upstream
irregex bugfix release 0.8.3, and one patch for #888.  Meanwhile,
Alex has merged my submatch extraction change(!) but this is a very
large and invasive change which I think is probably best to postpone
until after 4.8.0 is released (which I hope is RSN).  However, the
patches I've attached include one very important bugfix and the patch
for #888 is so trivial I think it's worth including in the release.

Cheers,
Peter
-- 
http://sjamaan.ath.cx
--
"The process of preparing programs for a digital computer
 is especially attractive, not only because it can be economically
 and scientifically rewarding, but also because it can be an aesthetic
 experience much like composing poetry or music."
                                                        -- Donald Knuth
>From dee52f34a68a37aab0f9613b29ac81951f79e82f Mon Sep 17 00:00:00 2001
From: Peter Bex <[email protected]>
Date: Wed, 18 Jul 2012 20:26:27 +0200
Subject: [PATCH 1/4] Fix hang in irregex-fold caused by patterns matching the
 empty string (upstream changeset ba70feace1dd)

---
 irregex-core.scm       |   28 ++++++++++++++++++----------
 tests/test-irregex.scm |    5 +++++
 2 files changed, 23 insertions(+), 10 deletions(-)

diff --git a/irregex-core.scm b/irregex-core.scm
index 982f57e..54413bf 100644
--- a/irregex-core.scm
+++ b/irregex-core.scm
@@ -1485,7 +1485,7 @@
            (map (lambda (_)
                   `(/ ,(integer->char #x80) ,(integer->char #xFF)))
                 (zero-to (+ i lo-len))))))
-       (zero-to (- (length hi-ls) lo-len 1)))
+       (zero-to (- (length hi-ls) (+ lo-len 1))))
       (list
        (sre-sequence
         (cons `(/ ,(integer->char
@@ -3752,10 +3752,13 @@
                     matches)))
             (if (not m)
                 (finish i acc)
-                (let* ((end (%irregex-match-end-index m 0))
-                       (acc (kons i m acc)))
-                  (irregex-reset-matches! matches)
-                  (lp end acc))))))))
+                (let ((end (%irregex-match-end-index m 0)))
+                  (if (= end i)
+                      ;; skip one char forward if we match the empty string
+                      (lp (+ end 1) acc)
+                      (let ((acc (kons i m acc)))
+                        (irregex-reset-matches! matches)
+                        (lp end acc))))))))))
 
 (define (irregex-fold irx kons . args)
   (if (not (procedure? kons)) (%irregex-error 'irregex-fold "not a procedure" 
kons))
@@ -3777,11 +3780,16 @@
           (let ((m (irregex-search/matches irx cnk start i matches)))
             (if (not m)
                 (finish start i acc)
-                (let* ((acc (kons start i m acc))
-                       (end-src (%irregex-match-end-chunk m 0))
-                       (end-index (%irregex-match-end-index m 0)))
-                  (irregex-reset-matches! matches)
-                  (lp end-src end-index acc))))))))
+                (let ((end-src (%irregex-match-end-chunk m 0))
+                      (end-index (%irregex-match-end-index m 0)))
+                  (if (and (eq? end-src start) (= end-index i))
+                      (if (>= end-index ((chunker-get-end cnk) end-src ))
+                          (let ((next ((chunker-get-next cnk) end-src)))
+                            (lp next ((chunker-get-start cnk) next) acc))
+                          (lp end-src (+ end-index 1) acc))
+                      (let ((acc (kons start i m acc)))
+                        (irregex-reset-matches! matches)
+                        (lp end-src end-index acc))))))))))
 
 (define (irregex-fold/chunked irx kons . args)
   (if (not (procedure? kons)) (%irregex-error 'irregex-fold/chunked "not a 
procedure" kons))
diff --git a/tests/test-irregex.scm b/tests/test-irregex.scm
index a06bc6b..11bf225 100644
--- a/tests/test-irregex.scm
+++ b/tests/test-irregex.scm
@@ -358,6 +358,11 @@
        rope-chunker
        (rope "[email protected] and [email protected]")
        (lambda (src i s) (reverse s))))
+  (test-equal '("poo poo ")
+      (irregex-fold '(* "poo ")
+                    (lambda (i m s) (cons (irregex-match-substring m) s))
+                    '()
+                    "poo poo platter"))
   )
 
 
-- 
1.7.9.1

>From 3247177f6d17aa1607cb0e2b58309c8ceac8389d Mon Sep 17 00:00:00 2001
From: Peter Bex <[email protected]>
Date: Wed, 18 Jul 2012 20:31:09 +0200
Subject: [PATCH 2/4] Add complemented unicode char-set tests for irregex
 (upstream changeset 78ba6b09e021)

---
 tests/test-irregex.scm |   10 +++++++++-
 1 files changed, 9 insertions(+), 1 deletions(-)

diff --git a/tests/test-irregex.scm b/tests/test-irregex.scm
index 11bf225..fd2cb97 100644
--- a/tests/test-irregex.scm
+++ b/tests/test-irregex.scm
@@ -504,5 +504,13 @@
 (test-assert (not (irregex-search "(?u:<[あ-ん]*>)" "<ひらgがな>")))
 (test-assert (not (irregex-search "(?u:<[^あ-ん語]*>)" "<語>")))
 
-(test-end)(test-exit)
+(test-assert (irregex-search "(?u:<[^あ-ん]*>)" "<abc>"))
+(test-assert (not (irregex-search "(?u:<[^あ-ん]*>)" "<あん>")))
+(test-assert (not (irregex-search "(?u:<[^あ-ん]*>)" "<ひらがな>")))
+(test-assert (irregex-search "(?u:<[^あ-ん語]*>)" "<abc>"))
+(test-assert (not (irregex-search "(?u:<[^あ-ん語]*>)" "<あん>")))
+(test-assert (not (irregex-search "(?u:<[^あ-ん語]*>)" "<ひらがな>")))
+(test-assert (not (irregex-search "(?u:<[^あ-ん語]*>)" "<語>")))
+
+(test-end)
 
-- 
1.7.9.1

>From 4c666088bb5a02c7a96e2d9593dc8551b52c3c21 Mon Sep 17 00:00:00 2001
From: Peter Bex <[email protected]>
Date: Wed, 18 Jul 2012 20:36:30 +0200
Subject: [PATCH 3/4] Update irregex copyright and NEWS; this brings us
 exactly up to upstream release 0.8.3 (upstream
 changeset 88104ffcd77a)

---
 LICENSE          |    2 +-
 NEWS             |    2 +-
 irregex-core.scm |    3 ++-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/LICENSE b/LICENSE
index 2ebd33b..c4217e0 100644
--- a/LICENSE
+++ b/LICENSE
@@ -60,7 +60,7 @@ synrules.scm:
 
 irregex.scm:
 
-  Copyright (c) 2005-2010, Alex Shinn
+  Copyright (c) 2005-2011, Alex Shinn
   All rights reserved.
   
   Redistribution and use in source and binary forms, with or without
diff --git a/NEWS b/NEWS
index dfc5c77..77e7999 100644
--- a/NEWS
+++ b/NEWS
@@ -144,7 +144,7 @@
   - deprecated C_hash_string and C_hash_string_ci functions in the C API in
     favor of the more secure versions C_u_i_string_hash, C_u_i_string_ci_hash
   - a number of bugs in the irregex regular expression engine have been
-    fixed
+    fixed; it has been updated to upstream release 0.8.3
   - "with-input-from-file", "with-output-to-file", "with-input-from-pipe" and
     "with-output-to-pipe" now properly restore the standard input/output 
     ports in case the body thunk escapes
diff --git a/irregex-core.scm b/irregex-core.scm
index 54413bf..ebc3553 100644
--- a/irregex-core.scm
+++ b/irregex-core.scm
@@ -1,6 +1,6 @@
 ;;;; irregex.scm -- IrRegular Expressions
 ;;
-;; Copyright (c) 2005-2010 Alex Shinn.  All rights reserved.
+;; Copyright (c) 2005-2011 Alex Shinn.  All rights reserved.
 ;; BSD-style license: http://synthcode.com/license.txt
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -31,6 +31,7 @@
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;;; History
 ;;
+;; 0.8.3: 2011/12/18 - various accumulated bugfixes
 ;; 0.8.2: 2010/08/28 - (...)? submatch extraction fix and alternate
 ;;                     named submatches from Peter Bex
 ;;                     Added irregex-split, irregex-extract,
-- 
1.7.9.1

>From a66599b471fe44abd5ebca532009dbea68a8119c Mon Sep 17 00:00:00 2001
From: Peter Bex <[email protected]>
Date: Wed, 18 Jul 2012 20:40:45 +0200
Subject: [PATCH 4/4] Fix irregex builtin "real" utility pattern to allow
 leading +/- sign.  This fixes #888 (upstream changeset
 3c51418853de)

---
 NEWS             |    1 +
 irregex-core.scm |    4 +++-
 2 files changed, 4 insertions(+), 1 deletions(-)

diff --git a/NEWS b/NEWS
index 77e7999..01a3bf9 100644
--- a/NEWS
+++ b/NEWS
@@ -134,6 +134,7 @@
   - added "alist-update" ("data-structures" unit)
   - "irregex-replace" returns the original string instead of #f when the
     regex does not match
+  - irregex "real" built-in utility pattern now accepts a leading sign
   - added "change-directory*" ("posix" unit)
   - number parsing has been made more reliable and standards compliant
   - deprecated "none?", "always?" and "never?"
diff --git a/irregex-core.scm b/irregex-core.scm
index ebc3553..5b3e80a 100644
--- a/irregex-core.scm
+++ b/irregex-core.scm
@@ -2261,7 +2261,9 @@
 
     ;; extended library patterns
     (integer . (seq (? (or #\+ #\-)) (+ numeric)))
-    (real . (seq (+ numeric) (? #\. (+ numeric)) (? (or #\e #\E) integer)))
+    (real . (seq (? (or #\+ #\-))
+                 (+ numeric) (? #\. (+ numeric))
+                 (? (or #\e #\E) integer)))
     ;; slightly more lax than R5RS, allow ->foo, etc.
     (symbol-initial . (or alpha ("!$%&*/:<=>?^_~")))
     (symbol-subsequent . (or symbol-initial digit ("+-.@")))
-- 
1.7.9.1

_______________________________________________
Chicken-hackers mailing list
[email protected]
https://lists.nongnu.org/mailman/listinfo/chicken-hackers

Reply via email to