bug#11887: [PATCH] Improve standards conformance of string-number (was Re: bug#11887: string-number edge cases)

2013-03-07 Thread Mark H Weaver
I wrote:
 Here's a patch to fix these problems.  Comments and suggestions welcome.

I went ahead and pushed this to stable-2.0.  I'm closing this bug.

Thanks,
  Mark





bug#11887: string-number edge cases

2013-03-06 Thread Mark H Weaver
Daniel Llorens daniel.llor...@bluewin.ch writes:

 I think this isn't working as it should either.

 scheme@(guile-user) +1i
 $1 = 0.0+1.0i
 scheme@(guile-user) 1i
 ;;; unknown-location: warning: possibly unbound variable `#{1i}#'
 ERROR: In procedure #procedure 101d6d4a0 ():
 ERROR: In procedure module-lookup: Unbound variable: #{1i}#

No, this is correct.  Scheme requires a sign before an imaginary part.
See R5RS section 7.1.1 (Lexical structure).

Thanks,
  Mark


--8---cut here---start-8---
  number - num 2| num 8
   | num 10| num 16
  
  The following rules for num R, complex R, real R, ureal R,
  uinteger R, and prefix R should be replicated for R = 2, 8, 10,
  and 16.  There are no rules for decimal 2, decimal 8, and decimal
  16, which means that numbers containing decimal points or exponents
  must be in decimal radix.
  
  num R - prefix R complex R
  complex R - real R | real R @ real R
  | real R + ureal R i | real R - ureal R i
  | real R + i | real R - i
  | + ureal R i | - ureal R i | + i | - i
  real R - sign ureal R
  ureal R - uinteger R
  | uinteger R / uinteger R
  | decimal R
  decimal 10 - uinteger 10 suffix
  | . digit 10+ #* suffix
  | digit 10+ . digit 10* #* suffix
  | digit 10+ #+ . #* suffix
  uinteger R - digit R+ #*
  prefix R - radix R exactness
  | exactness radix R
  
  suffix - empty
  | exponent marker sign digit 10+
  exponent marker - e | s | f | d | l
  sign - empty  | + |  -
  exactness - empty | #i | #e
  radix 2 - #b
  radix 8 - #o
  radix 10 - empty | #d
  radix 16 - #x
  digit 2 - 0 | 1
  digit 8 - 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7
  digit 10 - digit
  digit 16 - digit 10 | a | b | c | d | e | f
--8---cut here---end---8---





bug#11887: string-number edge cases

2013-03-06 Thread Daniel Llorens

On Mar 6, 2013, at 17:55, Mark H Weaver wrote:

 Daniel Llorens daniel.llor...@bluewin.ch writes:
 
 I think this isn't working as it should either.
 
 scheme@(guile-user) +1i
 $1 = 0.0+1.0i
 scheme@(guile-user) 1i
 ;;; unknown-location: warning: possibly unbound variable `#{1i}#'
 ERROR: In procedure #procedure 101d6d4a0 ():
 ERROR: In procedure module-lookup: Unbound variable: #{1i}#

 No, this is correct.  Scheme requires a sign before an imaginary part.
 See R5RS section 7.1.1 (Lexical structure).

Thanks.

R5RS is wrong then. A shame.






bug#11887: [PATCH] Improve standards conformance of string-number (was Re: bug#11887: string-number edge cases)

2013-03-06 Thread Mark H Weaver
Here's a patch to fix these problems.  Comments and suggestions welcome.

Mark


From a1926777b03445d397bb1069b325d243e765f84b Mon Sep 17 00:00:00 2001
From: Mark H Weaver m...@netris.org
Date: Wed, 6 Mar 2013 12:52:39 -0500
Subject: [PATCH] Improve standards conformance of string-number.

Fixes http://bugs.gnu.org/11887.

* libguile/numbers.c (mem2ureal): New argument 'allow_inf_or_nan'.
  Accept infinities and NaNs only if 'allow_inf_or_nan' is true and #e
  is not present.  Check for inf.0 or nan. case-insensitively.  Do
  not accept rationals with zero divisors.

  (mem2complex): Pass new 'allow_inf_or_nan' argument to 'mem2ureal',
  which is set if and only if a explicit sign was present.

* test-suite/tests/numbers.test (string-number): Add tests.
---
 libguile/numbers.c|   76 +++--
 test-suite/tests/numbers.test |   12 ++-
 2 files changed, 61 insertions(+), 27 deletions(-)

diff --git a/libguile/numbers.c b/libguile/numbers.c
index 66c95db..f9538f5 100644
--- a/libguile/numbers.c
+++ b/libguile/numbers.c
@@ -5740,7 +5740,8 @@ mem2decimal_from_point (SCM result, SCM mem,
 
 static SCM
 mem2ureal (SCM mem, unsigned int *p_idx,
-	   unsigned int radix, enum t_exactness forced_x)
+	   unsigned int radix, enum t_exactness forced_x,
+   int allow_inf_or_nan)
 {
   unsigned int idx = *p_idx;
   SCM result;
@@ -5753,30 +5754,53 @@ mem2ureal (SCM mem, unsigned int *p_idx,
   if (idx == len)
 return SCM_BOOL_F;
 
-  if (idx+5 = len  !scm_i_string_strcmp (mem, idx, inf.0))
-{
-  *p_idx = idx+5;
-  return scm_inf ();
-}
-
-  if (idx+4  len  !scm_i_string_strcmp (mem, idx, nan.))
-{
-  /* Cobble up the fractional part.  We might want to set the
-	 NaN's mantissa from it. */
-  idx += 4;
-  if (!scm_is_eq (mem2uinteger (mem, idx, 10, implicit_x), SCM_INUM0))
-{
+  if (allow_inf_or_nan  forced_x != EXACT  idx+5 = len)
+switch (scm_i_string_ref (mem, idx))
+  {
+  case 'i': case 'I':
+switch (scm_i_string_ref (mem, idx + 1))
+  {
+  case 'n': case 'N':
+switch (scm_i_string_ref (mem, idx + 2))
+  {
+  case 'f': case 'F':
+if (scm_i_string_ref (mem, idx + 3) == '.'
+ scm_i_string_ref (mem, idx + 4) == '0')
+  {
+*p_idx = idx+5;
+return scm_inf ();
+  }
+  }
+  }
+  case 'n': case 'N':
+switch (scm_i_string_ref (mem, idx + 1))
+  {
+  case 'a': case 'A':
+switch (scm_i_string_ref (mem, idx + 2))
+  {
+  case 'n': case 'N':
+if (scm_i_string_ref (mem, idx + 3) == '.')
+  {
+/* Cobble up the fractional part.  We might want to
+   set the NaN's mantissa from it. */
+idx += 4;
+if (!scm_is_eq (mem2uinteger (mem, idx, 10, implicit_x),
+SCM_INUM0))
+  {
 #if SCM_ENABLE_DEPRECATED == 1
-  scm_c_issue_deprecation_warning
-(Non-zero suffixes to `+nan.' are deprecated.  Use `+nan.0'.);
+scm_c_issue_deprecation_warning
+  (Non-zero suffixes to `+nan.' are deprecated.  Use `+nan.0'.);
 #else
-  return SCM_BOOL_F;
+return SCM_BOOL_F;
 #endif
-}
+  }
   
-  *p_idx = idx;
-  return scm_nan ();
-}
+*p_idx = idx;
+return scm_nan ();
+  }
+  }
+  }
+  }
 
   if (scm_i_string_ref (mem, idx) == '.')
 {
@@ -5809,7 +5833,7 @@ mem2ureal (SCM mem, unsigned int *p_idx,
 return SCM_BOOL_F;
 
 	  divisor = mem2uinteger (mem, idx, radix, implicit_x);
-	  if (scm_is_false (divisor))
+	  if (scm_is_false (divisor) || scm_is_eq (divisor, SCM_INUM0))
 	return SCM_BOOL_F;
 
 	  /* both are int/big here, I assume */
@@ -5885,7 +5909,7 @@ mem2complex (SCM mem, unsigned int idx,
   if (idx == len)
 return SCM_BOOL_F;
 
-  ureal = mem2ureal (mem, idx, radix, forced_x);
+  ureal = mem2ureal (mem, idx, radix, forced_x, sign != 0);
   if (scm_is_false (ureal))
 {
   /* input must be either +i or -i */
@@ -5954,9 +5978,9 @@ mem2complex (SCM mem, unsigned int idx,
 		  sign = -1;
 		}
 	  else
-		sign = 1;
+		sign = 0;
 
-	  angle = mem2ureal (mem, idx, radix, forced_x);
+	  angle = mem2ureal (mem, idx, radix, forced_x, sign != 0);
 	  if (scm_is_false (angle))
 		return SCM_BOOL_F;
 	  if (idx != len)
@@ -5978,7 +6002,7 @@ mem2complex (SCM mem, unsigned int idx,
 	  else
 	{
 	  int sign = (c == '+') ? 1 : -1;
-	  SCM imag = mem2ureal (mem, idx, radix, forced_x);
+	  SCM imag = mem2ureal (mem, idx, radix, forced_x, sign != 0);
 
 	 

bug#11887: string-number edge cases

2013-03-06 Thread Mark H Weaver
Hi Peter,

Peter Bex peter@xs4all.nl writes:

 On Tue, Mar 05, 2013 at 02:04:55PM -0500, Mark H Weaver wrote:
 FYI, I produced a simple patch a while back to fix this (see below), but
 it had an interesting side effect: it caused the reader to read things
 like 3/0 and 4+3/0i as symbols.  More generally, anything for which
 'scm_string_to_number' returns false is treated as a symbol by 'read'.

 I think this is simple and at least internally consistent.  Several
 Schemes assume something like 1/0 is a symbol; Chicken does this as
 well, with the numbers egg loaded, so does Gambit.
[...]
 This is also related to how string-number deals with it; if it
 returns #f it is essentially saying this is not valid numerical syntax
 and should fail to parse as a number.  Not raising an exception on
 (string-number 1/0) but raising an exception on
 (with-input-from-string 1/0 read) is a bit odd, I'd say.

Thanks, this makes me feel somewhat better about it.  I guess it's okay.

 On Tue, Mar 05, 2013 at 03:49:13PM +0100, Andy Wingo wrote:
  Hi Ian,
  
  On Mon 09 Jul 2012 14:29, Ian Price ianpric...@googlemail.com writes:
  
   PARSE ERROR (+InF.0 +inf.0 +inf.0 +Inf.0) = #f
   PARSE ERROR (-iNF.0 -inf.0 -inf.0 -Inf.0) = #f
   PARSE ERROR (+NAN.0 +nan.0 +nan.0 +NaN.0) = #f
  
  These are not errors.  +NAN.0 is not even not a number :)
 
 I double-checked, but in the upcoming R7RS it is (see 7.1, paragraph 2 of
 draft 7).  It looks like R6RS was case-sensitive in its numerical syntax
 and +NAN.0 is disallowed by it.

Thanks for looking this up.  Given that it's allowed by the latest R7RS
draft, I think we should allow these too.

I've written a patch to fix these issues, and will send it in my next
email.

Regards,
  Mark





bug#11887: string-number edge cases

2013-03-05 Thread Andy Wingo
Hi Ian,

On Mon 09 Jul 2012 14:29, Ian Price ianpric...@googlemail.com writes:

 PARSE ERROR (+InF.0 +inf.0 +inf.0 +Inf.0) = #f
 PARSE ERROR (-iNF.0 -inf.0 -inf.0 -Inf.0) = #f
 PARSE ERROR (+NAN.0 +nan.0 +nan.0 +NaN.0) = #f

These are not errors.  +NAN.0 is not even not a number :)

 PARSE ERROR (+nan.1 #f) = +nan.0
 PARSE ERROR (+nan.01 #f) = +nan.0

These are only supported because 2.0.0 was released with +nan.1 parsing
as +nan.0.  It signals a deprecation warning with a note to this effect.
Guile from master should pass these particular tests.

 PARSE ERROR (nan.0 #f) = +nan.0
 PARSE ERROR (inf.0 #f) = +inf.0
 PARSE ERROR (#e+nan.0 #f) = +nan.0
 PARSE ERROR (#e+inf.0 #f) = +inf.0
 PARSE ERROR (#e-inf.0 #f) = -inf.0

These are errors.

 If the number contains a division by zero, we get a numerical overflow
 error.

 scheme@(guile−user) (string-number 3/0)
 ERROR: In procedure string−number:
 ERROR: Throw to key `numerical−overflow' with args `(make−ratio Numerical 
 overflow #f #f)'.

This is also an error.  We should plumb through some extra arg to
mem2ureal, I guess, to check for a zero denominator.

Andy
-- 
http://wingolog.org/





bug#11887: string-number edge cases

2013-03-05 Thread Mark H Weaver
Andy Wingo wi...@pobox.com writes:

 On Mon 09 Jul 2012 14:29, Ian Price ianpric...@googlemail.com writes:

 If the number contains a division by zero, we get a numerical overflow
 error.

 scheme@(guile−user) (string-number 3/0)
 ERROR: In procedure string−number:
 ERROR: Throw to key `numerical−overflow' with args `(make−ratio Numerical 
 overflow #f #f)'.

 This is also an error.  We should plumb through some extra arg to
 mem2ureal, I guess, to check for a zero denominator.

FYI, I produced a simple patch a while back to fix this (see below), but
it had an interesting side effect: it caused the reader to read things
like 3/0 and 4+3/0i as symbols.  More generally, anything for which
'scm_string_to_number' returns false is treated as a symbol by 'read'.

I'm not sure how I feel about this.  What do you think?

 Mark


diff --git a/libguile/numbers.c b/libguile/numbers.c
index 66c95db..9013f0c 100644
--- a/libguile/numbers.c
+++ b/libguile/numbers.c
@@ -5809,7 +5809,7 @@ mem2ureal (SCM mem, unsigned int *p_idx,
 return SCM_BOOL_F;
 
  divisor = mem2uinteger (mem, idx, radix, implicit_x);
- if (scm_is_false (divisor))
+ if (scm_is_false (divisor) || scm_is_eq (divisor, SCM_INUM0))
return SCM_BOOL_F;
 
  /* both are int/big here, I assume */





bug#11887: string-number edge cases

2013-03-05 Thread Daniel Llorens

I think this isn't working as it should either.

scheme@(guile-user) +1i
$1 = 0.0+1.0i
scheme@(guile-user) 1i
;;; unknown-location: warning: possibly unbound variable `#{1i}#'
ERROR: In procedure #procedure 101d6d4a0 ():
ERROR: In procedure module-lookup: Unbound variable: #{1i}#






bug#11887: string-number edge cases

2013-03-05 Thread Peter Bex
On Tue, Mar 05, 2013 at 03:49:13PM +0100, Andy Wingo wrote:
 Hi Ian,
 
 On Mon 09 Jul 2012 14:29, Ian Price ianpric...@googlemail.com writes:
 
  PARSE ERROR (+InF.0 +inf.0 +inf.0 +Inf.0) = #f
  PARSE ERROR (-iNF.0 -inf.0 -inf.0 -Inf.0) = #f
  PARSE ERROR (+NAN.0 +nan.0 +nan.0 +NaN.0) = #f
 
 These are not errors.  +NAN.0 is not even not a number :)

I double-checked, but in the upcoming R7RS it is (see 7.1, paragraph 2 of
draft 7).  It looks like R6RS was case-sensitive in its numerical syntax
and +NAN.0 is disallowed by it.

Cheers,
Peter
-- 
http://www.more-magic.net





bug#11887: string-number edge cases

2013-03-05 Thread Peter Bex
On Tue, Mar 05, 2013 at 02:04:55PM -0500, Mark H Weaver wrote:
 FYI, I produced a simple patch a while back to fix this (see below), but
 it had an interesting side effect: it caused the reader to read things
 like 3/0 and 4+3/0i as symbols.  More generally, anything for which
 'scm_string_to_number' returns false is treated as a symbol by 'read'.

I think this is simple and at least internally consistent.  Several
Schemes assume something like 1/0 is a symbol; Chicken does this as
well, with the numbers egg loaded, so does Gambit.

Raising an error is also acceptable.  According to the lexical syntax
1/0 *is* a valid number, so you could argue that it *must* parse as
a number, which is impossible so an error should occur.

This is also related to how string-number deals with it; if it
returns #f it is essentially saying this is not valid numerical syntax
and should fail to parse as a number.  Not raising an exception on
(string-number 1/0) but raising an exception on
(with-input-from-string 1/0 read) is a bit odd, I'd say.

Cheers,
Peter
-- 
http://www.more-magic.net





bug#11887: string-number edge cases

2012-07-09 Thread Ian Price

Hi guilers,

I've mentioned these to Mark Weaver on IRC off-hand before, but I'm
posting this bug report to formalise it, and make sure we don't
forget.

Peter Bex, as one of the maintainers of the chicken numbers egg,
created a thorough test suite for string-number a while ago[0], and it
points out a number of possible and actual errors in the handling of
string-number.

I've attached a copy with the modifications for guile already applied,
and a copy of the full results, but the summary is as follows.

If the number contains a division by zero, we get a numerical overflow
error.

scheme@(guile−user) (string-number 3/0)
ERROR: In procedure string−number:
ERROR: Throw to key `numerical−overflow' with args `(make−ratio Numerical 
overflow #f #f)'.

Entering a new prompt.  Type `,bt' for a backtrace or `,q' to continue.
scheme@(guile−user) [1] ,q

This contradicts the r6rs specification[1], which mandates a return
value of #f. IMO, this is the correct behaviour, since otherwise, we
have created a special case in the API for one type of invalid number.

(These tests cause the program to stop, and so are commented out in the
attached version.)

The rest of the errors are less serious, and show that we treat nans and
infinity someone inconsistently in guile. Specifically, we are strict on
case, and lenient on the numerical prefix.

0. 
http://bugs.call-cc.org/browser/release/4/numbers/trunk/tests/string-conversion.scm
1. http://www.r6rs.org/final/html/r6rs/r6rs-Z-H-14.html#node_idx_584

Peter,

Thanks for these

-- 
Ian Price

Programming is like pinball. The reward for doing it well is
the opportunity to do it again - from The Wizardy Compiled
;;;
;;; Numerical syntax torture test
;;;
;;; This tries to test a lot of edge cases in Scheme's numerical syntax.
;;;
;;; Output is written so that if you run it through grep ERROR it will
;;; output nothing (and exit status will be nonzero) if there are no errors.
;;; If you run it through tail -n 1 you will just get the total error summary.
;;;
;;; This code assumes that string-number accepts numbers with embedded radix
;;; specifiers (R5RS mentions that it's allowed to return #f in those cases).
;;; It also doesn't try to support Schemes which support *only* integers or
;;; *only* flonums (which is also allowed by R5RS).
;;;

;;;
;; The prelude below is messy but required to get everything working
;; with some of the major Schemes.
;;
;; Also note that to test with Gambit, it appears you first need to type in
;; (load ~~/lib/syntax-case) and then load this file, or use gsi's -:s switch
;;;

;(use extras numbers); Chicken w/ numbers
;(use-syntax (ice-9 syncase)) ; Guile

;; Set this to #f if the Scheme has no compnums at all, 'inexact if it only
;; supports inexact compnums or 'exact if it supports exact compnums.
;; (Gauche, Guile, SCM: inexact, Chicken w/o numbers: #f)
(define compnum-type 'inexact)

;; Set this to #f if the Scheme has no fractional number support,
;; 'exact if it supports rational numbers and 'inexact if it converts fractions
;; to floating-point, inexact numbers
(define fraction-type 'exact)

;; Fix these if your Scheme doesn't allow division by zero
;; For Chicken without the numbers egg, use fp/ instead of /
(define the-nan (/ 0.0 0.0))
(define pos-inf (/ 1.0 0.0))
(define neg-inf (/ -1.0 0.0))

; Scheme48, Racket, Gambit, Chicken w/o numbers, SCM
;(define (nan? x) (and (number? x) (not (= x x

(define total-errors 0)

(define (check-string-against-values! str . possible-values)
  (let ((res (string-number str)))
(let lp ((values possible-values))
  (if (null? values)
  (begin (display   PARSE ERROR )
 (write (cons str possible-values))
 (display  = ) (write res) (newline)
 (set! total-errors (+ total-errors 1)))
  (let ((value (car values)))
(if (not (or (and (not (string? value)) (equal? res value))
 (and res (nan? res) (or (and value (nan? value))
(lp (cdr values))
(let ((re-str (and res (number-string res
  (let lp2 ((values possible-values))
(if (null? values)
(begin (display SERIALIZATION ERROR )
   (write (cons str possible-values))
   (display  = ) (write re-str) (newline)
   (set! total-errors (+ total-errors 1)))
(let ((value (car values)))
  (if (not (or (and res (string=? re-str str))
   (and (not res) (not value))
   (and res (string? value) (string=? 
re-str value
  (lp2 (cdr values))
  (begin (display OK  )
 (write (cons str possible-values))