bug#11843: acknowledged by developer (date -s with locale-dependent input: notabug)

2012-07-04 Thread Jim Meyering
peter evans wrote:
 Thank you for closing this as not a bug.

 So it is not a bug that date is unable to parse its own output in
 arbitrary locales.
 Indeed, it would not be a bug if it stopped and complained about
 it. That would be
 perfectly acceptable.

 date however, goes one better than that, rather than alerting you to
 the fact that
 you have tried something stupid, like having a locale other than C,
 it will just set
 the date to whatever it feels like.

 At the very least, it should stop and complain rather than setting
 some random time.
 This is a bug. The rest of the world does not live in some undefined subset of
 locales that date groks.

I agree.  The lack of diagnostic is indeed a bug.
I'll remove the notabug tag.
Thanks for persevering.

This demonstrates the bad behavior:

$ date -d $(printf '\xb0')
Wed Jul  4 00:00:00 CEST 2012

With the patch below, it now diagnoses the problem:

$ src/date -d $(printf '\xb0')
src/date: invalid date '\260'
[Exit 1]

I'll add tests and NEWS.


diff --git a/lib/parse-datetime.y b/lib/parse-datetime.y
index 67669f6..4d9f65a 100644
--- a/lib/parse-datetime.y
+++ b/lib/parse-datetime.y
@@ -113,6 +113,11 @@ typedef long int long_time_t;
 typedef time_t long_time_t;
 #endif

+/* Convert a possibly-signed character to an unsigned character.  This is
+   a bit safer than casting to unsigned char, since it catches some type
+   errors that the cast doesn't.  */
+static inline unsigned char to_uchar (char ch) { return ch; }
+
 /* Lots of this code assumes time_t and time_t-like values fit into
long_time_t.  */
 verify (TYPE_MINIMUM (long_time_t) = TYPE_MINIMUM (time_t)
@@ -1171,7 +1176,8 @@ yylex (YYSTYPE *lvalp, parser_control *pc)
 }

   if (c != '(')
-return *pc-input++;
+return to_uchar (*pc-input++);
+
   count = 0;
   do
 {





bug#11843: acknowledged by developer (date -s with locale-dependent input: notabug)

2012-07-04 Thread Bruno Haible
Jim Meyering wrote:
 +static inline unsigned char to_uchar (char ch) { return ch; }

For the use of 'inline', one needs this too:


--- m4/parse-datetime.m4.orig   Wed Jul  4 10:04:43 2012
+++ m4/parse-datetime.m4Wed Jul  4 10:04:36 2012
@@ -1,4 +1,4 @@
-# parse-datetime.m4 serial 19
+# parse-datetime.m4 serial 20
 dnl Copyright (C) 2002-2006, 2008-2012 Free Software Foundation, Inc.
 dnl This file is free software; the Free Software Foundation
 dnl gives unlimited permission to copy and/or distribute it,
@@ -32,6 +32,7 @@
 
   dnl Prerequisites of lib/parse-datetime.y.
   AC_REQUIRE([gl_BISON])
+  AC_REQUIRE([AC_C_INLINE])
   AC_REQUIRE([gl_C_COMPOUND_LITERALS])
   AC_STRUCT_TIMEZONE
   AC_REQUIRE([gl_CLOCK_TIME])






bug#11843: acknowledged by developer (date -s with locale-dependent input: notabug)

2012-07-04 Thread Jim Meyering
Bruno Haible wrote:
 Jim Meyering wrote:
 +static inline unsigned char to_uchar (char ch) { return ch; }

 For the use of 'inline', one needs this too:
 +++ m4/parse-datetime.m4  Wed Jul  4 10:04:36 2012
 +  AC_REQUIRE([AC_C_INLINE])

Thanks, Bruno.
Here's the complete patch on the gnulib side:
(still to do in coreutils: NEWS, test and gnulib update)

From d8f90adf5f01512958b6da46bd5eea01294a434e Mon Sep 17 00:00:00 2001
From: Jim Meyering meyer...@redhat.com
Date: Wed, 4 Jul 2012 12:58:07 +0200
Subject: [PATCH] parse-datetime: fix failure to diagnose invalid input

date -d $(printf '\xb0') would print 00:00:00 with today's date
rather than diagnosing the invalid input.  Now it reports this:
date: invalid date '\260'
* lib/parse-datetime.y (to_uchar): Define.
(yylex): Don't sign-extend other bytes.
* m4/parse-datetime.m4: Require AC_C_INLINE for first use of inline.
Thanks to Bruno Haible for the patch to this file.
* tests/test-parse-datetime.c (main): Add a test to trigger the bug.
Peter Evans reported the bug in GNU date: http://bugs.gnu.org/11843
---
 ChangeLog   | 13 +
 lib/parse-datetime.y|  8 +++-
 m4/parse-datetime.m4|  3 ++-
 tests/test-parse-datetime.c |  4 
 4 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 5edb6d4..cd3ba33 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,16 @@
+2012-07-04  Jim Meyering  meyer...@redhat.com
+
+   parse-datetime: fix failure to diagnose invalid input
+   date -d $(printf '\xb0') would print 00:00:00 with today's date
+   rather than diagnosing the invalid input.  Now it reports this:
+   date: invalid date '\260'
+   * lib/parse-datetime.y (to_uchar): Define.
+   (yylex): Don't sign-extend other bytes.
+   * m4/parse-datetime.m4: Require AC_C_INLINE for first use of inline.
+   Thanks to Bruno Haible for the patch to this file.
+   * tests/test-parse-datetime.c (main): Add a test to trigger the bug.
+   Peter Evans reported the bug in GNU date: http://bugs.gnu.org/11843
+
 2012-07-03  Jim Meyering  meyer...@redhat.com

bootstrap: do not require now-removed build-aux/missing
diff --git a/lib/parse-datetime.y b/lib/parse-datetime.y
index 67669f6..4d9f65a 100644
--- a/lib/parse-datetime.y
+++ b/lib/parse-datetime.y
@@ -113,6 +113,11 @@ typedef long int long_time_t;
 typedef time_t long_time_t;
 #endif

+/* Convert a possibly-signed character to an unsigned character.  This is
+   a bit safer than casting to unsigned char, since it catches some type
+   errors that the cast doesn't.  */
+static inline unsigned char to_uchar (char ch) { return ch; }
+
 /* Lots of this code assumes time_t and time_t-like values fit into
long_time_t.  */
 verify (TYPE_MINIMUM (long_time_t) = TYPE_MINIMUM (time_t)
@@ -1171,7 +1176,8 @@ yylex (YYSTYPE *lvalp, parser_control *pc)
 }

   if (c != '(')
-return *pc-input++;
+return to_uchar (*pc-input++);
+
   count = 0;
   do
 {
diff --git a/m4/parse-datetime.m4 b/m4/parse-datetime.m4
index 8efefbe..3fb2d2b 100644
--- a/m4/parse-datetime.m4
+++ b/m4/parse-datetime.m4
@@ -1,4 +1,4 @@
-# parse-datetime.m4 serial 19
+# parse-datetime.m4 serial 20
 dnl Copyright (C) 2002-2006, 2008-2012 Free Software Foundation, Inc.
 dnl This file is free software; the Free Software Foundation
 dnl gives unlimited permission to copy and/or distribute it,
@@ -32,6 +32,7 @@ AC_DEFUN([gl_PARSE_DATETIME],

   dnl Prerequisites of lib/parse-datetime.y.
   AC_REQUIRE([gl_BISON])
+  AC_REQUIRE([AC_C_INLINE])
   AC_REQUIRE([gl_C_COMPOUND_LITERALS])
   AC_STRUCT_TIMEZONE
   AC_REQUIRE([gl_CLOCK_TIME])
diff --git a/tests/test-parse-datetime.c b/tests/test-parse-datetime.c
index 4c0370d..1c9fd2d 100644
--- a/tests/test-parse-datetime.c
+++ b/tests/test-parse-datetime.c
@@ -409,5 +409,9 @@ main (int argc _GL_UNUSED, char **argv)
   ASSERT (result.tv_sec == 24 * 3600
result.tv_nsec == now.tv_nsec);

+  /* Exercise a sign-extension bug.  Before July 2012, an input
+ starting with a high-bit-set byte would be treated like 0.  */
+  ASSERT ( ! parse_datetime (result, \xb0, now));
+
   return 0;
 }
--
1.7.11.1.104.ge7b44f1





bug#11843: acknowledged by developer (date -s with locale-dependent input: notabug)

2012-07-04 Thread Jim Meyering
Jim Meyering wrote:
 Bruno Haible wrote:
 Jim Meyering wrote:
 +static inline unsigned char to_uchar (char ch) { return ch; }

 For the use of 'inline', one needs this too:
 +++ m4/parse-datetime.m4 Wed Jul  4 10:04:36 2012
 +  AC_REQUIRE([AC_C_INLINE])

 Thanks, Bruno.
 Here's the complete patch on the gnulib side:
 (still to do in coreutils: NEWS, test and gnulib update)

 Subject: [PATCH] parse-datetime: fix failure to diagnose invalid input

 date -d $(printf '\xb0') would print 00:00:00 with today's date
 rather than diagnosing the invalid input.  Now it reports this:
 date: invalid date '\260'
 * lib/parse-datetime.y (to_uchar): Define.
 (yylex): Don't sign-extend other bytes.
 * m4/parse-datetime.m4: Require AC_C_INLINE for first use of inline.
 Thanks to Bruno Haible for the patch to this file.
 * tests/test-parse-datetime.c (main): Add a test to trigger the bug.
 Peter Evans reported the bug in GNU date: http://bugs.gnu.org/11843
...

I confirmed that this bug affects GNU date at least back to sh-utils-1.16f
by running that version of date (building it did require a few tweaks).
The sources say yylex has always returned int, the pointer
type has always been char * and there has never been an
attempt to cast that returned value.

Here's the coreutils fix:

From cc322ddd16607c0c8e978e5003008344e2710ec9 Mon Sep 17 00:00:00 2001
From: Jim Meyering meyer...@redhat.com
Date: Wed, 4 Jul 2012 13:01:56 +0200
Subject: [PATCH] date: fails to diagnose invalid input

date -d $(printf '\xb0') would print 00:00:00 with today's date
rather than diagnosing the invalid input.  Now it reports this:
date: invalid date '\260'
* gnulib: Update submodule to latest for fixed parse-datetime.y.
* tests/misc/date [invalid-high-bit-set]: New test.
* NEWS (Bug fixes): Mention it.
* bootstrap, tests/init.sh: Also update to latest.
Reported by Peter Evans in http://bugs.gnu.org/11843
---
 NEWS|   5 ++
 THANKS.in   |   1 +
 bootstrap   | 146 +---
 gnulib  |   2 +-
 tests/init.sh   |   5 +-
 tests/misc/date |   5 ++
 6 files changed, 100 insertions(+), 64 deletions(-)

diff --git a/NEWS b/NEWS
index 03c600a..3aff3a8 100644
--- a/NEWS
+++ b/NEWS
@@ -8,6 +8,11 @@ GNU coreutils NEWS-*- 
outline -*-
   processes will not intersperse their output.
   [the bug dates back to the initial implementation]

+  date -d $(printf '\xb0') would print 00:00:00 with today's date
+  rather than diagnosing the invalid input.  Now it reports this:
+  date: invalid date '\260'
+  [This bug was present in the beginning.]
+
   head --lines=-N (-n-N) now resets the read pointer of a seekable input file.
   This means that head -n-3 no longer consumes all of its input, and lines
   not output by head may be processed by other programs.  For example, this
diff --git a/THANKS.in b/THANKS.in
index 2873594..a00357d 100644
--- a/THANKS.in
+++ b/THANKS.in
@@ -483,6 +483,7 @@ Per Starbäck
starb...@stp.lingfil.uu.se
 Peter Breitenlohner p...@mppmu.mpg.de
 Peter Dyballa   peter_dyba...@web.de
 Peter Eriksson  pe...@ifm.liu.se
+Peter Evans pe...@ixp.jp
 Peter Horst pe...@ointment.org
 Peter Moulder   rei...@netspace.net.au
 Peter Samuelson psamu...@sampo.creighton.edu
diff --git a/bootstrap b/bootstrap
index c496d29..e984910 100755
--- a/bootstrap
+++ b/bootstrap
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Print a version string.
-scriptversion=2012-04-26.13; # UTC
+scriptversion=2012-07-03.20; # UTC

 # Bootstrap this package from checked-out sources.

@@ -130,7 +130,7 @@ extract_package_name='
  p
   }
 '
-package=`sed -n $extract_package_name configure.ac` || exit
+package=$(sed -n $extract_package_name configure.ac) || exit
 gnulib_name=lib$package

 build_aux=build-aux
@@ -215,7 +215,7 @@ find_tool ()
   eval export $find_tool_envvar
 }

-# Find sha1sum, named gsha1sum on MacPorts, and shasum on MacOS 10.6.
+# Find sha1sum, named gsha1sum on MacPorts, and shasum on Mac OS X 10.6.
 find_tool SHA1SUM sha1sum gsha1sum shasum

 # Override the default configuration, if necessary.
@@ -230,7 +230,6 @@ esac
 test -z ${gnulib_extra_files}  \
   gnulib_extra_files=
 $build_aux/install-sh
-$build_aux/missing
 $build_aux/mdate-sh
 $build_aux/texinfo.tex
 $build_aux/depcomp
@@ -256,7 +255,7 @@ do
 usage
 exit;;
   --gnulib-srcdir=*)
-GNULIB_SRCDIR=`expr X$option : 'X--gnulib-srcdir=\(.*\)'`;;
+GNULIB_SRCDIR=${option#--gnulib-srcdir=};;
   --skip-po)
 SKIP_PO=t;;
   --force)
@@ -307,7 +306,7 @@ insert_sorted_if_absent() {
   file=$1
   str=$2
   test -f $file || touch $file
-  echo $str | sort_patterns - $file | cmp - $file  /dev/null \
+  echo $str | sort_patterns - $file | cmp -s - $file  /dev/null \
 || { echo $str | sort_patterns -