On Wed, Jan 31, 2018 at 05:53:29PM +0100, Dmitry Dolgov wrote:
> Thanks for working on that! I haven't followed this thread before, and after a
> quick review I have few side questions.

Thank you for your comments!

> Why not write `is_separator_char` using `isprint`, `isalpha`, `isdigit` from
> ctype.h? Something like:
> 
>     return isprint(*str) && !isalpha(*str) && !isdigit(*str)
> 
> From what I see in the source code they do exactly the same and tests are
> successfully passing with this change.

Fixed. The patch uses those functions now. I made is_separator_char() as a
IS_SEPARATOR_CHAR() macro.

> What do you think about providing two slightly different messages for these 
> two
> situations:
> 
>     if (n->type == NODE_TYPE_SPACE && !isspace((unsigned char) *s))
>         ereport(ERROR,
>                 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
>                  errmsg("unexpected character \"%.*s\", expected \"%s\"",
>                         pg_mblen(s), s, n->character),
>                  errhint("In FX mode, punctuation in the input string "
>                          "must exactly match the format string.")));
>     /*
>      * In FX mode we insist that separator character from the format
>      * string matches separator character from the input string.
>      */
>     else if (n->type == NODE_TYPE_SEPARATOR && *n->character != *s)
>         ereport(ERROR,
>                 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
>                  errmsg("unexpected character \"%.*s\", expected \"%s\"",
>                         pg_mblen(s), s, n->character),
>                  errhint("In FX mode, punctuation in the input string "
>                          "must exactly match the format string.")));
> 
> E.g. "unexpected space character" and "unexpected separator character". The
> difference is quite subtle, but I think a bit of context would never hurt.

I fixed those messages, but in a different manner. I think that an
unexpected character is unknown and neither space nor separator. And
better to say that was expected space/separator character.

Attached fixed patch.

-- 
Arthur Zakirov
Postgres Professional: http://www.postgrespro.com
Russian Postgres Company
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 487c7ff750..053d153d35 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -6174,7 +6174,8 @@ SELECT regexp_match('abc01234xyz', 
'(?:(.*?)(\d+)(.*)){1,1}');
        <function>to_timestamp</function> and <function>to_date</function>
        skip multiple blank spaces in the input string unless the
        <literal>FX</literal> option is used. For example,
-       <literal>to_timestamp('2000&nbsp;&nbsp;&nbsp;&nbsp;JUN', 'YYYY 
MON')</literal> works, but
+       <literal>to_timestamp('2000&nbsp;&nbsp;&nbsp;&nbsp;JUN', 'YYYY 
MON')</literal> and
+       <literal>to_timestamp('2000&nbsp;JUN', 
'YYYY&nbsp;&nbsp;&nbsp;&nbsp;MON')</literal> work, but
        <literal>to_timestamp('2000&nbsp;&nbsp;&nbsp;&nbsp;JUN', 'FXYYYY 
MON')</literal> returns an error
        because <function>to_timestamp</function> expects one space only.
        <literal>FX</literal> must be specified as the first item in
@@ -6182,6 +6183,19 @@ SELECT regexp_match('abc01234xyz', 
'(?:(.*?)(\d+)(.*)){1,1}');
       </para>
      </listitem>
 
+     <listitem>
+      <para>
+       <function>to_timestamp</function> and <function>to_date</function> don't
+       skip multiple printable non letter and non digit characters in the input
+       string, but skip them in the formatting string. For example,
+       <literal>to_timestamp('2000-JUN', 'YYYY/MON')</literal> and
+       <literal>to_timestamp('2000/JUN', 'YYYY//MON')</literal> work, but
+       <literal>to_timestamp('2000//JUN', 'YYYY/MON')</literal>
+       returns an error because count of the "/" character in the input string
+       doesn't match count of it in the formatting string.
+      </para>
+     </listitem>
+
      <listitem>
       <para>
        Ordinary text is allowed in <function>to_char</function>
diff --git a/src/backend/utils/adt/formatting.c 
b/src/backend/utils/adt/formatting.c
index b8bd4caa3e..35007bec5c 100644
--- a/src/backend/utils/adt/formatting.c
+++ b/src/backend/utils/adt/formatting.c
@@ -171,6 +171,8 @@ typedef struct
 #define NODE_TYPE_END          1
 #define NODE_TYPE_ACTION       2
 #define NODE_TYPE_CHAR         3
+#define NODE_TYPE_SEPARATOR    4
+#define NODE_TYPE_SPACE                5
 
 #define SUFFTYPE_PREFIX                1
 #define SUFFTYPE_POSTFIX       2
@@ -542,6 +544,10 @@ static const KeySuffix DCH_suff[] = {
        {NULL, 0, 0, 0}
 };
 
+#define IS_SEPARATOR_CHAR(s) ( \
+       isprint(*(unsigned char*)(s)) && \
+       !isalpha(*(unsigned char*)(s)) && \
+       !isdigit(*(unsigned char*)(s)))
 
 /* ----------
  * Format-pictures (KeyWord).
@@ -1325,7 +1331,14 @@ parse_format(FormatNode *node, const char *str, const 
KeyWord *kw,
                                if (*str == '\\' && *(str + 1) == '"')
                                        str++;
                                chlen = pg_mblen(str);
-                               n->type = NODE_TYPE_CHAR;
+
+                               if (ver == DCH_TYPE && IS_SEPARATOR_CHAR(str))
+                                       n->type = NODE_TYPE_SEPARATOR;
+                               else if (isspace((unsigned char) *str))
+                                       n->type = NODE_TYPE_SPACE;
+                               else
+                                       n->type = NODE_TYPE_CHAR;
+
                                memcpy(n->character, str, chlen);
                                n->character[chlen] = '\0';
                                n->key = NULL;
@@ -2996,12 +3009,53 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar 
*out)
 
        for (n = node, s = in; n->type != NODE_TYPE_END && *s != '\0'; n++)
        {
-               if (n->type != NODE_TYPE_ACTION)
+               if (n->type == NODE_TYPE_SPACE || n->type == 
NODE_TYPE_SEPARATOR)
+               {
+                       /*
+                        * In non FX (fixed format) mode we don't insist that 
the consumed
+                        * character matches the format's character.
+                        */
+                       if (!fx_mode)
+                       {
+                               if (isspace((unsigned char) *s) || 
IS_SEPARATOR_CHAR(s))
+                                       s++;
+
+                               continue;
+                       }
+
+                       /*
+                        * In FX mode we insist that whitespace from the format 
string
+                        * matches whitespace from the input string.
+                        */
+                       if (n->type == NODE_TYPE_SPACE && !isspace((unsigned 
char) *s))
+                               ereport(ERROR,
+                                               
(errcode(ERRCODE_INVALID_DATETIME_FORMAT),
+                                                errmsg("unexpected character 
\"%.*s\", expected space character \"%s\"",
+                                                               pg_mblen(s), s, 
n->character),
+                                                errhint("In FX mode, 
punctuation in the input string "
+                                                                "must exactly 
match the format string.")));
+                       /*
+                        * In FX mode we insist that separator character from 
the format
+                        * string matches separator character from the input 
string.
+                        */
+                       else if (n->type == NODE_TYPE_SEPARATOR && 
*n->character != *s)
+                               ereport(ERROR,
+                                               
(errcode(ERRCODE_INVALID_DATETIME_FORMAT),
+                                                errmsg("unexpected character 
\"%.*s\", expected separator character \"%s\"",
+                                                               pg_mblen(s), s, 
n->character),
+                                                errhint("In FX mode, 
punctuation in the input string "
+                                                                "must exactly 
match the format string.")));
+
+                       s++;
+                       continue;
+               }
+               else if (n->type != NODE_TYPE_ACTION)
                {
                        /*
-                        * Separator, so consume one character from input 
string.  Notice
-                        * we don't insist that the consumed character match 
the format's
-                        * character.
+                        * Text character, so consume one character from input 
string.
+                        * Notice we don't insist that the consumed character 
match the
+                        * format's character.
+                        * Text field ignores FX mode.
                         */
                        s += pg_mblen(s);
                        continue;
diff --git a/src/test/regress/expected/horology.out 
b/src/test/regress/expected/horology.out
index 63e39198e6..6e50cfdbfa 100644
--- a/src/test/regress/expected/horology.out
+++ b/src/test/regress/expected/horology.out
@@ -2769,14 +2769,32 @@ SELECT to_timestamp('97/2/16 8:14:30', 
'FMYYYY/FMMM/FMDD FMHH:FMMI:FMSS');
  Sat Feb 16 08:14:30 0097 PST
 (1 row)
 
+SELECT to_timestamp('2011$03!18 23_38_15', 'YYYY-MM-DD HH24:MI:SS');
+         to_timestamp         
+------------------------------
+ Fri Mar 18 23:38:15 2011 PDT
+(1 row)
+
 SELECT to_timestamp('1985 January 12', 'YYYY FMMonth DD');
          to_timestamp         
 ------------------------------
  Sat Jan 12 00:00:00 1985 PST
 (1 row)
 
+SELECT to_timestamp('1985 FMMonth 12', 'YYYY "FMMonth" DD');
+         to_timestamp         
+------------------------------
+ Sat Jan 12 00:00:00 1985 PST
+(1 row)
+
+SELECT to_timestamp('1985 \ 12', 'YYYY \\ DD');
+         to_timestamp         
+------------------------------
+ Sat Jan 12 00:00:00 1985 PST
+(1 row)
+
 SELECT to_timestamp('My birthday-> Year: 1976, Month: May, Day: 16',
-                    '"My birthday-> Year" YYYY, "Month:" FMMonth, "Day:" DD');
+                    '"My birthday-> Year:" YYYY, "Month:" FMMonth, "Day:" DD');
          to_timestamp         
 ------------------------------
  Sun May 16 00:00:00 1976 PDT
@@ -2789,7 +2807,7 @@ SELECT to_timestamp('1,582nd VIII 21', 'Y,YYYth FMRM DD');
 (1 row)
 
 SELECT to_timestamp('15 "text between quote marks" 98 54 45',
-                    E'HH24 "\\text between quote marks\\"" YY MI SS');
+                    E'HH24 "\\"text between quote marks\\"" YY MI SS');
          to_timestamp         
 ------------------------------
  Thu Jan 01 15:54:45 1998 PST
@@ -2810,6 +2828,21 @@ SELECT to_timestamp('2000January09Sunday', 
'YYYYFMMonthDDFMDay');
 SELECT to_timestamp('97/Feb/16', 'YYMonDD');
 ERROR:  invalid value "/Fe" for "Mon"
 DETAIL:  The given value did not match any of the allowed values for this 
field.
+SELECT to_timestamp('97/Feb/16', 'YY:Mon:DD');
+         to_timestamp         
+------------------------------
+ Sun Feb 16 00:00:00 1997 PST
+(1 row)
+
+SELECT to_timestamp('97/Feb/16', 'FXYY:Mon:DD');
+ERROR:  unexpected character "/", expected separator character ":"
+HINT:  In FX mode, punctuation in the input string must exactly match the 
format string.
+SELECT to_timestamp('97/Feb/16', 'FXYY/Mon/DD');
+         to_timestamp         
+------------------------------
+ Sun Feb 16 00:00:00 1997 PST
+(1 row)
+
 SELECT to_timestamp('19971116', 'YYYYMMDD');
          to_timestamp         
 ------------------------------
@@ -2966,7 +2999,7 @@ SELECT to_timestamp('2011-12-18 11:38 20',     
'YYYY-MM-DD HH12:MI TZM');
 SELECT to_timestamp('2011-12-18 23:38:15', 'YYYY-MM-DD  HH24:MI:SS');
          to_timestamp         
 ------------------------------
- Sun Dec 18 03:38:15 2011 PST
+ Sun Dec 18 23:38:15 2011 PST
 (1 row)
 
 SELECT to_timestamp('2011-12-18  23:38:15', 'YYYY-MM-DD  HH24:MI:SS');
@@ -2996,7 +3029,7 @@ SELECT to_timestamp('2011-12-18  23:38:15', 'YYYY-MM-DD  
HH24:MI:SS');
 SELECT to_timestamp('2011-12-18  23:38:15', 'YYYY-MM-DD   HH24:MI:SS');
          to_timestamp         
 ------------------------------
- Sun Dec 18 03:38:15 2011 PST
+ Sun Dec 18 23:38:15 2011 PST
 (1 row)
 
 SELECT to_date('2011 12  18', 'YYYY MM DD');
@@ -3014,13 +3047,13 @@ SELECT to_date('2011 12  18', 'YYYY MM  DD');
 SELECT to_date('2011 12  18', 'YYYY MM   DD');
   to_date   
 ------------
- 12-08-2011
+ 12-18-2011
 (1 row)
 
 SELECT to_date('2011 12 18', 'YYYY  MM DD');
   to_date   
 ------------
- 02-18-2011
+ 12-18-2011
 (1 row)
 
 SELECT to_date('2011  12 18', 'YYYY  MM DD');
diff --git a/src/test/regress/sql/horology.sql 
b/src/test/regress/sql/horology.sql
index ebb196a1cf..e3305fc836 100644
--- a/src/test/regress/sql/horology.sql
+++ b/src/test/regress/sql/horology.sql
@@ -392,15 +392,21 @@ SELECT to_timestamp('0097/Feb/16 --> 08:14:30', 
'YYYY/Mon/DD --> HH:MI:SS');
 
 SELECT to_timestamp('97/2/16 8:14:30', 'FMYYYY/FMMM/FMDD FMHH:FMMI:FMSS');
 
+SELECT to_timestamp('2011$03!18 23_38_15', 'YYYY-MM-DD HH24:MI:SS');
+
 SELECT to_timestamp('1985 January 12', 'YYYY FMMonth DD');
 
+SELECT to_timestamp('1985 FMMonth 12', 'YYYY "FMMonth" DD');
+
+SELECT to_timestamp('1985 \ 12', 'YYYY \\ DD');
+
 SELECT to_timestamp('My birthday-> Year: 1976, Month: May, Day: 16',
-                    '"My birthday-> Year" YYYY, "Month:" FMMonth, "Day:" DD');
+                    '"My birthday-> Year:" YYYY, "Month:" FMMonth, "Day:" DD');
 
 SELECT to_timestamp('1,582nd VIII 21', 'Y,YYYth FMRM DD');
 
 SELECT to_timestamp('15 "text between quote marks" 98 54 45',
-                    E'HH24 "\\text between quote marks\\"" YY MI SS');
+                    E'HH24 "\\"text between quote marks\\"" YY MI SS');
 
 SELECT to_timestamp('05121445482000', 'MMDDHH24MISSYYYY');
 
@@ -408,6 +414,12 @@ SELECT to_timestamp('2000January09Sunday', 
'YYYYFMMonthDDFMDay');
 
 SELECT to_timestamp('97/Feb/16', 'YYMonDD');
 
+SELECT to_timestamp('97/Feb/16', 'YY:Mon:DD');
+
+SELECT to_timestamp('97/Feb/16', 'FXYY:Mon:DD');
+
+SELECT to_timestamp('97/Feb/16', 'FXYY/Mon/DD');
+
 SELECT to_timestamp('19971116', 'YYYYMMDD');
 
 SELECT to_timestamp('20000-1116', 'YYYY-MMDD');

Reply via email to