Hi,

I noticed that the default parser does not recognize Windows-style
filenames:

alvherre=# SELECT alias, description, token FROM ts_debug(e'c:\\archivos');
   alias   |   description   |  token   
-----------+-----------------+----------
 asciiword | Word, all ASCII | c
 blank     | Space symbols   | :\
 asciiword | Word, all ASCII | archivos
(3 lignes)

I played with it a bit (see attached patch -- basically I added \ in all
places where a / was being parsed, in the file-path states) and managed
to have it parse some naive versions, like

alvherre=# SELECT alias, description, token FROM ts_debug(e'c:\\archivos\\foo');
 alias |    description    |      token      
-------+-------------------+-----------------
 file  | File or path name | c:\archivos\foo
(1 ligne)

However it fails as soon as you have a space, which is quite common on
Windows, for example

alvherre=# SELECT alias, description, token FROM ts_debug(e'c:\\Program 
Files\\');
   alias   |    description    |   token    
-----------+-------------------+------------
 file      | File or path name | c:\Program
 blank     | Space symbols     |  
 asciiword | Word, all ASCII   | Files
 blank     | Space symbols     | \
(4 lignes)

It also fails to recognize "network" file names, like

alvherre=# SELECT alias, description, token FROM 
ts_debug(e'\\\\server\\archivos\\foo');
   alias   |   description   |  token   
-----------+-----------------+----------
 blank     | Space symbols   | \\
 asciiword | Word, all ASCII | server
 blank     | Space symbols   | \
 asciiword | Word, all ASCII | archivos
 blank     | Space symbols   | \
 asciiword | Word, all ASCII | foo
(6 lignes)

Is this something worth worrying about?

-- 
Alvaro Herrera                                http://www.CommandPrompt.com/
The PostgreSQL Company - Command Prompt, Inc.
Index: src/backend/tsearch/wparser_def.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/tsearch/wparser_def.c,v
retrieving revision 1.4
diff -c -p -r1.4 wparser_def.c
*** src/backend/tsearch/wparser_def.c	23 Oct 2007 20:46:12 -0000	1.4
--- src/backend/tsearch/wparser_def.c	25 Oct 2007 13:45:36 -0000
*************** static TParserStateActionItem actionTPS_
*** 1053,1058 ****
--- 1053,1059 ----
  	{p_isdigit, 0, A_NEXT, TPS_InFile, 0, NULL},
  	{p_iseqC, '_', A_NEXT, TPS_InFile, 0, NULL},
  	{p_iseqC, '/', A_NEXT, TPS_InFileFirst, 0, NULL},
+ 	{p_iseqC, '\\', A_NEXT, TPS_InFileFirst, 0, NULL},
  	{NULL, 0, A_POP, TPS_Null, 0, NULL}
  };
  
*************** static TParserStateActionItem actionTPS_
*** 1063,1068 ****
--- 1064,1070 ----
  	{p_iseqC, '_', A_NEXT, TPS_InFile, 0, NULL},
  	{p_iseqC, '.', A_NEXT, TPS_InPathSecond, 0, NULL},
  	{p_iseqC, '/', A_NEXT, TPS_InFileFirst, 0, NULL},
+ 	{p_iseqC, '\\', A_NEXT, TPS_InFileFirst, 0, NULL},
  	{NULL, 0, A_POP, TPS_Null, 0, NULL}
  };
  
*************** static TParserStateActionItem actionTPS_
*** 1070,1082 ****
--- 1072,1087 ----
  	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
  	{p_iseqC, '.', A_NEXT, TPS_InPathSecond, 0, NULL},
  	{p_iseqC, '/', A_NEXT, TPS_InFileFirst, 0, NULL},
+ 	{p_iseqC, '\\', A_NEXT, TPS_InFileFirst, 0, NULL},
  	{NULL, 0, A_POP, TPS_Null, 0, NULL}
  };
  
  static TParserStateActionItem actionTPS_InPathSecond[] = {
  	{p_isEOF, 0, A_BINGO | A_CLEAR, TPS_Base, FILEPATH, NULL},
  	{p_iseqC, '/', A_NEXT | A_PUSH, TPS_InFileFirst, 0, NULL},
+ 	{p_iseqC, '\\', A_NEXT | A_PUSH, TPS_InFileFirst, 0, NULL},
  	{p_iseqC, '/', A_BINGO | A_CLEAR, TPS_Base, FILEPATH, NULL},
+ 	{p_iseqC, '\\', A_BINGO | A_CLEAR, TPS_Base, FILEPATH, NULL},
  	{p_isspace, 0, A_BINGO | A_CLEAR, TPS_Base, FILEPATH, NULL},
  	{NULL, 0, A_POP, TPS_Null, 0, NULL}
  };
*************** static TParserStateActionItem actionTPS_
*** 1089,1094 ****
--- 1094,1100 ----
  	{p_iseqC, '_', A_NEXT, TPS_InFile, 0, NULL},
  	{p_iseqC, '-', A_NEXT, TPS_InFile, 0, NULL},
  	{p_iseqC, '/', A_PUSH, TPS_InFileFirst, 0, NULL},
+ 	{p_iseqC, '\\', A_PUSH, TPS_InFileFirst, 0, NULL},
  	{p_iseqC, '?', A_PUSH, TPS_InURIFirst, 0, NULL},
  	{NULL, 0, A_BINGO, TPS_Base, FILEPATH, NULL}
  };
*************** static TParserStateActionItem actionTPS_
*** 1130,1135 ****
--- 1136,1142 ----
  static TParserStateActionItem actionTPS_InProtocolFirst[] = {
  	{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
  	{p_iseqC, '/', A_NEXT, TPS_InProtocolSecond, 0, NULL},
+ 	{p_iseqC, '\\', A_NEXT, TPS_InFile, 0, NULL},
  	{NULL, 0, A_POP, TPS_Null, 0, NULL}
  };
  
---------------------------(end of broadcast)---------------------------
TIP 9: In versions below 8.0, the planner will ignore your desire to
       choose an index scan if your joining column's datatypes do not
       match

Reply via email to