Hi,
I noticed that the default parser does not recognize Windows-style
filenames:
alvherre=# SELECT alias, description, token FROM ts_debug(e'c:\\archivos');
alias | description | token
---+-+--
asciiword | Word, all ASCII | c
blank | Space symbols | :\
asciiword | Word, all ASCII | archivos
(3 lignes)
I played with it a bit (see attached patch -- basically I added \ in all
places where a / was being parsed, in the file-path states) and managed
to have it parse some naive versions, like
alvherre=# SELECT alias, description, token FROM ts_debug(e'c:\\archivos\\foo');
alias |description| token
---+---+-
file | File or path name | c:\archivos\foo
(1 ligne)
However it fails as soon as you have a space, which is quite common on
Windows, for example
alvherre=# SELECT alias, description, token FROM ts_debug(e'c:\\Program
Files\\');
alias |description| token
---+---+
file | File or path name | c:\Program
blank | Space symbols |
asciiword | Word, all ASCII | Files
blank | Space symbols | \
(4 lignes)
It also fails to recognize "network" file names, like
alvherre=# SELECT alias, description, token FROM
ts_debug(e'server\\archivos\\foo');
alias | description | token
---+-+--
blank | Space symbols | \\
asciiword | Word, all ASCII | server
blank | Space symbols | \
asciiword | Word, all ASCII | archivos
blank | Space symbols | \
asciiword | Word, all ASCII | foo
(6 lignes)
Is this something worth worrying about?
--
Alvaro Herrerahttp://www.CommandPrompt.com/
The PostgreSQL Company - Command Prompt, Inc.
Index: src/backend/tsearch/wparser_def.c
===
RCS file: /cvsroot/pgsql/src/backend/tsearch/wparser_def.c,v
retrieving revision 1.4
diff -c -p -r1.4 wparser_def.c
*** src/backend/tsearch/wparser_def.c 23 Oct 2007 20:46:12 - 1.4
--- src/backend/tsearch/wparser_def.c 25 Oct 2007 13:45:36 -
*** static TParserStateActionItem actionTPS_
*** 1053,1058
--- 1053,1059
{p_isdigit, 0, A_NEXT, TPS_InFile, 0, NULL},
{p_iseqC, '_', A_NEXT, TPS_InFile, 0, NULL},
{p_iseqC, '/', A_NEXT, TPS_InFileFirst, 0, NULL},
+ {p_iseqC, '\\', A_NEXT, TPS_InFileFirst, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
};
*** static TParserStateActionItem actionTPS_
*** 1063,1068
--- 1064,1070
{p_iseqC, '_', A_NEXT, TPS_InFile, 0, NULL},
{p_iseqC, '.', A_NEXT, TPS_InPathSecond, 0, NULL},
{p_iseqC, '/', A_NEXT, TPS_InFileFirst, 0, NULL},
+ {p_iseqC, '\\', A_NEXT, TPS_InFileFirst, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
};
*** static TParserStateActionItem actionTPS_
*** 1070,1082
--- 1072,1087
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_iseqC, '.', A_NEXT, TPS_InPathSecond, 0, NULL},
{p_iseqC, '/', A_NEXT, TPS_InFileFirst, 0, NULL},
+ {p_iseqC, '\\', A_NEXT, TPS_InFileFirst, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
};
static TParserStateActionItem actionTPS_InPathSecond[] = {
{p_isEOF, 0, A_BINGO | A_CLEAR, TPS_Base, FILEPATH, NULL},
{p_iseqC, '/', A_NEXT | A_PUSH, TPS_InFileFirst, 0, NULL},
+ {p_iseqC, '\\', A_NEXT | A_PUSH, TPS_InFileFirst, 0, NULL},
{p_iseqC, '/', A_BINGO | A_CLEAR, TPS_Base, FILEPATH, NULL},
+ {p_iseqC, '\\', A_BINGO | A_CLEAR, TPS_Base, FILEPATH, NULL},
{p_isspace, 0, A_BINGO | A_CLEAR, TPS_Base, FILEPATH, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
};
*** static TParserStateActionItem actionTPS_
*** 1089,1094
--- 1094,1100
{p_iseqC, '_', A_NEXT, TPS_InFile, 0, NULL},
{p_iseqC, '-', A_NEXT, TPS_InFile, 0, NULL},
{p_iseqC, '/', A_PUSH, TPS_InFileFirst, 0, NULL},
+ {p_iseqC, '\\', A_PUSH, TPS_InFileFirst, 0, NULL},
{p_iseqC, '?', A_PUSH, TPS_InURIFirst, 0, NULL},
{NULL, 0, A_BINGO, TPS_Base, FILEPATH, NULL}
};
*** static TParserStateActionItem actionTPS_
*** 1130,1135
--- 1136,1142
static TParserStateActionItem actionTPS_InProtocolFirst[] = {
{p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
{p_iseqC, '/', A_NEXT, TPS_InProtocolSecond, 0, NULL},
+ {p_iseqC, '\\', A_NEXT, TPS_InFile, 0, NULL},
{NULL, 0, A_POP, TPS_Null, 0, NULL}
};
---(end of broadcast)---
TIP 9: In versions below 8.0, the planner will ignore your desire to
choose an index scan if your joining column's datatypes do not
match