[EMAIL PROTECTED] wrote:
>
> Hello!
>
> I have tried to parse a string using the following command:
>
> link-parts: parse link [ "/" | ":" | none ]
>
> where link is a string.
>
> How do I split a string in parts with a
> rule of different identifiers?
>
Assuming you are referring to hypertext links...
Short answer: Write in BNF the complete grammar for strings you
want split, then use that as the basis for writing your parse
rules.
Longer answer: The code below is an excerpt from something I'll
post later that tries to manipulate general URLs (i.e. not
just the "http" scheme).
The url-to-object function takes a URL (or string for one) and
returns an object containing the component parts.
The object-to-url reassembles the parts of one of the above
objects into a URL.
THIS IS BETA CODE. BE WARNED.
==================================
REBOL []
url-utils: make object! [
dump-it: func [uo [object!] /local dump1] [
dump1: func [attr /local attribute] [
if found? attribute: uo/field attr [
print rejoin [ {^-^-} attr {: } attribute]
]
]
foreach attrib [schm user pass host port path file args frag] [
dump1 attrib
]
]
pack-path: function [
p [string! file!]
][
wasfile? parts delim
][
if wasfile?: file? p [p: to-string p]
parts: copy []
foreach part parse/all p "/" [
switch/default part [
"." []
".." [remove parts]
][
insert parts part
]
]
if #"/" = last p [insert parts ""]
reverse parts
delim: copy p: copy ""
foreach part parts [
append p rejoin [delim part]
delim: "/"
]
either wasfile? [to-file p] [p]
]
digit: charset [#"0" - #"9"]
hex: union digit charset [#"A" - #"F" #"a" - #"f"]
alpha: charset [#"A" - #"Z" #"a" - #"z"]
aldig: union alpha digit
hchar: union aldig charset "-"
uchar: union hchar charset "_.+"
pchar: union aldig charset {$-_@.&+!*"'(),}
digits: [some digit]
hnameseg: [alpha any [hchar]]
hostname: [hnameseg any ["." hnameseg]]
hostquad: [digits "." digits "." digits "." digits]
hostpart: [hostname | hostquad]
escape: ["%" hex hex]
url-object: make object! [
schm:
user:
pass:
host:
port:
path:
file:
args:
frag:
none
field: func [attr [word!] /local attrib] [
either all [
found? attrib: in self attr attrib: get attrib
]
[attrib]
[none]
]
]
url-to-object: function [
u [url! string!]
][
_schm _user _pass _host _port _path _file _args _frag
schemepat portpat userpat hostnamepat pathpat filepat
][
_schm: _user: _pass: _host: _port: _path: _file: _args: _frag:
none
parse/all to-string u [
; special case for mailto
[ copy _schm "mailto" ":"
copy _user some uchar "@"
copy _host hostpart end
|
; scheme (other than mailto)
[ copy _schm hnameseg ":"
| none (_schm: none)
]
; user info, host name or IP address, host port
[ "//"
; optional user ID and password
[ copy _user some uchar ":" copy _pass some uchar
"@"
| copy _user some uchar "@" (_pass: none)
| none (_user: _pass: none)
]
; host name or IP
; (should be optional only for tcp server)
[ copy _host hostpart
| none (_host: none)
]
; optional port number
[ ":" copy _port digits
| none (_port: none)
]
| none (_host: none)
]
; directory path
[ copy _path [["/~" | "/" | none] any [some pchar "/"]]
| none (_path: none)
]
; optional file name and optional arguments or fragment
[ copy _file any pchar
any [ "?" copy _args to end
| "#" copy _frag to end
]
]
]
end
]
make url-object [
schm: _schm
user: _user
pass: _pass
host: _host
port: _port
path: _path
file: _file
args: _args
frag: _frag
]
]
object-to-url: func [o [object!] /local cons u] [
cons: func [
pre [string!] attr [word!] post [string!] /local attrib
][
either found? attrib: o/field attr
[rejoin [pre attrib post]]
[""]
]
u: copy ""
either "mailto" = o/schm [
append u cons "" 'schm ":"
append u cons "" 'user "@"
append u cons "" 'host ""
][
append u cons "" 'schm ":"
if any [o/user o/host o/port] [
append u "//"
if o/user [
append u cons "" 'user ""
append u cons ":" 'pass ""
append u "@"
]
append u cons "" 'host ""
append u cons ":" 'port ""
]
either all [
found? o/host found? o/file none? o/path
][
append u "/"
][
append u cons "" 'path ""
]
append u cons "" 'file ""
append u cons "?" 'args ""
append u cons "#" 'frag ""
]
to-url u
]
context-url: func [co [object!] o [object!] /local ro match? dup] [
change?: func [b [block!] /local f] [
foreach attr b [
if to-logic all [
found? f: o/field attr f <> co/field attr
][
return true
]
]
false
]
dup: func [b [block!] /local copyfield] [
foreach attr b [
either string? copyfield: o/field attr [
copyfield: copy copyfield
][
copyfield: none
]
set in ro attr copyfield
]
]
either change? ['schm 'user 'pass 'host 'port ] [
o
][
ro: make co []
either change? ['path] [
dup ['path 'file 'args 'frag]
if all [
found? o/path
#"/" <> first o/path
][
ro/path: pack-path rejoin [
any [co/path "/"]
o/path
]
]
][
either change? ['file] [
dup ['file 'args 'frag]
][
if change? ['args 'frag] [dup ['args 'frag]]
]
]
ro
]
]
]
test-context: func [/local co uo] [
foreach [c ub] [
http://www.foo.com/~bozo/
[ "hairdos.html"
"hats/"
"/cgi-bin/feedback.html"
"/~waldo/"
"../flubber/home.html"
]
http://www.foo.com/~bozo/buddy/boy.com
[ "hairdos.html"
"hats/"
"/cgi-bin/feedback.html"
"/~waldo/"
"../flubber/home.html"
]
http://www.foo.com
[ "hairdos.html"
"hats/"
"/cgi-bin/feedback.html"
"/~waldo/"
"../flubber/home.html"
]
][
print [c ":"]
co: url-utils/url-to-object c
foreach u ub [
prin ["^-" u]
uo: url-utils/url-to-object u
print [
"^-=>^-"
url-utils/object-to-url url-utils/context-url co uo
]
]
]
]
test-to-from: func [/local obj url2 n nd] [
n: nd: 0
foreach url [
protocol://site/
protocol://login@site/
protocol://login:password@site/
protocol://site:9999/
protocol://login@site:9999/
protocol://login:password@site:9999/
finger:[EMAIL PROTECTED]/
finger:[EMAIL PROTECTED]
ftp://ftp.rebol.com/
ftp://username:[EMAIL PROTECTED]/
ftp://login:[EMAIL PROTECTED]/pub/sound_file.wav
ftp://host.com/directory-path/new-directory
ftp://host.com/directory-path/new-directory/
http://www.rebol.com/
http://www.rebol.com:8000/
http://login:[EMAIL PROTECTED]/sound_file.wav
http://www.bigdocs.com/encyclopedia/p.html#podiatry
http://www.code.net/cgi-bin/test?day=28&month=1&year=2000
pop://username:[EMAIL PROTECTED]
dns://www.rebol.com
dns://207.69.132.8
daytime://everest.cclabs.missouri.edu
whois:[EMAIL PROTECTED]
whois:[EMAIL PROTECTED]
nntp://news.some-isp.net
nntp://news.some-isp.net/some.news.group
tcp://:8000
tcp://127.0.0.1:8080
mailto:somebody@somehost
mailto:[EMAIL PROTECTED]
"rel/path/file.html"
"/abs/path/file.html"
"file.html"
"rel/path/file.html#fragment"
"/abs/path/file.html#fragment"
"file.html#fragment"
"../../assets/images/logo.gif"
http://www.big.box.com/~user/
http://www.big.box.com/~user/path/goes/here/
http://www.big.box.com/~user
"/~waldo/"
"/~waldo/stuff/more.html"
"/~waldo/"
"../flubber/home.html"
][
n: n + 1
url: to-url url
print rejoin ["^/^-" url]
obj: url-utils/url-to-object url
url2: url-utils/object-to-url obj
print rejoin ["^-" url2]
if url <> url2 [
print "**DIFFERENT**"
nd: nd + 1
]
url-utils/dump-it obj
]
print ["^/tested" n "cases, with" nd "difference(s)"]
exit
]