[EMAIL PROTECTED] wrote:
> 
> Hello!
> 
> I have tried to parse a string using the following command:
> 
> link-parts: parse link [ "/" | ":" | none ]
> 
> where link is a string.
> 
> How do I split a string in parts with a
> rule of different identifiers?
> 

Assuming you are referring to hypertext links...

Short answer:  Write in BNF the complete grammar for strings you
   want split, then use that as the basis for writing your parse
   rules.

Longer answer:  The code below is an excerpt from something I'll
   post later that tries to manipulate general URLs (i.e. not
   just the "http" scheme).

The url-to-object function takes a URL (or string for one) and
returns an object containing the component parts.

The object-to-url reassembles the parts of one of the above
objects into a URL.

THIS IS BETA CODE.  BE WARNED.

==================================

REBOL []

url-utils: make object! [

    dump-it: func [uo [object!] /local dump1] [
        dump1: func [attr /local attribute] [
            if found? attribute: uo/field attr [
                print rejoin [ {^-^-} attr {: } attribute]
            ]
        ]
        foreach attrib [schm user pass host port path file args frag] [
            dump1 attrib
        ]
    ]

    pack-path: function [
        p [string! file!]
    ][
        wasfile? parts delim
    ][
        if wasfile?: file? p [p: to-string p]
        parts: copy []
        foreach part parse/all p "/" [
            switch/default part [
                "."  []
                ".." [remove parts]
            ][
                insert parts part
            ]
        ]
        if #"/" = last p [insert parts ""]
        reverse parts
        delim: copy p: copy ""
        foreach part parts [
            append p rejoin [delim part]
            delim: "/"
        ]
        either wasfile? [to-file p] [p]
    ]

    digit: charset [#"0" - #"9"]
    hex:   union digit charset [#"A" - #"F" #"a" - #"f"]
    alpha: charset [#"A" - #"Z" #"a" - #"z"]
    aldig: union alpha digit
    hchar: union aldig charset "-"
    uchar: union hchar charset "_.+"
    pchar: union aldig charset {$-_@.&+!*"'(),}

    digits:   [some digit]

    hnameseg: [alpha    any [hchar]]
    hostname: [hnameseg any ["." hnameseg]]
    hostquad: [digits "." digits "." digits "." digits]
    hostpart: [hostname | hostquad]

    escape: ["%" hex hex]

    url-object: make object! [
        schm:
        user:
        pass:
        host:
        port:
        path:
        file:
        args:
        frag:
            none
        field: func [attr [word!] /local attrib] [
            either all [
                found? attrib: in self attr  attrib: get attrib
            ]
            [attrib]
            [none]
        ]
    ]

    url-to-object: function [
        u [url! string!]
    ][
        _schm _user _pass _host _port _path _file _args _frag
        schemepat portpat userpat hostnamepat pathpat filepat
    ][

        _schm: _user: _pass: _host: _port: _path: _file: _args: _frag:
none

        parse/all to-string u [
            ; special case for mailto
            [   copy _schm "mailto"   ":"
                copy _user some uchar "@"
                copy _host hostpart   end
            |
                ; scheme (other than mailto)
                [ copy _schm hnameseg  ":"
                | none (_schm: none)
                ]
                ; user info, host name or IP address, host port
                [   "//"
                    ; optional user ID and password
                    [ copy _user some uchar ":"  copy _pass some uchar
"@"
                    | copy _user some uchar "@"  (_pass: none)
                    |  none (_user: _pass: none)
                    ]
                    ; host name or IP
                    ; (should be optional only for tcp server)
                    [ copy _host hostpart
                    | none (_host: none)
                    ]
                    ; optional port number
                    [ ":" copy _port digits
                    | none (_port: none)
                    ]
                | none (_host: none)
                ]
                ; directory path
                [ copy _path [["/~" | "/" | none] any [some pchar "/"]]
                | none (_path: none)
                ]
                ; optional file name and optional arguments or fragment
                [ copy _file any pchar
                  any [ "?" copy _args to end
                      | "#" copy _frag to end
                      ]
                ]
            ]
            end
        ]

        make url-object [
            schm: _schm
            user: _user
            pass: _pass
            host: _host
            port: _port
            path: _path
            file: _file
            args: _args
            frag: _frag
        ]
    ]

    object-to-url: func [o [object!] /local cons u] [
        cons: func [
            pre [string!] attr [word!] post [string!] /local attrib
        ][
            either found? attrib: o/field attr
            [rejoin [pre attrib post]]
            [""]
        ]
        u: copy ""
        either "mailto" = o/schm [
            append u cons "" 'schm ":"
            append u cons "" 'user "@"
            append u cons "" 'host ""
        ][
            append u cons "" 'schm ":"
            if any [o/user  o/host  o/port] [
                append u "//"
                if o/user [
                    append u cons ""  'user ""
                    append u cons ":" 'pass ""
                    append u "@"
                ]
                append u cons ""  'host ""
                append u cons ":" 'port ""
            ]
            either all [
                found? o/host  found? o/file  none? o/path
            ][
                append u "/"
            ][
                append u cons ""  'path ""
            ]
            append u cons ""  'file ""
            append u cons "?" 'args ""
            append u cons "#" 'frag ""
        ]
        to-url u
    ]

    context-url: func [co [object!] o [object!] /local ro match? dup] [
        change?: func [b [block!] /local f] [
            foreach attr b [
                if to-logic all [
                    found? f: o/field attr  f <> co/field attr
                ][
                    return true
                ]
            ]
            false
        ]
        dup: func [b [block!] /local copyfield] [
            foreach attr b [
                either string? copyfield: o/field attr [
                    copyfield: copy copyfield
                ][
                    copyfield: none
                ]
                set in ro attr copyfield
            ]
        ]
        either change? ['schm 'user 'pass 'host 'port ] [
            o
        ][
            ro: make co []
            either change? ['path] [
                dup ['path 'file 'args 'frag]
                if all [
                    found? o/path
                    #"/" <> first o/path
                ][
                    ro/path: pack-path rejoin [
                        any [co/path "/"]
                        o/path
                    ]
                ]
            ][
                either change? ['file] [
                    dup ['file 'args 'frag]
                ][
                    if change? ['args 'frag] [dup ['args 'frag]]
                ]
            ]
            ro
        ]
    ]

]

test-context: func [/local co uo] [
    foreach [c ub] [
        http://www.foo.com/~bozo/
            [   "hairdos.html"
                "hats/"
                "/cgi-bin/feedback.html"
                "/~waldo/"
                "../flubber/home.html"
            ]
        http://www.foo.com/~bozo/buddy/boy.com
            [   "hairdos.html"
                "hats/"
                "/cgi-bin/feedback.html"
                "/~waldo/"
                "../flubber/home.html"
            ]
        http://www.foo.com
            [   "hairdos.html"
                "hats/"
                "/cgi-bin/feedback.html"
                "/~waldo/"
                "../flubber/home.html"
            ]
    ][
        print [c ":"]
        co: url-utils/url-to-object c
        foreach u ub [
            prin ["^-" u]
            uo: url-utils/url-to-object u
            print [
                "^-=>^-"
                url-utils/object-to-url url-utils/context-url co uo
            ]
        ]
    ]
]

test-to-from: func [/local obj url2 n nd] [

    n: nd: 0

    foreach url [

        protocol://site/
        protocol://login@site/
        protocol://login:password@site/

        protocol://site:9999/
        protocol://login@site:9999/
        protocol://login:password@site:9999/

        finger:[EMAIL PROTECTED]/
        finger:[EMAIL PROTECTED]

        ftp://ftp.rebol.com/
        ftp://username:[EMAIL PROTECTED]/
        ftp://login:[EMAIL PROTECTED]/pub/sound_file.wav
        ftp://host.com/directory-path/new-directory
        ftp://host.com/directory-path/new-directory/

        http://www.rebol.com/
        http://www.rebol.com:8000/
        http://login:[EMAIL PROTECTED]/sound_file.wav

        http://www.bigdocs.com/encyclopedia/p.html#podiatry
        http://www.code.net/cgi-bin/test?day=28&month=1&year=2000

        pop://username:[EMAIL PROTECTED]

        dns://www.rebol.com
        dns://207.69.132.8

        daytime://everest.cclabs.missouri.edu

        whois:[EMAIL PROTECTED]
        whois:[EMAIL PROTECTED]

        nntp://news.some-isp.net
        nntp://news.some-isp.net/some.news.group

        tcp://:8000
        tcp://127.0.0.1:8080

        mailto:somebody@somehost
        mailto:[EMAIL PROTECTED]

        "rel/path/file.html"
        "/abs/path/file.html"
        "file.html"
        "rel/path/file.html#fragment"
        "/abs/path/file.html#fragment"
        "file.html#fragment"

        "../../assets/images/logo.gif"

        http://www.big.box.com/~user/
        http://www.big.box.com/~user/path/goes/here/
        http://www.big.box.com/~user
        "/~waldo/"
        "/~waldo/stuff/more.html"

        "/~waldo/"
        "../flubber/home.html"

    ][
        n: n + 1
        url: to-url url
        print rejoin ["^/^-" url]
        obj:  url-utils/url-to-object url
        url2: url-utils/object-to-url obj
        print rejoin ["^-" url2]
        if url <> url2 [
            print "**DIFFERENT**"
            nd: nd + 1
        ]
        url-utils/dump-it obj
    ]
    print ["^/tested" n "cases, with" nd "difference(s)"]
    exit
]

Reply via email to