On Sat, May 25, 2013 at 4:35 PM, Henk Langeveld <[email protected]> wrote:
> About three years ago there was a thread on using json with ksh.  This
> provided some pointers to libdss (and the dss wrapper).
>
> I've recently been doing some work on centos where a bit of builtin json
> processing would come in handy but I'm not so desperate yet: python has
> plenty of support for json if I really nee that level of integration.
>
> However, a similar question surfaced on Stackoverflow today, so perhaps it's
> time to revisit this theme:
>
> http://stackoverflow.com/questions/16742136/ways-to-parse-json-using-kornshell
>
> So my question would be: What options are there for *builtin* json support?
> As json is quickly gaining traction as
> a data-exchange format over xml, it would be nice to translate compounds to
> json and back.

Long ago I wrote two standalone utilities "json2cpv" and "cpv2json"
which exactly do that (*stream* (!!) conversion betweem JSON and
Compound variables). Trouble is that they were written for Sun and
it's unlikely Oracle will now release them to the (non-paying) public
(which means: A cleanroom implementation is required... not hard but
requires time (and I'm *bitterly* searching for a job right now so I
don't have much time... sorry)) ... ;-(

... attached (as "jsontest1.sh.txt") is a small demo application
(10min hack) which shows how to parse small chunks of JSON using ksh93
and regular expressions... the output is more or less the same how the
output of "json2cpv" looked like.

* Random notes:
- Parsing JSON can be tricky since the names in name/value pairs can
contain blanks and other characters with a special meaning for the
shell. Additionally the full range of Unicode characters is allowed.
That's why the demo above returns an array of compound variables
instead of using something like an associative array (the other design
reason is that compound variables can be streamed via pipe and read
one-by-one via read -C... something which is not easily possible with
JSON :-) )
The alternative would be to "encode" names (for example a variant of
the URLencoding scheme) to make sure that they do not contain any
special characters so that theycan be used as index names in
associative arrays and/or as plain ksh93 variable names (note that
ksh93 allows Unicode characters as variable names (e.g. this part is
not a big problem... :-) ))
- JSON is almost always send in UTF-8. Make sure your locale is UTF-8
compatible and/or use the ksh93 builtin "iconv"
- It would be nice if dgk&&I could somehow sit together physically and
work-out a RFC for compound variable streams and send that to the IETF
(not kidding)

----

Bye,
Roland

-- 
  __ .  . __
 (o.\ \/ /.o) [email protected]
  \__\/\/__/  MPEG specialist, C&&JAVA&&Sun&&Unix programmer
  /O /==\ O\  TEL +49 641 3992797
 (;O/ \/ \O;)
#!/usr/bin/ksh93

########################################################################
#                                                                      #
#               This software is part of the ast package               #
#                    Copyright (c) 2013 Roland Mainz                   #
#                      and is licensed under the                       #
#                 Eclipse Public License, Version 1.0                  #
#                    by AT&T Intellectual Property                     #
#                                                                      #
#                A copy of the License is available at                 #
#          http://www.eclipse.org/org/documents/epl-v10.html           #
#         (with md5 checksum b35adb5213ca9657e911e9befb180842)         #
#                                                                      #
#                                                                      #
#                 Roland Mainz <[email protected]>              #
#                                                                      #
########################################################################

#
# Copyright (c) 2013, Roland Mainz. All rights reserved.
#

#
# jsonparse1 - a simple JSON parser 
#

typeset -r jsontext="$(
cat <<EOF
{
    "firstName": "John",
    "lastName": "Smith",
    "age": 25,
    "address": {
        "streetAddress": "21 2nd Street",
        "city": "New York",
        "state": "NY",
        "postalCode": 10021
    },
    "phoneNumbers": [
        {
            "type": "home",
            "number": "212 555-1234"
        },
        {
            "type": "fax",
            "number": "646 555-4567"
        }
    ]


    "done": 666,

}

EOF
)"

function parse_json
{
        typeset jsontext="$2"
        nameref ar="$1"
        typeset dummy

        # fixme:
        # - We want to enforce standard conformance - does ~(Exp) or ~(Ex-p) 
does that ?
        dummy="${jsontext//~(Ex-p)(?:
                (?:\"([^\"]+?)\"):|     # name
                (\{)|                   # object begin
                (\})|                   # object end
                (\[)|                   # array start
                (\])|                   # array end
                (,)|                    #
                ([[:digit:]]+)|         # numerical value
                (?:\"([^\"]*?)\")       # string value
                )/D}"

        # debug output
#       print -v .sh.match
#       printf $"#dummy=%q\n" "${dummy}"

        # copy ".sh.match" to array "ar"
        # fixme: Use typeset -c instead
        integer i j
        for i in "${!.sh.match[@]}" ; do
                for j in "${!.sh.match[i][@]}" ; do
                        [[ -v .sh.match[i][j] ]] && 
ar[i][j]="${.sh.match[i][j]}"
                done
        done

        return 0
}


function build_compound_tree
{
        nameref car=$1
        nameref ar=$2
        integer i=$3
        integer ar_max=$4
        integer cari=$5 # car index

        integer j k nested
        bool foundobj

        for (( ; i < ar_max ; )) ; do
                # end of object ? then return...
                if [[ -v ar[3][$i] ]] ; then
                        return 0
                fi

                car[cari].name="${ar[1][$i]-}"
                ((i++))

                # string value
                if [[ -v ar[8][$i] ]] ; then
                        typeset car[cari].value="${ar[8][i++]}"
                elif [[ -v ar[7][$i] ]] ; then
                        float car[cari].value="${ar[7][i++]}"
                elif [[ -v ar[2][$i] ]] ; then
                        (( nested=0 , foundobj=false ))
                        for (( j=i ; (j < ar_max) && (nested > 0 || !foundobj) 
; j++ )) ; do
                                [[ -v ar[2][$j] ]] && ((nested++, 
foundobj=true))
                                [[ -v ar[3][$j] ]] && ((nested--))
                        done

                        compound -a car[$cari].value
                        build_compound_tree car[$cari].value xar $((i+1)) $j 0
                        (( i=j ))
                elif [[ -v ar[4][$i] ]] ; then
                        (( nested=0 , foundobj=false ))
                        for (( j=i ; (j < ar_max) && (nested > 0 || !foundobj) 
; j++ )) ; do
                                [[ -v ar[4][$j] ]] && ((nested++, 
foundobj=true))
                                [[ -v ar[5][$j] ]] && ((nested--))
                        done

                        compound -a car[$cari].arrayvalue
                        nameref js_ar=car[$cari].arrayvalue
                        integer jari=0 # json array index

                        ((i++))

                        for (( ;; )) ; do
                                (( nested=0 , foundobj=false ))
                                for (( k=i ; (k < j) && (nested > 0 || 
!foundobj) ; k++ )) ; do
                                        [[ -v ar[2][$k] ]] && ((nested++, 
foundobj=true))
                                        [[ -v ar[3][$k] ]] && ((nested--))
                                done

                                build_compound_tree js_ar[$jari] xar $((i+1)) 
$k 0
                                [[ -v ar[6][$k] ]] && ((k++)) # ',' handling

                                (( i=k, jari++ ))
                                (( i > (j-1) )) && break
                        done

                        (( i=j ))
                fi

                [[ -v ar[6][$i] ]] && ((i++)) # ',' handling

                (( cari++ ))
        done

        return 0
}


main()
{
        typeset -a xar
        parse_json xar "${jsontext}"
        #print -v xar

        compound -a xcar
        build_compound_tree xcar xar 1 ${#xar[0][@]} 0

        print -v xcar

        return 0
}


# program start
builtin cat
set -o nounset

main
exit $?
_______________________________________________
ast-users mailing list
[email protected]
http://lists.research.att.com/mailman/listinfo/ast-users

Reply via email to