thoughts on functional prototype programming

kragen Sat, 01 Oct 2005 00:37:38 -0700

# Some thoughts on prototype programming, transcribed from a notebook

# Notation:
# x.a is the value of the field "a" from the object "x".


# { a = 1, b = 2 } is an object with two fields; equivalent to
# {
#     a = 1
#     b = 2
# }
# and also { self.a = 1, this.b = 2 }.  The field definitions are
# really method bodies; the name before the "." is the name they can
# use to refer to the object they're being called on.  You could write
# another object with the same two fields and values as 
# { a = 1, banana.b = banana.a + 1 }.

# You can "derive" from an object by writing the object followed by 
# { } and some more field definitions.  The resulting object will have
# the specified fields overridden.  So { a = 1, x.b = x.a + 1 } { a = 3 }
# is another way to write { a = 3, x.b = x.a + 1 }.  (In general,
# because of lexical scoping, you can't always evaluate by textual
# substitution in this way.)

# Objects not explicitly derived from anything else are derived from
# "object".

# You can surround something non-wordlike with '' to make it a word
# you can use as a field name or object name.  This way you can define
# fields with funny names.  For example, { '[EMAIL PROTECTED]@!' = 3 } has one
# field, named "[EMAIL PROTECTED]@!".

# "foo(stuff)" is just a shorthand for "foo{stuff}.'()'".  "{a, b, c}"
# is just a shorthand for "{ arg1 = a, arg2 = b, arg3 = c }".

# You can write strings surrounded by "".  They derive from an object
# known as "string", which derives from "object".

# In various places here I use infix operators, such as "+", "%",
# "==", "*", and "<".  The intent is that "a + b" is shorthand for
# "a.'+'(b)", i.e. "a.'+'{arg1=b}.'()'", but nothing here yet tries to
# override these methods.  Most of the operators would be implemented
# as primitives and are self-explanatory, but the "%" on strings is
# not.  It's inspired by the Python string "%" operator, but works
# differently; it replaces curly-bracketed names in its left operand
# with the named fields in the right operand.

# "a[b]" is intended to be shorthand for "a.'[]'(b)".  Indices are
# zero-based; I've borrowed Python's slice syntax; foo[bar:baz] is a
# subsequence of foo, containing foo[bar], foo[bar+1], foo[bar+2],
# etc., up to foo[baz-1].  foo[bar:] is the same as
# foo[bar:foo.length], and foo[:baz] is the same as foo[0:baz].  I
# don't know how this syntax should work.



# In this file, the names given to self parameters in top-level
# definitions denote the objects on which those names are defined.

# recursive string.split

string.split = {
    delim = arg1 = ","    # syntax for making "delim" an alias for arg 1
    split.pos = string.index(split.delim)
    split.before = string[:split.pos]
    split.after = string[split.pos + split.delim.length:]
    split.recursion = split.after.split(split.delim)
    split.'()' = split.pos.ifNull(then=[string], else=[split.before] + 
recursion)
}

# ifNull, ifTrue don't need to be primitives
true.ifTrue = {
    then = true
    else = false
    self.'()' = self.then
}

# false's ifTrue is just like true's, except that it overrides the
# return value
false.ifTrue = true.ifTrue { self.'()' = self.else }

# prototype-based functions give us currying, without the mental overhead
object.ifNull = (object is null).ifTrue

# Here's something like C's "for" loop.
# for (state; !done; ...) { ... }
loop = {
    state = { i = 0 }
    loop.done = loop.state.i == 10
    loop.i = loop.state.i + 1
    loop.'()' = loop.done.ifTrue(then=loop.state, else=loop(state=loop))
}
# Perhaps it should say "then=loop.state.result" instead of
# "then=loop.state".  That could flatten the following routine into
# just a derivation from "loop".  I'm not sure what object "loop"
# should be defined on.

string.split = {
    delim = arg1 = ","
    s.loop = loop(
        state = { rv = [], str = string }
        loop.pos = loop.state.str.index(s.delim)
        loop.done = loop.pos is null
        loop.pre = loop.state.str[:loop.pos]
        loop.rv = loop.state.rv + [loop.pre]
        loop.str = loop.state.str[loop.pos + s.delim.length:]
    )
    s.'()' = s.loop.rv + [s.loop.str]
}
# I think there's a bug there when pos is initially null

# often we want some object, or if it's null, some substitute.  People
# often do this in Perl with 'or' or '||'.
object.nullis = {
    default = arg1 = null
    nullis.'()' = object.ifNull(then=nullis.default, else=object)
}

# Here's a URL manipulation object.  It contains five derivations from
# non-class-like objects, in essentially the places where a
# non-functional OO language would mutate itself.
url = {
    scheme = "http"
    host = "www.example.com"
    port = 80
    path = "/"
    query = null
    fragment = null
    username = null
    password = null

    url.hostport = url.port.ifNull(then=url.host, else="{host}:{port}" % url)
    url.auth = url.password.ifNull(
        then = url.username
        else = "{username}:{password}" % url{username=url.username.nullis("")}
    )

    url.asString = "{scheme}://{netloc}{path}{qpart}{fpart}" % url{
        netloc = url.auth.ifNull(
            then = url.hostport
            else = "[EMAIL PROTECTED]" % url
        )
        qpart = url.query.ifNull(then="", else="?" + url.query)
        fpart = url.fragment.ifNull(then="", else="#" + url.fragment)
    }

    url.omitDefPort = url{
        port = (url.port == 80).ifTrue(then=null, else=url.port)
    }
    url.explicitPort = url{port = url.port.nullis(80)}
    url.withHostport = {
        arg1 = "example.com:8000"
        my.hostport = my.arg1.split(":")
        my.host = my.hostport[0]
        my.port = (my.hostport.length < 2).ifTrue(
            then = null
            else = my.hostport[1].asInt
        )
        my.'()' = url{host=my.host, port=my.port}
    }
}

# this API allows
url.withHostport("lula:1000").port   # --> 1000
url{host="lula", port=80}.omitDefPort.hostport   # --> "lula"

# When I wrote this, I thought that a non-prototype-based interface
# probably would need a separate "hostport" class for this kind of
# manipulation, but I don't think that's true.

# first thing I scribbled down in this notation on 2005-07-21
{
    fact = {
        arg = 1
        f.val = arg.equals(0).ifTrue{
            then = 1
            else = f.arg.times{
                f(arg = f.arg.minus(1)
            }
        }
}

# corrected and modernized, this is
factorial = {
    arg1 = 1
    f.'()' = (arg1 == 0).ifTrue(
        then = 1
        else = f.arg1 * f(f.arg1 - 1)
    )
}


# ok, so here we go again with string.split

object.loop = {
    prev = { i = 0 }
    loop.i = loop.prev.i + 1
    loop.done = loop.i == 10
    loop.'()' = loop.done.ifTrue(then=loop.prev.result, else=loop(prev=loop))
}

string.split = string.loop {
    delim = arg1 = ","
    self.prev = self { rv = [], left = string }
    loop.pos = loop.prev.left.index(loop.delim)
    loop.done = loop.pos is null
    loop.rv = loop.prev.rv + [loop.prev.left[:loop.pos]]
    loop.left = loop.state.str[loop.pos + loop.delim.length:]
    loop.result = loop.rv + [loop.left]
}

# compare Python more-or-less equivalent:
# def splitstring(string, delim=","):
#     rv = []
#     left = string
#     while 1:
#         pos = left.index(delim)
#         if pos == -1: return rv + [left]
#         rv.append(left[:pos])
#         left = left[pos + len(delim):]
# ... not much of an improvement.  

# In "contextual" form, where most of the self-names are elided:

split = loop {
    delim = arg1 = ","
    self.prev = self { rv = [], left = string }
    pos = prev.left.index(delim)
    done = pos is null
    rv = prev.rv + [prev.left[:pos]]
    left = state.str[pos + delim.length:]
    result = rv + [left]
}

# string.index finds one string in another; the naive algorithm:

string.index = string.loop {
    needle = arg1 = "foo"
    self.prev = self { i = 0 }
    outer.found = string.loop (
        prev = self { i = 0, equal = true }
        in.equal = string[in.i + outer.i] == outer.needle[in.i]
        inner.done = (!inner.equal) or inner.i >= outer.needle.length
    )
    outer.done = outer.found or outer.i > string.length - outer.needle.length
    outer.result = outer.found.ifTrue(then=outer.i, else=null)
}

# typedef struct { char *s; int length; } string;
# int index(string *haystack, string *needle) {
#     int ii, jj;
#     for (ii = 0; ii <= haystack->length - needle->length; ii++) {
#         for (jj = 0; jj < needle.length; jj++) {
#             if (haystack->s[ii + jj] != needle->s[jj]) goto nextloop;
#         }
#         return ii;
#     nextloop:
#     }
#     return -1;
# }

thoughts on functional prototype programming

Reply via email to