Don't use regular expressions to parse programming languages, but if you have
no better tool at hand, at least use a `multiMatch` in combination with regular
expressions. What's a multi match? Something that considers all the given
patterns in lock-step. Hopefully somebody can give a better explanation.
import re, macros
macro multiMatch*(inp: string; sections: untyped): untyped =
## "Multi regex match". Usage:
## multiMatch inp:
## of pattern1:
## x = matches[0]
## of pattern2:
## ...
template branch(inp, p, action) =
var mmlen = matchLen(inp, mmpatterns[p], matches, mmpos)
if mmlen > 0:
action
inc(mmpos, mmlen)
break searchSubs
template searchLoop(inp, actions) {.dirty} =
var mmpos = 0
while mmpos < inp.len:
block searchSubs:
actions
inc(mmpos)
result = newTree(nnkStmtList)
# first pass: extract regexes:
var regexes: seq[string] = @[]
for sec in sections.children:
if sec.kind == nnkElse:
discard
else:
expectKind sec, nnkOfBranch
expectLen sec, 2
if sec[0].kind in nnkStrLit..nnkTripleStrLit:
regexes.add sec[0].strVal
else:
error("Expected a node of kind nnkStrLit, got " & $sec[0].kind)
# now generate re-construction and cache regexes for efficiency:
template declPatterns(size) =
var mmpatterns{.inject.}: array[size, Regex]
var matches{.inject.}: array[MaxSubpatterns, string]
template createPattern(i, p) {.dirty.} =
bind re
mmpatterns[i] = re(p)
result.add getAst(declPatterns(regexes.len))
for i, r in regexes:
result.add getAst(createPattern(i, r))
# last pass: generate code:
let actions = newTree(nnkStmtList)
var i = 0
for sec in sections.children:
if sec.kind == nnkElse:
actions.add sec[0]
else:
actions.add getAst branch(inp, i, sec[1])
inc i
result.add getAst searchLoop(inp, actions)
import sets
proc py2nim(inp: string): string =
var locals = initSet[string]()
var globals = initSet[string]()
result = newStringOfCap(inp.len + 1000)
template sink(x) =
result &= x
multiMatch inp:
of r"\b def \s+ (\w+) \(([a-zA-Z0-9_, \t]+)\):":
sink "proc " & matches[0] & "(" & matches[1] & ": auto): auto ="
# reset locals:
locals = initSet[string]()
globals = initSet[string]()
of r"(\w+) \s* = \s+":
if not globals.contains(matches[0]) and not
locals.containsOrIncl(matches[0]):
sink "var "
sink matches[0] & " = "
of r"\[\:\]":
discard
of r"\[(.+?):(.+?)\]":
sink "[" & matches[0] & ".." & matches[0] & "+" & matches[1] & "]"
of r"\b print \b":
sink "echo"
of r"\s*\b global \s+ (\w+);?":
globals.incl matches[0]
else:
sink inp[mmpos]
echo py2nim """
def main(a, b):
global outer
outer = 3
c = a
if a[1:3] == b[:]:
print "the same!"
c = b
main(3, 4)
"""
The `multiMatch` macro could be turned into a stdlib module/nimble package.
It's a "regex replace taking a callback that drives the replacement" on
steroids.