Don't use regular expressions to parse programming languages, but if you have 
no better tool at hand, at least use a `multiMatch` in combination with regular 
expressions. What's a multi match? Something that considers all the given 
patterns in lock-step. Hopefully somebody can give a better explanation. 
    
    
    import re, macros
    
    macro multiMatch*(inp: string; sections: untyped): untyped =
      ## "Multi regex match". Usage:
      ## multiMatch inp:
      ## of pattern1:
      ##   x = matches[0]
      ## of pattern2:
      ##   ...
      template branch(inp, p, action) =
        var mmlen = matchLen(inp, mmpatterns[p], matches, mmpos)
        if mmlen > 0:
          action
          inc(mmpos, mmlen)
          break searchSubs
      
      template searchLoop(inp, actions) {.dirty} =
        var mmpos = 0
        while mmpos < inp.len:
          block searchSubs:
            actions
            inc(mmpos)
      
      result = newTree(nnkStmtList)
      # first pass: extract regexes:
      var regexes: seq[string] = @[]
      for sec in sections.children:
        if sec.kind == nnkElse:
          discard
        else:
          expectKind sec, nnkOfBranch
          expectLen sec, 2
          if sec[0].kind in nnkStrLit..nnkTripleStrLit:
            regexes.add sec[0].strVal
          else:
            error("Expected a node of kind nnkStrLit, got " & $sec[0].kind)
      # now generate re-construction and cache regexes for efficiency:
      template declPatterns(size) =
        var mmpatterns{.inject.}: array[size, Regex]
        var matches{.inject.}: array[MaxSubpatterns, string]
      
      template createPattern(i, p) {.dirty.} =
        bind re
        mmpatterns[i] = re(p)
      
      result.add getAst(declPatterns(regexes.len))
      for i, r in regexes:
        result.add getAst(createPattern(i, r))
      
      # last pass: generate code:
      let actions = newTree(nnkStmtList)
      var i = 0
      for sec in sections.children:
        if sec.kind == nnkElse:
          actions.add sec[0]
        else:
          actions.add getAst branch(inp, i, sec[1])
        inc i
      result.add getAst searchLoop(inp, actions)
    
    
    import sets
    
    proc py2nim(inp: string): string =
      var locals = initSet[string]()
      var globals = initSet[string]()
      result = newStringOfCap(inp.len + 1000)
      template sink(x) =
        result &= x
      multiMatch inp:
      of r"\b def \s+ (\w+) \(([a-zA-Z0-9_, \t]+)\):":
        sink "proc " & matches[0] & "(" & matches[1] & ": auto): auto ="
        # reset locals:
        locals = initSet[string]()
        globals = initSet[string]()
      of r"(\w+) \s* = \s+":
        if not globals.contains(matches[0]) and not 
locals.containsOrIncl(matches[0]):
          sink "var "
        sink matches[0] & " = "
      of r"\[\:\]":
        discard
      of r"\[(.+?):(.+?)\]":
        sink "[" & matches[0] & ".." & matches[0] & "+" & matches[1] & "]"
      of r"\b print \b":
        sink "echo"
      of r"\s*\b global \s+ (\w+);?":
        globals.incl matches[0]
      else:
        sink inp[mmpos]
    
    echo py2nim """
    def main(a, b):
      global outer
      outer = 3
      c = a
      if a[1:3] == b[:]:
        print "the same!"
        c = b
    
    main(3, 4)
    """
    

The `multiMatch` macro could be turned into a stdlib module/nimble package. 
It's a "regex replace taking a callback that drives the replacement" on 
steroids. 

Reply via email to