Hi, I'm building trigrams for text. And not sure how to optimise it.

  1. Nim won't allow to use `addr` and suggest to use `unsafe_addr` instead, 
why?
  2. The code fails with `unsafe_addr`
  3. It seems that the whole idea to store StringSlice in HashTable is wrong. 
The trigram string should be stored in HashTable, and Slice should be used only 
to get the code from the table, and not be stored there. But `Table[string, 
int]` won't allow to be queried by `table[key_of_string_slice_type]`.


    
    
    import std/[tables, hashes, strutils]
    
    type SSlice = object
      s:    ptr string
      i, l: int
    
    proc sslice*(s: ptr string, i: int, l: int): SSlice =
      SSlice(s: s, i: i, l: l)
    
    proc `$`*(s: SSlice): string =
      for j in 0..(s.l - 1): result.add s.s[s.i + j]
    
    proc `==`(a, b: SSlice): bool =
      if a.l != b.l: return false
      for j in 0.int..(a.l - 1):
        if a.s[a.i + j] != b.s[b.i + j]: return false
      true
    
    proc hash*(s: SSlice): Hash =
      result = 0
      for j in 0..(s.l - 1):
        result = result !& s.s[s.i + j].hash
      result = !$result
    
    var codes: Table[SSlice, uint16]
    
    template encode_trigram*(s: SSlice): uint16 =
      var r: uint16
      with_value(codes, s, value):
        r = value[]
      do:
        codes[s] = codes.len.uint16
        r = codes[s]
      r
    
    proc to_trigrams*(text: string): seq[uint16] =
      if text.len < 3: return text.align_left(3).to_trigrams
      for i in 0..(text.len - 3):
        result.add sslice(unsafe_addr text, i, 3).encode_trigram
      let last_two = text[^2..^1] & " "
      result.add sslice(unsafe_addr last_two,  0, 3).encode_trigram
      let last_one = text[^1..^1] & "  "
      result.add sslice(unsafe_addr last_one, 0, 3).encode_trigram
    
    echo "some text".to_trigrams
    echo "some".to_trigrams
    
    
    Run

P.S.

  4. Also, not sure why this code fails?


    
    
    type SSlice = object
      s:    string
      i, l: int
    
    template sslice*(s: string, i: int, l: int): SSlice =
      SSlice(s: s, i: i, l: l)
    
    echo sslice("some", 0, 3)
    
    
    Run
    
    
    .nim(10, 13) Error: identifier expected, but found '"some"'
    
    Run

Reply via email to