Good morning,

I'm noticing a slow-down of my code the more data I process. I _think_ I've 
isolated the problem to the count table (`result` in `proc get_min_frq`). After 
loading ~ 10Million datum into the count table the code slows down, and likely 
stalls completely. So what have I tried?

  * I tested the performance of the countTable in a stand-alone program, and 
the CountTable performs as expected (nice and fast).
  * Instead of counting my datum-of-interest I threw a random uint64 into the 
count table (result). To my surprise this restored the performance of my 
program. Same number of entries counted but the datum are different. **Why 
should this matter?** **Is this some sort of worst case runtime scenario?** 
**How can I fix this?**


    
    
    type
      Min* = uint64 # minimizer
      minimizer_t* = object
         minimizer*: Min
         pos*: uint32
         strand*: Strand
     
     for i in mins:
         inc(counter)
         if win_min_idx < (counter - window):
             win_min = high(uint64)
             win_min_idx = -1
         if i.minimizer < win_min:
             win_min = i.minimizer
             win_min_idx = counter
         if counter < window:
             continue
         if last_min != win_min:
             result.add(i)
             lastMin = win_min
    
    
    proc get_min_frq*(db_prefix: string): CountTable[Min] =
      let db_pattern = "{db_prefix}*.min.msgpck".fmt
      for db in walkPattern(db_pattern):
        let mindb = load_min_db(db)
        for k, v in mindb:
          var min_1 = min_min(v.mins, 12)
          var min_2 = min_min(min_1, 12)
          for i in 0 .. (min_2.len - 2):
            let nm: Min = (min_2[i].minimizer shl 32) or min_2[i+1].minimizer
            result.inc(nm)
    
    
    Run

** just using a random uint64**
    
    
    proc get_min_frq*(db_prefix: string): CountTable[Min] =
      var r = initRand(2019)
      stderr.writeLine("[INFO] counting N.M.P.s".fmt)
      let db_pattern = "{db_prefix}*.min.msgpck".fmt
      for db in walkPattern(db_pattern):
        let mindb = load_min_db(db)
        for k, v in mindb:
          var min_1 = min_min(v.mins, 12)
          var min_2 = min_min(min_1, 12)
          for i in 0 .. (min_2.len - 2):
            let nm: Min = (min_2[i].minimizer shl 32) or min_2[i+1].minimizer
            result.inc(r.next())
    
    
    Run

**stand alone time testing CountTable**
    
    
    import random
    import times
    import tables
    import strformat
    
    when isMainModule:
        
        var ct = initCountTable[uint64]()
        var r = initRand(2019)
        
        for i in 0..50000000:
            var before = now()
            ct.inc(r.next())
            var after  = now()
            echo "iteration i:{i} time = {after - before} ; size of ct = 
{ct.len}".fmt
    
    
    Run

Reply via email to