It has improved a bit. Regarding the Nim filter:
* 308fps by tuning seq[seq[int]], into array[9,int32]
* 442fps by changing the bound checks into min( max(val, 0), max_val)
Still slow but getting better.
import ../vapoursynth
import math
template clamp(val:int, max_val:int):untyped =
min( max(val, 0), max_val)
proc apply_kernel*(src:ptr VSFrameRef, dst:ptr VSFrameRef, kernel:array[9,
int32], mul:int, den:int) =
let fi = API.getFrameFormat(src) # Format information
let n = (( math.sqrt(kernel.len.float).int - 1 ) / 2).int
for i in 0..<fi.numPlanes:
var srcPlane = src[i]
var dstPlane = dst[i]
let height = srcPlane.height
let width = srcPlane.width
for row in 0..<height:
for col in 0..<width:
let row0 = clamp(row-1, height-1)
let row2 = clamp(row+1, height-1)
let col0 = clamp(col-1, width-1)
let col2 = clamp(col+1, width-1)
let value:int32 = srcPlane[row0,col0] +
srcPlane[row0,col] * 2 + srcPlane[row0,col2] +
srcPlane[row,col0] * 2 +
srcPlane[row,col] * 4 + srcPlane[row,col2] * 2 +
srcPlane[row2,col0] +
srcPlane[row2,col] * 2 + srcPlane[row2,col2]
dstPlane[row, col] = (value * mul / den).uint8
Run
(I know I am not using kernel or n)