I observed that function invocation is the performance bottleneck in this 
function:

https://github.com/dhconnelly/rtreego/blob/v1.1.0/rtree.go#L631

In assembly, this translates into

```
CALL github.com/dhconnelly/rtreego.(*Rtree).searchIntersect(SB)
```

Is there room for improvement with respect to Go's recursive function 
invocation?

You can reproduce this using the source code at the bottom.
The pprof listings are:

```
$ go tool pprof main main.prof 
File: main
Type: cpu
Time: Sep 1, 2023 at 12:24am (CST)
Duration: 1.63s, Total samples = 1.42s (86.90%)
Entering interactive mode (type "help" for commands, "o" for options)
(pprof) disasm searchIntersect
Total: 1.42s
ROUTINE ======================== 
github.com/dhconnelly/rtreego.(*Rtree).searchIntersect
     820ms      1.93s (flat, cum) 135.92% of Total
         .          .    10b9760: LEAQ 0xffffff50(SP), R12               
 ;rtree.go:653
         .          .    10b9768: CMPQ 0x10(R14), R12
         .          .    10b976c: JBE 0x10b9ac1
         .          .    10b9772: SUBQ $0x130, SP
         .          .    10b9779: MOVQ BP, 0x128(SP)
         .          .    10b9781: LEAQ 0x128(SP), BP
         .          .    10b9789: MOVQ BX, 0x160(SP)
         .          .    10b9791: MOVQ SI, 0x178(SP)
         .          .    10b9799: MOVQ R9, 0x188(SP)
         .          .    10b97a1: MOVQ R8, 0x180(SP)
         .          .    10b97a9: MOVQ AX, 0xe8(SP)
         .          .    10b97b1: MOVQ R10, 0x190(SP)
         .          .    10b97b9: MOVQ 0x8(SI), DX                       
 ;rtree.go:654
      20ms       20ms    10b97bd: MOVQ 0x10(SI), R11                     
 ;github.com/dhconnelly/rtreego.(*Rtree).searchIntersect rtree.go:654
         .          .    10b97c1: MOVQ R11, 0x88(SP)                     
 ;rtree.go:654
         .          .    10b97c9: XORL R12, R12
         .          .    10b97cc: JMP 0x10b97d5
      50ms       50ms    10b97ce: ADDQ $0x38, DX                         
 ;github.com/dhconnelly/rtreego.(*Rtree).searchIntersect rtree.go:654
         .          .    10b97d2: INCQ R12                               
 ;rtree.go:654
         .          .    10b97d5: CMPQ R11, R12
         .          .    10b97d8: JGE 0x10b9aa5
         .          .    10b97de: MOVQ 0(DX), R13
      30ms       30ms    10b97e1: MOVQ R13, 0xf0(SP)                     
 ;github.com/dhconnelly/rtreego.(*Rtree).searchIntersect rtree.go:654
         .          .    10b97e9: MOVUPS 0x8(DX), X0                     
 ;rtree.go:654
      30ms       30ms    10b97ed: MOVUPS X0, 0xf8(SP)                     
;github.com/dhconnelly/rtreego.(*Rtree).searchIntersect rtree.go:654
         .          .    10b97f5: MOVUPS 0x18(DX), X0                     
;rtree.go:654
      10ms       10ms    10b97f9: MOVUPS X0, 0x108(SP)                   
 ;github.com/dhconnelly/rtreego.(*Rtree).searchIntersect rtree.go:654
      10ms       10ms    10b9801: MOVUPS 0x28(DX), X0
         .          .    10b9805: MOVUPS X0, 0x118(SP)                   
 ;rtree.go:654
         .          .    10b980d: MOVUPS 0xf0(SP), X0                     
;rtree.go:655
     340ms      340ms    10b9815: MOVUPS X0, 0xb0(SP)                     
;github.com/dhconnelly/rtreego.(*Rtree).searchIntersect rtree.go:655
      30ms       30ms    10b981d: MOVUPS 0x100(SP), X0
      10ms       10ms    10b9825: MOVUPS X0, 0xc0(SP)
      10ms       10ms    10b982d: MOVUPS 0x138(SP), X0
         .          .    10b9835: MOVUPS X0, 0x90(SP)                     
;rtree.go:655
         .          .    10b983d: MOVUPS 0x148(SP), X0
         .          .    10b9845: MOVUPS X0, 0xa0(SP)
         .          .    10b984d: MOVSD_XMM 0xa0(SP), X0                 
 ;geom.go:317
         .          .    10b9856: MOVSD_XMM 0xb0(SP), X1
      80ms       80ms    10b985f: UCOMISD X0, X1                         
 ;github.com/dhconnelly/rtreego.(*Rtree).searchIntersect geom.go:317
      40ms       40ms    10b9863: JAE 0x10b97ce
      20ms       20ms    10b9869: MOVSD_XMM 0xc0(SP), X0
         .          .    10b9872: MOVSD_XMM 0x90(SP), X1                 
 ;geom.go:317
         .          .    10b987b: UCOMISD X0, X1
         .          .    10b987f: NOPL
         .          .    10b9880: JAE 0x10b97ce
         .          .    10b9886: MOVSD_XMM 0xa8(SP), X0                 
 ;geom.go:320
      10ms       10ms    10b988f: MOVSD_XMM 0xb8(SP), X1                 
 ;github.com/dhconnelly/rtreego.(*Rtree).searchIntersect geom.go:320
         .          .    10b9898: UCOMISD X0, X1                         
 ;geom.go:320
         .          .    10b989c: NOPL 0(AX)
         .          .    10b98a0: JAE 0x10b97ce
      30ms       30ms    10b98a6: MOVSD_XMM 0xc8(SP), X0                 
 ;github.com/dhconnelly/rtreego.(*Rtree).searchIntersect geom.go:320
      20ms       20ms    10b98af: MOVSD_XMM 0x98(SP), X1
         .          .    10b98b8: UCOMISD X0, X1                         
 ;geom.go:320
      20ms       20ms    10b98bc: NOPL 0(AX)                             
 ;github.com/dhconnelly/rtreego.(*Rtree).searchIntersect geom.go:320
         .          .    10b98c0: JAE 0x10b97ce                           
;geom.go:320
      10ms       10ms    10b98c6: MOVQ R12, 0x80(SP)                     
 ;github.com/dhconnelly/rtreego.(*Rtree).searchIntersect rtree.go:654
      10ms       10ms    10b98ce: MOVQ DX, 0xe0(SP)
         .          .    10b98d6: CMPB $0x0, 0x28(SI)                     
;rtree.go:659
      10ms       10ms    10b98da: NOPW 0(AX)(AX*1)                       
 ;github.com/dhconnelly/rtreego.(*Rtree).searchIntersect rtree.go:659
         .          .    10b98e0: JE 0x10b9a2d                           
 ;rtree.go:659
         .          .    10b98e6: MOVQ DI, 0x78(SP)                       
;rtree.go:660
         .          .    10b98eb: MOVQ CX, 0x70(SP)
         .          .    10b98f0: MOVQ BX, 0xd8(SP)
         .          .    10b98f8: MOVQ 0x118(SP), DX                     
 ;rtree.go:664
         .          .    10b9900: MOVQ 0x120(SP), SI
         .          .    10b9908: MOVQ BX, AX
         .          .    10b990b: MOVQ CX, BX
         .          .    10b990e: MOVQ DI, CX
         .          .    10b9911: MOVQ DX, DI
                                                                          
;github.com/dhconnelly/rtreego.(*Rtree).searchIntersect rtree.go:664
         .       30ms    10b9914: CALL 
github.com/dhconnelly/rtreego.applyFilters(SB)
         .          .    10b9919: TESTL AL, AL                           
 ;rtree.go:664
         .          .    10b991b: JE 0x10b9934                           
 ;rtree.go:665
         .          .    10b991d: MOVQ 0x70(SP), DX                       
;rtree.go:660
         .          .    10b9922: MOVQ 0x78(SP), CX
         .          .    10b9927: MOVQ 0xd8(SP), AX
         .          .    10b992f: JMP 0x10b99d7                           
;rtree.go:665
         .          .    10b9934: MOVQ 0x70(SP), DX                       
;rtree.go:666
         .          .    10b9939: INCQ DX
         .          .    10b993c: MOVQ 0x120(SP), R11
         .          .    10b9944: MOVQ 0x118(SP), R12
         .          .    10b994c: MOVQ 0x78(SP), CX
         .          .    10b9951: CMPQ DX, CX
         .          .    10b9954: JB 0x10b9962
         .          .    10b9956: MOVQ 0xd8(SP), AX
         .          .    10b995e: NOPW
         .          .    10b9960: JMP 0x10b99a4
      10ms       10ms    10b9962: MOVQ R11, 0xd0(SP)                     
 ;github.com/dhconnelly/rtreego.(*Rtree).searchIntersect rtree.go:666
      10ms       10ms    10b996a: MOVQ R12, 0x68(SP)
         .          .    10b996f: MOVB BL, 0x67(SP)                       
;rtree.go:664
         .          .    10b9973: MOVQ 0xd8(SP), AX                       
;rtree.go:666
         .          .    10b997b: MOVQ DX, BX
         .          .    10b997e: MOVL $0x1, DI
         .          .    10b9983: LEAQ runtime.types+64192(SB), SI
         .      140ms    10b998a: CALL runtime.growslice(SB)             
 ;github.com/dhconnelly/rtreego.(*Rtree).searchIntersect rtree.go:666
         .          .    10b998f: MOVQ 0xd0(SP), R11                     
 ;rtree.go:666
         .          .    10b9997: MOVQ 0x68(SP), R12
         .          .    10b999c: MOVQ BX, DX
         .          .    10b999f: MOVZX 0x67(SP), BX                     
 ;rtree.go:664
         .          .    10b99a4: LEAQ -0x1(DX), R13                     
 ;rtree.go:666
         .          .    10b99a8: SHLQ $0x4, R13
         .          .    10b99ac: MOVQ R12, 0(AX)(R13*1)
         .          .    10b99b0: LEAQ 0x8(R13), R12
         .          .    10b99b4: LEAQ 0(AX)(R12*1), DI
         .          .    10b99b8: CMPL $0x0, runtime.writeBarrier(SB)
         .          .    10b99bf: NOPL
         .          .    10b99c0: JNE 0x10b99c9
         .          .    10b99c2: MOVQ R11, 0x8(AX)(R13*1)
         .          .    10b99c7: JMP 0x10b99d7
         .          .    10b99c9: MOVQ DX, SI
         .          .    10b99cc: MOVQ R11, DX
         .          .    10b99cf: CALL runtime.gcWriteBarrierDX(SB)
         .          .    10b99d4: MOVQ SI, DX                             
;rtree.go:660
         .          .    10b99d7: TESTL BL, BL                           
 ;rtree.go:664
         .          .    10b99d9: JNE 0x10b9aae                           
;rtree.go:669
         .          .    10b99df: MOVQ 0x178(SP), SI                     
 ;rtree.go:659
         .          .    10b99e7: MOVQ 0x180(SP), R8                     
 ;rtree.go:664
         .          .    10b99ef: MOVQ 0x188(SP), R9
      10ms       10ms    10b99f7: MOVQ 0x190(SP), R10                     
;github.com/dhconnelly/rtreego.(*Rtree).searchIntersect rtree.go:664
         .          .    10b99ff: MOVQ 0x88(SP), R11                     
 ;rtree.go:654
         .          .    10b9a07: MOVQ 0x80(SP), R12
         .          .    10b9a0f: MOVQ CX, DI                             
;rtree.go:660
         .          .    10b9a12: MOVQ DX, CX
         .          .    10b9a15: MOVQ AX, BX
         .          .    10b9a18: MOVQ 0xe8(SP), AX
         .          .    10b9a20: MOVQ 0xe0(SP), DX                       
;rtree.go:654
         .          .    10b9a28: JMP 0x10b97ce                           
;rtree.go:669
         .          .    10b9a2d: MOVUPS 0x138(SP), X0                   
 ;rtree.go:660
         .          .    10b9a35: MOVQ 0x110(SP), SI
         .          .    10b9a3d: MOVUPS X0, 0(SP)
         .          .    10b9a41: MOVUPS 0x148(SP), X0
         .          .    10b9a49: MOVUPS X0, 0x10(SP)
                                                                          
;github.com/dhconnelly/rtreego.(*Rtree).searchIntersect rtree.go:660
         .      940ms    10b9a4e: CALL 
github.com/dhconnelly/rtreego.(*Rtree).searchIntersect(SB)
         .          .    10b9a53: MOVQ 0xe0(SP), DX                       
;rtree.go:654
         .          .    10b9a5b: MOVQ 0x178(SP), SI                     
 ;rtree.go:659
         .          .    10b9a63: MOVQ 0x180(SP), R8                     
 ;rtree.go:664
         .          .    10b9a6b: MOVQ 0x188(SP), R9
         .          .    10b9a73: MOVQ 0x190(SP), R10
         .          .    10b9a7b: MOVQ 0x88(SP), R11                     
 ;rtree.go:654
         .          .    10b9a83: MOVQ 0x80(SP), R12
         .          .    10b9a8b: MOVQ CX, DI                             
;rtree.go:660
         .          .    10b9a8e: MOVQ BX, CX
         .          .    10b9a91: MOVQ AX, BX
         .          .    10b9a94: MOVQ 0xe8(SP), AX
         .          .    10b9a9c: NOPL 0(AX)
         .          .    10b9aa0: JMP 0x10b97ce                           
;rtree.go:661
         .          .    10b9aa5: MOVQ CX, DX                             
;rtree.go:673
         .          .    10b9aa8: MOVQ BX, AX
         .          .    10b9aab: MOVQ DI, CX
         .          .    10b9aae: MOVQ DX, BX
         .          .    10b9ab1: MOVQ 0x128(SP), BP
         .          .    10b9ab9: ADDQ $0x130, SP
         .          .    10b9ac0: RET
         .          .    10b9ac1: MOVQ AX, 0x28(SP)                       
;rtree.go:653
         .          .    10b9ac6: MOVQ BX, 0x30(SP)
         .          .    10b9acb: MOVQ CX, 0x38(SP)
         .          .    10b9ad0: MOVQ DI, 0x40(SP)
         .          .    10b9ad5: MOVQ SI, 0x48(SP)
         .          .    10b9ada: MOVQ R8, 0x50(SP)
         .          .    10b9adf: MOVQ R9, 0x58(SP)
         .          .    10b9ae4: MOVQ R10, 0x60(SP)
         .          .    10b9ae9: CALL runtime.morestack_noctxt.abi0(SB)
         .          .    10b9aee: MOVQ 0x28(SP), AX
         .          .    10b9af3: MOVQ 0x30(SP), BX
         .          .    10b9af8: MOVQ 0x38(SP), CX
         .          .    10b9afd: MOVQ 0x40(SP), DI
         .          .    10b9b02: MOVQ 0x48(SP), SI
         .          .    10b9b07: MOVQ 0x50(SP), R8
         .          .    10b9b0c: MOVQ 0x58(SP), R9
         .          .    10b9b11: MOVQ 0x60(SP), R10
         .          .    10b9b16: JMP 
github.com/dhconnelly/rtreego.(*Rtree).searchIntersect(SB)
         .          .    10b9b1b: INT $0x3
         .          .    10b9b1c: INT $0x3
         .          .    10b9b1d: INT $0x3
         .          .    10b9b1e: INT $0x3
(pprof) list searchIntersect  
Total: 1.42s
ROUTINE ======================== 
github.com/dhconnelly/rtreego.(*Rtree).searchIntersect in 
/Users/shaoyu/go/src/github.com/dhconnelly/rtreego/rtree.go
     600ms      1.93s (flat, cum) 135.92% of Total
         .          .    653:func (tree *Rtree) searchIntersect(results 
[]Spatial, n *node, bb Rect, filters []Filter) []Spatial {
     170ms      170ms    654: for _, e := range n.entries {
     390ms      610ms    655: if !intersect(e.bb, bb) {
         .          .    656: continue
         .          .    657: }
         .          .    658:
      10ms       10ms    659: if !n.leaf {
         .      940ms    660: results = tree.searchIntersect(results, 
e.child, bb, filters)
         .          .    661: continue
         .          .    662: }
         .          .    663:
      10ms       40ms    664: refuse, abort := applyFilters(results, e.obj, 
filters)
         .          .    665: if !refuse {
      20ms      160ms    666: results = append(results, e.obj)
         .          .    667: }
         .          .    668:
         .          .    669: if abort {
         .          .    670: break
         .          .    671: }
(pprof) 
```

```
package main

import (
"flag"
"log"
"math/rand"
"os"
"runtime/pprof"

"github.com/dhconnelly/rtreego"
)

type point struct {
rtreego.Point
}

func (p point) Bounds() rtreego.Rect {
r := p.ToRect(0)
return r
}

func (p point) dist(q point) float64 {
sum := 0.0
for i := range p.Point {
dx := p.Point[i] - q.Point[i]
sum += dx * dx
}
return sum
}

func main() {
flag.Parse()
log.SetFlags(log.Flags() | log.Llongfile)

cpuprofile := "main.prof"
f, err := os.Create(cpuprofile)
if err != nil {
log.Fatal("could not create CPU profile: ", err)
}
defer f.Close() // error handling omitted for example
if err := pprof.StartCPUProfile(f); err != nil {
log.Fatal("could not start CPU profile: ", err)
}
defer pprof.StopCPUProfile()

if err := mainWithErr(); err != nil {
log.Fatalf("%+v", err)
}
}

func mainWithErr() error {
points := make([]rtreego.Spatial, 0, 1e5)
for i := 0; i < cap(points); i++ {
p := point{}
p.Point[0] = rand.Float64()
p.Point[1] = rand.Float64()
points = append(points, p)
}
tree := rtreego.NewTree(2, 25, 50, points...)

neighborCnt := 0
for i, tgt := range points {
target := tgt.(point)
neighbors := tree.SearchIntersect(target.Point.ToRect(0.01))
neighborCnt += len(neighbors)

if i % 1000 == 0 {
log.Printf("%d", i)
}
}
neighborCnt /= len(points)

log.Printf("%d", neighborCnt)

return nil
}
```

-- 
You received this message because you are subscribed to the Google Groups 
"golang-nuts" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to golang-nuts+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/golang-nuts/c9a5556d-df10-4ace-8504-ce8272f5c4dfn%40googlegroups.com.

Reply via email to