I observed that function invocation is the performance bottleneck in this function:
https://github.com/dhconnelly/rtreego/blob/v1.1.0/rtree.go#L631 In assembly, this translates into ``` CALL github.com/dhconnelly/rtreego.(*Rtree).searchIntersect(SB) ``` Is there room for improvement with respect to Go's recursive function invocation? You can reproduce this using the source code at the bottom. The pprof listings are: ``` $ go tool pprof main main.prof File: main Type: cpu Time: Sep 1, 2023 at 12:24am (CST) Duration: 1.63s, Total samples = 1.42s (86.90%) Entering interactive mode (type "help" for commands, "o" for options) (pprof) disasm searchIntersect Total: 1.42s ROUTINE ======================== github.com/dhconnelly/rtreego.(*Rtree).searchIntersect 820ms 1.93s (flat, cum) 135.92% of Total . . 10b9760: LEAQ 0xffffff50(SP), R12 ;rtree.go:653 . . 10b9768: CMPQ 0x10(R14), R12 . . 10b976c: JBE 0x10b9ac1 . . 10b9772: SUBQ $0x130, SP . . 10b9779: MOVQ BP, 0x128(SP) . . 10b9781: LEAQ 0x128(SP), BP . . 10b9789: MOVQ BX, 0x160(SP) . . 10b9791: MOVQ SI, 0x178(SP) . . 10b9799: MOVQ R9, 0x188(SP) . . 10b97a1: MOVQ R8, 0x180(SP) . . 10b97a9: MOVQ AX, 0xe8(SP) . . 10b97b1: MOVQ R10, 0x190(SP) . . 10b97b9: MOVQ 0x8(SI), DX ;rtree.go:654 20ms 20ms 10b97bd: MOVQ 0x10(SI), R11 ;github.com/dhconnelly/rtreego.(*Rtree).searchIntersect rtree.go:654 . . 10b97c1: MOVQ R11, 0x88(SP) ;rtree.go:654 . . 10b97c9: XORL R12, R12 . . 10b97cc: JMP 0x10b97d5 50ms 50ms 10b97ce: ADDQ $0x38, DX ;github.com/dhconnelly/rtreego.(*Rtree).searchIntersect rtree.go:654 . . 10b97d2: INCQ R12 ;rtree.go:654 . . 10b97d5: CMPQ R11, R12 . . 10b97d8: JGE 0x10b9aa5 . . 10b97de: MOVQ 0(DX), R13 30ms 30ms 10b97e1: MOVQ R13, 0xf0(SP) ;github.com/dhconnelly/rtreego.(*Rtree).searchIntersect rtree.go:654 . . 10b97e9: MOVUPS 0x8(DX), X0 ;rtree.go:654 30ms 30ms 10b97ed: MOVUPS X0, 0xf8(SP) ;github.com/dhconnelly/rtreego.(*Rtree).searchIntersect rtree.go:654 . . 10b97f5: MOVUPS 0x18(DX), X0 ;rtree.go:654 10ms 10ms 10b97f9: MOVUPS X0, 0x108(SP) ;github.com/dhconnelly/rtreego.(*Rtree).searchIntersect rtree.go:654 10ms 10ms 10b9801: MOVUPS 0x28(DX), X0 . . 10b9805: MOVUPS X0, 0x118(SP) ;rtree.go:654 . . 10b980d: MOVUPS 0xf0(SP), X0 ;rtree.go:655 340ms 340ms 10b9815: MOVUPS X0, 0xb0(SP) ;github.com/dhconnelly/rtreego.(*Rtree).searchIntersect rtree.go:655 30ms 30ms 10b981d: MOVUPS 0x100(SP), X0 10ms 10ms 10b9825: MOVUPS X0, 0xc0(SP) 10ms 10ms 10b982d: MOVUPS 0x138(SP), X0 . . 10b9835: MOVUPS X0, 0x90(SP) ;rtree.go:655 . . 10b983d: MOVUPS 0x148(SP), X0 . . 10b9845: MOVUPS X0, 0xa0(SP) . . 10b984d: MOVSD_XMM 0xa0(SP), X0 ;geom.go:317 . . 10b9856: MOVSD_XMM 0xb0(SP), X1 80ms 80ms 10b985f: UCOMISD X0, X1 ;github.com/dhconnelly/rtreego.(*Rtree).searchIntersect geom.go:317 40ms 40ms 10b9863: JAE 0x10b97ce 20ms 20ms 10b9869: MOVSD_XMM 0xc0(SP), X0 . . 10b9872: MOVSD_XMM 0x90(SP), X1 ;geom.go:317 . . 10b987b: UCOMISD X0, X1 . . 10b987f: NOPL . . 10b9880: JAE 0x10b97ce . . 10b9886: MOVSD_XMM 0xa8(SP), X0 ;geom.go:320 10ms 10ms 10b988f: MOVSD_XMM 0xb8(SP), X1 ;github.com/dhconnelly/rtreego.(*Rtree).searchIntersect geom.go:320 . . 10b9898: UCOMISD X0, X1 ;geom.go:320 . . 10b989c: NOPL 0(AX) . . 10b98a0: JAE 0x10b97ce 30ms 30ms 10b98a6: MOVSD_XMM 0xc8(SP), X0 ;github.com/dhconnelly/rtreego.(*Rtree).searchIntersect geom.go:320 20ms 20ms 10b98af: MOVSD_XMM 0x98(SP), X1 . . 10b98b8: UCOMISD X0, X1 ;geom.go:320 20ms 20ms 10b98bc: NOPL 0(AX) ;github.com/dhconnelly/rtreego.(*Rtree).searchIntersect geom.go:320 . . 10b98c0: JAE 0x10b97ce ;geom.go:320 10ms 10ms 10b98c6: MOVQ R12, 0x80(SP) ;github.com/dhconnelly/rtreego.(*Rtree).searchIntersect rtree.go:654 10ms 10ms 10b98ce: MOVQ DX, 0xe0(SP) . . 10b98d6: CMPB $0x0, 0x28(SI) ;rtree.go:659 10ms 10ms 10b98da: NOPW 0(AX)(AX*1) ;github.com/dhconnelly/rtreego.(*Rtree).searchIntersect rtree.go:659 . . 10b98e0: JE 0x10b9a2d ;rtree.go:659 . . 10b98e6: MOVQ DI, 0x78(SP) ;rtree.go:660 . . 10b98eb: MOVQ CX, 0x70(SP) . . 10b98f0: MOVQ BX, 0xd8(SP) . . 10b98f8: MOVQ 0x118(SP), DX ;rtree.go:664 . . 10b9900: MOVQ 0x120(SP), SI . . 10b9908: MOVQ BX, AX . . 10b990b: MOVQ CX, BX . . 10b990e: MOVQ DI, CX . . 10b9911: MOVQ DX, DI ;github.com/dhconnelly/rtreego.(*Rtree).searchIntersect rtree.go:664 . 30ms 10b9914: CALL github.com/dhconnelly/rtreego.applyFilters(SB) . . 10b9919: TESTL AL, AL ;rtree.go:664 . . 10b991b: JE 0x10b9934 ;rtree.go:665 . . 10b991d: MOVQ 0x70(SP), DX ;rtree.go:660 . . 10b9922: MOVQ 0x78(SP), CX . . 10b9927: MOVQ 0xd8(SP), AX . . 10b992f: JMP 0x10b99d7 ;rtree.go:665 . . 10b9934: MOVQ 0x70(SP), DX ;rtree.go:666 . . 10b9939: INCQ DX . . 10b993c: MOVQ 0x120(SP), R11 . . 10b9944: MOVQ 0x118(SP), R12 . . 10b994c: MOVQ 0x78(SP), CX . . 10b9951: CMPQ DX, CX . . 10b9954: JB 0x10b9962 . . 10b9956: MOVQ 0xd8(SP), AX . . 10b995e: NOPW . . 10b9960: JMP 0x10b99a4 10ms 10ms 10b9962: MOVQ R11, 0xd0(SP) ;github.com/dhconnelly/rtreego.(*Rtree).searchIntersect rtree.go:666 10ms 10ms 10b996a: MOVQ R12, 0x68(SP) . . 10b996f: MOVB BL, 0x67(SP) ;rtree.go:664 . . 10b9973: MOVQ 0xd8(SP), AX ;rtree.go:666 . . 10b997b: MOVQ DX, BX . . 10b997e: MOVL $0x1, DI . . 10b9983: LEAQ runtime.types+64192(SB), SI . 140ms 10b998a: CALL runtime.growslice(SB) ;github.com/dhconnelly/rtreego.(*Rtree).searchIntersect rtree.go:666 . . 10b998f: MOVQ 0xd0(SP), R11 ;rtree.go:666 . . 10b9997: MOVQ 0x68(SP), R12 . . 10b999c: MOVQ BX, DX . . 10b999f: MOVZX 0x67(SP), BX ;rtree.go:664 . . 10b99a4: LEAQ -0x1(DX), R13 ;rtree.go:666 . . 10b99a8: SHLQ $0x4, R13 . . 10b99ac: MOVQ R12, 0(AX)(R13*1) . . 10b99b0: LEAQ 0x8(R13), R12 . . 10b99b4: LEAQ 0(AX)(R12*1), DI . . 10b99b8: CMPL $0x0, runtime.writeBarrier(SB) . . 10b99bf: NOPL . . 10b99c0: JNE 0x10b99c9 . . 10b99c2: MOVQ R11, 0x8(AX)(R13*1) . . 10b99c7: JMP 0x10b99d7 . . 10b99c9: MOVQ DX, SI . . 10b99cc: MOVQ R11, DX . . 10b99cf: CALL runtime.gcWriteBarrierDX(SB) . . 10b99d4: MOVQ SI, DX ;rtree.go:660 . . 10b99d7: TESTL BL, BL ;rtree.go:664 . . 10b99d9: JNE 0x10b9aae ;rtree.go:669 . . 10b99df: MOVQ 0x178(SP), SI ;rtree.go:659 . . 10b99e7: MOVQ 0x180(SP), R8 ;rtree.go:664 . . 10b99ef: MOVQ 0x188(SP), R9 10ms 10ms 10b99f7: MOVQ 0x190(SP), R10 ;github.com/dhconnelly/rtreego.(*Rtree).searchIntersect rtree.go:664 . . 10b99ff: MOVQ 0x88(SP), R11 ;rtree.go:654 . . 10b9a07: MOVQ 0x80(SP), R12 . . 10b9a0f: MOVQ CX, DI ;rtree.go:660 . . 10b9a12: MOVQ DX, CX . . 10b9a15: MOVQ AX, BX . . 10b9a18: MOVQ 0xe8(SP), AX . . 10b9a20: MOVQ 0xe0(SP), DX ;rtree.go:654 . . 10b9a28: JMP 0x10b97ce ;rtree.go:669 . . 10b9a2d: MOVUPS 0x138(SP), X0 ;rtree.go:660 . . 10b9a35: MOVQ 0x110(SP), SI . . 10b9a3d: MOVUPS X0, 0(SP) . . 10b9a41: MOVUPS 0x148(SP), X0 . . 10b9a49: MOVUPS X0, 0x10(SP) ;github.com/dhconnelly/rtreego.(*Rtree).searchIntersect rtree.go:660 . 940ms 10b9a4e: CALL github.com/dhconnelly/rtreego.(*Rtree).searchIntersect(SB) . . 10b9a53: MOVQ 0xe0(SP), DX ;rtree.go:654 . . 10b9a5b: MOVQ 0x178(SP), SI ;rtree.go:659 . . 10b9a63: MOVQ 0x180(SP), R8 ;rtree.go:664 . . 10b9a6b: MOVQ 0x188(SP), R9 . . 10b9a73: MOVQ 0x190(SP), R10 . . 10b9a7b: MOVQ 0x88(SP), R11 ;rtree.go:654 . . 10b9a83: MOVQ 0x80(SP), R12 . . 10b9a8b: MOVQ CX, DI ;rtree.go:660 . . 10b9a8e: MOVQ BX, CX . . 10b9a91: MOVQ AX, BX . . 10b9a94: MOVQ 0xe8(SP), AX . . 10b9a9c: NOPL 0(AX) . . 10b9aa0: JMP 0x10b97ce ;rtree.go:661 . . 10b9aa5: MOVQ CX, DX ;rtree.go:673 . . 10b9aa8: MOVQ BX, AX . . 10b9aab: MOVQ DI, CX . . 10b9aae: MOVQ DX, BX . . 10b9ab1: MOVQ 0x128(SP), BP . . 10b9ab9: ADDQ $0x130, SP . . 10b9ac0: RET . . 10b9ac1: MOVQ AX, 0x28(SP) ;rtree.go:653 . . 10b9ac6: MOVQ BX, 0x30(SP) . . 10b9acb: MOVQ CX, 0x38(SP) . . 10b9ad0: MOVQ DI, 0x40(SP) . . 10b9ad5: MOVQ SI, 0x48(SP) . . 10b9ada: MOVQ R8, 0x50(SP) . . 10b9adf: MOVQ R9, 0x58(SP) . . 10b9ae4: MOVQ R10, 0x60(SP) . . 10b9ae9: CALL runtime.morestack_noctxt.abi0(SB) . . 10b9aee: MOVQ 0x28(SP), AX . . 10b9af3: MOVQ 0x30(SP), BX . . 10b9af8: MOVQ 0x38(SP), CX . . 10b9afd: MOVQ 0x40(SP), DI . . 10b9b02: MOVQ 0x48(SP), SI . . 10b9b07: MOVQ 0x50(SP), R8 . . 10b9b0c: MOVQ 0x58(SP), R9 . . 10b9b11: MOVQ 0x60(SP), R10 . . 10b9b16: JMP github.com/dhconnelly/rtreego.(*Rtree).searchIntersect(SB) . . 10b9b1b: INT $0x3 . . 10b9b1c: INT $0x3 . . 10b9b1d: INT $0x3 . . 10b9b1e: INT $0x3 (pprof) list searchIntersect Total: 1.42s ROUTINE ======================== github.com/dhconnelly/rtreego.(*Rtree).searchIntersect in /Users/shaoyu/go/src/github.com/dhconnelly/rtreego/rtree.go 600ms 1.93s (flat, cum) 135.92% of Total . . 653:func (tree *Rtree) searchIntersect(results []Spatial, n *node, bb Rect, filters []Filter) []Spatial { 170ms 170ms 654: for _, e := range n.entries { 390ms 610ms 655: if !intersect(e.bb, bb) { . . 656: continue . . 657: } . . 658: 10ms 10ms 659: if !n.leaf { . 940ms 660: results = tree.searchIntersect(results, e.child, bb, filters) . . 661: continue . . 662: } . . 663: 10ms 40ms 664: refuse, abort := applyFilters(results, e.obj, filters) . . 665: if !refuse { 20ms 160ms 666: results = append(results, e.obj) . . 667: } . . 668: . . 669: if abort { . . 670: break . . 671: } (pprof) ``` ``` package main import ( "flag" "log" "math/rand" "os" "runtime/pprof" "github.com/dhconnelly/rtreego" ) type point struct { rtreego.Point } func (p point) Bounds() rtreego.Rect { r := p.ToRect(0) return r } func (p point) dist(q point) float64 { sum := 0.0 for i := range p.Point { dx := p.Point[i] - q.Point[i] sum += dx * dx } return sum } func main() { flag.Parse() log.SetFlags(log.Flags() | log.Llongfile) cpuprofile := "main.prof" f, err := os.Create(cpuprofile) if err != nil { log.Fatal("could not create CPU profile: ", err) } defer f.Close() // error handling omitted for example if err := pprof.StartCPUProfile(f); err != nil { log.Fatal("could not start CPU profile: ", err) } defer pprof.StopCPUProfile() if err := mainWithErr(); err != nil { log.Fatalf("%+v", err) } } func mainWithErr() error { points := make([]rtreego.Spatial, 0, 1e5) for i := 0; i < cap(points); i++ { p := point{} p.Point[0] = rand.Float64() p.Point[1] = rand.Float64() points = append(points, p) } tree := rtreego.NewTree(2, 25, 50, points...) neighborCnt := 0 for i, tgt := range points { target := tgt.(point) neighbors := tree.SearchIntersect(target.Point.ToRect(0.01)) neighborCnt += len(neighbors) if i % 1000 == 0 { log.Printf("%d", i) } } neighborCnt /= len(points) log.Printf("%d", neighborCnt) return nil } ``` -- You received this message because you are subscribed to the Google Groups "golang-nuts" group. To unsubscribe from this group and stop receiving emails from it, send an email to golang-nuts+unsubscr...@googlegroups.com. To view this discussion on the web visit https://groups.google.com/d/msgid/golang-nuts/c9a5556d-df10-4ace-8504-ce8272f5c4dfn%40googlegroups.com.