This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-go.git


The following commit(s) were added to refs/heads/main by this push:
     new 19bd313  fix: don't clobber saved frame pointer in arm64 assembly 
functions (#170)
19bd313 is described below

commit 19bd31356781cd10f304d87ca08f37e127494bd8
Author: Nick Ripley <[email protected]>
AuthorDate: Fri Oct 25 16:43:28 2024 -0400

    fix: don't clobber saved frame pointer in arm64 assembly functions (#170)
    
    The arm64 neon assembly functions in this repository overwrite the frame
    pointer saved by their callers, leading to crashes from the Go runtime
    execution tracer and profilers which use frame pointer unwinding. For
    historical reasons, on arm64 Go functions save the caller's frame
    pointer register (x29) one word below their stack frame. See
    go.dev/s/regabi#arm64-architecture. The assembly functions here,
    translated from C compiler output, save values at the top of their
    frame, and overwrite the frame pointer saved by the caller. We can fix
    this by decrementing the stack pointer past where that frame pointer is
    saved before saving anything on the stack.
    
    Fixed with this sed script on my macos laptop + manual cleanup to match
    indentation:
    
    ```
    /stp[\t ]*x29/i\
            // The Go ABI saves the frame pointer register one word below the \
            // caller's frame. Make room so we don't overwrite it. Needs to 
stay \
            // 16-byte aligned \
            SUB $16, RSP
    
    /ldp[\t ]*x29/a\
            // Put the stack pointer back where it was \
            ADD $16, RSP
    
    ```
    
    Ran the script from the root of this repository with
    
            find . -name '*_arm64.s' -exec sed -f fix.sed -i '' {} +
    
    Then manually inspected the assembly for missing SUBs/ADDs at the
    beginning of functions and prior to returns.
    
    Fixes #150
---
 .../internal/kernels/cast_numeric_neon_arm64.s     |  6 ++++
 arrow/internal/testing/tools/fpunwind.go           | 23 ++++++++++++++++
 arrow/internal/testing/tools/fpunwind_arm64.s      | 25 +++++++++++++++++
 arrow/internal/testing/tools/fpunwind_default.go   | 24 ++++++++++++++++
 arrow/math/int64_neon_arm64.s                      |  8 ++++++
 arrow/math/uint64_neon_arm64.s                     |  8 ++++++
 arrow/memory/memory_neon_arm64.s                   |  6 ++++
 arrow/memory/memory_test.go                        | 10 +++++++
 internal/utils/min_max_neon_arm64.s                | 32 ++++++++++++++++++++++
 parquet/internal/bmi/bitmap_neon_arm64.s           |  6 ++++
 parquet/internal/utils/bit_packing_neon_arm64.s    |  6 ++++
 parquet/internal/utils/unpack_bool_neon_arm64.s    |  6 ++++
 12 files changed, 160 insertions(+)

diff --git a/arrow/compute/internal/kernels/cast_numeric_neon_arm64.s 
b/arrow/compute/internal/kernels/cast_numeric_neon_arm64.s
index c54eac4..3d56efc 100644
--- a/arrow/compute/internal/kernels/cast_numeric_neon_arm64.s
+++ b/arrow/compute/internal/kernels/cast_numeric_neon_arm64.s
@@ -10,6 +10,10 @@ TEXT ·_cast_type_numeric_neon(SB), $0-40
     MOVD len+32(FP), R4
 
 
+    // The Go ABI saves the frame pointer register one word below the 
+    // caller's frame. Make room so we don't overwrite it. Needs to stay 
+    // 16-byte aligned 
+    SUB $16, RSP
     WORD $0xa9bf7bfd // stp    x29, x30, [sp, #-16]!
     WORD $0x7100181f // cmp    w0, #6
     WORD $0x910003fd // mov    x29, sp
@@ -4447,6 +4451,8 @@ LBB0_892:
     BNE LBB0_892
 LBB0_893:
     WORD $0xa8c17bfd // ldp    x29, x30, [sp], #16
+    // Put the stack pointer back where it was 
+    ADD $16, RSP
     RET
 LBB0_894:
     WORD $0x927b6909 // and x9, x8, #0xffffffe0
diff --git a/arrow/internal/testing/tools/fpunwind.go 
b/arrow/internal/testing/tools/fpunwind.go
new file mode 100644
index 0000000..9b10493
--- /dev/null
+++ b/arrow/internal/testing/tools/fpunwind.go
@@ -0,0 +1,23 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//go:build arm64
+
+package tools
+
+// FPUnwind does frame pointer unwinding. It is implemented in assembly.
+// If frame pointers are broken, it will crash.
+func FPUnwind()
diff --git a/arrow/internal/testing/tools/fpunwind_arm64.s 
b/arrow/internal/testing/tools/fpunwind_arm64.s
new file mode 100644
index 0000000..113092c
--- /dev/null
+++ b/arrow/internal/testing/tools/fpunwind_arm64.s
@@ -0,0 +1,25 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+TEXT ·FPUnwind(SB), $0-0
+       MOVD R29, R19
+loop:
+       CMP $0, R19
+       BEQ exit
+       MOVD (R19), R19
+       B loop
+exit:
+       RET
diff --git a/arrow/internal/testing/tools/fpunwind_default.go 
b/arrow/internal/testing/tools/fpunwind_default.go
new file mode 100644
index 0000000..2a07c63
--- /dev/null
+++ b/arrow/internal/testing/tools/fpunwind_default.go
@@ -0,0 +1,24 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//go:build !arm64
+
+package tools
+
+// FPUnwind does frame pointer unwinding. It is implemented in assembly.
+// If frame pointers are broken, it will crash.
+// It is currently only implemented for arm64
+func FPUnwind() {}
diff --git a/arrow/math/int64_neon_arm64.s b/arrow/math/int64_neon_arm64.s
index 4f55163..c0b639b 100755
--- a/arrow/math/int64_neon_arm64.s
+++ b/arrow/math/int64_neon_arm64.s
@@ -11,6 +11,10 @@ TEXT ·_sum_int64_neon(SB), $0-24
        MOVD    len+8(FP), R1
        MOVD    res+16(FP), R2
     
+       // The Go ABI saves the frame pointer register one word below the 
+       // caller's frame. Make room so we don't overwrite it. Needs to stay 
+       // 16-byte aligned 
+       SUB $16, RSP
        WORD $0xa9bf7bfd // stp x29, x30, [sp, #-16]!
        WORD $0x910003fd // mov x29, sp
        CBZ R1, LBB0_3
@@ -23,6 +27,8 @@ LBB0_3:
        WORD $0xaa1f03e9 // mov     x9, xzr
        WORD $0xf9000049 // str     x9, [x2]
        WORD $0xa8c17bfd // ldp     x29, x30, [sp], #16
+       // Put the stack pointer back where it was 
+       ADD $16, RSP
        RET
 LBB0_4:
        WORD $0x927ef428 // and     x8, x1, #0xfffffffffffffffc
@@ -54,5 +60,7 @@ LBB0_8:
 LBB0_9:
        WORD $0xf9000049 // str     x9, [x2]
        WORD $0xa8c17bfd // ldp     x29, x30, [sp], #16
+       // Put the stack pointer back where it was 
+       ADD $16, RSP
        RET
 
diff --git a/arrow/math/uint64_neon_arm64.s b/arrow/math/uint64_neon_arm64.s
index edbc1a6..0f8d66a 100755
--- a/arrow/math/uint64_neon_arm64.s
+++ b/arrow/math/uint64_neon_arm64.s
@@ -11,6 +11,10 @@ TEXT ·_sum_uint64_neon(SB), $0-24
        MOVD    len+8(FP), R1
        MOVD    res+16(FP), R2
     
+       // The Go ABI saves the frame pointer register one word below the 
+       // caller's frame. Make room so we don't overwrite it. Needs to stay 
+       // 16-byte aligned 
+       SUB $16, RSP
        WORD $0xa9bf7bfd // stp x29, x30, [sp, #-16]!
        WORD $0x910003fd // mov x29, sp
        CBZ R1, LBB0_3
@@ -23,6 +27,8 @@ LBB0_3:
        WORD $0xaa1f03e9 // mov     x9, xzr
        WORD $0xf9000049 // str     x9, [x2]
        WORD $0xa8c17bfd // ldp     x29, x30, [sp], #16
+       // Put the stack pointer back where it was 
+       ADD $16, RSP
        RET
 LBB0_4:
        WORD $0x927ef428 // and     x8, x1, #0xfffffffffffffffc
@@ -54,5 +60,7 @@ LBB0_8:
 LBB0_9:
        WORD $0xf9000049 // str     x9, [x2]
        WORD $0xa8c17bfd // ldp     x29, x30, [sp], #16
+       // Put the stack pointer back where it was 
+       ADD $16, RSP
        RET
 
diff --git a/arrow/memory/memory_neon_arm64.s b/arrow/memory/memory_neon_arm64.s
index 18655cc..18b0af5 100755
--- a/arrow/memory/memory_neon_arm64.s
+++ b/arrow/memory/memory_neon_arm64.s
@@ -11,6 +11,10 @@ TEXT ·_memset_neon(SB), $0-24
        MOVD    len+8(FP), R1
        MOVD    c+16(FP), R2
 
+       // The Go ABI saves the frame pointer register one word below the 
+       // caller's frame. Make room so we don't overwrite it. Needs to stay 
+       // 16-byte aligned 
+       SUB $16, RSP
        WORD $0xa9bf7bfd // stp    x29, x30, [sp, #-16]!
        WORD $0x8b010008 // add    x8, x0, x1
        WORD $0xeb00011f // cmp    x8, x0
@@ -40,4 +44,6 @@ LBB0_6:
        BNE     LBB0_6
 LBB0_7:
        WORD $0xa8c17bfd // ldp    x29, x30, [sp], #16
+       // Put the stack pointer back where it was 
+       ADD $16, RSP
        RET
diff --git a/arrow/memory/memory_test.go b/arrow/memory/memory_test.go
index 4a97531..ca1ad53 100644
--- a/arrow/memory/memory_test.go
+++ b/arrow/memory/memory_test.go
@@ -19,6 +19,7 @@ package memory_test
 import (
        "testing"
 
+       "github.com/apache/arrow-go/v18/arrow/internal/testing/tools"
        "github.com/apache/arrow-go/v18/arrow/memory"
        "github.com/stretchr/testify/assert"
 )
@@ -123,3 +124,12 @@ func BenchmarkSet_8000(b *testing.B) {
 func BenchmarkSet_8192(b *testing.B) {
        benchmarkSet(b, 8192)
 }
+
+func TestSetNoClobberFramePointer(t *testing.T) {
+       var b [8]byte
+       memory.Set(b[:], 0xff)
+       // We should be able to safely frame pointer unwind after calling this
+       // function. The arm64 assembly implementation used to clobber the
+       // frame pointer. See GH-150
+       tools.FPUnwind()
+}
diff --git a/internal/utils/min_max_neon_arm64.s 
b/internal/utils/min_max_neon_arm64.s
index b679bb6..a31c5d2 100755
--- a/internal/utils/min_max_neon_arm64.s
+++ b/internal/utils/min_max_neon_arm64.s
@@ -13,6 +13,10 @@ TEXT ·_int32_max_min_neon(SB), $0-32
        MOVD    minout+16(FP), R2
        MOVD    maxout+24(FP), R3
 
+       // The Go ABI saves the frame pointer register one word below the 
+       // caller's frame. Make room so we don't overwrite it. Needs to stay 
+       // 16-byte aligned 
+       SUB $16, RSP
        WORD $0xa9bf7bfd // stp x29, x30, [sp, #-16]!
        WORD $0x7100043f // cmp    w1, #1
        WORD $0x910003fd // mov    x29, sp
@@ -32,6 +36,8 @@ LBB0_3:
        WORD $0xb900006b // str    w11, [x3]
        WORD $0xb900004a // str    w10, [x2]
        WORD $0xa8c17bfd // ldp    x29, x30, [sp], #16
+       // Put the stack pointer back where it was 
+       ADD $16, RSP
        RET
 LBB0_4:
        WORD $0x927e7509 // and    x9, x8, #0xfffffffc
@@ -76,6 +82,8 @@ LBB0_9:
        WORD $0xb900006b // str    w11, [x3]
        WORD $0xb900004a // str    w10, [x2]
        WORD $0xa8c17bfd // ldp    x29, x30, [sp], #16
+       // Put the stack pointer back where it was 
+       ADD $16, RSP
        RET
 
 // func _uint32_max_min_neon(values unsafe.Pointer, length int, minout, maxout 
unsafe.Pointer)
@@ -86,6 +94,10 @@ TEXT ·_uint32_max_min_neon(SB), $0-32
        MOVD    minout+16(FP), R2
        MOVD    maxout+24(FP), R3
     
+       // The Go ABI saves the frame pointer register one word below the 
+       // caller's frame. Make room so we don't overwrite it. Needs to stay 
+       // 16-byte aligned 
+       SUB $16, RSP
        WORD $0xa9bf7bfd // stp x29, x30, [sp, #-16]!
        WORD $0x7100043f // cmp    w1, #1
        WORD $0x910003fd // mov    x29, sp
@@ -105,6 +117,8 @@ LBB1_3:
        WORD $0xb900006a // str    w10, [x3]
        WORD $0xb900004b // str    w11, [x2]
        WORD $0xa8c17bfd // ldp    x29, x30, [sp], #16
+       // Put the stack pointer back where it was 
+       ADD $16, RSP
        RET
 LBB1_4:
        WORD $0x927e7509 // and    x9, x8, #0xfffffffc
@@ -149,6 +163,8 @@ LBB1_9:
        WORD $0xb900006a // str    w10, [x3]
        WORD $0xb900004b // str    w11, [x2]
        WORD $0xa8c17bfd // ldp    x29, x30, [sp], #16
+       // Put the stack pointer back where it was 
+       ADD $16, RSP
        RET
 
 // func _int64_max_min_neon(values unsafe.Pointer, length int, minout, maxout 
unsafe.Pointer)
@@ -159,6 +175,10 @@ TEXT ·_int64_max_min_neon(SB), $0-32
         MOVD    minout+16(FP), R2
         MOVD    maxout+24(FP), R3
 
+       // The Go ABI saves the frame pointer register one word below the 
+       // caller's frame. Make room so we don't overwrite it. Needs to stay 
+       // 16-byte aligned 
+       SUB $16, RSP
        WORD $0xa9bf7bfd // stp    x29, x30, [sp, #-16]!
        WORD $0x7100043f // cmp    w1, #1
        WORD $0x910003fd // mov    x29, sp
@@ -178,6 +198,8 @@ LBB2_3:
        WORD $0xf900006b // str    x11, [x3]
        WORD $0xf900004a // str    x10, [x2]
        WORD $0xa8c17bfd // ldp    x29, x30, [sp], #16
+       // Put the stack pointer back where it was 
+       ADD $16, RSP
        RET
 LBB2_4:
        WORD $0x927e7509 // and    x9, x8, #0xfffffffc
@@ -234,6 +256,8 @@ LBB2_9:
        WORD $0xf900006b // str    x11, [x3]
        WORD $0xf900004a // str    x10, [x2]
        WORD $0xa8c17bfd // ldp    x29, x30, [sp], #16
+       // Put the stack pointer back where it was 
+       ADD $16, RSP
        RET
 
 
@@ -245,6 +269,10 @@ TEXT ·_uint64_max_min_neon(SB), $0-32
         MOVD    minout+16(FP), R2
         MOVD    maxout+24(FP), R3
 
+       // The Go ABI saves the frame pointer register one word below the 
+       // caller's frame. Make room so we don't overwrite it. Needs to stay 
+       // 16-byte aligned 
+       SUB $16, RSP
        WORD $0xa9bf7bfd // stp    x29, x30, [sp, #-16]!
        WORD $0x7100043f // cmp    w1, #1
        WORD $0x910003fd // mov    x29, sp
@@ -264,6 +292,8 @@ LBB3_3:
        WORD $0xf900006a // str    x10, [x3]
        WORD $0xf900004b // str    x11, [x2]
        WORD $0xa8c17bfd // ldp    x29, x30, [sp], #16
+       // Put the stack pointer back where it was 
+       ADD $16, RSP
        RET
 LBB3_4:
        WORD $0x927e7509 // and    x9, x8, #0xfffffffc
@@ -320,5 +350,7 @@ LBB3_9:
        WORD $0xf900006a // str    x10, [x3]
        WORD $0xf900004b // str    x11, [x2]
        WORD $0xa8c17bfd // ldp    x29, x30, [sp], #16
+       // Put the stack pointer back where it was 
+       ADD $16, RSP
        RET
 
diff --git a/parquet/internal/bmi/bitmap_neon_arm64.s 
b/parquet/internal/bmi/bitmap_neon_arm64.s
index abde584..f711e49 100755
--- a/parquet/internal/bmi/bitmap_neon_arm64.s
+++ b/parquet/internal/bmi/bitmap_neon_arm64.s
@@ -8,6 +8,10 @@ TEXT ·_levels_to_bitmap_neon(SB), $0-32
     MOVD numLevels+8(FP), R1
     MOVD rhs+16(FP), R2
 
+    // The Go ABI saves the frame pointer register one word below the 
+    // caller's frame. Make room so we don't overwrite it. Needs to stay 
+    // 16-byte aligned 
+    SUB $16, RSP
     WORD $0xa9bf7bfd // stp    x29, x30, [sp, #-16]!
     WORD $0x7100043f // cmp    w1, #1
     WORD $0x910003fd // mov    x29, sp
@@ -79,6 +83,8 @@ LBB1_7:
     BNE LBB1_7
 LBB1_8:
     WORD $0xa8c17bfd // ldp    x29, x30, [sp], #16
+    // Put the stack pointer back where it was 
+    ADD $16, RSP
     MOVD R8, res+24(FP)
     RET
 
diff --git a/parquet/internal/utils/bit_packing_neon_arm64.s 
b/parquet/internal/utils/bit_packing_neon_arm64.s
index 2d18dcc..b3a02f4 100644
--- a/parquet/internal/utils/bit_packing_neon_arm64.s
+++ b/parquet/internal/utils/bit_packing_neon_arm64.s
@@ -281,6 +281,10 @@ TEXT ·_unpack32_neon(SB), $0-40
        // LEAQ LCDATA1<>(SB), BP
 
        // %bb.0:
+       // The Go ABI saves the frame pointer register one word below the 
+       // caller's frame. Make room so we don't overwrite it. Needs to stay 
+       // 16-byte aligned 
+       SUB $16, RSP
        WORD $0xa9ba7bfd // stp    x29, x30, [sp, #-96]!
        WORD $0xd10643e9 // sub    x9, sp, #400
        WORD $0xa9016ffc // stp    x28, x27, [sp, #16]
@@ -6922,5 +6926,7 @@ LBB0_156:
        WORD $0xa94267fa    // ldp    x26, x25, [sp, #32]
        WORD $0xa9416ffc    // ldp    x28, x27, [sp, #16]
        WORD $0xa8c67bfd    // ldp    x29, x30, [sp], #96
+       // Put the stack pointer back where it was 
+       ADD $16, RSP
        MOVD R0, num+32(FP)
        RET
diff --git a/parquet/internal/utils/unpack_bool_neon_arm64.s 
b/parquet/internal/utils/unpack_bool_neon_arm64.s
index 2427895..3d1edac 100755
--- a/parquet/internal/utils/unpack_bool_neon_arm64.s
+++ b/parquet/internal/utils/unpack_bool_neon_arm64.s
@@ -12,6 +12,10 @@ TEXT ·_bytes_to_bools_neon(SB), $0-32
     MOVD out+16(FP), R2
     MOVD outlen+24(FP), R3
 
+    // The Go ABI saves the frame pointer register one word below the 
+    // caller's frame. Make room so we don't overwrite it. Needs to stay 
+    // 16-byte aligned 
+    SUB $16, RSP
     WORD $0xa9bf7bfd // stp    x29, x30, [sp, #-16]!
     WORD $0x7100043f // cmp    w1, #1
     WORD $0x910003fd // mov    x29, sp
@@ -78,4 +82,6 @@ LBB0_3:
     JMP LBB0_2
 LBB0_12:
     WORD $0xa8c17bfd // ldp    x29, x30, [sp], #16
+    // Put the stack pointer back where it was 
+    ADD $16, RSP
     RET

Reply via email to