This is an automated email from the ASF dual-hosted git repository.
zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-go.git
The following commit(s) were added to refs/heads/main by this push:
new 19bd313 fix: don't clobber saved frame pointer in arm64 assembly
functions (#170)
19bd313 is described below
commit 19bd31356781cd10f304d87ca08f37e127494bd8
Author: Nick Ripley <[email protected]>
AuthorDate: Fri Oct 25 16:43:28 2024 -0400
fix: don't clobber saved frame pointer in arm64 assembly functions (#170)
The arm64 neon assembly functions in this repository overwrite the frame
pointer saved by their callers, leading to crashes from the Go runtime
execution tracer and profilers which use frame pointer unwinding. For
historical reasons, on arm64 Go functions save the caller's frame
pointer register (x29) one word below their stack frame. See
go.dev/s/regabi#arm64-architecture. The assembly functions here,
translated from C compiler output, save values at the top of their
frame, and overwrite the frame pointer saved by the caller. We can fix
this by decrementing the stack pointer past where that frame pointer is
saved before saving anything on the stack.
Fixed with this sed script on my macos laptop + manual cleanup to match
indentation:
```
/stp[\t ]*x29/i\
// The Go ABI saves the frame pointer register one word below the \
// caller's frame. Make room so we don't overwrite it. Needs to
stay \
// 16-byte aligned \
SUB $16, RSP
/ldp[\t ]*x29/a\
// Put the stack pointer back where it was \
ADD $16, RSP
```
Ran the script from the root of this repository with
find . -name '*_arm64.s' -exec sed -f fix.sed -i '' {} +
Then manually inspected the assembly for missing SUBs/ADDs at the
beginning of functions and prior to returns.
Fixes #150
---
.../internal/kernels/cast_numeric_neon_arm64.s | 6 ++++
arrow/internal/testing/tools/fpunwind.go | 23 ++++++++++++++++
arrow/internal/testing/tools/fpunwind_arm64.s | 25 +++++++++++++++++
arrow/internal/testing/tools/fpunwind_default.go | 24 ++++++++++++++++
arrow/math/int64_neon_arm64.s | 8 ++++++
arrow/math/uint64_neon_arm64.s | 8 ++++++
arrow/memory/memory_neon_arm64.s | 6 ++++
arrow/memory/memory_test.go | 10 +++++++
internal/utils/min_max_neon_arm64.s | 32 ++++++++++++++++++++++
parquet/internal/bmi/bitmap_neon_arm64.s | 6 ++++
parquet/internal/utils/bit_packing_neon_arm64.s | 6 ++++
parquet/internal/utils/unpack_bool_neon_arm64.s | 6 ++++
12 files changed, 160 insertions(+)
diff --git a/arrow/compute/internal/kernels/cast_numeric_neon_arm64.s
b/arrow/compute/internal/kernels/cast_numeric_neon_arm64.s
index c54eac4..3d56efc 100644
--- a/arrow/compute/internal/kernels/cast_numeric_neon_arm64.s
+++ b/arrow/compute/internal/kernels/cast_numeric_neon_arm64.s
@@ -10,6 +10,10 @@ TEXT ·_cast_type_numeric_neon(SB), $0-40
MOVD len+32(FP), R4
+ // The Go ABI saves the frame pointer register one word below the
+ // caller's frame. Make room so we don't overwrite it. Needs to stay
+ // 16-byte aligned
+ SUB $16, RSP
WORD $0xa9bf7bfd // stp x29, x30, [sp, #-16]!
WORD $0x7100181f // cmp w0, #6
WORD $0x910003fd // mov x29, sp
@@ -4447,6 +4451,8 @@ LBB0_892:
BNE LBB0_892
LBB0_893:
WORD $0xa8c17bfd // ldp x29, x30, [sp], #16
+ // Put the stack pointer back where it was
+ ADD $16, RSP
RET
LBB0_894:
WORD $0x927b6909 // and x9, x8, #0xffffffe0
diff --git a/arrow/internal/testing/tools/fpunwind.go
b/arrow/internal/testing/tools/fpunwind.go
new file mode 100644
index 0000000..9b10493
--- /dev/null
+++ b/arrow/internal/testing/tools/fpunwind.go
@@ -0,0 +1,23 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//go:build arm64
+
+package tools
+
+// FPUnwind does frame pointer unwinding. It is implemented in assembly.
+// If frame pointers are broken, it will crash.
+func FPUnwind()
diff --git a/arrow/internal/testing/tools/fpunwind_arm64.s
b/arrow/internal/testing/tools/fpunwind_arm64.s
new file mode 100644
index 0000000..113092c
--- /dev/null
+++ b/arrow/internal/testing/tools/fpunwind_arm64.s
@@ -0,0 +1,25 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+TEXT ·FPUnwind(SB), $0-0
+ MOVD R29, R19
+loop:
+ CMP $0, R19
+ BEQ exit
+ MOVD (R19), R19
+ B loop
+exit:
+ RET
diff --git a/arrow/internal/testing/tools/fpunwind_default.go
b/arrow/internal/testing/tools/fpunwind_default.go
new file mode 100644
index 0000000..2a07c63
--- /dev/null
+++ b/arrow/internal/testing/tools/fpunwind_default.go
@@ -0,0 +1,24 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//go:build !arm64
+
+package tools
+
+// FPUnwind does frame pointer unwinding. It is implemented in assembly.
+// If frame pointers are broken, it will crash.
+// It is currently only implemented for arm64
+func FPUnwind() {}
diff --git a/arrow/math/int64_neon_arm64.s b/arrow/math/int64_neon_arm64.s
index 4f55163..c0b639b 100755
--- a/arrow/math/int64_neon_arm64.s
+++ b/arrow/math/int64_neon_arm64.s
@@ -11,6 +11,10 @@ TEXT ·_sum_int64_neon(SB), $0-24
MOVD len+8(FP), R1
MOVD res+16(FP), R2
+ // The Go ABI saves the frame pointer register one word below the
+ // caller's frame. Make room so we don't overwrite it. Needs to stay
+ // 16-byte aligned
+ SUB $16, RSP
WORD $0xa9bf7bfd // stp x29, x30, [sp, #-16]!
WORD $0x910003fd // mov x29, sp
CBZ R1, LBB0_3
@@ -23,6 +27,8 @@ LBB0_3:
WORD $0xaa1f03e9 // mov x9, xzr
WORD $0xf9000049 // str x9, [x2]
WORD $0xa8c17bfd // ldp x29, x30, [sp], #16
+ // Put the stack pointer back where it was
+ ADD $16, RSP
RET
LBB0_4:
WORD $0x927ef428 // and x8, x1, #0xfffffffffffffffc
@@ -54,5 +60,7 @@ LBB0_8:
LBB0_9:
WORD $0xf9000049 // str x9, [x2]
WORD $0xa8c17bfd // ldp x29, x30, [sp], #16
+ // Put the stack pointer back where it was
+ ADD $16, RSP
RET
diff --git a/arrow/math/uint64_neon_arm64.s b/arrow/math/uint64_neon_arm64.s
index edbc1a6..0f8d66a 100755
--- a/arrow/math/uint64_neon_arm64.s
+++ b/arrow/math/uint64_neon_arm64.s
@@ -11,6 +11,10 @@ TEXT ·_sum_uint64_neon(SB), $0-24
MOVD len+8(FP), R1
MOVD res+16(FP), R2
+ // The Go ABI saves the frame pointer register one word below the
+ // caller's frame. Make room so we don't overwrite it. Needs to stay
+ // 16-byte aligned
+ SUB $16, RSP
WORD $0xa9bf7bfd // stp x29, x30, [sp, #-16]!
WORD $0x910003fd // mov x29, sp
CBZ R1, LBB0_3
@@ -23,6 +27,8 @@ LBB0_3:
WORD $0xaa1f03e9 // mov x9, xzr
WORD $0xf9000049 // str x9, [x2]
WORD $0xa8c17bfd // ldp x29, x30, [sp], #16
+ // Put the stack pointer back where it was
+ ADD $16, RSP
RET
LBB0_4:
WORD $0x927ef428 // and x8, x1, #0xfffffffffffffffc
@@ -54,5 +60,7 @@ LBB0_8:
LBB0_9:
WORD $0xf9000049 // str x9, [x2]
WORD $0xa8c17bfd // ldp x29, x30, [sp], #16
+ // Put the stack pointer back where it was
+ ADD $16, RSP
RET
diff --git a/arrow/memory/memory_neon_arm64.s b/arrow/memory/memory_neon_arm64.s
index 18655cc..18b0af5 100755
--- a/arrow/memory/memory_neon_arm64.s
+++ b/arrow/memory/memory_neon_arm64.s
@@ -11,6 +11,10 @@ TEXT ·_memset_neon(SB), $0-24
MOVD len+8(FP), R1
MOVD c+16(FP), R2
+ // The Go ABI saves the frame pointer register one word below the
+ // caller's frame. Make room so we don't overwrite it. Needs to stay
+ // 16-byte aligned
+ SUB $16, RSP
WORD $0xa9bf7bfd // stp x29, x30, [sp, #-16]!
WORD $0x8b010008 // add x8, x0, x1
WORD $0xeb00011f // cmp x8, x0
@@ -40,4 +44,6 @@ LBB0_6:
BNE LBB0_6
LBB0_7:
WORD $0xa8c17bfd // ldp x29, x30, [sp], #16
+ // Put the stack pointer back where it was
+ ADD $16, RSP
RET
diff --git a/arrow/memory/memory_test.go b/arrow/memory/memory_test.go
index 4a97531..ca1ad53 100644
--- a/arrow/memory/memory_test.go
+++ b/arrow/memory/memory_test.go
@@ -19,6 +19,7 @@ package memory_test
import (
"testing"
+ "github.com/apache/arrow-go/v18/arrow/internal/testing/tools"
"github.com/apache/arrow-go/v18/arrow/memory"
"github.com/stretchr/testify/assert"
)
@@ -123,3 +124,12 @@ func BenchmarkSet_8000(b *testing.B) {
func BenchmarkSet_8192(b *testing.B) {
benchmarkSet(b, 8192)
}
+
+func TestSetNoClobberFramePointer(t *testing.T) {
+ var b [8]byte
+ memory.Set(b[:], 0xff)
+ // We should be able to safely frame pointer unwind after calling this
+ // function. The arm64 assembly implementation used to clobber the
+ // frame pointer. See GH-150
+ tools.FPUnwind()
+}
diff --git a/internal/utils/min_max_neon_arm64.s
b/internal/utils/min_max_neon_arm64.s
index b679bb6..a31c5d2 100755
--- a/internal/utils/min_max_neon_arm64.s
+++ b/internal/utils/min_max_neon_arm64.s
@@ -13,6 +13,10 @@ TEXT ·_int32_max_min_neon(SB), $0-32
MOVD minout+16(FP), R2
MOVD maxout+24(FP), R3
+ // The Go ABI saves the frame pointer register one word below the
+ // caller's frame. Make room so we don't overwrite it. Needs to stay
+ // 16-byte aligned
+ SUB $16, RSP
WORD $0xa9bf7bfd // stp x29, x30, [sp, #-16]!
WORD $0x7100043f // cmp w1, #1
WORD $0x910003fd // mov x29, sp
@@ -32,6 +36,8 @@ LBB0_3:
WORD $0xb900006b // str w11, [x3]
WORD $0xb900004a // str w10, [x2]
WORD $0xa8c17bfd // ldp x29, x30, [sp], #16
+ // Put the stack pointer back where it was
+ ADD $16, RSP
RET
LBB0_4:
WORD $0x927e7509 // and x9, x8, #0xfffffffc
@@ -76,6 +82,8 @@ LBB0_9:
WORD $0xb900006b // str w11, [x3]
WORD $0xb900004a // str w10, [x2]
WORD $0xa8c17bfd // ldp x29, x30, [sp], #16
+ // Put the stack pointer back where it was
+ ADD $16, RSP
RET
// func _uint32_max_min_neon(values unsafe.Pointer, length int, minout, maxout
unsafe.Pointer)
@@ -86,6 +94,10 @@ TEXT ·_uint32_max_min_neon(SB), $0-32
MOVD minout+16(FP), R2
MOVD maxout+24(FP), R3
+ // The Go ABI saves the frame pointer register one word below the
+ // caller's frame. Make room so we don't overwrite it. Needs to stay
+ // 16-byte aligned
+ SUB $16, RSP
WORD $0xa9bf7bfd // stp x29, x30, [sp, #-16]!
WORD $0x7100043f // cmp w1, #1
WORD $0x910003fd // mov x29, sp
@@ -105,6 +117,8 @@ LBB1_3:
WORD $0xb900006a // str w10, [x3]
WORD $0xb900004b // str w11, [x2]
WORD $0xa8c17bfd // ldp x29, x30, [sp], #16
+ // Put the stack pointer back where it was
+ ADD $16, RSP
RET
LBB1_4:
WORD $0x927e7509 // and x9, x8, #0xfffffffc
@@ -149,6 +163,8 @@ LBB1_9:
WORD $0xb900006a // str w10, [x3]
WORD $0xb900004b // str w11, [x2]
WORD $0xa8c17bfd // ldp x29, x30, [sp], #16
+ // Put the stack pointer back where it was
+ ADD $16, RSP
RET
// func _int64_max_min_neon(values unsafe.Pointer, length int, minout, maxout
unsafe.Pointer)
@@ -159,6 +175,10 @@ TEXT ·_int64_max_min_neon(SB), $0-32
MOVD minout+16(FP), R2
MOVD maxout+24(FP), R3
+ // The Go ABI saves the frame pointer register one word below the
+ // caller's frame. Make room so we don't overwrite it. Needs to stay
+ // 16-byte aligned
+ SUB $16, RSP
WORD $0xa9bf7bfd // stp x29, x30, [sp, #-16]!
WORD $0x7100043f // cmp w1, #1
WORD $0x910003fd // mov x29, sp
@@ -178,6 +198,8 @@ LBB2_3:
WORD $0xf900006b // str x11, [x3]
WORD $0xf900004a // str x10, [x2]
WORD $0xa8c17bfd // ldp x29, x30, [sp], #16
+ // Put the stack pointer back where it was
+ ADD $16, RSP
RET
LBB2_4:
WORD $0x927e7509 // and x9, x8, #0xfffffffc
@@ -234,6 +256,8 @@ LBB2_9:
WORD $0xf900006b // str x11, [x3]
WORD $0xf900004a // str x10, [x2]
WORD $0xa8c17bfd // ldp x29, x30, [sp], #16
+ // Put the stack pointer back where it was
+ ADD $16, RSP
RET
@@ -245,6 +269,10 @@ TEXT ·_uint64_max_min_neon(SB), $0-32
MOVD minout+16(FP), R2
MOVD maxout+24(FP), R3
+ // The Go ABI saves the frame pointer register one word below the
+ // caller's frame. Make room so we don't overwrite it. Needs to stay
+ // 16-byte aligned
+ SUB $16, RSP
WORD $0xa9bf7bfd // stp x29, x30, [sp, #-16]!
WORD $0x7100043f // cmp w1, #1
WORD $0x910003fd // mov x29, sp
@@ -264,6 +292,8 @@ LBB3_3:
WORD $0xf900006a // str x10, [x3]
WORD $0xf900004b // str x11, [x2]
WORD $0xa8c17bfd // ldp x29, x30, [sp], #16
+ // Put the stack pointer back where it was
+ ADD $16, RSP
RET
LBB3_4:
WORD $0x927e7509 // and x9, x8, #0xfffffffc
@@ -320,5 +350,7 @@ LBB3_9:
WORD $0xf900006a // str x10, [x3]
WORD $0xf900004b // str x11, [x2]
WORD $0xa8c17bfd // ldp x29, x30, [sp], #16
+ // Put the stack pointer back where it was
+ ADD $16, RSP
RET
diff --git a/parquet/internal/bmi/bitmap_neon_arm64.s
b/parquet/internal/bmi/bitmap_neon_arm64.s
index abde584..f711e49 100755
--- a/parquet/internal/bmi/bitmap_neon_arm64.s
+++ b/parquet/internal/bmi/bitmap_neon_arm64.s
@@ -8,6 +8,10 @@ TEXT ·_levels_to_bitmap_neon(SB), $0-32
MOVD numLevels+8(FP), R1
MOVD rhs+16(FP), R2
+ // The Go ABI saves the frame pointer register one word below the
+ // caller's frame. Make room so we don't overwrite it. Needs to stay
+ // 16-byte aligned
+ SUB $16, RSP
WORD $0xa9bf7bfd // stp x29, x30, [sp, #-16]!
WORD $0x7100043f // cmp w1, #1
WORD $0x910003fd // mov x29, sp
@@ -79,6 +83,8 @@ LBB1_7:
BNE LBB1_7
LBB1_8:
WORD $0xa8c17bfd // ldp x29, x30, [sp], #16
+ // Put the stack pointer back where it was
+ ADD $16, RSP
MOVD R8, res+24(FP)
RET
diff --git a/parquet/internal/utils/bit_packing_neon_arm64.s
b/parquet/internal/utils/bit_packing_neon_arm64.s
index 2d18dcc..b3a02f4 100644
--- a/parquet/internal/utils/bit_packing_neon_arm64.s
+++ b/parquet/internal/utils/bit_packing_neon_arm64.s
@@ -281,6 +281,10 @@ TEXT ·_unpack32_neon(SB), $0-40
// LEAQ LCDATA1<>(SB), BP
// %bb.0:
+ // The Go ABI saves the frame pointer register one word below the
+ // caller's frame. Make room so we don't overwrite it. Needs to stay
+ // 16-byte aligned
+ SUB $16, RSP
WORD $0xa9ba7bfd // stp x29, x30, [sp, #-96]!
WORD $0xd10643e9 // sub x9, sp, #400
WORD $0xa9016ffc // stp x28, x27, [sp, #16]
@@ -6922,5 +6926,7 @@ LBB0_156:
WORD $0xa94267fa // ldp x26, x25, [sp, #32]
WORD $0xa9416ffc // ldp x28, x27, [sp, #16]
WORD $0xa8c67bfd // ldp x29, x30, [sp], #96
+ // Put the stack pointer back where it was
+ ADD $16, RSP
MOVD R0, num+32(FP)
RET
diff --git a/parquet/internal/utils/unpack_bool_neon_arm64.s
b/parquet/internal/utils/unpack_bool_neon_arm64.s
index 2427895..3d1edac 100755
--- a/parquet/internal/utils/unpack_bool_neon_arm64.s
+++ b/parquet/internal/utils/unpack_bool_neon_arm64.s
@@ -12,6 +12,10 @@ TEXT ·_bytes_to_bools_neon(SB), $0-32
MOVD out+16(FP), R2
MOVD outlen+24(FP), R3
+ // The Go ABI saves the frame pointer register one word below the
+ // caller's frame. Make room so we don't overwrite it. Needs to stay
+ // 16-byte aligned
+ SUB $16, RSP
WORD $0xa9bf7bfd // stp x29, x30, [sp, #-16]!
WORD $0x7100043f // cmp w1, #1
WORD $0x910003fd // mov x29, sp
@@ -78,4 +82,6 @@ LBB0_3:
JMP LBB0_2
LBB0_12:
WORD $0xa8c17bfd // ldp x29, x30, [sp], #16
+ // Put the stack pointer back where it was
+ ADD $16, RSP
RET