================
@@ -0,0 +1,525 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
UTC_ARGS: --version 6
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=infer-address-spaces %s |
FileCheck %s
+
+; Test that address space inference works correctly for inttoptr/ptrtoint
+; patterns when the pointer manipulation is within the preserved mask.
+; For local memory, AMDGPU uses 2^32-aligned apertures, so only the lower
+; 32 bits are safe to modify.
+; For global memory, flat and global addresses are identical, so all 64
+; bits are preserved.
+
+; Local (shared) memory tests - addrspace(3)
+
+define void @test_xor_local(ptr addrspace(3) %sp) {
+; CHECK-LABEL: define void @test_xor_local(
+; CHECK-SAME: ptr addrspace(3) [[SP:%.*]]) {
+; CHECK-NEXT: [[GP:%.*]] = addrspacecast ptr addrspace(3) [[SP]] to ptr
+; CHECK-NEXT: [[A:%.*]] = ptrtoint ptr [[GP]] to i64
+; CHECK-NEXT: [[B:%.*]] = xor i64 [[A]], 4095
+; CHECK-NEXT: [[GP2:%.*]] = inttoptr i64 [[B]] to ptr
+; CHECK-NEXT: store i16 0, ptr [[GP2]], align 2
+; CHECK-NEXT: ret void
+;
+ %gp = addrspacecast ptr addrspace(3) %sp to ptr
+ %a = ptrtoint ptr %gp to i64
+ %b = xor i64 %a, 4095
+ %gp2 = inttoptr i64 %b to ptr
+ store i16 0, ptr %gp2, align 2
+ ret void
+}
+
+define void @test_xor_local_max32bit(ptr addrspace(3) %sp) {
+; CHECK-LABEL: define void @test_xor_local_max32bit(
+; CHECK-SAME: ptr addrspace(3) [[SP:%.*]]) {
+; CHECK-NEXT: [[GP:%.*]] = addrspacecast ptr addrspace(3) [[SP]] to ptr
+; CHECK-NEXT: [[A:%.*]] = ptrtoint ptr [[GP]] to i64
+; CHECK-NEXT: [[B:%.*]] = xor i64 [[A]], 4294967295
+; CHECK-NEXT: [[GP2:%.*]] = inttoptr i64 [[B]] to ptr
+; CHECK-NEXT: store i16 0, ptr [[GP2]], align 2
+; CHECK-NEXT: ret void
+;
+ %gp = addrspacecast ptr addrspace(3) %sp to ptr
+ %a = ptrtoint ptr %gp to i64
+ ; 0xFFFFFFFF - maximum 32-bit value, should still be optimized
+ %b = xor i64 %a, 4294967295
+ %gp2 = inttoptr i64 %b to ptr
+ store i16 0, ptr %gp2, align 2
+ ret void
+}
+
+; 0x100000000 tests bit 32 (0-indexed), not bit 33
+define void @test_xor_local_fail_bit32(ptr addrspace(3) %sp) {
+; CHECK-LABEL: define void @test_xor_local_fail_bit32(
+; CHECK-SAME: ptr addrspace(3) [[SP:%.*]]) {
+; CHECK-NEXT: [[GP:%.*]] = addrspacecast ptr addrspace(3) [[SP]] to ptr
+; CHECK-NEXT: [[A:%.*]] = ptrtoint ptr [[GP]] to i64
+; CHECK-NEXT: [[B:%.*]] = xor i64 [[A]], 4294967296
+; CHECK-NEXT: [[GP2:%.*]] = inttoptr i64 [[B]] to ptr
+; CHECK-NEXT: store i16 0, ptr [[GP2]], align 2
+; CHECK-NEXT: ret void
+;
+ %gp = addrspacecast ptr addrspace(3) %sp to ptr
+ %a = ptrtoint ptr %gp to i64
+ ; 0x100000000 - bit 32 set, should NOT be optimized
+ %b = xor i64 %a, 4294967296
+ %gp2 = inttoptr i64 %b to ptr
+ store i16 0, ptr %gp2, align 2
+ ret void
+}
+
+define void @test_or_local(ptr addrspace(3) %sp) {
+; CHECK-LABEL: define void @test_or_local(
+; CHECK-SAME: ptr addrspace(3) [[SP:%.*]]) {
+; CHECK-NEXT: [[GP:%.*]] = addrspacecast ptr addrspace(3) [[SP]] to ptr
+; CHECK-NEXT: [[A:%.*]] = ptrtoint ptr [[GP]] to i64
+; CHECK-NEXT: [[B:%.*]] = or i64 [[A]], 255
+; CHECK-NEXT: [[GP2:%.*]] = inttoptr i64 [[B]] to ptr
+; CHECK-NEXT: store i16 0, ptr [[GP2]], align 2
+; CHECK-NEXT: ret void
+;
+ %gp = addrspacecast ptr addrspace(3) %sp to ptr
+ %a = ptrtoint ptr %gp to i64
+ %b = or i64 %a, 255
+ %gp2 = inttoptr i64 %b to ptr
+ store i16 0, ptr %gp2, align 2
+ ret void
+}
+
+define void @test_and_local(ptr addrspace(3) %sp) {
+; CHECK-LABEL: define void @test_and_local(
+; CHECK-SAME: ptr addrspace(3) [[SP:%.*]]) {
+; CHECK-NEXT: [[GP:%.*]] = addrspacecast ptr addrspace(3) [[SP]] to ptr
+; CHECK-NEXT: [[A:%.*]] = ptrtoint ptr [[GP]] to i64
+; CHECK-NEXT: [[B:%.*]] = and i64 [[A]], -4096
+; CHECK-NEXT: [[GP2:%.*]] = inttoptr i64 [[B]] to ptr
+; CHECK-NEXT: store i16 0, ptr [[GP2]], align 2
+; CHECK-NEXT: ret void
+;
+ %gp = addrspacecast ptr addrspace(3) %sp to ptr
+ %a = ptrtoint ptr %gp to i64
+ ; -4096 = 0xFFFFFFFFFFFFF000 - clears low 12 bits, should be optimized
+ %b = and i64 %a, -4096
+ %gp2 = inttoptr i64 %b to ptr
+ store i16 0, ptr %gp2, align 2
+ ret void
+}
+
+define void @test_and_local_fail(ptr addrspace(3) %sp) {
+; CHECK-LABEL: define void @test_and_local_fail(
+; CHECK-SAME: ptr addrspace(3) [[SP:%.*]]) {
+; CHECK-NEXT: [[GP:%.*]] = addrspacecast ptr addrspace(3) [[SP]] to ptr
+; CHECK-NEXT: [[A:%.*]] = ptrtoint ptr [[GP]] to i64
+; CHECK-NEXT: [[B:%.*]] = and i64 [[A]], -4294967297
+; CHECK-NEXT: [[GP2:%.*]] = inttoptr i64 [[B]] to ptr
+; CHECK-NEXT: store i16 0, ptr [[GP2]], align 2
+; CHECK-NEXT: ret void
+;
+ %gp = addrspacecast ptr addrspace(3) %sp to ptr
+ %a = ptrtoint ptr %gp to i64
+ ; -4294967297 = 0xFFFFFFFEFFFFFFFF - clears bit 32, should NOT be optimized
+ %b = and i64 %a, -4294967297
+ %gp2 = inttoptr i64 %b to ptr
+ store i16 0, ptr %gp2, align 2
+ ret void
+}
+
+; Global memory tests - addrspace(1)
+
+define void @test_xor_global(ptr addrspace(1) %sp) {
+; CHECK-LABEL: define void @test_xor_global(
+; CHECK-SAME: ptr addrspace(1) [[SP:%.*]]) {
+; CHECK-NEXT: [[GP:%.*]] = addrspacecast ptr addrspace(1) [[SP]] to ptr
+; CHECK-NEXT: [[A:%.*]] = ptrtoint ptr [[GP]] to i64
+; CHECK-NEXT: [[B:%.*]] = xor i64 [[A]], 7
+; CHECK-NEXT: [[GP2:%.*]] = inttoptr i64 [[B]] to ptr
+; CHECK-NEXT: store i16 0, ptr [[GP2]], align 2
+; CHECK-NEXT: ret void
+;
+ %gp = addrspacecast ptr addrspace(1) %sp to ptr
+ %a = ptrtoint ptr %gp to i64
+ %b = xor i64 %a, 7
+ %gp2 = inttoptr i64 %b to ptr
+ store i16 0, ptr %gp2, align 2
+ ret void
+}
+
+define void @test_xor_global_max32bit(ptr addrspace(1) %sp) {
+; CHECK-LABEL: define void @test_xor_global_max32bit(
+; CHECK-SAME: ptr addrspace(1) [[SP:%.*]]) {
+; CHECK-NEXT: [[GP:%.*]] = addrspacecast ptr addrspace(1) [[SP]] to ptr
+; CHECK-NEXT: [[A:%.*]] = ptrtoint ptr [[GP]] to i64
+; CHECK-NEXT: [[B:%.*]] = xor i64 [[A]], 4294967295
+; CHECK-NEXT: [[GP2:%.*]] = inttoptr i64 [[B]] to ptr
+; CHECK-NEXT: store i16 0, ptr [[GP2]], align 2
+; CHECK-NEXT: ret void
+;
+ %gp = addrspacecast ptr addrspace(1) %sp to ptr
+ %a = ptrtoint ptr %gp to i64
+ ; 0xFFFFFFFF - maximum 32-bit value, should still be optimized
+ %b = xor i64 %a, 4294967295
+ %gp2 = inttoptr i64 %b to ptr
+ store i16 0, ptr %gp2, align 2
+ ret void
+}
+
+; For global address space, all 64 bits are preserved because global addresses
+; are bit-identical in both global and flat address spaces. Global memory
regions
+; are architecturally separate from the private/local aperture regions, which
use
+; specific high-address ranges that don't overlap with valid global addresses.
+; 0x100000000 tests bit 32 (0-indexed), not bit 33
----------------
addmisol wrote:
apologies for that if they shouldn't be there,
i have made changes here
https://github.com/llvm/llvm-project/pull/195875/changes
https://github.com/llvm/llvm-project/pull/195277
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits