rmaprath created this revision.
rmaprath added reviewers: rjmccall, jmolloy, rengolin, olista01.
rmaprath added a subscriber: cfe-commits.
Herald added a subscriber: aemerson.

Lets consider the following plain struct:
```
  struct S1 {
    char a;
    short b : 8;
  };
```
According to the AAPCS, the bitfield 'b' _can_ be accessed by loading its
"container" (a short in this case) and then using bit-shifting operations to
extract the actual bitfield value. However, for plain (non-volatile) bitfields,
the AAPCS does not mandate this, so compilers are free to use whatever type of
access they think is best. armcc tends to use wider accesses even when narrower
accesses are possible (like in the above example, b can be byte-loaded), whereas
clang and gcc tend to issue narrow loads/stores where available.

But things get tricky when volatile bitfields are involved:
```
  struct S2 {
    char a;
    volatile short b : 8;
  };
```
Now the AAPCS mandates that 'b' must be accessed through loads/stores of widths
as appropriate to the container type (Section 7.1.7.5). To complicate matters
further, AAPCS allows overlapping of bitfields with regular fields
(Section 7.1.7.4). What this means is that the storage units of 'a' and 'b'
are overlapping, where 'a' occupies the first byte of the struct and 'b' lies
on the first half-word of the struct. Loading 'b' will load 'a' as well since
'b' is loaded as a short.

Currently, clang will byte-load 'b' and is therefore not AAPCS compliant. The
purpose of this patch is to make clang respect these access widths of volatile
bitfields.

One way to fix this problem is to teach clang the concept of overlapping storage
units, this would require an overhaul of clang's structure layout code, as the
idea of distinct storage units is deeply embedded in clang's structure layout
routines. In this patch, I take the view that this confusion is only a matter of
abstraction; whether the above struct's storage is viewed as two consecutive
bytes (this is how clang lays out the struct) or as a single half-word (this is
how armcc lays out the struct) is irrelevant, the actual data of the fields will
always be at the same offset from the base of the struct. This difference of
abstraction only matters when we generate code to access the relevant fields, we
can therefore intercept those loads/stores in clang and adjust the accesses so
that they are AAPCS compliant.

http://reviews.llvm.org/D16586

Files:
  lib/CodeGen/CGExpr.cpp
  lib/CodeGen/CGRecordLayoutBuilder.cpp
  lib/CodeGen/CodeGenFunction.h
  test/CodeGen/aapcs-bitfield.c
  test/CodeGen/arm-bitfield-alignment.c

Index: test/CodeGen/arm-bitfield-alignment.c
===================================================================
--- test/CodeGen/arm-bitfield-alignment.c
+++ test/CodeGen/arm-bitfield-alignment.c
@@ -12,4 +12,4 @@
 }
 
 // CHECK: @g = external global %struct.T, align 4
-// CHECK: %{{.*}} = load i64, i64* bitcast (%struct.T* @g to i64*), align 4
+// CHECK: %{{.*}} = load i32, i32* bitcast (%struct.T* @g to i32*), align 4
Index: test/CodeGen/aapcs-bitfield.c
===================================================================
--- /dev/null
+++ test/CodeGen/aapcs-bitfield.c
@@ -0,0 +1,467 @@
+// RUN: %clang_cc1 -triple armv8-none-linux-eabi %s -emit-llvm -o - -O3 \
+// RUN:   | FileCheck %s -check-prefix=LE -check-prefix=CHECK
+// RUN: %clang_cc1 -triple armebv8-none-linux-eabi %s -emit-llvm -o - -O3 \
+// RUN:   | FileCheck %s -check-prefix=BE -check-prefix=CHECK
+
+// CHECK: %struct.st0 = type { i8, i8 }
+// CHECK: %struct.st1 = type { i16, [2 x i8] }
+// CHECK: %struct.st2 = type { i16, i8 }
+// CHECK: %struct.st3 = type { i8, i8 }
+// CHECK: %struct.st4 = type { i16, [2 x i8] }
+// CHECK: %struct.st5 = type { i16, i8 }
+// CHECK: %struct.st6 = type { i16, i8, i8 }
+// CHECK: %struct.st7b = type { i8, [3 x i8], %struct.st7a }
+// CHECK: %struct.st7a = type { i8, i8, [2 x i8] }
+// CHECK: %struct.st8 = type { i16, [2 x i8] }
+
+// A simple plain bit-field
+struct st0 {
+  // Expected masks (0s mark the bit-field):
+  // le: 0xff80 (-128)
+  // be: 0x01ff (511)
+  short c : 7;
+};
+
+// CHECK-LABEL: st0_check_load
+int st0_check_load(struct st0 *m) {
+  // LE: %[[PTR:.*]] = bitcast %struct.st0* %m to i16*
+  // LE-NEXT: %[[LD:.*]] = load i16, i16* %[[PTR]], align 2
+  // LE-NEXT: %[[CLR1:.*]] = shl i16 %[[LD]], 9
+  // LE-NEXT: %[[CLR2:.*]] = ashr exact i16 %[[CLR1]], 9
+  // LE-NEXT: %[[SEXT:.*]] = sext i16 %[[CLR2]] to i32
+  // LE-NEXT: ret i32 %[[SEXT]]
+
+  // BE: %[[PTR:.*]] = bitcast %struct.st0* %m to i16*
+  // BE-NEXT: %[[LD:.*]] = load i16, i16* %[[PTR]], align 2
+  // BE-NEXT: %[[CLR:.*]] = ashr i16 %[[LD]], 9
+  // BE-NEXT: %[[SEXT:.*]] = sext i16 %[[CLR]] to i32
+  // BE-NEXT: ret i32 %[[SEXT]]
+  return m->c;
+}
+
+// CHECK-LABEL: st0_check_store
+void st0_check_store(struct st0 *m) {
+  // LE: %[[PTR:.*]] = bitcast %struct.st0* %m to i16*
+  // LE-NEXT: %[[LD:.*]] = load i16, i16* %[[PTR]], align 2
+  // LE-NEXT: %[[CLR:.*]] = and i16 %[[LD]], -128
+  // LE-NEXT: %[[SET:.*]] = or i16 %[[CLR]], 1
+  // LE-NEXT: store i16 %[[SET]], i16* %[[PTR]], align 2
+
+  // BE: %[[PTR:.*]] = bitcast %struct.st0* %m to i16*
+  // BE-NEXT: %[[LD:.*]] = load i16, i16* %[[PTR]], align 2
+  // BE-NEXT: %[[CLR:.*]] = and i16 %[[LD]], 511
+  // BE-NEXT: %[[SET:.*]] = or i16 %[[CLR]], 512
+  // BE-NEXT: store i16 %[[SET]], i16* %[[PTR]], align 2
+  m->c = 1;
+}
+
+// The bitfield 'c' should come straight after 'a'.
+struct st1 {
+  int a : 10;
+  // Expected masks (0s mark the bit-field 'c'):
+  // le: 0x03ff (1023)
+  // be: 0xffc0 (-64)
+  short c : 6;
+};
+
+// CHECK-LABEL: st1_check_load
+int st1_check_load(struct st1 *m) {
+  // LE: %[[PTR:.*]] = getelementptr inbounds %struct.st1, %struct.st1* %m, i32 0, i32 0
+  // LE-NEXT: %[[LD:.*]] = load i16, i16* %[[PTR]], align 2
+  // LE-NEXT: %[[CLR:.*]] = ashr i16 %[[LD]], 10
+  // LE-NEXT: %[[SEXT:.*]] = sext i16 %[[CLR]] to i32
+  // LE-NEXT: ret i32 %[[SEXT]]
+
+  // BE: %[[PTR:.*]] = getelementptr inbounds %struct.st1, %struct.st1* %m, i32 0, i32 0
+  // BE-NEXT: %[[LD:.*]] = load i16, i16* %[[PTR]], align 2
+  // BE-NEXT: %[[CLR1:.*]] = shl i16 %[[LD]], 10
+  // BE-NEXT: %[[CLR2:.*]] = ashr exact i16 %[[CLR1]], 10
+  // BE-NEXT: %[[SEXT:.*]] = sext i16 %[[CLR2]] to i32
+  // BE-NEXT: ret i32 %[[SEXT]]
+  return m->c;
+}
+
+// CHECK-LABEL: st1_check_store
+void st1_check_store(struct st1 *m) {
+  // LE: %[[PTR:.*]] = getelementptr inbounds %struct.st1, %struct.st1* %m, i32 0, i32 0
+  // LE-NEXT: %[[LD:.*]] = load i16, i16* %[[PTR]], align 2
+  // LE-NEXT: %[[CLR:.*]] = and i16 %[[LD]], 1023
+  // LE-NEXT: %[[SET:.*]] = or i16 %[[CLR]], 1024
+  // LE-NEXT: store i16 %[[SET]], i16* %[[PTR]], align 2
+
+  // BE: %[[PTR:.*]] = getelementptr inbounds %struct.st1, %struct.st1* %m, i32 0, i32 0
+  // BE-NEXT: %[[LD:.*]] = load i16, i16* %[[PTR]], align 2
+  // BE-NEXT: %[[CLR:.*]] = and i16 %[[LD]], -64
+  // BE-NEXT: %[[SET:.*]] = or i16 %[[CLR]], 1
+  // BE-NEXT: store i16 %[[SET]], i16* %[[PTR]], align 2
+  m->c = 1;
+}
+
+// The bit-field 'c' should begin from the third byte (alignment of the
+// container 'short' forces this).
+struct st2 {
+  int a : 10;
+  // Expected masks (0s mark the bit-field):
+  // le: 0xff80 (-128)
+  // be: 0x01ff (511)
+  short c : 7;
+};
+
+// CHECK-LABEL: st2_check_load
+int st2_check_load(struct st2 *m) {
+  // LE: %[[PTR1:.*]] = getelementptr inbounds %struct.st2, %struct.st2* %m, i32 0, i32 0
+  // LE-NEXT: %[[PTR2:.*]] = getelementptr inbounds i16, i16* %[[PTR1]], i32 1
+  // LE-NEXT: %[[LD:.*]] = load i16, i16* %[[PTR2]], align 2
+  // LE-NEXT: %[[CLR1:.*]] = shl i16 %[[LD]], 9
+  // LE-NEXT: %[[CLR2:.*]] = ashr exact i16 %[[CLR1]], 9
+  // LE-nEXT: %[[SEXT:.*]] = sext i16 %[[CLR2]] to i32
+  // ret i32 %[[SEXT]]
+
+  // BE: %[[PTR1:.*]] = getelementptr inbounds %struct.st2, %struct.st2* %m, i32 0, i32 0
+  // BE-NEXT: %[[PTR2:.*]] = getelementptr inbounds i16, i16* %[[PTR1]], i32 1
+  // BE-NEXT: %[[LD:.*]] = load i16, i16* %[[PTR2]], align 2
+  // BE-NEXT: %[[CLR:.*]] = ashr i16 %[[LD]], 9
+  // BE-NEXT: %[[SEXT:.*]] = sext i16 %[[CLR]] to i32
+  // BE-NEXT: ret i32 %[[SEXT]]
+  return m->c;
+}
+
+// CHECK-LABEL: st2_check_store
+void st2_check_store(struct st2 *m) {
+  // LE: %[[PTR1:.*]] = getelementptr inbounds %struct.st2, %struct.st2* %m, i32 0, i32 0
+  // LE-NEXT: %[[PTR2:.*]] = getelementptr inbounds i16, i16* %[[PTR1]], i32 1
+  // LE-NEXT: %[[LD:.*]] = load i16, i16* %[[PTR2]], align 2
+  // LE-NEXT: %[[CLR:.*]] = and i16 %[[LD]], -128
+  // LE-NEXT: %[[SET:.*]] = or i16 %[[CLR]], 1
+  // LE-NEXT: store i16 %[[SET]], i16* %[[PTR2]], align 2
+
+  // BE: %[[PTR1:.*]] = getelementptr inbounds %struct.st2, %struct.st2* %m, i32 0, i32 0
+  // BE-NEXT: %[[PTR2:.*]] = getelementptr inbounds i16, i16* %[[PTR1]], i32 1
+  // BE-NEXT: %[[LD:.*]] = load i16, i16* %[[PTR2]], align 2
+  // BE-NEXT: %[[CLR:.*]] = and i16 %[[LD]], 511
+  // BE-NEXT: %[[SET:.*]] = or i16 %[[CLR]], 512
+  // BE-NEXT: store i16 %[[SET]], i16* %[[PTR2]], align 2
+  m->c = 1;
+}
+
+// Simple volatile bitfield
+struct st3 {
+  // Expected masks (0s mark the bit-field):
+  // le: 0xff80 (-128)
+  // be: 0x01ff (511)
+  volatile short c : 7;
+};
+
+// CHECK-LABLE: st3_check_load
+int st3_check_load(struct st3 *m) {
+  // LE: %[[PTR1:.*]] = bitcast %struct.st3* %m to i16*
+  // LE-NEXT: %[[LD:.*]] = load volatile i16, i16* %[[PTR1]], align 2
+  // LE-NEXT: %[[CLR1:.*]] = shl i16 %[[LD]], 9
+  // LE-NEXT: %[[CLR2:.*]] = ashr exact i16 %[[CLR1]], 9
+  // LE-NEXT: %[[SEXT:.*]] = sext i16 %[[CLR2]] to i32
+  // LE-NEXT: ret i32 %[[SEXT]]
+
+  // BE: %[[PTR1:.*]] = bitcast %struct.st3* %m to i16*
+  // BE-NEXT: %[[LD:.*]] = load volatile i16, i16* %[[PTR1]], align 2
+  // BE-NEXT: %[[CLR:.*]] = ashr i16 %[[LD]], 9
+  // BE-NEXT: %[[SEXT:.*]] = sext i16 %[[CLR]] to i32
+  // BE-NEXT: ret i32 %[[SEXT]]
+  return m->c;
+}
+
+// CHECK-LABLE: st3_check_store
+void st3_check_store(struct st3 *m) {
+  // LE: %[[PTR1:.*]] = bitcast %struct.st3* %m to i16*
+  // LE-NEXT: %[[LD:.*]] = load volatile i16, i16* %[[PTR1]], align 2
+  // LE-NEXT: %[[CLR:.*]] = and i16 %[[LD]], -128
+  // LE-NEXT: %[[SET:.*]] = or i16 %[[CLR]], 1
+  // LE-NEXT: store volatile i16 %[[SET]], i16* %[[PTR1]], align 2
+
+  // BE: %[[PTR1:.*]] = bitcast %struct.st3* %m to i16*
+  // BE-NEXT: %[[LD:.*]] = load volatile i16, i16* %[[PTR1]], align 2
+  // BE-NEXT: %[[CLR:.*]] = and i16 %[[LD]], 511
+  // BE-NEXT: %[[SET:.*]] = or i16 %[[CLR]], 512
+  // BE-NEXT: store volatile i16 %[[SET]], i16* %[[PTR1]], align 2
+  m->c = 1;
+}
+
+// Similar to st1 but 'c' is volatile, it should come straight after 'b'.
+struct st4 {
+  int b : 9;
+  // Expected masks (0s mark the bit-field):
+  // le: 0xc1 (-63)
+  // be: 0x83 (-125)
+  volatile char c : 5;
+};
+
+// CHECK-LABLE: st4_check_load
+int st4_check_load(struct st4 *m) {
+  // LE: %[[PTR1:.*]] = bitcast %struct.st4* %m to i8*
+  // LE-NEXT: %[[PTR2:.*]] = getelementptr inbounds i8, i8* %[[PTR1]], i32 1
+  // LE-NEXT: %[[LD:.*]] = load volatile i8, i8* %[[PTR2]], align 1
+  // LE-NEXT: %[[CLR1:.*]] = shl i8 %[[LD]], 2
+  // LE-NEXT: %[[CLR2:.*]] = ashr i8 %[[CLR1]], 3
+  // LE-NEXT: %[[SEXT:.*]] = sext i8 %[[CLR2]] to i32
+  // LE-NEXT: ret i32 %[[SEXT]]
+
+  // BE: %[[PTR1:.*]] = bitcast %struct.st4* %m to i8*
+  // BE-NEXT: %[[PTR2:.*]] = getelementptr inbounds i8, i8* %[[PTR1]], i32 1
+  // BE-NEXT: %[[LD:.*]] = load volatile i8, i8* %[[PTR2]], align 1
+  // BE-NEXT: %[[CLR1:.*]] = shl i8 %[[LD]], 1
+  // BE-NEXT: %[[CLR2:.*]] = ashr i8 %[[CLR1]], 3
+  // BE-NEXT: %[[SEXT:.*]] = sext i8 %[[CLR2]] to i32
+  // BE-NEXT: ret i32 %[[SEXT]]
+  return m->c;
+}
+
+// CHECK-LABLE: st4_check_store
+void st4_check_store(struct st4 *m) {
+  // LE: %[[PTR1:.*]] = bitcast %struct.st4* %m to i8*
+  // LE-NEXT: %[[PTR2:.*]] = getelementptr inbounds i8, i8* %[[PTR1]], i32 1
+  // LE-NEXT: %[[LD:.*]] = load volatile i8, i8* %[[PTR2]], align 1
+  // LE-NEXT: %[[CLR:.*]] = and i8 %[[LD]], -63
+  // LE-NEXT: %[[SET:.*]] = or i8 %[[CLR]], 2
+  // LE-NEXT: store volatile i8 %[[SET]], i8* %[[PTR2]], align 1
+
+  // BE: %[[PTR1:.*]] = bitcast %struct.st4* %m to i8*
+  // BE-NEXT: %[[PTR2:.*]] = getelementptr inbounds i8, i8* %[[PTR1]], i32 1
+  // BE-NEXT: %[[LD:.*]] = load volatile i8, i8* %[[PTR2]], align 1
+  // BE-NEXT: %[[CLR:.*]] = and i8 %[[LD]], -125
+  // BE-NEXT: %[[SET:.*]] = or i8 %[[CLR]], 4
+  // BE-NEXT: store volatile i8 %[[SET]], i8* %[[PTR2]], align 1
+  m->c = 1;
+}
+
+// Similar to st2 but 'c' is volatile, it should land into the third byte.
+struct st5 {
+  int a : 12;
+  // Expected masks (0s mark the bit-field):
+  // le: 0xe0 (-32)
+  // be: 0x07 (7)
+  volatile char c : 5;
+};
+
+// CHECK-LABEL: st5_check_load
+int st5_check_load(struct st5 *m) {
+  // LE: %[[PTR1:.*]] = getelementptr inbounds %struct.st5, %struct.st5* %m, i32 0, i32 1
+  // LE-NEXT: %[[LD:.*]] = load volatile i8, i8* %[[PTR1]], align 1
+  // LE-NEXT: %[[CLR1:.*]] = shl i8 %[[LD]], 3
+  // LE-NEXT: %[[CLR2:.*]] = ashr exact i8 %[[CLR1]], 3
+  // LE-NEXT: %[[SEXT:.*]] = sext i8 %[[CLR2]] to i32
+  // LE-NEXT: ret i32 %[[SEXT]]
+
+  // BE: %[[PTR1:.*]] = getelementptr inbounds %struct.st5, %struct.st5* %m, i32 0, i32 1
+  // BE-NEXT: %[[LD:.*]] = load volatile i8, i8* %[[PTR1]], align 1
+  // BE-NEXT: %[[CLR:.*]] = ashr i8 %[[LD]], 3
+  // BE-NEXT: %[[SEXT:.*]] = sext i8 %[[CLR]] to i32
+  // BE-NEXT: ret i32 %[[SEXT]]
+  return m->c;
+}
+
+// CHECK-LABEL: st5_check_store
+void st5_check_store(struct st5 *m) {
+  // LE: %[[PTR1:.*]] = getelementptr inbounds %struct.st5, %struct.st5* %m, i32 0, i32 1
+  // LE-NEXT: %[[LD:.*]] = load volatile i8, i8* %[[PTR1]], align 1
+  // LE-NEXT: %[[CLR:.*]] = and i8 %[[LD]], -32
+  // LE-NEXT: %[[SET:.*]] = or i8 %[[CLR]], 1
+  // LE-NEXT: store volatile i8 %[[SET:.*]], i8* %[[PTR1]], align 1
+
+  // BE: %[[PTR1:.*]] = getelementptr inbounds %struct.st5, %struct.st5* %m, i32 0, i32 1
+  // BE-NEXT: %[[LD:.*]] = load volatile i8, i8* %[[PTR1]], align 1
+  // BE-NEXT: %[[CLR:.*]] = and i8 %[[LD]], 7
+  // BE-NEXT: %[[SET:.*]] = or i8 %[[CLR]], 8
+  // BE-NEXT: store volatile i8 %[[SET]], i8* %[[PTR1]], align 1
+  m->c = 1;
+}
+
+// Overlapping (access) of bitfields and normal fields
+struct st6 {
+  // Occupies the first two bytes
+  // le: 0xfffff000 (-4096)
+  // be: 0x000fffff (1048575)
+  int a : 12;
+  // Occupies the third byte
+  char b;
+  // Occupies the last byte
+  // le: 0xe0ffffff (-520093697)
+  // be: 0xffffff07 (-249)
+  int c : 5;
+};
+
+// CHECK-LABEL: st6_check_load
+int st6_check_load(struct st6 *m) {
+  // LE: %[[PTR1:.*]] = bitcast %struct.st6* %m to i32*
+  // LE-NEXT: %[[LD:.*]] = load i32, i32* %[[PTR]], align 4
+  // LE-NEXT: %[[CLR1:.*]] = shl i32 %[[LD]], 20
+  // LE-NEXT: %[[CLR2]] = ashr exact i32 %[[CLR1]], 20
+
+  // BE: %[[PTR1:.*]] = bitcast %struct.st6* %m to i32*
+  // BE-NEXT: %[[LD:.*]] = load i32, i32* %[[PTR1]], align 4
+  // BE-NEXT: %[[CLR1:.*]] = ashr i32 %[[LD]], 20
+  int x = m->a;
+
+  // LE: %[[PTR2:.*]] = getelementptr inbounds %struct.st6, %struct.st6* %m, i32 0, i32 1
+  // LE-NEXT: %[[LD2:.*]] = load i8, i8* %[[PTR2]], align 2{{.*}}
+  // LE-NEXT: %[[SEXT:.*]] = sext i8 %[[LD2]] to i32
+  // LE-NEXT: %[[RES1:.*]] = add nsw i32 %[[CLR2]], %[[SEXT]]
+
+  // BE: %[[PTR2:.*]] = getelementptr inbounds %struct.st6, %struct.st6* %m, i32 0, i32 1
+  // BE-NEXT: %[[LD2:.*]] = load i8, i8* %[[PTR2]], align 2{{.*}}
+  // BE-NEXT: %[[SEXT:.*]] = sext i8 %[[LD2]] to i32
+  // BE-NEXT: %[[RES1:.*]] = add nsw i32 %[[SEXT]], %[[CLR1]]
+  x += m->b;
+
+  // LE: %[[CLR3:.*]] = shl i32 %[[LD]], 3
+  // LE-NEXT: %[[CLR4:.*]] = ashr i32 %[[CLR3]], 27
+  // LE-NEXT: %[[RES2:.*]] = add nsw i32 %[[RES1]], %[[CLR4]]
+
+  // BE: %[[CLR3:.*]] = shl i32 %[[LD]], 24
+  // BE-NEXT: %[[CLR4:.*]] = ashr i32 %[[CLR3]], 27
+  // BE-NEXT: %[[RES2:.*]] = add nsw i32 %[[RES1]], %[[CLR4]]
+  x += m->c;
+
+  // LE: ret i32 %[[RES2]]
+  // BE: ret i32 %[[RES2]]
+  return x;
+}
+
+// CHECK-LABEL: st6_check_store
+void st6_check_store(struct st6 *m) {
+  // LE: %[[PTR:.*]] = bitcast %struct.st6* %m to i32*
+  // LE-NEXT: %[[LD:.*]] = load i32, i32* %[[PTR]], align 4
+  // LE-NEXT: %[[CLR:.*]] = and i32 %[[LD]], -4096
+  // LE-NEXT: %[[SET:.*]] = or i32 %[[CLR]], 1
+  // LE-NEXT: store i32 %[[SET]], i32* %[[PTR]], align 4
+
+  // BE: %[[PTR1:.*]] = bitcast %struct.st6* %m to i32*
+  // BE-NEXT: %[[LD:.*]] = load i32, i32* %[[PTR]], align 4
+  // BE-NEXT: %[[CLR:.*]] = and i32 %[[LD]], 1048575
+  // BE-NEXT: %[[SET:.*]] = or i32 %[[CLR]], 1048576
+  // BE-NEXT: store i32 %[[SET]], i32* %[[PTR1]], align 4
+  m->a = 1;
+
+  // LE: %[[PTR2:.*]] = getelementptr inbounds %struct.st6, %struct.st6* %m, i32 0, i32 1
+  // LE-NEXT: store i8 2, i8* %[[PTR2]], align 2{{.*}}
+  // BE: %[[PTR2:.*]] = getelementptr inbounds %struct.st6, %struct.st6* %m, i32 0, i32 1
+  // BE-NEXT: store i8 2, i8* %[[PTR2]], align 2{{.*}}
+  m->b = 2;
+
+  // LE: %[[LD2:.*]] = load i32, i32* %[[PTR1]], align 4
+  // LE-NEXT: %[[CLR2:.*]] = and i32 %[[LD2]], -520093697
+  // LE-NEXT: %[[SET2:.*]] = or i32 %[[CLR2]], 50331648
+  // LE-NEXT: store i32 %[[SET2]], i32* %[[PTR1]], align 4
+
+  // BE: %[[LD2:.*]] = load i32, i32* %[[PTR1]], align 4
+  // BE-NEXT: %[[CLR2:.*]] = and i32 %[[LD2]], -249
+  // BE-NEXT: %[[SET2:.*]] = or i32 %[[CLR2]], 24
+  // store i32 %[[SET2]], i32* %[[PTR2]], align 4
+  m->c = 3;
+}
+
+// Nested structs and bitfields.
+struct st7a {
+  // Occupies the first byte.
+  char a;
+  // Occupies the second byte.
+  // le: 0xffffe0ff (-7937)
+  // be: 0xff07ffff (-16252929)
+  int b : 5;
+};
+
+struct st7b {
+  // Occupies the first byte.
+  char x;
+  // Comes after 3 bytes of padding.
+  struct st7a y;
+};
+
+// CHECK-LABEL: st7_check_load
+int st7_check_load(struct st7b *m) {
+  // LE: %[[PTR:.*]] = getelementptr inbounds %struct.st7b, %struct.st7b* %m, i32 0, i32 0
+  // LE-NEXT: %[[LD:.*]] = load i8, i8* %[[PTR]], align 4{{.*}}
+  // LE-NEXT: %[[SEXT:.*]] = sext i8 %[[LD]] to i32
+
+  // BE: %[[PTR:.*]] = getelementptr inbounds %struct.st7b, %struct.st7b* %m, i32 0, i32 0
+  // BE-NEXT: %[[LD:.*]] = load i8, i8* %[[PTR]], align 4{{.*}}
+  // BE-NEXT: %[[SEXT:.*]] = sext i8 %[[LD]] to i32
+  int r = m->x;
+
+  // LE: %[[PTR1:.*]] = getelementptr inbounds %struct.st7b, %struct.st7b* %m, i32 0, i32 2, i32 0
+  // LE-NEXT: %[[LD1:.*]] = load i8, i8* %[[PTR1]], align 4{{.*}}
+  // LE-NEXT: %[[SEXT1:.*]] = sext i8 %[[LD1]] to i32
+  // LE-NEXT: %[[RES:.*]] = add nsw i32 %[[SEXT1]], %[[SEXT]]
+
+  // BE: %[[PTR1:.*]] = getelementptr inbounds %struct.st7b, %struct.st7b* %m, i32 0, i32 2, i32 0
+  // BE-NEXT: %[[LD1:.*]] = load i8, i8* %[[PTR1]], align 4{{.*}}
+  // BE-NEXT: %[[SEXT1:.*]] = sext i8 %[[LD1]] to i32
+  // BE-NEXT: %[[RES:.*]] = add nsw i32 %[[SEXT1]], %[[SEXT]]
+  r += m->y.a;
+
+  // LE: %[[PTR2:.*]] = bitcast i8* %[[PTR1]] to i32*
+  // LE-NEXT: %[[LD2:.*]] = load i32, i32* %[[PTR2]], align 4
+  // LE-NEXT: %[[CLR:.*]] = shl i32 %[[LD2]], 19
+  // LE-NEXT: %[[CLR1:.*]] = ashr i32 %[[CLR]], 27
+  // LE-NEXT: %[[RES1:.*]] = add nsw i32 %[[RES]], %[[CLR1]]
+
+  // BE: %[[PTR2:.*]] = bitcast i8* %[[PTR1]] to i32*
+  // BE-NEXT: %[[LD2:.*]] = load i32, i32* %[[PTR2]], align 4
+  // BE-NEXT: %[[CLR:.*]] = shl i32 %[[LD2]], 8
+  // BE-NEXT: %[[CLR1:.*]] = ashr i32 %[[CLR]], 27
+  // BE-NEXT: %[[RES1:.*]] = add nsw i32 %[[RES]], %[[CLR1]]
+  r += m->y.b;
+
+  // LE: ret i32 %[[RES1]]
+  // BE: ret i32 %[[RES1]]
+  return r;
+}
+
+// CHECK-LABEL: st7_check_store
+void st7_check_store(struct st7b *m) {
+  // LE: %[[PTR1:.*]] = getelementptr inbounds %struct.st7b, %struct.st7b* %m, i32 0, i32 0
+  // LE-NEXT: store i8 1, i8* %[[PTR1]], align 4{{.*}}
+
+  // BE: %[[PTR1:.*]] = getelementptr inbounds %struct.st7b, %struct.st7b* %m, i32 0, i32 0
+  // BE-NEXT: store i8 1, i8* %[[PTR1]], align 4{{.*}}
+  m->x = 1;
+
+  // LE: %[[PTR2:.*]] = getelementptr inbounds %struct.st7b, %struct.st7b* %m, i32 0, i32 2, i32 0
+  // LE-NEXT: store i8 2, i8* %[[PTR2]], align 4{{.*}}
+
+  // BE: %[[PTR2:.*]] = getelementptr inbounds %struct.st7b, %struct.st7b* %m, i32 0, i32 2, i32 0
+  // BE-NEXT: store i8 2, i8* %[[PTR2]], align 4{{.*}}
+  m->y.a = 2;
+
+  // LE: %[[PTR3:.*]] = bitcast i8* %[[PTR2]] to i32*
+  // LE-NEXT: %[[LD:.*]] = load i32, i32* %[[PTR3]], align 4
+  // LE-NEXT: %[[CLR:.*]] = and i32 %[[LD]], -7937
+  // LE-NEXT: %[[SET:.*]] = or i32 %[[CLR]], 768
+  // LE-NEXT: store i32 %[[SET]], i32* %[[PTR3]], align 4
+
+  // BE: %[[PTR3:.*]] = bitcast i8* %[[PTR2]] to i32*
+  // BE-NEXT: %[[LD:.*]] = load i32, i32* %[[PTR3]], align 4
+  // BE-NEXT: %[[CLR:.*]] = and i32 %[[LD]], -16252929
+  // BE-NEXT: %[[SET:.*]] = or i32 %[[CLR]], 1572864
+  // BE-NEXT: store i32 %[[SET]], i32* %[[PTR3]], align 4
+  m->y.b = 3;
+}
+
+// Check overflowing assignments to bitfields.
+struct st8 {
+  unsigned f : 16;
+};
+
+// CHECK-LABEL: st8_check_assignment
+int st8_check_assignment(struct st8 *m) {
+  // LE: %[[PTR:.*]] = bitcast %struct.st8* %m to i32*
+  // LE-NEXT: %[[LD:.*]] = load i32, i32* %[[PTR]], align 4
+  // LE-NEXT: %[[SET:.*]] = or i32 %[[LD]], 65535
+  // LE-NEXT: store i32 %[[SET]], i32* %[[PTR]], align 4
+  // LE-NEXT: ret i32 65535
+
+  // BE: %[[PTR:.*]] = bitcast %struct.st8* %m to i32*
+  // BE-NEXT: %[[LD:.*]] = load i32, i32* %[[PTR]], align 4
+  // BE-NEXT: %[[SET:.*]] = or i32 %[[LD]], -65536
+  // BE-NEXT: store i32 %[[SET]], i32* %[[PTR]], align 4
+  // BE-NEXT: ret i32 65535
+  return m->f = 0xffffffff;
+}
Index: lib/CodeGen/CodeGenFunction.h
===================================================================
--- lib/CodeGen/CodeGenFunction.h
+++ lib/CodeGen/CodeGenFunction.h
@@ -2384,6 +2384,10 @@
   /// \brief Emit code for sections directive.
   OpenMPDirectiveKind EmitSections(const OMPExecutableDirective &S);
 
+  /// \brief Perform AAPCS specific tweaks on bitfield loads / stores.
+  llvm::Value *AdjustAAPCSBitfieldAccess(const LValue LV, CGBitFieldInfo &Info,
+                                         bool IsLoad = true);
+
 public:
 
   //===--------------------------------------------------------------------===//
Index: lib/CodeGen/CGRecordLayoutBuilder.cpp
===================================================================
--- lib/CodeGen/CGRecordLayoutBuilder.cpp
+++ lib/CodeGen/CGRecordLayoutBuilder.cpp
@@ -234,8 +234,11 @@
   // Reverse the bit offsets for big endian machines. Because we represent
   // a bitfield as a single large integer load, we can imagine the bits
   // counting from the most-significant-bit instead of the
-  // least-significant-bit.
-  if (DataLayout.isBigEndian())
+  // least-significant-bit. For AAPCS we delay this offset reversal until
+  // the IR generation phase (CodeGenFunction::AdjustAAPCSBitfieldAccess)
+  // where the access parameters are re-calculated.
+  if (DataLayout.isBigEndian() &&
+      !Context.getTargetInfo().getABI().startswith("aapcs"))
     Info.Offset = Info.StorageSize - (Info.Offset + Info.Size);
 }
 
Index: lib/CodeGen/CGExpr.cpp
===================================================================
--- lib/CodeGen/CGExpr.cpp
+++ lib/CodeGen/CGExpr.cpp
@@ -24,12 +24,14 @@
 #include "clang/AST/Attr.h"
 #include "clang/AST/DeclObjC.h"
 #include "clang/Frontend/CodeGenOptions.h"
+#include "llvm/ADT/APInt.h"
 #include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Operator.h"
 #include "llvm/Support/ConvertUTF.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Transforms/Utils/SanitizerStats.h"
@@ -1411,6 +1413,96 @@
                     lvalue.getTBAAOffset(), lvalue.isNontemporal());
 }
 
+// AAPCS requires bitfield acesses to be performed using the natural alignment /
+// width of the container type. This requirement is at odds with how clang works
+// with record types. For example, consider:
+//   struct s {
+//     char a;
+//     short b : 8;
+//   };
+// Here, clang treats 'a' and 'b' as two separate (consecutive) storage units
+// with each having a size of 8-bits. This means an access to the bitfield 'b'
+// will use an i8* instead of an i16* (type of the container) as required by the
+// AAPCS. In other words, AAPCS allows the storage of 'a' and 'b' to overlap
+// (7.1.7.4); a load of 'b' will load 'a' as well, which is then masked out as
+// necessary. Modifying clang's abstraction of structures to allow overlapping
+// fields is quite obtrusive, we have therefore resolved to the following tweak
+// where we intercept bitfied loads / stores and adjust them so that they are
+// AAPCS compliant. Note that both clang and AAPCS already agree on the layout
+// of structs, it's only the access width / alignment that needs fixing.
+llvm::Value *CodeGenFunction::AdjustAAPCSBitfieldAccess(const LValue LV,
+                                                        CGBitFieldInfo &Info,
+                                                        bool IsLoad) {
+  llvm::Type *ResLTy = IsLoad ? ConvertType(LV.getType()) :
+                                ConvertTypeForMem(LV.getType());
+  llvm::Value *Ptr = LV.getBitFieldAddress().getPointer();
+
+  // Offset to the bitfield from the beginning of the struct
+  uint32_t AbsoluteOffset = getContext().toBits(Info.StorageOffset) +
+    Info.Offset;
+
+  // Container size is the width of the bitfield type
+  uint32_t ContainerSize = ResLTy->getPrimitiveSizeInBits();
+
+  // Offset within the container
+  uint32_t MemberOffset = AbsoluteOffset & (ContainerSize - 1);
+
+  // Bail out if an aligned load of the container cannot cover the entire
+  // bitfield. This can happen for example, if the bitfield is part of a packed
+  // struct. AAPCS does not define access rules for such cases, we let clang to
+  // follow its own rules.
+  if (MemberOffset + Info.Size > ContainerSize)
+    return Ptr;
+
+  // Adjust offsets for big-endian targets
+  if (CGM.getTypes().getDataLayout().isBigEndian())
+    MemberOffset = ContainerSize - (MemberOffset + Info.Size);
+
+  // Get rid of any bitcasts (we are going to change the cast anyways)
+  if (dyn_cast<llvm::BitCastOperator>(Ptr))
+    Ptr = Ptr->stripPointerCasts();
+
+  // If the result is a GEP, we need to reset it so that it points to the
+  // beginning of the struct.
+  if (isa<llvm::GEPOperator>(Ptr)) {
+    llvm::User *GEPUser = dyn_cast<llvm::User>(Ptr);
+    if (GEPUser->getNumOperands() > 1 &&
+        (AbsoluteOffset > Info.Offset)) {
+      // This means we are indexing into an element in the middle of a record.
+      // So we dissect the original GEP and assemble a new one that indexes into
+      // the beginning of that record.
+      llvm::User::op_iterator it = GEPUser->op_begin();
+      // All the indices are given relative to the first operand. We use this
+      // operand but overwrite the final index to zero, which gives us a pointer
+      // to the inner-most struct (in the case of nested structs) holding the
+      // bitfield.
+      Ptr = *it++;
+      std::vector<llvm::Value*> IdxVec;
+      while (it != GEPUser->op_end()) {
+        IdxVec.push_back(*it++);
+      }
+      IdxVec.pop_back();
+      IdxVec.push_back(llvm::ConstantInt::get(CGM.IntTy, 0));
+      Ptr = Builder.CreateInBoundsGEP(Ptr,
+                                      llvm::ArrayRef<llvm::Value*>(IdxVec));
+    }
+  }
+
+  // Here we essentially treat the resulting record as an array of containers
+  // and index into it appropriately.
+  Ptr = Builder.CreateBitCast(Ptr, ResLTy->getPointerTo());
+  Ptr = Builder.CreateInBoundsGEP(Ptr,
+    llvm::ConstantInt::get(CGM.IntTy, AbsoluteOffset / ContainerSize));
+
+  // Set the new bitfield access parameters.
+  Info.StorageOffset =
+    CharUnits::fromQuantity(AbsoluteOffset & ~(ContainerSize - 1));
+  Info.StorageSize = ContainerSize;
+  Info.Offset = MemberOffset;
+
+  return Ptr;
+}
+
 /// EmitLoadOfLValue - Given an expression that represents a value lvalue, this
 /// method emits the address of the lvalue, then loads the result as an rvalue,
 /// returning the rvalue.
@@ -1461,12 +1553,14 @@
 }
 
 RValue CodeGenFunction::EmitLoadOfBitfieldLValue(LValue LV) {
-  const CGBitFieldInfo &Info = LV.getBitFieldInfo();
-
-  // Get the output type.
+  CGBitFieldInfo Info = CGBitFieldInfo(LV.getBitFieldInfo());
   llvm::Type *ResLTy = ConvertType(LV.getType());
-
   Address Ptr = LV.getBitFieldAddress();
+
+  if (CGM.getTarget().getABI().startswith("aapcs"))
+    Ptr = Address(AdjustAAPCSBitfieldAccess(LV, Info),
+                  getContext().getTypeAlignInChars(LV.getType()));
+
   llvm::Value *Val = Builder.CreateLoad(Ptr, LV.isVolatileQualified(), "bf.load");
 
   if (Info.IsSigned) {
@@ -1658,16 +1752,19 @@
 
 void CodeGenFunction::EmitStoreThroughBitfieldLValue(RValue Src, LValue Dst,
                                                      llvm::Value **Result) {
-  const CGBitFieldInfo &Info = Dst.getBitFieldInfo();
+  CGBitFieldInfo Info = CGBitFieldInfo(Dst.getBitFieldInfo());
   llvm::Type *ResLTy = ConvertTypeForMem(Dst.getType());
   Address Ptr = Dst.getBitFieldAddress();
 
+  if (CGM.getTarget().getABI().startswith("aapcs"))
+    Ptr = Address(AdjustAAPCSBitfieldAccess(Dst, Info, false),
+                  getContext().getTypeAlignInChars(Dst.getType()));
+
   // Get the source value, truncated to the width of the bit-field.
   llvm::Value *SrcVal = Src.getScalarVal();
-
-  // Cast the source to the storage type and shift it into place.
+  // Cast the source to the access type and shift it into place.
   SrcVal = Builder.CreateIntCast(SrcVal, Ptr.getElementType(),
-                                 /*IsSigned=*/false);
+                                /*IsSigned=*/false);
   llvm::Value *MaskedVal = SrcVal;
 
   // See if there are other bits in the bitfield's storage we'll need to load
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to