kosarev created this revision.
kosarev added a project: clang.

The basic idea behind this patch is that since in strict aliasing mode all 
accesses to union members require their outermost enclosing union objects to be 
specified explicitly, then for a couple given accesses to union members of the 
form

  p->a.b.c...
  q->x.y.z...

it is known they can only alias if both p and q point to the same union type 
and offset ranges of members ##a.b.c...## and ##x.y.z..## overlap. Note that 
the actual types of the members do not matter.

Specifically, in this patch we do the following:

- Make unions to be valid TBAA base access types. This enables generation of 
TBAA type descriptors for unions.
- Encode union types as structures with a single member of a special "union 
member" type. Currently we do not encode information about sizes of types, but 
conceptually such union members are considered to be of the size of the whole 
union.
- Encode accesses to direct and indirect union members, including member 
arrays, as accesses to these special members. All accesses to members of a 
union thus get the same offset, which is the offset of the union they are part 
of. This means the existing LLVM TBAA machinery is able to handle such accesses 
with no changes.

While this is already an improvement comparing to the current situation, that 
is, representing all union accesses as may-alias ones, there are further 
changes planned to complete the support for unions. One of them is storing 
information about access sizes so we can distinct accesses to non-overlapping 
union members, including accesses to different elements of member arrays. 
Another change is encoding type sizes in order to make it possible to compute 
offsets within constant-indexed array elements. These enhancements will be 
addressed with separate patches.


Repository:
  rL LLVM

https://reviews.llvm.org/D39455

Files:
  lib/CodeGen/CGExpr.cpp
  lib/CodeGen/CodeGenModule.h
  lib/CodeGen/CodeGenTBAA.cpp
  lib/CodeGen/CodeGenTBAA.h
  test/CodeGen/tbaa-union.cpp
  test/CodeGen/union-tbaa1.c

Index: test/CodeGen/union-tbaa1.c
===================================================================
--- test/CodeGen/union-tbaa1.c
+++ test/CodeGen/union-tbaa1.c
@@ -15,30 +15,32 @@
 // But no tbaa for the two stores:
 // CHECK: %uw[[UW1:[0-9]*]] = getelementptr
 // CHECK: store{{.*}}%uw[[UW1]]
-// CHECK: tbaa ![[OCPATH:[0-9]+]]
+// CHECK: tbaa [[TAG_vect32_union_member:![0-9]+]]
 // There will be a load after the store, and it will use tbaa. Make sure
 // the check-not above doesn't find it:
 // CHECK: load
   Tmp[*Index][0].uw = Arr[*Index][0] * Num;
 // CHECK: %uw[[UW2:[0-9]*]] = getelementptr
 // CHECK: store{{.*}}%uw[[UW2]]
-// CHECK: tbaa ![[OCPATH]]
+// CHECK: tbaa [[TAG_vect32_union_member]]
   Tmp[*Index][1].uw = Arr[*Index][1] * Num;
 // Same here, don't generate tbaa for the loads:
 // CHECK: %uh[[UH1:[0-9]*]] = bitcast %union.vect32
 // CHECK: %arrayidx[[AX1:[0-9]*]] = getelementptr{{.*}}%uh[[UH1]]
 // CHECK: load i16, i16* %arrayidx[[AX1]]
-// CHECK: tbaa ![[OCPATH]]
+// CHECK: tbaa [[TAG_vect32_union_member]]
 // CHECK: store
   Vec[0] = Tmp[*Index][0].uh[1];
 // CHECK: %uh[[UH2:[0-9]*]] = bitcast %union.vect32
 // CHECK: %arrayidx[[AX2:[0-9]*]] = getelementptr{{.*}}%uh[[UH2]]
 // CHECK: load i16, i16* %arrayidx[[AX2]]
-// CHECK: tbaa ![[OCPATH]]
+// CHECK: tbaa [[TAG_vect32_union_member]]
 // CHECK: store
   Vec[1] = Tmp[*Index][1].uh[1];
   bar(Tmp);
 }
 
-// CHECK-DAG: ![[CHAR:[0-9]+]] = !{!"omnipotent char"
-// CHECK-DAG: ![[OCPATH]] = !{![[CHAR]], ![[CHAR]], i64 0}
+// CHECK-DAG: [[TAG_vect32_union_member]] = !{[[TYPE_vect32:!.*]], [[TYPE_union_member:!.*]], i64 0}
+// CHECK-DAG: [[TYPE_vect32]] = !{!"", [[TYPE_union_member]], i64 0}
+// CHECK-DAG: [[TYPE_union_member]] = !{!"union member", [[TYPE_char:!.*]], i64 0}
+// CHECK-DAG: [[TYPE_char]] = !{!"omnipotent char", {{.*}}}
Index: test/CodeGen/tbaa-union.cpp
===================================================================
--- test/CodeGen/tbaa-union.cpp
+++ test/CodeGen/tbaa-union.cpp
@@ -0,0 +1,74 @@
+// RUN: %clang_cc1 -triple x86_64-linux -O1 -disable-llvm-passes %s -emit-llvm -o - | FileCheck %s
+//
+// Check that we generate correct TBAA information for accesses to union
+// members.
+
+struct X {
+  int a, b;
+  int arr[3];
+  int c, d;
+};
+
+union U {
+  int i;
+  X x;
+  int j;
+};
+
+struct S {
+  U u, v;
+};
+
+int f1(U *p) {
+// CHECK-LABEL: _Z2f1P1U
+// CHECK: load i32, i32* {{.*}}, !tbaa [[TAG_U_j:!.*]]
+  return p->j;
+}
+
+int f2(S *p) {
+// CHECK-LABEL: _Z2f2P1S
+// CHECK: load i32, i32* {{.*}}, !tbaa [[TAG_S_u_i:!.*]]
+  return p->u.i;
+}
+
+int f3(S *p) {
+// CHECK-LABEL: _Z2f3P1S
+// CHECK: load i32, i32* {{.*}}, !tbaa [[TAG_S_v_j:!.*]]
+  return p->v.j;
+}
+
+int f4(S *p) {
+// CHECK-LABEL: _Z2f4P1S
+// CHECK: load i32, i32* {{.*}}, !tbaa [[TAG_S_u_x_b:!.*]]
+  return p->u.x.b;
+}
+
+int f5(S *p) {
+// CHECK-LABEL: _Z2f5P1S
+// CHECK: load i32, i32* {{.*}}, !tbaa [[TAG_S_v_x_b:!.*]]
+  return p->v.x.b;
+}
+
+int f6(S *p) {
+// CHECK-LABEL: _Z2f6P1S
+// CHECK: load i32, i32* {{.*}}, !tbaa [[TAG_S_u_x_arr:!.*]]
+  return p->u.x.arr[1];
+}
+
+int f7(S *p) {
+// CHECK-LABEL: _Z2f7P1S
+// CHECK: load i32, i32* {{.*}}, !tbaa [[TAG_S_v_x_arr:!.*]]
+  return p->v.x.arr[1];
+}
+
+// CHECK-DAG: [[TAG_U_j]] = !{[[TYPE_U:!.*]], [[TYPE_union_member:!.*]], i64 0}
+// CHECK-DAG: [[TAG_S_u_i]] = !{[[TYPE_S:!.*]], [[TYPE_union_member]], i64 0}
+// CHECK-DAG: [[TAG_S_u_x_b]] = !{[[TYPE_S:!.*]], [[TYPE_union_member]], i64 0}
+// CHECK-DAG: [[TAG_S_u_x_arr]] = !{[[TYPE_S:!.*]], [[TYPE_union_member]], i64 0}
+// CHECK-DAG: [[TAG_S_v_j]] = !{[[TYPE_S:!.*]], [[TYPE_union_member]], i64 28}
+// CHECK-DAG: [[TAG_S_v_x_b]] = !{[[TYPE_S:!.*]], [[TYPE_union_member]], i64 28}
+// CHECK-DAG: [[TAG_S_v_x_arr]] = !{[[TYPE_S:!.*]], [[TYPE_union_member]], i64 28}
+// CHECK-DAG: [[TYPE_U]] = !{!"_ZTS1U", [[TYPE_union_member]], i64 0}
+// CHECK-DAG: [[TYPE_S]] = !{!"_ZTS1S", [[TYPE_U]], i64 0, [[TYPE_U]], i64 28}
+// CHECK-DAG: [[TYPE_union_member]] = !{!"union member", [[TYPE_char:!.*]], i64 0}
+// CHECK-DAG: [[TYPE_char]] = !{!"omnipotent char", {{.*}}, i64 0}
Index: lib/CodeGen/CodeGenTBAA.h
===================================================================
--- lib/CodeGen/CodeGenTBAA.h
+++ lib/CodeGen/CodeGenTBAA.h
@@ -34,8 +34,9 @@
 
 // TBAAAccessKind - A kind of TBAA memory access descriptor.
 enum class TBAAAccessKind : unsigned {
-  Ordinary,
-  MayAlias,
+  Ordinary,     // An ordinary memory access.
+  MayAlias,     // An access that may alias with any other accesses.
+  UnionMember,  // An access to a direct or indirect union member.
 };
 
 // TBAAAccessInfo - Describes a memory access in terms of TBAA.
@@ -65,6 +66,14 @@
 
   bool isMayAlias() const { return Kind == TBAAAccessKind::MayAlias; }
 
+  static TBAAAccessInfo getUnionMemberInfo(llvm::MDNode *BaseType,
+                                           uint64_t Offset) {
+    return TBAAAccessInfo(TBAAAccessKind::UnionMember, BaseType,
+                          /* AccessType= */ nullptr, Offset);
+  }
+
+  bool isUnionMember() const { return Kind == TBAAAccessKind::UnionMember; }
+
   bool operator==(const TBAAAccessInfo &Other) const {
     return Kind == Other.Kind &&
            BaseType == Other.BaseType &&
@@ -131,6 +140,10 @@
   /// considered to be equivalent to it.
   llvm::MDNode *getChar();
 
+  /// getUnionMemberType - Get metadata that represents the type of union
+  /// members.
+  llvm::MDNode *getUnionMemberType();
+
   /// CollectFields - Collect information about the fields of a type for
   /// !tbaa.struct metadata formation. Return false for an unsupported type.
   bool CollectFields(uint64_t BaseOffset,
Index: lib/CodeGen/CodeGenTBAA.cpp
===================================================================
--- lib/CodeGen/CodeGenTBAA.cpp
+++ lib/CodeGen/CodeGenTBAA.cpp
@@ -72,6 +72,10 @@
   return Char;
 }
 
+llvm::MDNode *CodeGenTBAA::getUnionMemberType() {
+  return createTBAAScalarType("union member", getChar());
+}
+
 static bool TypeHasMayAlias(QualType QTy) {
   // Tagged types have declarations, and therefore may have attributes.
   if (const TagType *TTy = dyn_cast<TagType>(QTy))
@@ -99,9 +103,8 @@
       return false;
     if (RD->hasFlexibleArrayMember())
       return false;
-    // RD can be struct, union, class, interface or enum.
-    // For now, we only handle struct and class.
-    if (RD->isStruct() || RD->isClass())
+    // For now, we do not allow interface classes to be base access types.
+    if (RD->isStruct() || RD->isClass() || RD->isUnion())
       return true;
   }
   return false;
@@ -269,19 +272,26 @@
 
   if (const RecordType *TTy = QTy->getAs<RecordType>()) {
     const RecordDecl *RD = TTy->getDecl()->getDefinition();
-
     const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD);
     SmallVector <std::pair<llvm::MDNode*, uint64_t>, 4> Fields;
-    unsigned idx = 0;
-    for (RecordDecl::field_iterator i = RD->field_begin(),
-         e = RD->field_end(); i != e; ++i, ++idx) {
-      QualType FieldQTy = i->getType();
-      llvm::MDNode *FieldNode = isValidBaseType(FieldQTy) ?
-          getBaseTypeInfo(FieldQTy) : getTypeInfo(FieldQTy);
-      if (!FieldNode)
-        return BaseTypeMetadataCache[Ty] = nullptr;
-      Fields.push_back(std::make_pair(
-          FieldNode, Layout.getFieldOffset(idx) / Context.getCharWidth()));
+
+    if (RD->isUnion()) {
+      // Unions are represented as structures with a single member that has a
+      // special type and occupies the whole object.
+      llvm::MDNode *FieldType = getUnionMemberType();
+      Fields.push_back(std::make_pair(FieldType, /* Offset= */ 0));
+    } else {
+      for (RecordDecl::field_iterator i = RD->field_begin(),
+           e = RD->field_end(); i != e; ++i) {
+        QualType FieldQTy = i->getType();
+        llvm::MDNode *FieldType = isValidBaseType(FieldQTy) ?
+            getBaseTypeInfo(FieldQTy) : getTypeInfo(FieldQTy);
+        if (!FieldType)
+          return BaseTypeMetadataCache[Ty] = nullptr;
+        unsigned Offset = Layout.getFieldOffset(i->getFieldIndex()) /
+                              Context.getCharWidth();
+        Fields.push_back(std::make_pair(FieldType, Offset));
+      }
     }
 
     SmallString<256> OutName;
@@ -303,6 +313,8 @@
 llvm::MDNode *CodeGenTBAA::getAccessTagInfo(TBAAAccessInfo Info) {
   if (Info.isMayAlias())
     Info = TBAAAccessInfo(getChar());
+  else if (Info.isUnionMember())
+    Info.AccessType = getUnionMemberType();
 
   if (!Info.AccessType)
     return nullptr;
Index: lib/CodeGen/CodeGenModule.h
===================================================================
--- lib/CodeGen/CodeGenModule.h
+++ lib/CodeGen/CodeGenModule.h
@@ -686,8 +686,9 @@
   /// getTBAAInfoForSubobject - Get TBAA information for an access with a given
   /// base lvalue.
   TBAAAccessInfo getTBAAInfoForSubobject(LValue Base, QualType AccessType) {
-    if (Base.getTBAAInfo().isMayAlias())
-      return TBAAAccessInfo::getMayAliasInfo();
+    TBAAAccessInfo TBAAInfo = Base.getTBAAInfo();
+    if (TBAAInfo.isMayAlias() || TBAAInfo.isUnionMember())
+      return TBAAInfo;
     return getTBAAAccessInfo(AccessType);
   }
 
Index: lib/CodeGen/CGExpr.cpp
===================================================================
--- lib/CodeGen/CGExpr.cpp
+++ lib/CodeGen/CGExpr.cpp
@@ -3723,9 +3723,6 @@
   if (base.getTBAAInfo().isMayAlias() ||
           rec->hasAttr<MayAliasAttr>() || FieldType->isVectorType()) {
     FieldTBAAInfo = TBAAAccessInfo::getMayAliasInfo();
-  } else if (rec->isUnion()) {
-    // TODO: Support TBAA for unions.
-    FieldTBAAInfo = TBAAAccessInfo::getMayAliasInfo();
   } else {
     // If no base type been assigned for the base access, then try to generate
     // one for this base lvalue.
@@ -3736,16 +3733,25 @@
                "Nonzero offset for an access with no base type!");
     }
 
-    // Adjust offset to be relative to the base type.
-    const ASTRecordLayout &Layout =
-        getContext().getASTRecordLayout(field->getParent());
-    unsigned CharWidth = getContext().getCharWidth();
-    if (FieldTBAAInfo.BaseType)
-      FieldTBAAInfo.Offset +=
-          Layout.getFieldOffset(field->getFieldIndex()) / CharWidth;
+    // All union members are encoded to be of the same special type.
+    if (FieldTBAAInfo.BaseType && rec->isUnion())
+      FieldTBAAInfo = TBAAAccessInfo::getUnionMemberInfo(FieldTBAAInfo.BaseType,
+                                                         FieldTBAAInfo.Offset);
+
+    // For now we describe accesses to direct and indirect union members as if
+    // they were at the offset of their outermost enclosing union.
+    if (!FieldTBAAInfo.isUnionMember()) {
+      // Adjust offset to be relative to the base type.
+      const ASTRecordLayout &Layout =
+          getContext().getASTRecordLayout(field->getParent());
+      unsigned CharWidth = getContext().getCharWidth();
+      if (FieldTBAAInfo.BaseType)
+        FieldTBAAInfo.Offset +=
+            Layout.getFieldOffset(field->getFieldIndex()) / CharWidth;
 
-    // Update the final access type.
-    FieldTBAAInfo.AccessType = CGM.getTBAATypeInfo(FieldType);
+      // Update the final access type.
+      FieldTBAAInfo.AccessType = CGM.getTBAATypeInfo(FieldType);
+    }
   }
 
   Address addr = base.getAddress();
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to