[clang] [BoundsSafety] Allow 'counted_by' attribute on pointers in structs in C (PR #90786)

2024-05-14 Thread Kees Cook via cfe-commits

https://github.com/kees approved this pull request.

Thanks for the updates! Let's get this in and continue with the rest of the 
support. :)

https://github.com/llvm/llvm-project/pull/90786
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [BoundsSafety] Allow 'counted_by' attribute on pointers in structs in C (PR #90786)

2024-05-11 Thread Kees Cook via cfe-commits


@@ -0,0 +1,187 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s
+
+#define __counted_by(f)  __attribute__((counted_by(f)))
+
+struct bar;
+
+struct not_found {
+  int count;
+  struct bar *fam[] __counted_by(bork); // expected-error {{use of undeclared 
identifier 'bork'}}
+};
+
+struct no_found_count_not_in_substruct {
+  unsigned long flags;
+  unsigned char count; // expected-note {{'count' declared here}}
+  struct A {
+int dummy;
+int array[] __counted_by(count); // expected-error {{'counted_by' field 
'count' isn't within the same struct as the flexible array}}
+  } a;
+};
+
+struct not_found_count_not_in_unnamed_substruct {
+  unsigned char count; // expected-note {{'count' declared here}}
+  struct {
+int dummy;
+int array[] __counted_by(count); // expected-error {{'counted_by' field 
'count' isn't within the same struct as the flexible array}}
+  } a;
+};
+
+struct not_found_count_not_in_unnamed_substruct_2 {
+  struct {
+unsigned char count; // expected-note {{'count' declared here}}
+  };
+  struct {
+int dummy;
+int array[] __counted_by(count); // expected-error {{'counted_by' field 
'count' isn't within the same struct as the flexible array}}
+  } a;
+};
+
+struct not_found_count_in_other_unnamed_substruct {
+  struct {
+unsigned char count;
+  } a1;
+
+  struct {
+int dummy;
+int array[] __counted_by(count); // expected-error {{use of undeclared 
identifier 'count'}}
+  };
+};
+
+struct not_found_count_in_other_substruct {
+  struct _a1 {
+unsigned char count;
+  } a1;
+
+  struct {
+int dummy;
+int array[] __counted_by(count); // expected-error {{use of undeclared 
identifier 'count'}}
+  };
+};
+
+struct not_found_count_in_other_substruct_2 {
+  struct _a2 {
+unsigned char count;
+  } a2;
+
+  int array[] __counted_by(count); // expected-error {{use of undeclared 
identifier 'count'}}
+};
+
+struct not_found_suggest {
+  int bork;
+  struct bar *fam[] __counted_by(blork); // expected-error {{use of undeclared 
identifier 'blork'}}
+};
+
+int global; // expected-note {{'global' declared here}}
+
+struct found_outside_of_struct {
+  int bork;
+  struct bar *fam[] __counted_by(global); // expected-error {{field 'global' 
in 'counted_by' not inside structure}}
+};
+
+struct self_referrential {
+  int bork;
+  struct bar *self[] __counted_by(self); // expected-error {{use of undeclared 
identifier 'self'}}
+};
+
+struct non_int_count {
+  double dbl_count;
+  struct bar *fam[] __counted_by(dbl_count); // expected-error {{'counted_by' 
requires a non-boolean integer type argument}}
+};
+
+struct array_of_ints_count {
+  int integers[2];
+  struct bar *fam[] __counted_by(integers); // expected-error {{'counted_by' 
requires a non-boolean integer type argument}}
+};
+
+struct not_a_fam {
+  int count;
+  // expected-error@+1{{'counted_by' cannot be applied to a pointer with 
pointee of unknown size because 'struct bar' is an incomplete type}}
+  struct bar *non_fam __counted_by(count);
+};
+
+struct not_a_c99_fam {
+  int count;
+  struct bar *non_c99_fam[0] __counted_by(count); // expected-error 
{{'counted_by' on arrays only applies to C99 flexible array members}}
+};
+
+struct annotated_with_anon_struct {
+  unsigned long flags;
+  struct {
+unsigned char count;
+int array[] __counted_by(crount); // expected-error {{use of undeclared 
identifier 'crount'}}
+  };
+};
+
+//==
+// __counted_by on a struct VLA with element type that has unknown size
+//==
+
+struct size_unknown; // expected-note 2{{forward declaration of 'struct 
size_unknown'}}
+struct on_member_arr_incomplete_ty_ty_pos {
+  int count;
+  // expected-error@+2{{'counted_by' only applies to pointers or C99 flexible 
array members}}
+  // expected-error@+1{{array has incomplete element type 'struct 
size_unknown'}}
+  struct size_unknown buf[] __counted_by(count);
+};
+
+struct on_member_arr_incomplete_const_ty_ty_pos {
+  int count;
+  // expected-error@+2{{'counted_by' only applies to pointers or C99 flexible 
array members}}
+  // expected-error@+1{{array has incomplete element type 'const struct 
size_unknown'}}
+  const struct size_unknown buf[] __counted_by(count);
+};
+
+struct on_member_arr_void_ty_ty_pos {
+  int count;
+  // expected-error@+2{{'counted_by' only applies to pointers or C99 flexible 
array members}}
+  // expected-error@+1{{array has incomplete element type 'void'}}
+  void buf[] __counted_by(count);
+};
+
+typedef void(fn_ty)(int);
+
+struct on_member_arr_fn_ty {
+  int count;
+  // expected-error@+2{{'counted_by' only applies to pointers or C99 flexible 
array members}}
+  // expected-error@+1{{'buf' declared as array of functions of type 'fn_ty' 
(aka 'void (int)')}}
+  fn_ty buf[] __counted_by(count);
+};

kees wrote:

Please add a positive test for `typedef 

[clang] [BoundsSafety] Allow 'counted_by' attribute on pointers in structs in C (PR #90786)

2024-05-11 Thread Kees Cook via cfe-commits

kees wrote:

> Consider this example. It tries to illustrate why putting `__counted_by()` on 
> a pointer to a structs containing flexible array members doesn't make sense.
> 
> ```c
> struct HasFAM {
> int count;
> char buffer[] __counted_by(count); // This is OK
> };
> 
> struct BufferOfFAMS {
> int count;
> struct HasFAM* fams __counted_by(count); // This is invalid
> };
> ```

Agreed: structs with flexible array members must be considered to be 
singletons. This property is actually important for  being able to have 
`__builtin_dynamic_object_size()` work on pointers to flexible array structs. 
i.e.:

```
size_t func(struct foo *p)
{
return__builtin_dynamic_object_size(p, 0);
}
```

This must always return `SIZE_MAX` for fixed-size arrays since the pointer may 
be in the middle of a larger array of `struct foo`s, but if it is a struct with 
a flexible array marked with `counted_by`, then we know deterministically what 
the size is, since it must be a single complete object.

https://github.com/llvm/llvm-project/pull/90786
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [BoundsSafety] Allow 'counted_by' attribute on pointers in structs in C (PR #90786)

2024-05-10 Thread Kees Cook via cfe-commits

kees wrote:


> As @apple-fcloutier @rapidsna noted this is how `-fbounds-safety` is 
> currently implemented (because its much simpler) but it is a restriction that 
> could be lifted in future by only requiring `struct bar` to be defined at the 
> point that `foo::bar` is used rather than when the `__counted_by` attribute 
> is applied. Given that this PR is allowing `__counted_by` in a new place 
> (pointers in structs) I think its reasonable for us to place restrictions on 
> that until we've proved its actually necessary to have the more complicated 
> implementation.

The main concern I have with delaying support for this is that header files 
could find themselves in a state where they could not be refactored without 
removing counted_by attributes that refer to now-incomplete structs.

For example, in Linux we've been separating structs from implementations, and 
that usually means using incomplete structs as much as possible to avoid lots 
of #includes.

So, no objection on this PR, but I think the more complete behavior needs to 
follow soonish. :)


https://github.com/llvm/llvm-project/pull/90786
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [CodeGen][i386] Move -mregparm storage earlier and fix Runtime calls (PR #89707)

2024-04-29 Thread Kees Cook via cfe-commits

https://github.com/kees closed https://github.com/llvm/llvm-project/pull/89707
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang] Add wraps attribute (for granular integer overflow handling) (PR #86618)

2024-04-29 Thread Kees Cook via cfe-commits

kees wrote:

My thinking about this attribute tends to follow from my desire not to change 
the C type system, but rather to adjust the behavior of the sanitizers. This 
means that it is possible to still build the Linux kernel without the 
sanitizers (the build just ignores the attribute), or with (where the attribute 
now has meaning). I feel like adding new types is a much larger/different 
thing, as it ends up requiring that the core language has to do something 
specific about overflow, etc. Under the sanitizers, there is a well-defined 
behavioral modification for overflow. Having the attribute augment the 
sanitizer means the semantics of regular C remain unchanged. We could even go 
so far as making it an error to encounter the attribute when the sanitizers 
aren't enabled.

https://github.com/llvm/llvm-project/pull/86618
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [CodeGen][i386] Move -mregparm storage earlier and fix Runtime calls (PR #89707)

2024-04-26 Thread Kees Cook via cfe-commits


@@ -4781,6 +4782,7 @@ CodeGenModule::CreateRuntimeFunction(llvm::FunctionType 
*FTy, StringRef Name,
 }
   }
   setDSOLocal(F);
+  markRegisterParameterAttributes(F);

kees wrote:

Ah-ha, thanks! Okay, I've updated the comments with just a minor tweak.

https://github.com/llvm/llvm-project/pull/89707
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [CodeGen][i386] Move -mregparm storage earlier and fix Runtime calls (PR #89707)

2024-04-26 Thread Kees Cook via cfe-commits

https://github.com/kees updated https://github.com/llvm/llvm-project/pull/89707

>From c061c8f49f2b916bb5e60ec35d3e448ac13f2b72 Mon Sep 17 00:00:00 2001
From: Kees Cook 
Date: Mon, 22 Apr 2024 17:53:32 -0700
Subject: [PATCH 1/4] [CodeGen][i386] Move -mregparm storage earlier and fix
 Runtime calls

When building the Linux kernel for i386, the -mregparm=3 option is
enabled. Crashes were observed in the sanitizer handler functions,
and the problem was found to be mismatched calling convention.

As was fixed in commit c167c0a4dcdb ("[BuildLibCalls] infer inreg param
attrs from NumRegisterParameters"), call arguments need to be marked as
"in register" when -mregparm is set. Use the same helper developed there
to update the function arguments.

Since CreateRuntimeFunction() is actually part of CodeGenModule, storage
of the -mregparm value is also moved to the constructor, as doing this
in Release() is too late.

Fixes: https://github.com/llvm/llvm-project/issues/89670
---
 clang/lib/CodeGen/CodeGenModule.cpp| 12 +++-
 llvm/include/llvm/Transforms/Utils/BuildLibCalls.h |  3 +++
 llvm/lib/Transforms/Utils/BuildLibCalls.cpp|  2 +-
 3 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/clang/lib/CodeGen/CodeGenModule.cpp 
b/clang/lib/CodeGen/CodeGenModule.cpp
index 0c447b20cef40d..c314cd9e2e9f4c 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -73,6 +73,7 @@
 #include "llvm/Support/xxhash.h"
 #include "llvm/TargetParser/Triple.h"
 #include "llvm/TargetParser/X86TargetParser.h"
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
 #include 
 
 using namespace clang;
@@ -442,6 +443,11 @@ CodeGenModule::CodeGenModule(ASTContext ,
   }
 ModuleNameHash = llvm::getUniqueInternalLinkagePostfix(Path);
   }
+
+  // Record mregparm value now so it is visible through all of codegen.
+  if (Context.getTargetInfo().getTriple().getArch() == llvm::Triple::x86)
+getModule().addModuleFlag(llvm::Module::Error, "NumRegisterParameters",
+  CodeGenOpts.NumRegisterParameters);
 }
 
 CodeGenModule::~CodeGenModule() {}
@@ -980,11 +986,6 @@ void CodeGenModule::Release() {
   NMD->addOperand(MD);
   }
 
-  // Record mregparm value now so it is visible through rest of codegen.
-  if (Context.getTargetInfo().getTriple().getArch() == llvm::Triple::x86)
-getModule().addModuleFlag(llvm::Module::Error, "NumRegisterParameters",
-  CodeGenOpts.NumRegisterParameters);
-
   if (CodeGenOpts.DwarfVersion) {
 getModule().addModuleFlag(llvm::Module::Max, "Dwarf Version",
   CodeGenOpts.DwarfVersion);
@@ -4781,6 +4782,7 @@ CodeGenModule::CreateRuntimeFunction(llvm::FunctionType 
*FTy, StringRef Name,
 }
   }
   setDSOLocal(F);
+  markRegisterParameterAttributes(F);
 }
   }
 
diff --git a/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h 
b/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
index 9ebb9500777421..a0f2f43e287c71 100644
--- a/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
+++ b/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
@@ -62,6 +62,9 @@ namespace llvm {
  LibFunc TheLibFunc, AttributeList AttributeList,
  FunctionType *Invalid, ArgsTy... Args) = delete;
 
+  // Handle -mregparm for the given function.
+  void markRegisterParameterAttributes(Function *F);
+
   /// Check whether the library function is available on target and also that
   /// it in the current Module is a Function with the right type.
   bool isLibFuncEmittable(const Module *M, const TargetLibraryInfo *TLI,
diff --git a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp 
b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
index ed0ed345435c45..e97506b4bbd95d 100644
--- a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -1255,7 +1255,7 @@ static void setRetExtAttr(Function ,
 }
 
 // Modeled after X86TargetLowering::markLibCallAttributes.
-static void markRegisterParameterAttributes(Function *F) {
+void llvm::markRegisterParameterAttributes(Function *F) {
   if (!F->arg_size() || F->isVarArg())
 return;
 

>From b7abf7c63e7169096401b958b767520a5301e417 Mon Sep 17 00:00:00 2001
From: Kees Cook 
Date: Tue, 23 Apr 2024 17:16:36 -0700
Subject: [PATCH 2/4] add tests for -mregparm vs runtime calls

---
 clang/test/CodeGen/regparm-flag.c | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/clang/test/CodeGen/regparm-flag.c 
b/clang/test/CodeGen/regparm-flag.c
index c35b53cd4e1990..d888c1e344c03b 100644
--- a/clang/test/CodeGen/regparm-flag.c
+++ b/clang/test/CodeGen/regparm-flag.c
@@ -1,4 +1,8 @@
 // RUN: %clang_cc1 -triple i386-unknown-unknown -mregparm 4 %s -emit-llvm -o - 
| FileCheck %s
+// RUN: %clang_cc1 -triple i386-unknown-unknown -fsanitize=array-bounds %s 
-emit-llvm -o - | FileCheck %s --check-prefix=RUNTIME0
+// RUN: %clang_cc1 -triple 

[clang] [llvm] [CodeGen][i386] Move -mregparm storage earlier and fix Runtime calls (PR #89707)

2024-04-24 Thread Kees Cook via cfe-commits


@@ -4781,6 +4782,7 @@ CodeGenModule::CreateRuntimeFunction(llvm::FunctionType 
*FTy, StringRef Name,
 }
   }
   setDSOLocal(F);
+  markRegisterParameterAttributes(F);

kees wrote:

Comment added. Is this what you had in mind?

https://github.com/llvm/llvm-project/pull/89707
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [CodeGen][i386] Move -mregparm storage earlier and fix Runtime calls (PR #89707)

2024-04-24 Thread Kees Cook via cfe-commits

https://github.com/kees updated https://github.com/llvm/llvm-project/pull/89707

>From c061c8f49f2b916bb5e60ec35d3e448ac13f2b72 Mon Sep 17 00:00:00 2001
From: Kees Cook 
Date: Mon, 22 Apr 2024 17:53:32 -0700
Subject: [PATCH 1/3] [CodeGen][i386] Move -mregparm storage earlier and fix
 Runtime calls

When building the Linux kernel for i386, the -mregparm=3 option is
enabled. Crashes were observed in the sanitizer handler functions,
and the problem was found to be mismatched calling convention.

As was fixed in commit c167c0a4dcdb ("[BuildLibCalls] infer inreg param
attrs from NumRegisterParameters"), call arguments need to be marked as
"in register" when -mregparm is set. Use the same helper developed there
to update the function arguments.

Since CreateRuntimeFunction() is actually part of CodeGenModule, storage
of the -mregparm value is also moved to the constructor, as doing this
in Release() is too late.

Fixes: https://github.com/llvm/llvm-project/issues/89670
---
 clang/lib/CodeGen/CodeGenModule.cpp| 12 +++-
 llvm/include/llvm/Transforms/Utils/BuildLibCalls.h |  3 +++
 llvm/lib/Transforms/Utils/BuildLibCalls.cpp|  2 +-
 3 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/clang/lib/CodeGen/CodeGenModule.cpp 
b/clang/lib/CodeGen/CodeGenModule.cpp
index 0c447b20cef40d..c314cd9e2e9f4c 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -73,6 +73,7 @@
 #include "llvm/Support/xxhash.h"
 #include "llvm/TargetParser/Triple.h"
 #include "llvm/TargetParser/X86TargetParser.h"
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
 #include 
 
 using namespace clang;
@@ -442,6 +443,11 @@ CodeGenModule::CodeGenModule(ASTContext ,
   }
 ModuleNameHash = llvm::getUniqueInternalLinkagePostfix(Path);
   }
+
+  // Record mregparm value now so it is visible through all of codegen.
+  if (Context.getTargetInfo().getTriple().getArch() == llvm::Triple::x86)
+getModule().addModuleFlag(llvm::Module::Error, "NumRegisterParameters",
+  CodeGenOpts.NumRegisterParameters);
 }
 
 CodeGenModule::~CodeGenModule() {}
@@ -980,11 +986,6 @@ void CodeGenModule::Release() {
   NMD->addOperand(MD);
   }
 
-  // Record mregparm value now so it is visible through rest of codegen.
-  if (Context.getTargetInfo().getTriple().getArch() == llvm::Triple::x86)
-getModule().addModuleFlag(llvm::Module::Error, "NumRegisterParameters",
-  CodeGenOpts.NumRegisterParameters);
-
   if (CodeGenOpts.DwarfVersion) {
 getModule().addModuleFlag(llvm::Module::Max, "Dwarf Version",
   CodeGenOpts.DwarfVersion);
@@ -4781,6 +4782,7 @@ CodeGenModule::CreateRuntimeFunction(llvm::FunctionType 
*FTy, StringRef Name,
 }
   }
   setDSOLocal(F);
+  markRegisterParameterAttributes(F);
 }
   }
 
diff --git a/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h 
b/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
index 9ebb9500777421..a0f2f43e287c71 100644
--- a/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
+++ b/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
@@ -62,6 +62,9 @@ namespace llvm {
  LibFunc TheLibFunc, AttributeList AttributeList,
  FunctionType *Invalid, ArgsTy... Args) = delete;
 
+  // Handle -mregparm for the given function.
+  void markRegisterParameterAttributes(Function *F);
+
   /// Check whether the library function is available on target and also that
   /// it in the current Module is a Function with the right type.
   bool isLibFuncEmittable(const Module *M, const TargetLibraryInfo *TLI,
diff --git a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp 
b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
index ed0ed345435c45..e97506b4bbd95d 100644
--- a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -1255,7 +1255,7 @@ static void setRetExtAttr(Function ,
 }
 
 // Modeled after X86TargetLowering::markLibCallAttributes.
-static void markRegisterParameterAttributes(Function *F) {
+void llvm::markRegisterParameterAttributes(Function *F) {
   if (!F->arg_size() || F->isVarArg())
 return;
 

>From b7abf7c63e7169096401b958b767520a5301e417 Mon Sep 17 00:00:00 2001
From: Kees Cook 
Date: Tue, 23 Apr 2024 17:16:36 -0700
Subject: [PATCH 2/3] add tests for -mregparm vs runtime calls

---
 clang/test/CodeGen/regparm-flag.c | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/clang/test/CodeGen/regparm-flag.c 
b/clang/test/CodeGen/regparm-flag.c
index c35b53cd4e1990..d888c1e344c03b 100644
--- a/clang/test/CodeGen/regparm-flag.c
+++ b/clang/test/CodeGen/regparm-flag.c
@@ -1,4 +1,8 @@
 // RUN: %clang_cc1 -triple i386-unknown-unknown -mregparm 4 %s -emit-llvm -o - 
| FileCheck %s
+// RUN: %clang_cc1 -triple i386-unknown-unknown -fsanitize=array-bounds %s 
-emit-llvm -o - | FileCheck %s --check-prefix=RUNTIME0
+// RUN: %clang_cc1 -triple 

[clang] [llvm] [CodeGen][i386] Move -mregparm storage earlier and fix Runtime calls (PR #89707)

2024-04-24 Thread Kees Cook via cfe-commits


@@ -4781,6 +4782,7 @@ CodeGenModule::CreateRuntimeFunction(llvm::FunctionType 
*FTy, StringRef Name,
 }
   }
   setDSOLocal(F);
+  markRegisterParameterAttributes(F);

kees wrote:

This seems like a large proposed change; is it worth it for this corner case? I 
think I would prefer to fix this as I have it now.

https://github.com/llvm/llvm-project/pull/89707
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [CodeGen][i386] Move -mregparm storage earlier and fix Runtime calls (PR #89707)

2024-04-23 Thread Kees Cook via cfe-commits


@@ -4781,6 +4782,7 @@ CodeGenModule::CreateRuntimeFunction(llvm::FunctionType 
*FTy, StringRef Name,
 }
   }
   setDSOLocal(F);
+  markRegisterParameterAttributes(F);

kees wrote:

Oh, I think I see what you mean -- this is the common place where all functions 
should get their attributes set. Can the libcall code use this? (As in, can we 
remove `markRegisterParameterAttributes()` entirely and move the logic into 
`SetLLVMFunctionAttributes()` or is that just fantastic overkill?

https://github.com/llvm/llvm-project/pull/89707
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [CodeGen][i386] Move -mregparm storage earlier and fix Runtime calls (PR #89707)

2024-04-23 Thread Kees Cook via cfe-commits


@@ -4781,6 +4782,7 @@ CodeGenModule::CreateRuntimeFunction(llvm::FunctionType 
*FTy, StringRef Name,
 }
   }
   setDSOLocal(F);
+  markRegisterParameterAttributes(F);

kees wrote:

I was trying to basically duplicate what was already done for the libcall 
functions. What is gained by using CGFunctionInfo? (Things already work as-is 
-- though generally it does looks like -mregparm support is kinda ad-hoc.)

https://github.com/llvm/llvm-project/pull/89707
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [CodeGen][i386] Move -mregparm storage earlier and fix Runtime calls (PR #89707)

2024-04-23 Thread Kees Cook via cfe-commits

https://github.com/kees updated https://github.com/llvm/llvm-project/pull/89707

>From c061c8f49f2b916bb5e60ec35d3e448ac13f2b72 Mon Sep 17 00:00:00 2001
From: Kees Cook 
Date: Mon, 22 Apr 2024 17:53:32 -0700
Subject: [PATCH 1/2] [CodeGen][i386] Move -mregparm storage earlier and fix
 Runtime calls

When building the Linux kernel for i386, the -mregparm=3 option is
enabled. Crashes were observed in the sanitizer handler functions,
and the problem was found to be mismatched calling convention.

As was fixed in commit c167c0a4dcdb ("[BuildLibCalls] infer inreg param
attrs from NumRegisterParameters"), call arguments need to be marked as
"in register" when -mregparm is set. Use the same helper developed there
to update the function arguments.

Since CreateRuntimeFunction() is actually part of CodeGenModule, storage
of the -mregparm value is also moved to the constructor, as doing this
in Release() is too late.

Fixes: https://github.com/llvm/llvm-project/issues/89670
---
 clang/lib/CodeGen/CodeGenModule.cpp| 12 +++-
 llvm/include/llvm/Transforms/Utils/BuildLibCalls.h |  3 +++
 llvm/lib/Transforms/Utils/BuildLibCalls.cpp|  2 +-
 3 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/clang/lib/CodeGen/CodeGenModule.cpp 
b/clang/lib/CodeGen/CodeGenModule.cpp
index 0c447b20cef40d..c314cd9e2e9f4c 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -73,6 +73,7 @@
 #include "llvm/Support/xxhash.h"
 #include "llvm/TargetParser/Triple.h"
 #include "llvm/TargetParser/X86TargetParser.h"
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
 #include 
 
 using namespace clang;
@@ -442,6 +443,11 @@ CodeGenModule::CodeGenModule(ASTContext ,
   }
 ModuleNameHash = llvm::getUniqueInternalLinkagePostfix(Path);
   }
+
+  // Record mregparm value now so it is visible through all of codegen.
+  if (Context.getTargetInfo().getTriple().getArch() == llvm::Triple::x86)
+getModule().addModuleFlag(llvm::Module::Error, "NumRegisterParameters",
+  CodeGenOpts.NumRegisterParameters);
 }
 
 CodeGenModule::~CodeGenModule() {}
@@ -980,11 +986,6 @@ void CodeGenModule::Release() {
   NMD->addOperand(MD);
   }
 
-  // Record mregparm value now so it is visible through rest of codegen.
-  if (Context.getTargetInfo().getTriple().getArch() == llvm::Triple::x86)
-getModule().addModuleFlag(llvm::Module::Error, "NumRegisterParameters",
-  CodeGenOpts.NumRegisterParameters);
-
   if (CodeGenOpts.DwarfVersion) {
 getModule().addModuleFlag(llvm::Module::Max, "Dwarf Version",
   CodeGenOpts.DwarfVersion);
@@ -4781,6 +4782,7 @@ CodeGenModule::CreateRuntimeFunction(llvm::FunctionType 
*FTy, StringRef Name,
 }
   }
   setDSOLocal(F);
+  markRegisterParameterAttributes(F);
 }
   }
 
diff --git a/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h 
b/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
index 9ebb9500777421..a0f2f43e287c71 100644
--- a/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
+++ b/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
@@ -62,6 +62,9 @@ namespace llvm {
  LibFunc TheLibFunc, AttributeList AttributeList,
  FunctionType *Invalid, ArgsTy... Args) = delete;
 
+  // Handle -mregparm for the given function.
+  void markRegisterParameterAttributes(Function *F);
+
   /// Check whether the library function is available on target and also that
   /// it in the current Module is a Function with the right type.
   bool isLibFuncEmittable(const Module *M, const TargetLibraryInfo *TLI,
diff --git a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp 
b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
index ed0ed345435c45..e97506b4bbd95d 100644
--- a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -1255,7 +1255,7 @@ static void setRetExtAttr(Function ,
 }
 
 // Modeled after X86TargetLowering::markLibCallAttributes.
-static void markRegisterParameterAttributes(Function *F) {
+void llvm::markRegisterParameterAttributes(Function *F) {
   if (!F->arg_size() || F->isVarArg())
 return;
 

>From b7abf7c63e7169096401b958b767520a5301e417 Mon Sep 17 00:00:00 2001
From: Kees Cook 
Date: Tue, 23 Apr 2024 17:16:36 -0700
Subject: [PATCH 2/2] add tests for -mregparm vs runtime calls

---
 clang/test/CodeGen/regparm-flag.c | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/clang/test/CodeGen/regparm-flag.c 
b/clang/test/CodeGen/regparm-flag.c
index c35b53cd4e1990..d888c1e344c03b 100644
--- a/clang/test/CodeGen/regparm-flag.c
+++ b/clang/test/CodeGen/regparm-flag.c
@@ -1,4 +1,8 @@
 // RUN: %clang_cc1 -triple i386-unknown-unknown -mregparm 4 %s -emit-llvm -o - 
| FileCheck %s
+// RUN: %clang_cc1 -triple i386-unknown-unknown -fsanitize=array-bounds %s 
-emit-llvm -o - | FileCheck %s --check-prefix=RUNTIME0
+// RUN: %clang_cc1 -triple 

[clang] [llvm] [CodeGen][i386] Move -mregparm storage earlier and fix Runtime calls (PR #89707)

2024-04-22 Thread Kees Cook via cfe-commits

kees wrote:

This needs test cases, which I'll add tomorrow. I just wanted to get the core 
logic up for review before I hit EOD...

https://github.com/llvm/llvm-project/pull/89707
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [CodeGen][i386] Move -mregparm storage earlier and fix Runtime calls (PR #89707)

2024-04-22 Thread Kees Cook via cfe-commits

https://github.com/kees created https://github.com/llvm/llvm-project/pull/89707

When building the Linux kernel for i386, the -mregparm=3 option is enabled. 
Crashes were observed in the sanitizer handler functions, and the problem was 
found to be mismatched calling convention.

As was fixed in commit c167c0a4dcdb ("[BuildLibCalls] infer inreg param attrs 
from NumRegisterParameters"), call arguments need to be marked as "in register" 
when -mregparm is set. Use the same helper developed there to update the 
function arguments.

Since CreateRuntimeFunction() is actually part of CodeGenModule, storage of the 
-mregparm value is also moved to the constructor, as doing this in Release() is 
too late.

Fixes: https://github.com/llvm/llvm-project/issues/89670

>From c061c8f49f2b916bb5e60ec35d3e448ac13f2b72 Mon Sep 17 00:00:00 2001
From: Kees Cook 
Date: Mon, 22 Apr 2024 17:53:32 -0700
Subject: [PATCH] [CodeGen][i386] Move -mregparm storage earlier and fix
 Runtime calls

When building the Linux kernel for i386, the -mregparm=3 option is
enabled. Crashes were observed in the sanitizer handler functions,
and the problem was found to be mismatched calling convention.

As was fixed in commit c167c0a4dcdb ("[BuildLibCalls] infer inreg param
attrs from NumRegisterParameters"), call arguments need to be marked as
"in register" when -mregparm is set. Use the same helper developed there
to update the function arguments.

Since CreateRuntimeFunction() is actually part of CodeGenModule, storage
of the -mregparm value is also moved to the constructor, as doing this
in Release() is too late.

Fixes: https://github.com/llvm/llvm-project/issues/89670
---
 clang/lib/CodeGen/CodeGenModule.cpp| 12 +++-
 llvm/include/llvm/Transforms/Utils/BuildLibCalls.h |  3 +++
 llvm/lib/Transforms/Utils/BuildLibCalls.cpp|  2 +-
 3 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/clang/lib/CodeGen/CodeGenModule.cpp 
b/clang/lib/CodeGen/CodeGenModule.cpp
index 0c447b20cef40d..c314cd9e2e9f4c 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -73,6 +73,7 @@
 #include "llvm/Support/xxhash.h"
 #include "llvm/TargetParser/Triple.h"
 #include "llvm/TargetParser/X86TargetParser.h"
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
 #include 
 
 using namespace clang;
@@ -442,6 +443,11 @@ CodeGenModule::CodeGenModule(ASTContext ,
   }
 ModuleNameHash = llvm::getUniqueInternalLinkagePostfix(Path);
   }
+
+  // Record mregparm value now so it is visible through all of codegen.
+  if (Context.getTargetInfo().getTriple().getArch() == llvm::Triple::x86)
+getModule().addModuleFlag(llvm::Module::Error, "NumRegisterParameters",
+  CodeGenOpts.NumRegisterParameters);
 }
 
 CodeGenModule::~CodeGenModule() {}
@@ -980,11 +986,6 @@ void CodeGenModule::Release() {
   NMD->addOperand(MD);
   }
 
-  // Record mregparm value now so it is visible through rest of codegen.
-  if (Context.getTargetInfo().getTriple().getArch() == llvm::Triple::x86)
-getModule().addModuleFlag(llvm::Module::Error, "NumRegisterParameters",
-  CodeGenOpts.NumRegisterParameters);
-
   if (CodeGenOpts.DwarfVersion) {
 getModule().addModuleFlag(llvm::Module::Max, "Dwarf Version",
   CodeGenOpts.DwarfVersion);
@@ -4781,6 +4782,7 @@ CodeGenModule::CreateRuntimeFunction(llvm::FunctionType 
*FTy, StringRef Name,
 }
   }
   setDSOLocal(F);
+  markRegisterParameterAttributes(F);
 }
   }
 
diff --git a/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h 
b/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
index 9ebb9500777421..a0f2f43e287c71 100644
--- a/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
+++ b/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
@@ -62,6 +62,9 @@ namespace llvm {
  LibFunc TheLibFunc, AttributeList AttributeList,
  FunctionType *Invalid, ArgsTy... Args) = delete;
 
+  // Handle -mregparm for the given function.
+  void markRegisterParameterAttributes(Function *F);
+
   /// Check whether the library function is available on target and also that
   /// it in the current Module is a Function with the right type.
   bool isLibFuncEmittable(const Module *M, const TargetLibraryInfo *TLI,
diff --git a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp 
b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
index ed0ed345435c45..e97506b4bbd95d 100644
--- a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -1255,7 +1255,7 @@ static void setRetExtAttr(Function ,
 }
 
 // Modeled after X86TargetLowering::markLibCallAttributes.
-static void markRegisterParameterAttributes(Function *F) {
+void llvm::markRegisterParameterAttributes(Function *F) {
   if (!F->arg_size() || F->isVarArg())
 return;
 

___
cfe-commits mailing list
cfe-commits@lists.llvm.org

[clang] [Clang][NFC] Improve testing for the flexible array member (PR #89462)

2024-04-19 Thread Kees Cook via cfe-commits

kees wrote:

Does this still work for cases where there are multiple flexible arrays? e.g.

```
struct weird_protocol {
unsigned int cmd_type;
unsigned int data_len;
union {
struct cmd_one one[];
struct cmd_two two[];
struct cmd_three three[];
unsigned char raw_bytes[];
};
};
```

https://github.com/llvm/llvm-project/pull/89462
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang] Handle structs with inner structs and no fields (PR #89126)

2024-04-17 Thread Kees Cook via cfe-commits

https://github.com/kees approved this pull request.

Tests and logic adjustment looks good to me.

https://github.com/llvm/llvm-project/pull/89126
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang] Add wraps attribute (for granular integer overflow handling) (PR #86618)

2024-04-09 Thread Kees Cook via cfe-commits

kees wrote:

This now passes my behavioral testing suite for wrapping; yay! (The earlier 
version didn't cover truncate, so this is very nice now.)

https://github.com/llvm/llvm-project/pull/86618
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang] Fix __is_array returning true for zero-sized arrays (PR #86652)

2024-03-28 Thread Kees Cook via cfe-commits

kees wrote:

I guess I don't have a strong opinion here, since these helpers are specific to 
C++, and I've been generally trying to remove fixed-size 0-sized arrays in C 
projects (i.e. the Linux kernel). I do care about C flexible arrays (and their 
associated extensions), though. I suspect there is some existing weirdness for 
C++ when using static initializers, though. See https://godbolt.org/z/PnYMcxhfM

```
struct S { int a; int b[]; };
static struct S s = { .a = 42, .b = { 10, 20, 30, 40 }, };
static struct S z = { .a = 13, };
```

Specifically `z.b` is a 0-sized flexible array, but `s.b` isn't. Should they 
have different results?

https://github.com/llvm/llvm-project/pull/86652
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][objectsize] Generate object size calculation for sub-objects (PR #86858)

2024-03-28 Thread Kees Cook via cfe-commits

https://github.com/kees commented:

I can't speak to the implementation details, but this passes my PoC tests that 
examine subobjects.

https://github.com/llvm/llvm-project/pull/86858
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang] Fix __is_array returning true for zero-sized arrays (PR #86652)

2024-03-28 Thread Kees Cook via cfe-commits

kees wrote:

> My natural inclination is that it is array-like, but... that just makes me 
> want `__is_array` to return `true` for it all the more.

Yes. An array is an array, regardless of its size. The size is just a storage 
characteristic. It'd almost be like arguing that `NaN` isn't a float.

> I wonder if we should be considering making zero-length arrays into a 
> non-conforming extension behind a flag?

It was never as simple as zero-length arrays. It was also _trailing arrays of 
any size_ in structures. Those extensions create huge problems with being able 
to reason about the characteristics of arrays, and we do have a flag for this: 
`-fstrict-flex-arrays=3`. It is designed to disable all of the "treat it like a 
flexible array" logic for the old "fake flexible array" objects. But a flexible 
array is _still an array_. And also note that its size can _change at runtime_ 
(see the `counted_by` attribute), which even more clearly argues that you can't 
claim a flexible array isn't an array.

https://github.com/llvm/llvm-project/pull/86652
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][Sema] Allow flexible arrays in unions and alone in structs (PR #84428)

2024-03-27 Thread Kees Cook via cfe-commits

https://github.com/kees closed https://github.com/llvm/llvm-project/pull/84428
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][Sema] Allow flexible arrays in unions and alone in structs (PR #84428)

2024-03-27 Thread Kees Cook via cfe-commits

https://github.com/kees updated https://github.com/llvm/llvm-project/pull/84428

>From 59c81a85cd9652d02b15a79553259351a59e8534 Mon Sep 17 00:00:00 2001
From: Kees Cook 
Date: Thu, 7 Mar 2024 17:03:09 -0800
Subject: [PATCH] [Clang][Sema] Allow flexible arrays in unions and alone in
 structs

GNU and MSVC have extensions where flexible array members (or their
equivalent) can be in unions or alone in structs. This is already fully
supported in Clang through the 0-sized array ("fake flexible array")
extension or when C99 flexible array members have been syntactically
obfuscated.

Clang needs to explicitly allow these extensions directly for C99
flexible arrays, since they are common code patterns in active use by the
Linux kernel (and other projects). Such projects have been using either
0-sized arrays (which is considered deprecated in favor of C99 flexible
array members) or via obfuscated syntax, both of which complicate their
code bases.

For example, these do not error by default:

union one {
int a;
int b[0];
};

union two {
int a;
struct {
struct { } __empty;
int b[];
};
};
But this does:

union three {
int a;
int b[];
};

Remove the default error diagnostics for this but continue to provide
warnings under Microsoft or GNU extensions checks. This will allow for
a seamless transition for code bases away from 0-sized arrays without
losing existing code patterns. Add explicit checking for the warnings
under various constructions.

Additionally fixes a CodeGen bug with flexible array members in unions
in C++, which was found when adding a testcase for:

union { char x[]; } z = {0};

which only had Sema tests originally.

Fixes GH#84565.
---
 clang/docs/ReleaseNotes.rst   |   3 +
 .../clang/Basic/DiagnosticSemaKinds.td|   5 -
 clang/lib/Sema/SemaDecl.cpp   |   8 +-
 clang/lib/Sema/SemaInit.cpp   |  11 +-
 clang/test/C/drs/dr5xx.c  |   2 +-
 clang/test/CodeGen/flexible-array-init.c  |  89 -
 clang/test/CodeGen/flexible-array-init.cpp|  24 +++
 clang/test/Sema/flexible-array-in-union.c | 187 +-
 clang/test/Sema/transparent-union.c   |   4 +-
 9 files changed, 303 insertions(+), 30 deletions(-)
 create mode 100644 clang/test/CodeGen/flexible-array-init.cpp

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 0fdd9e3fb3eee2..ccc399d36dbb54 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -304,6 +304,9 @@ Improvements to Clang's diagnostics
   annotated with the ``clang::always_destroy`` attribute.
   Fixes #GH68686, #GH86486
 
+- ``-Wmicrosoft``, ``-Wgnu``, or ``-pedantic`` is now required to diagnose C99
+  flexible array members in a union or alone in a struct. Fixes GH#84565.
+
 Improvements to Clang's time-trace
 --
 
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index fc727cef9cd835..51af81bf1f6fc5 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -6464,9 +6464,6 @@ def ext_c99_flexible_array_member : Extension<
 def err_flexible_array_virtual_base : Error<
   "flexible array member %0 not allowed in "
   "%select{struct|interface|union|class|enum}1 which has a virtual base 
class">;
-def err_flexible_array_empty_aggregate : Error<
-  "flexible array member %0 not allowed in otherwise empty "
-  "%select{struct|interface|union|class|enum}1">;
 def err_flexible_array_has_nontrivial_dtor : Error<
   "flexible array member %0 of type %1 with non-trivial destruction">;
 def ext_flexible_array_in_struct : Extension<
@@ -6481,8 +6478,6 @@ def ext_flexible_array_empty_aggregate_ms : Extension<
   "flexible array member %0 in otherwise empty "
   "%select{struct|interface|union|class|enum}1 is a Microsoft extension">,
   InGroup;
-def err_flexible_array_union : Error<
-  "flexible array member %0 in a union is not allowed">;
 def ext_flexible_array_union_ms : Extension<
   "flexible array member %0 in a union is a Microsoft extension">,
   InGroup;
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 8b44d24f5273aa..0bd88ece2aa544 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -19429,15 +19429,11 @@ void Sema::ActOnFields(Scope *S, SourceLocation 
RecLoc, Decl *EnclosingDecl,
 } else if (Record->isUnion())
   DiagID = getLangOpts().MicrosoftExt
? diag::ext_flexible_array_union_ms
-   : getLangOpts().CPlusPlus
- ? diag::ext_flexible_array_union_gnu
- : diag::err_flexible_array_union;
+   : diag::ext_flexible_array_union_gnu;
 else if (NumNamedMembers < 1)
   DiagID = 

[clang] [Clang][Sema] Allow flexible arrays in unions and alone in structs (PR #84428)

2024-03-26 Thread Kees Cook via cfe-commits


@@ -271,6 +271,9 @@ Improvements to Clang's diagnostics
 - Clang now correctly diagnoses no arguments to a variadic macro parameter as 
a C23/C++20 extension.
   Fixes #GH84495.
 
+- ``-Wmicrosoft`` or ``-Wgnu`` is now required to diagnose C99 flexible
+  array members in a union or alone in a struct. Fixes #GB84565.

kees wrote:

Oh thank you! Nice catch.

https://github.com/llvm/llvm-project/pull/84428
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][Sema] Allow flexible arrays in unions and alone in structs (PR #84428)

2024-03-25 Thread Kees Cook via cfe-commits

https://github.com/kees updated https://github.com/llvm/llvm-project/pull/84428

>From eb5138b45fa450737600050ad8dabdcb27513d42 Mon Sep 17 00:00:00 2001
From: Kees Cook 
Date: Thu, 7 Mar 2024 17:03:09 -0800
Subject: [PATCH 1/8] [Clang][Sema]: Allow flexible arrays in unions and alone
 in structs

GNU and MSVC have extensions where flexible array members (or their
equivalent) can be in unions or alone in structs. This is already fully
supported in Clang through the 0-sized array ("fake flexible array")
extension or when C99 flexible array members have been syntactically
obfuscated.

Clang needs to explicitly allow these extensions directly for C99
flexible arrays, since they are common code patterns in active use by the
Linux kernel (and other projects). Such projects have been using either
0-sized arrays (which is considered deprecated in favor of C99 flexible
array members) or via obfuscated syntax, both of which complicate their
code bases.

For example, these do not error by default:

union one {
int a;
int b[0];
};

union two {
int a;
struct {
struct { } __empty;
int b[];
};
};

But this does:

union three {
int a;
int b[];
};

Remove the default error diagnostics for this but continue to provide
warnings under Microsoft or GNU extensions checks. This will allow for
a seamless transition for code bases away from 0-sized arrays without
losing existing code patterns. Add explicit checking for the warnings
under various constructions.

Fixes #84565
---
 clang/docs/ReleaseNotes.rst   |  3 ++
 .../clang/Basic/DiagnosticSemaKinds.td|  5 --
 clang/lib/Sema/SemaDecl.cpp   |  8 +--
 clang/test/C/drs/dr5xx.c  |  2 +-
 clang/test/Sema/flexible-array-in-union.c | 53 +--
 clang/test/Sema/transparent-union.c   |  4 +-
 6 files changed, 57 insertions(+), 18 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 1b901a27fd19d1..960ab7e021cf2f 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -214,6 +214,9 @@ Improvements to Clang's diagnostics
 - Clang now diagnoses lambda function expressions being implicitly cast to 
boolean values, under ``-Wpointer-bool-conversion``.
   Fixes #GH82512.
 
+- ``-Wmicrosoft`` or ``-Wgnu`` is now required to diagnose C99 flexible
+  array members in a union or alone in a struct.
+
 Improvements to Clang's time-trace
 --
 
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index c8dfdc08f5ea07..f09121b8c7ec8f 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -6447,9 +6447,6 @@ def ext_c99_flexible_array_member : Extension<
 def err_flexible_array_virtual_base : Error<
   "flexible array member %0 not allowed in "
   "%select{struct|interface|union|class|enum}1 which has a virtual base 
class">;
-def err_flexible_array_empty_aggregate : Error<
-  "flexible array member %0 not allowed in otherwise empty "
-  "%select{struct|interface|union|class|enum}1">;
 def err_flexible_array_has_nontrivial_dtor : Error<
   "flexible array member %0 of type %1 with non-trivial destruction">;
 def ext_flexible_array_in_struct : Extension<
@@ -6464,8 +6461,6 @@ def ext_flexible_array_empty_aggregate_ms : Extension<
   "flexible array member %0 in otherwise empty "
   "%select{struct|interface|union|class|enum}1 is a Microsoft extension">,
   InGroup;
-def err_flexible_array_union : Error<
-  "flexible array member %0 in a union is not allowed">;
 def ext_flexible_array_union_ms : Extension<
   "flexible array member %0 in a union is a Microsoft extension">,
   InGroup;
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 67e56a917a51de..053122b588246b 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -19357,15 +19357,11 @@ void Sema::ActOnFields(Scope *S, SourceLocation 
RecLoc, Decl *EnclosingDecl,
 } else if (Record->isUnion())
   DiagID = getLangOpts().MicrosoftExt
? diag::ext_flexible_array_union_ms
-   : getLangOpts().CPlusPlus
- ? diag::ext_flexible_array_union_gnu
- : diag::err_flexible_array_union;
+   : diag::ext_flexible_array_union_gnu;
 else if (NumNamedMembers < 1)
   DiagID = getLangOpts().MicrosoftExt
? diag::ext_flexible_array_empty_aggregate_ms
-   : getLangOpts().CPlusPlus
- ? diag::ext_flexible_array_empty_aggregate_gnu
- : diag::err_flexible_array_empty_aggregate;
+   : diag::ext_flexible_array_empty_aggregate_gnu;
 
 if (DiagID)
   

[clang] [Clang][Sema] Allow flexible arrays in unions and alone in structs (PR #84428)

2024-03-21 Thread Kees Cook via cfe-commits


@@ -21,10 +27,76 @@ struct __attribute((packed, aligned(4))) { char a; int x; 
char z[]; } e = { 1, 2
 struct { int x; char y[]; } f = { 1, { 13, 15 } };
 // CHECK: @f ={{.*}} global <{ i32, [2 x i8] }> <{ i32 1, [2 x i8] c"\0D\0F" }>
 
-union {
-  struct {
-int a;
-char b[];
-  } x;
-} in_union = {};
-// CHECK: @in_union ={{.*}} global %union.anon zeroinitializer
+struct __attribute((packed)) { short a; char z[]; } g = { 2, { 11, 13, 15 } };
+// CHECK: @g ={{.*}} <{ i16, [3 x i8] }> <{ i16 2, [3 x i8] c"\0B\0D\0F" }>,
+
+// Last member is the potential flexible array, unnamed initializer skips it.
+struct { int a; union { int b; short x; }; int c; int d; } h = {1, 2, {}, 3};
+// CHECK: @h = global %struct.anon{{.*}} { i32 1, %union.anon{{.*}} { i32 2 }, 
i32 0, i32 3 }
+struct { int a; union { int b; short x[0]; }; int c; int d; } h0 = {1, 2, {}, 
3};
+// CHECK: @h0 = global %struct.anon{{.*}} { i32 1, %union.anon{{.*}} { i32 2 
}, i32 0, i32 3 }
+struct { int a; union { int b; short x[1]; }; int c; int d; } h1 = {1, 2, {}, 
3};
+// CHECK: @h1 = global %struct.anon{{.*}} { i32 1, %union.anon{{.*}} { i32 2 
}, i32 0, i32 3 }
+struct {
+  int a;
+  union {
+int b;
+struct {
+  struct { } __ununsed;
+  short x[];
+};
+  };
+  int c;
+  int d;
+} hiding = {1, 2, {}, 3};
+// CHECK: @hiding = global %struct.anon{{.*}} { i32 1, %union.anon{{.*}} { i32 
2 }, i32 0, i32 3 }
+struct { int a; union { int b; short x[]; }; int c; int d; } hf = {1, 2, {}, 
3};
+// CHECK: @hf = global %struct.anon{{.*}} { i32 1, %union.anon{{.*}} { i32 2 
}, i32 0, i32 3 }
+
+// First member is the potential flexible array, initialization requires 
braces.
+struct { int a; union { short x; int b; }; int c; int d; } i = {1, 2, {}, 3};
+// CHECK: @i = global { i32, { i16, [2 x i8] }, i32, i32 } { i32 1, { i16, [2 
x i8] } { i16 2, [2 x i8] undef }, i32 0, i32 3 }
+struct { int a; union { short x[0]; int b; }; int c; int d; } i0 = {1, {}, 2, 
3};
+// CHECK: @i0 = global { i32, { [0 x i16], [4 x i8] }, i32, i32 } { i32 1, { 
[0 x i16], [4 x i8] } { [0 x i16] zeroinitializer, [4 x i8] undef }, i32 2, i32 
3 }
+struct { int a; union { short x[1]; int b; }; int c; int d; } i1 = {1, {2}, 
{}, 3};
+// CHECK: @i1 = global { i32, { [1 x i16], [2 x i8] }, i32, i32 } { i32 1, { 
[1 x i16], [2 x i8] } { [1 x i16] [i16 2], [2 x i8] undef }, i32 0, i32 3 }
+struct { int a; union { short x[]; int b; }; int c; int d; } i_f = {4, {}, {}, 
6};
+// CHECK: @i_f = global { i32, { [0 x i16], [4 x i8] }, i32, i32 } { i32 4, { 
[0 x i16], [4 x i8] } { [0 x i16] zeroinitializer, [4 x i8] undef }, i32 0, i32 
6 }
+
+// Named initializers; order doesn't matter.
+struct { int a; union { int b; short x; }; int c; int d; } hn = {.a = 1, .x = 
2, .c = 3};
+// CHECK: @hn = global { i32, { i16, [2 x i8] }, i32, i32 } { i32 1, { i16, [2 
x i8] } { i16 2, [2 x i8] undef }, i32 3, i32 0 }
+struct { int a; union { int b; short x[0]; }; int c; int d; } hn0 = {.a = 1, 
.x = {2}, .c = 3};
+// CHECK: @hn0 = global { i32, { [0 x i16], [4 x i8] }, i32, i32 } { i32 1, { 
[0 x i16], [4 x i8] } { [0 x i16] zeroinitializer, [4 x i8] undef }, i32 3, i32 
0 }
+struct { int a; union { int b; short x[1]; }; int c; int d; } hn1 = {.a = 1, 
.x = {2}, .c = 3};
+// CHECK: @hn1 = global { i32, { [1 x i16], [2 x i8] }, i32, i32 } { i32 1, { 
[1 x i16], [2 x i8] } { [1 x i16] [i16 2], [2 x i8] undef }, i32 3, i32 0 }
+
+struct { char a[]; } empty_struct = {};
+// CHECK: @empty_struct ={{.*}} global %struct.anon{{.*}} zeroinitializer, 
align 1
+
+struct { char a[]; } empty_struct0 = {0};
+// CHECK: @empty_struct0 = global { [1 x i8] } zeroinitializer, align 1
+
+union { struct { int a; char b[]; }; } struct_in_union = {};
+// CHECK: @struct_in_union = global %union.anon{{.*}} zeroinitializer, align 4
+
+union { struct { int a; char b[]; }; } struct_in_union0 = {0};
+// CHECK: @struct_in_union0 = global %union.anon{{.*}} zeroinitializer, align 4
+
+union { int a; char b[]; } trailing_in_union = {};
+// CHECK: @trailing_in_union = global %union.anon{{.*}} zeroinitializer, align 
4
+
+union { int a; char b[]; } trailing_in_union0 = {0};
+// CHECK: @trailing_in_union0 = global %union.anon{{.*}} zeroinitializer, 
align 4
+
+union { char a[]; } only_in_union = {};
+// CHECK: @only_in_union = global %union.anon{{.*}} zeroinitializer, align 1
+
+union { char a[]; } only_in_union0 = {0};
+// CHECK: @only_in_union0 = global { [1 x i8] } zeroinitializer, align 1
+
+union { char a[]; int b; } first_in_union = {};
+// CHECK: @first_in_union = global { [0 x i8], [4 x i8] } { [0 x i8] 
zeroinitializer, [4 x i8] undef }, align 4
+
+union { char a[]; int b; } first_in_union0 = {};
+// CHECK: @first_in_union0 = global { [0 x i8], [4 x i8] } { [0 x i8] 
zeroinitializer, [4 x i8] undef }, align 4

kees wrote:

Whoops! Yes, thank you. Fixing.

https://github.com/llvm/llvm-project/pull/84428
___
cfe-commits 

[clang] [Clang][Sema] Allow flexible arrays in unions and alone in structs (PR #84428)

2024-03-21 Thread Kees Cook via cfe-commits

https://github.com/kees edited https://github.com/llvm/llvm-project/pull/84428
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][Sema] Allow flexible arrays in unions and alone in structs (PR #84428)

2024-03-21 Thread Kees Cook via cfe-commits

https://github.com/kees updated https://github.com/llvm/llvm-project/pull/84428

>From eb5138b45fa450737600050ad8dabdcb27513d42 Mon Sep 17 00:00:00 2001
From: Kees Cook 
Date: Thu, 7 Mar 2024 17:03:09 -0800
Subject: [PATCH 1/7] [Clang][Sema]: Allow flexible arrays in unions and alone
 in structs

GNU and MSVC have extensions where flexible array members (or their
equivalent) can be in unions or alone in structs. This is already fully
supported in Clang through the 0-sized array ("fake flexible array")
extension or when C99 flexible array members have been syntactically
obfuscated.

Clang needs to explicitly allow these extensions directly for C99
flexible arrays, since they are common code patterns in active use by the
Linux kernel (and other projects). Such projects have been using either
0-sized arrays (which is considered deprecated in favor of C99 flexible
array members) or via obfuscated syntax, both of which complicate their
code bases.

For example, these do not error by default:

union one {
int a;
int b[0];
};

union two {
int a;
struct {
struct { } __empty;
int b[];
};
};

But this does:

union three {
int a;
int b[];
};

Remove the default error diagnostics for this but continue to provide
warnings under Microsoft or GNU extensions checks. This will allow for
a seamless transition for code bases away from 0-sized arrays without
losing existing code patterns. Add explicit checking for the warnings
under various constructions.

Fixes #84565
---
 clang/docs/ReleaseNotes.rst   |  3 ++
 .../clang/Basic/DiagnosticSemaKinds.td|  5 --
 clang/lib/Sema/SemaDecl.cpp   |  8 +--
 clang/test/C/drs/dr5xx.c  |  2 +-
 clang/test/Sema/flexible-array-in-union.c | 53 +--
 clang/test/Sema/transparent-union.c   |  4 +-
 6 files changed, 57 insertions(+), 18 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 1b901a27fd19d1..960ab7e021cf2f 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -214,6 +214,9 @@ Improvements to Clang's diagnostics
 - Clang now diagnoses lambda function expressions being implicitly cast to 
boolean values, under ``-Wpointer-bool-conversion``.
   Fixes #GH82512.
 
+- ``-Wmicrosoft`` or ``-Wgnu`` is now required to diagnose C99 flexible
+  array members in a union or alone in a struct.
+
 Improvements to Clang's time-trace
 --
 
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index c8dfdc08f5ea07..f09121b8c7ec8f 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -6447,9 +6447,6 @@ def ext_c99_flexible_array_member : Extension<
 def err_flexible_array_virtual_base : Error<
   "flexible array member %0 not allowed in "
   "%select{struct|interface|union|class|enum}1 which has a virtual base 
class">;
-def err_flexible_array_empty_aggregate : Error<
-  "flexible array member %0 not allowed in otherwise empty "
-  "%select{struct|interface|union|class|enum}1">;
 def err_flexible_array_has_nontrivial_dtor : Error<
   "flexible array member %0 of type %1 with non-trivial destruction">;
 def ext_flexible_array_in_struct : Extension<
@@ -6464,8 +6461,6 @@ def ext_flexible_array_empty_aggregate_ms : Extension<
   "flexible array member %0 in otherwise empty "
   "%select{struct|interface|union|class|enum}1 is a Microsoft extension">,
   InGroup;
-def err_flexible_array_union : Error<
-  "flexible array member %0 in a union is not allowed">;
 def ext_flexible_array_union_ms : Extension<
   "flexible array member %0 in a union is a Microsoft extension">,
   InGroup;
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 67e56a917a51de..053122b588246b 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -19357,15 +19357,11 @@ void Sema::ActOnFields(Scope *S, SourceLocation 
RecLoc, Decl *EnclosingDecl,
 } else if (Record->isUnion())
   DiagID = getLangOpts().MicrosoftExt
? diag::ext_flexible_array_union_ms
-   : getLangOpts().CPlusPlus
- ? diag::ext_flexible_array_union_gnu
- : diag::err_flexible_array_union;
+   : diag::ext_flexible_array_union_gnu;
 else if (NumNamedMembers < 1)
   DiagID = getLangOpts().MicrosoftExt
? diag::ext_flexible_array_empty_aggregate_ms
-   : getLangOpts().CPlusPlus
- ? diag::ext_flexible_array_empty_aggregate_gnu
- : diag::err_flexible_array_empty_aggregate;
+   : diag::ext_flexible_array_empty_aggregate_gnu;
 
 if (DiagID)
   

[clang] [Clang][Sema] Allow flexible arrays in unions and alone in structs (PR #84428)

2024-03-21 Thread Kees Cook via cfe-commits

https://github.com/kees edited https://github.com/llvm/llvm-project/pull/84428
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][Sema]: Allow flexible arrays in unions and alone in structs (PR #84428)

2024-03-21 Thread Kees Cook via cfe-commits

https://github.com/kees updated https://github.com/llvm/llvm-project/pull/84428

>From eb5138b45fa450737600050ad8dabdcb27513d42 Mon Sep 17 00:00:00 2001
From: Kees Cook 
Date: Thu, 7 Mar 2024 17:03:09 -0800
Subject: [PATCH 1/7] [Clang][Sema]: Allow flexible arrays in unions and alone
 in structs

GNU and MSVC have extensions where flexible array members (or their
equivalent) can be in unions or alone in structs. This is already fully
supported in Clang through the 0-sized array ("fake flexible array")
extension or when C99 flexible array members have been syntactically
obfuscated.

Clang needs to explicitly allow these extensions directly for C99
flexible arrays, since they are common code patterns in active use by the
Linux kernel (and other projects). Such projects have been using either
0-sized arrays (which is considered deprecated in favor of C99 flexible
array members) or via obfuscated syntax, both of which complicate their
code bases.

For example, these do not error by default:

union one {
int a;
int b[0];
};

union two {
int a;
struct {
struct { } __empty;
int b[];
};
};

But this does:

union three {
int a;
int b[];
};

Remove the default error diagnostics for this but continue to provide
warnings under Microsoft or GNU extensions checks. This will allow for
a seamless transition for code bases away from 0-sized arrays without
losing existing code patterns. Add explicit checking for the warnings
under various constructions.

Fixes #84565
---
 clang/docs/ReleaseNotes.rst   |  3 ++
 .../clang/Basic/DiagnosticSemaKinds.td|  5 --
 clang/lib/Sema/SemaDecl.cpp   |  8 +--
 clang/test/C/drs/dr5xx.c  |  2 +-
 clang/test/Sema/flexible-array-in-union.c | 53 +--
 clang/test/Sema/transparent-union.c   |  4 +-
 6 files changed, 57 insertions(+), 18 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 1b901a27fd19d1..960ab7e021cf2f 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -214,6 +214,9 @@ Improvements to Clang's diagnostics
 - Clang now diagnoses lambda function expressions being implicitly cast to 
boolean values, under ``-Wpointer-bool-conversion``.
   Fixes #GH82512.
 
+- ``-Wmicrosoft`` or ``-Wgnu`` is now required to diagnose C99 flexible
+  array members in a union or alone in a struct.
+
 Improvements to Clang's time-trace
 --
 
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index c8dfdc08f5ea07..f09121b8c7ec8f 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -6447,9 +6447,6 @@ def ext_c99_flexible_array_member : Extension<
 def err_flexible_array_virtual_base : Error<
   "flexible array member %0 not allowed in "
   "%select{struct|interface|union|class|enum}1 which has a virtual base 
class">;
-def err_flexible_array_empty_aggregate : Error<
-  "flexible array member %0 not allowed in otherwise empty "
-  "%select{struct|interface|union|class|enum}1">;
 def err_flexible_array_has_nontrivial_dtor : Error<
   "flexible array member %0 of type %1 with non-trivial destruction">;
 def ext_flexible_array_in_struct : Extension<
@@ -6464,8 +6461,6 @@ def ext_flexible_array_empty_aggregate_ms : Extension<
   "flexible array member %0 in otherwise empty "
   "%select{struct|interface|union|class|enum}1 is a Microsoft extension">,
   InGroup;
-def err_flexible_array_union : Error<
-  "flexible array member %0 in a union is not allowed">;
 def ext_flexible_array_union_ms : Extension<
   "flexible array member %0 in a union is a Microsoft extension">,
   InGroup;
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 67e56a917a51de..053122b588246b 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -19357,15 +19357,11 @@ void Sema::ActOnFields(Scope *S, SourceLocation 
RecLoc, Decl *EnclosingDecl,
 } else if (Record->isUnion())
   DiagID = getLangOpts().MicrosoftExt
? diag::ext_flexible_array_union_ms
-   : getLangOpts().CPlusPlus
- ? diag::ext_flexible_array_union_gnu
- : diag::err_flexible_array_union;
+   : diag::ext_flexible_array_union_gnu;
 else if (NumNamedMembers < 1)
   DiagID = getLangOpts().MicrosoftExt
? diag::ext_flexible_array_empty_aggregate_ms
-   : getLangOpts().CPlusPlus
- ? diag::ext_flexible_array_empty_aggregate_gnu
- : diag::err_flexible_array_empty_aggregate;
+   : diag::ext_flexible_array_empty_aggregate_gnu;
 
 if (DiagID)
   

[clang] [Clang][Sema]: Allow flexible arrays in unions and alone in structs (PR #84428)

2024-03-21 Thread Kees Cook via cfe-commits

kees wrote:

Ah, well, regardless, I think I found where the 
`StructuredList->setInitializedFieldInUnion` was actually missing, and then I 
could undo my zero-init-only and everything still appears fixed. Doing a full 
debug build test run now...

https://github.com/llvm/llvm-project/pull/84428
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][Sema]: Allow flexible arrays in unions and alone in structs (PR #84428)

2024-03-21 Thread Kees Cook via cfe-commits

kees wrote:

> > because we don't yet support non-zero initialization (as described in 
> > commit 
> > [5955a0f](https://github.com/llvm/llvm-project/commit/5955a0f9375a8c0b134eeb4a8de5155dcce7c94f))
> 
> I'm confused. We support non-zero init, and there are tests for non-zero init 
> in that commit. The commit message mentions dynamic initialization, but 
> that's not non-zero; that's "requires code to run at program startup".

I guess I'm confused about this test:

```
  // Flexible array initialization is currently not supported by constant
  // evaluation. Make sure we emit an error message, for now.
  constexpr A c = {1, 2, 3}; // expected-error {{constexpr variable 'c' must be 
initialized by a constant expression}}
```

Why is 1, 2, 3 not considered a constant expression here? (Am I confusing 
"integer constant expression" for something else?)


https://github.com/llvm/llvm-project/pull/84428
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][Sema]: Allow flexible arrays in unions and alone in structs (PR #84428)

2024-03-21 Thread Kees Cook via cfe-commits

https://github.com/kees updated https://github.com/llvm/llvm-project/pull/84428

>From eb5138b45fa450737600050ad8dabdcb27513d42 Mon Sep 17 00:00:00 2001
From: Kees Cook 
Date: Thu, 7 Mar 2024 17:03:09 -0800
Subject: [PATCH 1/6] [Clang][Sema]: Allow flexible arrays in unions and alone
 in structs

GNU and MSVC have extensions where flexible array members (or their
equivalent) can be in unions or alone in structs. This is already fully
supported in Clang through the 0-sized array ("fake flexible array")
extension or when C99 flexible array members have been syntactically
obfuscated.

Clang needs to explicitly allow these extensions directly for C99
flexible arrays, since they are common code patterns in active use by the
Linux kernel (and other projects). Such projects have been using either
0-sized arrays (which is considered deprecated in favor of C99 flexible
array members) or via obfuscated syntax, both of which complicate their
code bases.

For example, these do not error by default:

union one {
int a;
int b[0];
};

union two {
int a;
struct {
struct { } __empty;
int b[];
};
};

But this does:

union three {
int a;
int b[];
};

Remove the default error diagnostics for this but continue to provide
warnings under Microsoft or GNU extensions checks. This will allow for
a seamless transition for code bases away from 0-sized arrays without
losing existing code patterns. Add explicit checking for the warnings
under various constructions.

Fixes #84565
---
 clang/docs/ReleaseNotes.rst   |  3 ++
 .../clang/Basic/DiagnosticSemaKinds.td|  5 --
 clang/lib/Sema/SemaDecl.cpp   |  8 +--
 clang/test/C/drs/dr5xx.c  |  2 +-
 clang/test/Sema/flexible-array-in-union.c | 53 +--
 clang/test/Sema/transparent-union.c   |  4 +-
 6 files changed, 57 insertions(+), 18 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 1b901a27fd19d1..960ab7e021cf2f 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -214,6 +214,9 @@ Improvements to Clang's diagnostics
 - Clang now diagnoses lambda function expressions being implicitly cast to 
boolean values, under ``-Wpointer-bool-conversion``.
   Fixes #GH82512.
 
+- ``-Wmicrosoft`` or ``-Wgnu`` is now required to diagnose C99 flexible
+  array members in a union or alone in a struct.
+
 Improvements to Clang's time-trace
 --
 
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index c8dfdc08f5ea07..f09121b8c7ec8f 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -6447,9 +6447,6 @@ def ext_c99_flexible_array_member : Extension<
 def err_flexible_array_virtual_base : Error<
   "flexible array member %0 not allowed in "
   "%select{struct|interface|union|class|enum}1 which has a virtual base 
class">;
-def err_flexible_array_empty_aggregate : Error<
-  "flexible array member %0 not allowed in otherwise empty "
-  "%select{struct|interface|union|class|enum}1">;
 def err_flexible_array_has_nontrivial_dtor : Error<
   "flexible array member %0 of type %1 with non-trivial destruction">;
 def ext_flexible_array_in_struct : Extension<
@@ -6464,8 +6461,6 @@ def ext_flexible_array_empty_aggregate_ms : Extension<
   "flexible array member %0 in otherwise empty "
   "%select{struct|interface|union|class|enum}1 is a Microsoft extension">,
   InGroup;
-def err_flexible_array_union : Error<
-  "flexible array member %0 in a union is not allowed">;
 def ext_flexible_array_union_ms : Extension<
   "flexible array member %0 in a union is a Microsoft extension">,
   InGroup;
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 67e56a917a51de..053122b588246b 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -19357,15 +19357,11 @@ void Sema::ActOnFields(Scope *S, SourceLocation 
RecLoc, Decl *EnclosingDecl,
 } else if (Record->isUnion())
   DiagID = getLangOpts().MicrosoftExt
? diag::ext_flexible_array_union_ms
-   : getLangOpts().CPlusPlus
- ? diag::ext_flexible_array_union_gnu
- : diag::err_flexible_array_union;
+   : diag::ext_flexible_array_union_gnu;
 else if (NumNamedMembers < 1)
   DiagID = getLangOpts().MicrosoftExt
? diag::ext_flexible_array_empty_aggregate_ms
-   : getLangOpts().CPlusPlus
- ? diag::ext_flexible_array_empty_aggregate_gnu
- : diag::err_flexible_array_empty_aggregate;
+   : diag::ext_flexible_array_empty_aggregate_gnu;
 
 if (DiagID)
   

[clang] [Clang][Sema]: Allow flexible arrays in unions and alone in structs (PR #84428)

2024-03-21 Thread Kees Cook via cfe-commits

kees wrote:

> `InitListChecker::CheckStructUnionTypes` never calls 
> `StructuredList->setInitializedFieldInUnion`

Ah-ha, thank you for the pointer. I think I've figured this out: initialization 
was avoiding flexible arrays because we don't yet support non-zero 
initialization (as described in commit 
5955a0f9375a8c0b134eeb4a8de5155dcce7c94f). However, we _do_ want to allow zero 
initialization. This fixes the Assert and the sketchy `undef`s in the codegen 
output. Now we get zero inits:

```diff
-// If we've hit the flexible array member at the end, we're done.
-if (Field->getType()->isIncompleteArrayType())
+// If we've hit a flexible array member, only allow zero initialization.
+// Other values are not yet supported. See commit 5955a0f9375a.
+if (Field->getType()->isIncompleteArrayType() && !IsZeroInitializer(Init))
```

> I think things can be simplified a bit further

Ah yes! That's much nicer. Since we're processing the Union, we can just return 
completely instead of continuing the loop. Thanks!


https://github.com/llvm/llvm-project/pull/84428
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][Sema]: Allow flexible arrays in unions and alone in structs (PR #84428)

2024-03-20 Thread Kees Cook via cfe-commits

kees wrote:

> Is this an existing bug? i.e. it's the CodeGen test for `union { char x[]; } 
> x = {0};` ... :P

Confirmed. Adding a CodeGen test for `union { char x[]; } x = {0};` without any 
of the changes from this PR still hits the assert. I assume this was from 
making flex array initialization work in C++ in commit 5955a0f9375a, which only 
added tests for structs. @efriedma-quic do you know what the "expected" 
situation should be here? This is the assert in 
`CodeGenModule::EmitGlobalVarDefinition`:

```
  CharUnits VarSize = getContext().getTypeSizeInChars(ASTTy) +
  InitDecl->getFlexibleArrayInitChars(getContext());
  CharUnits CstSize = CharUnits::fromQuantity(
  getDataLayout().getTypeAllocSize(Init->getType()));
  assert(VarSize == CstSize && "Emitted constant has unexpected size");
```

It feels like there's logic missing for calculating union size with an 
initialized flexible array? Taking just my last commit is sufficient to trip 
this assert 
(https://github.com/llvm/llvm-project/pull/84428/commits/06bc935771399672d0140340b226b9c2b04e13fd).


https://github.com/llvm/llvm-project/pull/84428
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][Sema]: Allow flexible arrays in unions and alone in structs (PR #84428)

2024-03-20 Thread Kees Cook via cfe-commits

kees wrote:

Hmpf. Build failure encountered under an Assert:

```
# | Assertion failed: VarSize == CstSize && "Emitted constant has unexpected 
size", file C:\ws\src\clang\lib\CodeGen\CodeGenModule.cpp, line 5294
# | PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ 
and include the crash backtrace, preprocessed source, and associated run script.
# | Stack dump:
# | 0.  Program arguments: 
c:\\ws\\src\\build\\build-clang-windows\\bin\\clang.exe -cc1 -internal-isystem 
C:\\ws\\src\\build\\build-clang-windows\\lib\\clang\\19\\include 
-nostdsysteminc -triple i386-unknown-unknown -x c++ -emit-llvm -o - 
C:\\ws\\src\\clang\\test\\CodeGen\\flexible-array-init.cpp
# | 1.  C:\ws\src\clang\test\CodeGen\flexible-array-init.cpp:12:1: current 
parser token 'union'
# | 2.  C:\ws\src\clang\test\CodeGen\flexible-array-init.cpp:4:7: LLVM IR 
generation of declaration '_u0'
```

Is this an existing bug? i.e. it's the CodeGen test for `union { char x[]; } x 
= {0};` ... :P

https://github.com/llvm/llvm-project/pull/84428
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][Sema]: Allow flexible arrays in unions and alone in structs (PR #84428)

2024-03-19 Thread Kees Cook via cfe-commits

https://github.com/kees updated https://github.com/llvm/llvm-project/pull/84428

>From eb5138b45fa450737600050ad8dabdcb27513d42 Mon Sep 17 00:00:00 2001
From: Kees Cook 
Date: Thu, 7 Mar 2024 17:03:09 -0800
Subject: [PATCH 1/5] [Clang][Sema]: Allow flexible arrays in unions and alone
 in structs

GNU and MSVC have extensions where flexible array members (or their
equivalent) can be in unions or alone in structs. This is already fully
supported in Clang through the 0-sized array ("fake flexible array")
extension or when C99 flexible array members have been syntactically
obfuscated.

Clang needs to explicitly allow these extensions directly for C99
flexible arrays, since they are common code patterns in active use by the
Linux kernel (and other projects). Such projects have been using either
0-sized arrays (which is considered deprecated in favor of C99 flexible
array members) or via obfuscated syntax, both of which complicate their
code bases.

For example, these do not error by default:

union one {
int a;
int b[0];
};

union two {
int a;
struct {
struct { } __empty;
int b[];
};
};

But this does:

union three {
int a;
int b[];
};

Remove the default error diagnostics for this but continue to provide
warnings under Microsoft or GNU extensions checks. This will allow for
a seamless transition for code bases away from 0-sized arrays without
losing existing code patterns. Add explicit checking for the warnings
under various constructions.

Fixes #84565
---
 clang/docs/ReleaseNotes.rst   |  3 ++
 .../clang/Basic/DiagnosticSemaKinds.td|  5 --
 clang/lib/Sema/SemaDecl.cpp   |  8 +--
 clang/test/C/drs/dr5xx.c  |  2 +-
 clang/test/Sema/flexible-array-in-union.c | 53 +--
 clang/test/Sema/transparent-union.c   |  4 +-
 6 files changed, 57 insertions(+), 18 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 1b901a27fd19d1..960ab7e021cf2f 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -214,6 +214,9 @@ Improvements to Clang's diagnostics
 - Clang now diagnoses lambda function expressions being implicitly cast to 
boolean values, under ``-Wpointer-bool-conversion``.
   Fixes #GH82512.
 
+- ``-Wmicrosoft`` or ``-Wgnu`` is now required to diagnose C99 flexible
+  array members in a union or alone in a struct.
+
 Improvements to Clang's time-trace
 --
 
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index c8dfdc08f5ea07..f09121b8c7ec8f 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -6447,9 +6447,6 @@ def ext_c99_flexible_array_member : Extension<
 def err_flexible_array_virtual_base : Error<
   "flexible array member %0 not allowed in "
   "%select{struct|interface|union|class|enum}1 which has a virtual base 
class">;
-def err_flexible_array_empty_aggregate : Error<
-  "flexible array member %0 not allowed in otherwise empty "
-  "%select{struct|interface|union|class|enum}1">;
 def err_flexible_array_has_nontrivial_dtor : Error<
   "flexible array member %0 of type %1 with non-trivial destruction">;
 def ext_flexible_array_in_struct : Extension<
@@ -6464,8 +6461,6 @@ def ext_flexible_array_empty_aggregate_ms : Extension<
   "flexible array member %0 in otherwise empty "
   "%select{struct|interface|union|class|enum}1 is a Microsoft extension">,
   InGroup;
-def err_flexible_array_union : Error<
-  "flexible array member %0 in a union is not allowed">;
 def ext_flexible_array_union_ms : Extension<
   "flexible array member %0 in a union is a Microsoft extension">,
   InGroup;
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 67e56a917a51de..053122b588246b 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -19357,15 +19357,11 @@ void Sema::ActOnFields(Scope *S, SourceLocation 
RecLoc, Decl *EnclosingDecl,
 } else if (Record->isUnion())
   DiagID = getLangOpts().MicrosoftExt
? diag::ext_flexible_array_union_ms
-   : getLangOpts().CPlusPlus
- ? diag::ext_flexible_array_union_gnu
- : diag::err_flexible_array_union;
+   : diag::ext_flexible_array_union_gnu;
 else if (NumNamedMembers < 1)
   DiagID = getLangOpts().MicrosoftExt
? diag::ext_flexible_array_empty_aggregate_ms
-   : getLangOpts().CPlusPlus
- ? diag::ext_flexible_array_empty_aggregate_gnu
- : diag::err_flexible_array_empty_aggregate;
+   : diag::ext_flexible_array_empty_aggregate_gnu;
 
 if (DiagID)
   

[clang] [Clang][Sema]: Allow flexible arrays in unions and alone in structs (PR #84428)

2024-03-19 Thread Kees Cook via cfe-commits


@@ -1,13 +1,158 @@
-// RUN: %clang_cc1 %s -verify=c -fsyntax-only
-// RUN: %clang_cc1 %s -verify -fsyntax-only -x c++
-// RUN: %clang_cc1 %s -verify -fsyntax-only -fms-compatibility
-// RUN: %clang_cc1 %s -verify -fsyntax-only -fms-compatibility -x c++
+// RUN: %clang_cc1 %s -verify=stock,c -fsyntax-only
+// RUN: %clang_cc1 %s -verify=stock,cpp -fsyntax-only -x c++
+// RUN: %clang_cc1 %s -verify=stock,cpp -fsyntax-only -fms-compatibility -x c++
+// RUN: %clang_cc1 %s -verify=stock,c,gnu -fsyntax-only 
-Wgnu-flexible-array-union-member -Wgnu-empty-struct
+// RUN: %clang_cc1 %s -verify=stock,c,microsoft -fsyntax-only 
-fms-compatibility -Wmicrosoft
 
 // The test checks that an attempt to initialize union with flexible array
 // member with an initializer list doesn't crash clang.
 
 
-union { char x[]; } r = {0}; // c-error {{flexible array member 'x' in a union 
is not allowed}}
+union { char x[]; } r = {0}; /* gnu-warning {{flexible array member 'x' in a 
union is a GNU extension}}
+microsoft-warning {{flexible array member 'x' 
in a union is a Microsoft extension}}
+  */
+struct _name1 {
+  int a;
+  union {
+int b;
+char x[]; /* gnu-warning {{flexible array member 'x' in a union is a GNU 
extension}}
+ microsoft-warning {{flexible array member 'x' in a union is a 
Microsoft extension}}
+   */
+  };
+} name1 = {
+  10,
+  42,/* initializes "b" */
+};
 
-// expected-no-diagnostics
+struct _name1i {
+  int a;
+  union {
+int b;
+char x[]; /* gnu-warning {{flexible array member 'x' in a union is a GNU 
extension}}
+ microsoft-warning {{flexible array member 'x' in a union is a 
Microsoft extension}}
+   */
+  };
+} name1i = {
+  .a = 10,
+  .b = 42,
+};
+
+/* Initialization of flexible array in a union is never allowed. */
+struct _name2 {
+  int a;
+  union {
+int b;
+char x[]; /* gnu-warning {{flexible array member 'x' in a union is a GNU 
extension}}
+ microsoft-warning {{flexible array member 'x' in a union is a 
Microsoft extension}}
+ stock-note {{initialized flexible array member 'x' is here}}
+   */
+  };
+} name2 = {
+  12,
+  13,
+  { 'c' },   /* stock-error {{initialization of flexible array member is not 
allowed}} */

kees wrote:

This seems to work:

```diff
diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp
index 011deed7a9a9..79cf2eed46fe 100644
--- a/clang/lib/Sema/SemaInit.cpp
+++ b/clang/lib/Sema/SemaInit.cpp
@@ -2437,6 +2437,7 @@ void InitListChecker::CheckStructUnionTypes(
   }
 
   if (Field == FieldEnd || !Field->getType()->isIncompleteArrayType() ||
+  (RD->isUnion() && Field != RD->field_begin()) ||
   Index >= IList->getNumInits())
 return;
 
```

https://github.com/llvm/llvm-project/pull/84428
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][Sema]: Allow flexible arrays in unions and alone in structs (PR #84428)

2024-03-19 Thread Kees Cook via cfe-commits


@@ -1,13 +1,158 @@
-// RUN: %clang_cc1 %s -verify=c -fsyntax-only
-// RUN: %clang_cc1 %s -verify -fsyntax-only -x c++
-// RUN: %clang_cc1 %s -verify -fsyntax-only -fms-compatibility
-// RUN: %clang_cc1 %s -verify -fsyntax-only -fms-compatibility -x c++
+// RUN: %clang_cc1 %s -verify=stock,c -fsyntax-only
+// RUN: %clang_cc1 %s -verify=stock,cpp -fsyntax-only -x c++
+// RUN: %clang_cc1 %s -verify=stock,cpp -fsyntax-only -fms-compatibility -x c++
+// RUN: %clang_cc1 %s -verify=stock,c,gnu -fsyntax-only 
-Wgnu-flexible-array-union-member -Wgnu-empty-struct
+// RUN: %clang_cc1 %s -verify=stock,c,microsoft -fsyntax-only 
-fms-compatibility -Wmicrosoft
 
 // The test checks that an attempt to initialize union with flexible array
 // member with an initializer list doesn't crash clang.
 
 
-union { char x[]; } r = {0}; // c-error {{flexible array member 'x' in a union 
is not allowed}}
+union { char x[]; } r = {0}; /* gnu-warning {{flexible array member 'x' in a 
union is a GNU extension}}
+microsoft-warning {{flexible array member 'x' 
in a union is a Microsoft extension}}
+  */
+struct _name1 {
+  int a;
+  union {
+int b;
+char x[]; /* gnu-warning {{flexible array member 'x' in a union is a GNU 
extension}}
+ microsoft-warning {{flexible array member 'x' in a union is a 
Microsoft extension}}
+   */
+  };
+} name1 = {
+  10,
+  42,/* initializes "b" */
+};
 
-// expected-no-diagnostics
+struct _name1i {
+  int a;
+  union {
+int b;
+char x[]; /* gnu-warning {{flexible array member 'x' in a union is a GNU 
extension}}
+ microsoft-warning {{flexible array member 'x' in a union is a 
Microsoft extension}}
+   */
+  };
+} name1i = {
+  .a = 10,
+  .b = 42,
+};
+
+/* Initialization of flexible array in a union is never allowed. */
+struct _name2 {
+  int a;
+  union {
+int b;
+char x[]; /* gnu-warning {{flexible array member 'x' in a union is a GNU 
extension}}
+ microsoft-warning {{flexible array member 'x' in a union is a 
Microsoft extension}}
+ stock-note {{initialized flexible array member 'x' is here}}
+   */
+  };
+} name2 = {
+  12,
+  13,
+  { 'c' },   /* stock-error {{initialization of flexible array member is not 
allowed}} */

kees wrote:

Dang, this doesn't work: it can't initialize a flex array if it's the first (or 
only) thing in the union. Hmm.

https://github.com/llvm/llvm-project/pull/84428
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][Sema]: Allow flexible arrays in unions and alone in structs (PR #84428)

2024-03-19 Thread Kees Cook via cfe-commits


@@ -1,13 +1,158 @@
-// RUN: %clang_cc1 %s -verify=c -fsyntax-only
-// RUN: %clang_cc1 %s -verify -fsyntax-only -x c++
-// RUN: %clang_cc1 %s -verify -fsyntax-only -fms-compatibility
-// RUN: %clang_cc1 %s -verify -fsyntax-only -fms-compatibility -x c++
+// RUN: %clang_cc1 %s -verify=stock,c -fsyntax-only
+// RUN: %clang_cc1 %s -verify=stock,cpp -fsyntax-only -x c++
+// RUN: %clang_cc1 %s -verify=stock,cpp -fsyntax-only -fms-compatibility -x c++
+// RUN: %clang_cc1 %s -verify=stock,c,gnu -fsyntax-only 
-Wgnu-flexible-array-union-member -Wgnu-empty-struct
+// RUN: %clang_cc1 %s -verify=stock,c,microsoft -fsyntax-only 
-fms-compatibility -Wmicrosoft
 
 // The test checks that an attempt to initialize union with flexible array
 // member with an initializer list doesn't crash clang.
 
 
-union { char x[]; } r = {0}; // c-error {{flexible array member 'x' in a union 
is not allowed}}
+union { char x[]; } r = {0}; /* gnu-warning {{flexible array member 'x' in a 
union is a GNU extension}}

kees wrote:

I added the `empty_union` codegen test for this. Do you mean I'm not checking 
for `{0}` explicitly? I will add that.

https://github.com/llvm/llvm-project/pull/84428
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][Sema]: Allow flexible arrays in unions and alone in structs (PR #84428)

2024-03-19 Thread Kees Cook via cfe-commits


@@ -1,13 +1,158 @@
-// RUN: %clang_cc1 %s -verify=c -fsyntax-only
-// RUN: %clang_cc1 %s -verify -fsyntax-only -x c++
-// RUN: %clang_cc1 %s -verify -fsyntax-only -fms-compatibility
-// RUN: %clang_cc1 %s -verify -fsyntax-only -fms-compatibility -x c++
+// RUN: %clang_cc1 %s -verify=stock,c -fsyntax-only
+// RUN: %clang_cc1 %s -verify=stock,cpp -fsyntax-only -x c++
+// RUN: %clang_cc1 %s -verify=stock,cpp -fsyntax-only -fms-compatibility -x c++
+// RUN: %clang_cc1 %s -verify=stock,c,gnu -fsyntax-only 
-Wgnu-flexible-array-union-member -Wgnu-empty-struct
+// RUN: %clang_cc1 %s -verify=stock,c,microsoft -fsyntax-only 
-fms-compatibility -Wmicrosoft
 
 // The test checks that an attempt to initialize union with flexible array
 // member with an initializer list doesn't crash clang.
 
 
-union { char x[]; } r = {0}; // c-error {{flexible array member 'x' in a union 
is not allowed}}
+union { char x[]; } r = {0}; /* gnu-warning {{flexible array member 'x' in a 
union is a GNU extension}}
+microsoft-warning {{flexible array member 'x' 
in a union is a Microsoft extension}}
+  */
+struct _name1 {
+  int a;
+  union {
+int b;
+char x[]; /* gnu-warning {{flexible array member 'x' in a union is a GNU 
extension}}
+ microsoft-warning {{flexible array member 'x' in a union is a 
Microsoft extension}}
+   */
+  };
+} name1 = {
+  10,
+  42,/* initializes "b" */
+};
 
-// expected-no-diagnostics
+struct _name1i {
+  int a;
+  union {
+int b;
+char x[]; /* gnu-warning {{flexible array member 'x' in a union is a GNU 
extension}}
+ microsoft-warning {{flexible array member 'x' in a union is a 
Microsoft extension}}
+   */
+  };
+} name1i = {
+  .a = 10,
+  .b = 42,
+};
+
+/* Initialization of flexible array in a union is never allowed. */
+struct _name2 {
+  int a;
+  union {
+int b;
+char x[]; /* gnu-warning {{flexible array member 'x' in a union is a GNU 
extension}}
+ microsoft-warning {{flexible array member 'x' in a union is a 
Microsoft extension}}
+ stock-note {{initialized flexible array member 'x' is here}}
+   */
+  };
+} name2 = {
+  12,
+  13,
+  { 'c' },   /* stock-error {{initialization of flexible array member is not 
allowed}} */

kees wrote:

Prior to this PR, it wasn't possible to use a flex array in a union, so I'll 
include it here and recheck the entire test suite. Thanks for finding this! I 
had found the function, but hadn't yet figured out how the position was being 
walked. :)

https://github.com/llvm/llvm-project/pull/84428
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][Sema]: Allow flexible arrays in unions and alone in structs (PR #84428)

2024-03-18 Thread Kees Cook via cfe-commits


@@ -1,13 +1,158 @@
-// RUN: %clang_cc1 %s -verify=c -fsyntax-only
-// RUN: %clang_cc1 %s -verify -fsyntax-only -x c++
-// RUN: %clang_cc1 %s -verify -fsyntax-only -fms-compatibility
-// RUN: %clang_cc1 %s -verify -fsyntax-only -fms-compatibility -x c++
+// RUN: %clang_cc1 %s -verify=stock,c -fsyntax-only
+// RUN: %clang_cc1 %s -verify=stock,cpp -fsyntax-only -x c++
+// RUN: %clang_cc1 %s -verify=stock,cpp -fsyntax-only -fms-compatibility -x c++
+// RUN: %clang_cc1 %s -verify=stock,c,gnu -fsyntax-only 
-Wgnu-flexible-array-union-member -Wgnu-empty-struct
+// RUN: %clang_cc1 %s -verify=stock,c,microsoft -fsyntax-only 
-fms-compatibility -Wmicrosoft
 
 // The test checks that an attempt to initialize union with flexible array
 // member with an initializer list doesn't crash clang.
 
 
-union { char x[]; } r = {0}; // c-error {{flexible array member 'x' in a union 
is not allowed}}
+union { char x[]; } r = {0}; /* gnu-warning {{flexible array member 'x' in a 
union is a GNU extension}}

kees wrote:

Ah, no, checking initializer positions is needed too. I've added tests to 
`clang/test/CodeGen/flexible-array-init.c`.

https://github.com/llvm/llvm-project/pull/84428
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][Sema]: Allow flexible arrays in unions and alone in structs (PR #84428)

2024-03-18 Thread Kees Cook via cfe-commits

https://github.com/kees updated https://github.com/llvm/llvm-project/pull/84428

>From eb5138b45fa450737600050ad8dabdcb27513d42 Mon Sep 17 00:00:00 2001
From: Kees Cook 
Date: Thu, 7 Mar 2024 17:03:09 -0800
Subject: [PATCH 1/3] [Clang][Sema]: Allow flexible arrays in unions and alone
 in structs

GNU and MSVC have extensions where flexible array members (or their
equivalent) can be in unions or alone in structs. This is already fully
supported in Clang through the 0-sized array ("fake flexible array")
extension or when C99 flexible array members have been syntactically
obfuscated.

Clang needs to explicitly allow these extensions directly for C99
flexible arrays, since they are common code patterns in active use by the
Linux kernel (and other projects). Such projects have been using either
0-sized arrays (which is considered deprecated in favor of C99 flexible
array members) or via obfuscated syntax, both of which complicate their
code bases.

For example, these do not error by default:

union one {
int a;
int b[0];
};

union two {
int a;
struct {
struct { } __empty;
int b[];
};
};

But this does:

union three {
int a;
int b[];
};

Remove the default error diagnostics for this but continue to provide
warnings under Microsoft or GNU extensions checks. This will allow for
a seamless transition for code bases away from 0-sized arrays without
losing existing code patterns. Add explicit checking for the warnings
under various constructions.

Fixes #84565
---
 clang/docs/ReleaseNotes.rst   |  3 ++
 .../clang/Basic/DiagnosticSemaKinds.td|  5 --
 clang/lib/Sema/SemaDecl.cpp   |  8 +--
 clang/test/C/drs/dr5xx.c  |  2 +-
 clang/test/Sema/flexible-array-in-union.c | 53 +--
 clang/test/Sema/transparent-union.c   |  4 +-
 6 files changed, 57 insertions(+), 18 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 1b901a27fd19d1..960ab7e021cf2f 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -214,6 +214,9 @@ Improvements to Clang's diagnostics
 - Clang now diagnoses lambda function expressions being implicitly cast to 
boolean values, under ``-Wpointer-bool-conversion``.
   Fixes #GH82512.
 
+- ``-Wmicrosoft`` or ``-Wgnu`` is now required to diagnose C99 flexible
+  array members in a union or alone in a struct.
+
 Improvements to Clang's time-trace
 --
 
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index c8dfdc08f5ea07..f09121b8c7ec8f 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -6447,9 +6447,6 @@ def ext_c99_flexible_array_member : Extension<
 def err_flexible_array_virtual_base : Error<
   "flexible array member %0 not allowed in "
   "%select{struct|interface|union|class|enum}1 which has a virtual base 
class">;
-def err_flexible_array_empty_aggregate : Error<
-  "flexible array member %0 not allowed in otherwise empty "
-  "%select{struct|interface|union|class|enum}1">;
 def err_flexible_array_has_nontrivial_dtor : Error<
   "flexible array member %0 of type %1 with non-trivial destruction">;
 def ext_flexible_array_in_struct : Extension<
@@ -6464,8 +6461,6 @@ def ext_flexible_array_empty_aggregate_ms : Extension<
   "flexible array member %0 in otherwise empty "
   "%select{struct|interface|union|class|enum}1 is a Microsoft extension">,
   InGroup;
-def err_flexible_array_union : Error<
-  "flexible array member %0 in a union is not allowed">;
 def ext_flexible_array_union_ms : Extension<
   "flexible array member %0 in a union is a Microsoft extension">,
   InGroup;
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 67e56a917a51de..053122b588246b 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -19357,15 +19357,11 @@ void Sema::ActOnFields(Scope *S, SourceLocation 
RecLoc, Decl *EnclosingDecl,
 } else if (Record->isUnion())
   DiagID = getLangOpts().MicrosoftExt
? diag::ext_flexible_array_union_ms
-   : getLangOpts().CPlusPlus
- ? diag::ext_flexible_array_union_gnu
- : diag::err_flexible_array_union;
+   : diag::ext_flexible_array_union_gnu;
 else if (NumNamedMembers < 1)
   DiagID = getLangOpts().MicrosoftExt
? diag::ext_flexible_array_empty_aggregate_ms
-   : getLangOpts().CPlusPlus
- ? diag::ext_flexible_array_empty_aggregate_gnu
- : diag::err_flexible_array_empty_aggregate;
+   : diag::ext_flexible_array_empty_aggregate_gnu;
 
 if (DiagID)
   

[clang] [Clang][Sema]: Allow flexible arrays in unions and alone in structs (PR #84428)

2024-03-18 Thread Kees Cook via cfe-commits

https://github.com/kees edited https://github.com/llvm/llvm-project/pull/84428
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][Sema]: Allow flexible arrays in unions and alone in structs (PR #84428)

2024-03-18 Thread Kees Cook via cfe-commits


@@ -1,13 +1,158 @@
-// RUN: %clang_cc1 %s -verify=c -fsyntax-only
-// RUN: %clang_cc1 %s -verify -fsyntax-only -x c++
-// RUN: %clang_cc1 %s -verify -fsyntax-only -fms-compatibility
-// RUN: %clang_cc1 %s -verify -fsyntax-only -fms-compatibility -x c++
+// RUN: %clang_cc1 %s -verify=stock,c -fsyntax-only
+// RUN: %clang_cc1 %s -verify=stock,cpp -fsyntax-only -x c++
+// RUN: %clang_cc1 %s -verify=stock,cpp -fsyntax-only -fms-compatibility -x c++
+// RUN: %clang_cc1 %s -verify=stock,c,gnu -fsyntax-only 
-Wgnu-flexible-array-union-member -Wgnu-empty-struct
+// RUN: %clang_cc1 %s -verify=stock,c,microsoft -fsyntax-only 
-fms-compatibility -Wmicrosoft
 
 // The test checks that an attempt to initialize union with flexible array
 // member with an initializer list doesn't crash clang.
 
 
-union { char x[]; } r = {0}; // c-error {{flexible array member 'x' in a union 
is not allowed}}
+union { char x[]; } r = {0}; /* gnu-warning {{flexible array member 'x' in a 
union is a GNU extension}}
+microsoft-warning {{flexible array member 'x' 
in a union is a Microsoft extension}}
+  */
+struct _name1 {
+  int a;
+  union {
+int b;
+char x[]; /* gnu-warning {{flexible array member 'x' in a union is a GNU 
extension}}
+ microsoft-warning {{flexible array member 'x' in a union is a 
Microsoft extension}}
+   */
+  };
+} name1 = {
+  10,
+  42,/* initializes "b" */
+};
 
-// expected-no-diagnostics
+struct _name1i {
+  int a;
+  union {
+int b;
+char x[]; /* gnu-warning {{flexible array member 'x' in a union is a GNU 
extension}}
+ microsoft-warning {{flexible array member 'x' in a union is a 
Microsoft extension}}
+   */
+  };
+} name1i = {
+  .a = 10,
+  .b = 42,
+};
+
+/* Initialization of flexible array in a union is never allowed. */
+struct _name2 {
+  int a;
+  union {
+int b;
+char x[]; /* gnu-warning {{flexible array member 'x' in a union is a GNU 
extension}}
+ microsoft-warning {{flexible array member 'x' in a union is a 
Microsoft extension}}
+ stock-note {{initialized flexible array member 'x' is here}}
+   */
+  };
+} name2 = {
+  12,
+  13,
+  { 'c' },   /* stock-error {{initialization of flexible array member is not 
allowed}} */

kees wrote:

Yeah, I see what you mean. It looks like (when not using named initializers) a 
union initializer is applied to the first member. (i.e. when the type is 
anything other than a flexible array.) When it's a flex array, it processes it 
but only allows it to be zero initialized. Regardless, here's the results for 
unnamed initializers:

```
struct { int a; union { int b; short x; }; int c; int d; } h = {1, 2, {}, 3};
// @h = global %struct.anon{{.*}} { i32 1, %union.anon{{.*}} { i32 2 }, i32 0, 
i32 3 }
struct { int a; union { int b; short x[0]; }; int c; int d; } h0 = {1, 2, {}, 
3};
// @h0 = global %struct.anon{{.*}} { i32 1, %union.anon{{.*}} { i32 2 }, i32 0, 
i32 3 }
struct { int a; union { int b; short x[1]; }; int c; int d; } h1 = {1, 2, {}, 
3};
// @h1 = global %struct.anon{{.*}} { i32 1, %union.anon{{.*}} { i32 2 }, i32 0, 
i32 3 }
struct { int a; union { int b; short x[]; }; int c; int d; } hf = {1, 2, {}, 3};
// @hf = global %struct.anon{{.*}} { i32 1, %union.anon{{.*}} { i32 2 }, i32 3, 
i32 0 }
```

`hf` has clearly gone weird.

And here's what happens when using named initializers:

```
struct { int a; union { int b; short x; }; int c; int d; } hn = {.a = 1, .x = 
2, .c = 3};
// @hn = global { i32, { i16, [2 x i8] }, i32, i32 } { i32 1, { i16, [2 x i8] } 
{ i16 2, [2 x i8] undef }, i32 3, i32 0 }
struct { int a; union { int b; short x[0]; }; int c; int d; } hn0 = {.a = 1, .x 
= {2}, .c = 3};
// @hn0 = global { i32, { [0 x i16], [4 x i8] }, i32, i32 } { i32 1, { [0 x 
i16], [4 x i8] } { [0 x i16] zeroinitializer, [4 x i8] undef }, i32 3, i32 0 }
struct { int a; union { int b; short x[1]; }; int c; int d; } hn1 = {.a = 1, .x 
= {2}, .c = 3};
// @hn1 = global { i32, { [1 x i16], [2 x i8] }, i32, i32 } { i32 1, { [1 x 
i16], [2 x i8] } { [1 x i16] [i16 2], [2 x i8] undef }, i32 3, i32 0 }
```

The initialization of `x` in `hn0` just disappears, but it does generate the 
warning `excess elements in array initializer`, which I guess makes sense if 
it's not treating it as a fake flexible array?

Notably tttempting to initialize the flexible array by name will fail to build:
```
/srv/code/llvm-project/clang/test/CodeGen/flexible-array-init.c:48:82: error: 
initialization of flexible array member is not allowed  
  
   48 | struct { int a; union { int b; short x[]; }; int c; int d; } hnf = {.a 
= 1, .x = {2}, .c = 3 };
  
  |  

[clang] [Clang][Sema]: Allow flexible arrays in unions and alone in structs (PR #84428)

2024-03-18 Thread Kees Cook via cfe-commits


@@ -1,13 +1,158 @@
-// RUN: %clang_cc1 %s -verify=c -fsyntax-only
-// RUN: %clang_cc1 %s -verify -fsyntax-only -x c++
-// RUN: %clang_cc1 %s -verify -fsyntax-only -fms-compatibility
-// RUN: %clang_cc1 %s -verify -fsyntax-only -fms-compatibility -x c++
+// RUN: %clang_cc1 %s -verify=stock,c -fsyntax-only
+// RUN: %clang_cc1 %s -verify=stock,cpp -fsyntax-only -x c++
+// RUN: %clang_cc1 %s -verify=stock,cpp -fsyntax-only -fms-compatibility -x c++
+// RUN: %clang_cc1 %s -verify=stock,c,gnu -fsyntax-only 
-Wgnu-flexible-array-union-member -Wgnu-empty-struct
+// RUN: %clang_cc1 %s -verify=stock,c,microsoft -fsyntax-only 
-fms-compatibility -Wmicrosoft
 
 // The test checks that an attempt to initialize union with flexible array
 // member with an initializer list doesn't crash clang.
 
 
-union { char x[]; } r = {0}; // c-error {{flexible array member 'x' in a union 
is not allowed}}
+union { char x[]; } r = {0}; /* gnu-warning {{flexible array member 'x' in a 
union is a GNU extension}}

kees wrote:

It looks like clang/test/CodeGen/object-size.c (via commit 
804af933f7310f78d91e13ad8a13b64b00249614) already covers a lot of this? I'll 
explicitly add unions to that test too.

https://github.com/llvm/llvm-project/pull/84428
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][Sema]: Allow flexible arrays in unions and alone in structs (PR #84428)

2024-03-18 Thread Kees Cook via cfe-commits


@@ -1,13 +1,158 @@
-// RUN: %clang_cc1 %s -verify=c -fsyntax-only
-// RUN: %clang_cc1 %s -verify -fsyntax-only -x c++
-// RUN: %clang_cc1 %s -verify -fsyntax-only -fms-compatibility
-// RUN: %clang_cc1 %s -verify -fsyntax-only -fms-compatibility -x c++
+// RUN: %clang_cc1 %s -verify=stock,c -fsyntax-only
+// RUN: %clang_cc1 %s -verify=stock,cpp -fsyntax-only -x c++
+// RUN: %clang_cc1 %s -verify=stock,cpp -fsyntax-only -fms-compatibility -x c++
+// RUN: %clang_cc1 %s -verify=stock,c,gnu -fsyntax-only 
-Wgnu-flexible-array-union-member -Wgnu-empty-struct
+// RUN: %clang_cc1 %s -verify=stock,c,microsoft -fsyntax-only 
-fms-compatibility -Wmicrosoft
 
 // The test checks that an attempt to initialize union with flexible array
 // member with an initializer list doesn't crash clang.
 
 
-union { char x[]; } r = {0}; // c-error {{flexible array member 'x' in a union 
is not allowed}}
+union { char x[]; } r = {0}; /* gnu-warning {{flexible array member 'x' in a 
union is a GNU extension}}
+microsoft-warning {{flexible array member 'x' 
in a union is a Microsoft extension}}
+  */
+struct _name1 {
+  int a;
+  union {
+int b;
+char x[]; /* gnu-warning {{flexible array member 'x' in a union is a GNU 
extension}}
+ microsoft-warning {{flexible array member 'x' in a union is a 
Microsoft extension}}
+   */
+  };
+} name1 = {
+  10,
+  42,/* initializes "b" */
+};
 
-// expected-no-diagnostics
+struct _name1i {
+  int a;
+  union {
+int b;
+char x[]; /* gnu-warning {{flexible array member 'x' in a union is a GNU 
extension}}
+ microsoft-warning {{flexible array member 'x' in a union is a 
Microsoft extension}}
+   */
+  };
+} name1i = {
+  .a = 10,
+  .b = 42,
+};
+
+/* Initialization of flexible array in a union is never allowed. */
+struct _name2 {
+  int a;
+  union {
+int b;
+char x[]; /* gnu-warning {{flexible array member 'x' in a union is a GNU 
extension}}
+ microsoft-warning {{flexible array member 'x' in a union is a 
Microsoft extension}}
+ stock-note {{initialized flexible array member 'x' is here}}
+   */
+  };
+} name2 = {
+  12,
+  13,
+  { 'c' },   /* stock-error {{initialization of flexible array member is not 
allowed}} */

kees wrote:

I was surprised by this too, especially since using named initializers does 
produce a useful warning. Improving this message seems out of scope for this 
PR, though, yes?

https://github.com/llvm/llvm-project/pull/84428
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][Sema]: Allow flexible arrays in unions and alone in structs (PR #84428)

2024-03-18 Thread Kees Cook via cfe-commits


@@ -1,13 +1,158 @@
-// RUN: %clang_cc1 %s -verify=c -fsyntax-only
-// RUN: %clang_cc1 %s -verify -fsyntax-only -x c++
-// RUN: %clang_cc1 %s -verify -fsyntax-only -fms-compatibility
-// RUN: %clang_cc1 %s -verify -fsyntax-only -fms-compatibility -x c++
+// RUN: %clang_cc1 %s -verify=stock,c -fsyntax-only
+// RUN: %clang_cc1 %s -verify=stock,cpp -fsyntax-only -x c++
+// RUN: %clang_cc1 %s -verify=stock,cpp -fsyntax-only -fms-compatibility -x c++
+// RUN: %clang_cc1 %s -verify=stock,c,gnu -fsyntax-only 
-Wgnu-flexible-array-union-member -Wgnu-empty-struct
+// RUN: %clang_cc1 %s -verify=stock,c,microsoft -fsyntax-only 
-fms-compatibility -Wmicrosoft
 
 // The test checks that an attempt to initialize union with flexible array
 // member with an initializer list doesn't crash clang.
 
 
-union { char x[]; } r = {0}; // c-error {{flexible array member 'x' in a union 
is not allowed}}
+union { char x[]; } r = {0}; /* gnu-warning {{flexible array member 'x' in a 
union is a GNU extension}}
+microsoft-warning {{flexible array member 'x' 
in a union is a Microsoft extension}}
+  */
+struct _name1 {
+  int a;
+  union {
+int b;
+char x[]; /* gnu-warning {{flexible array member 'x' in a union is a GNU 
extension}}
+ microsoft-warning {{flexible array member 'x' in a union is a 
Microsoft extension}}
+   */
+  };
+} name1 = {
+  10,
+  42,/* initializes "b" */
+};
 
-// expected-no-diagnostics
+struct _name1i {
+  int a;
+  union {
+int b;
+char x[]; /* gnu-warning {{flexible array member 'x' in a union is a GNU 
extension}}
+ microsoft-warning {{flexible array member 'x' in a union is a 
Microsoft extension}}
+   */
+  };
+} name1i = {
+  .a = 10,
+  .b = 42,
+};
+
+/* Initialization of flexible array in a union is never allowed. */
+struct _name2 {
+  int a;
+  union {
+int b;
+char x[]; /* gnu-warning {{flexible array member 'x' in a union is a GNU 
extension}}
+ microsoft-warning {{flexible array member 'x' in a union is a 
Microsoft extension}}
+ stock-note {{initialized flexible array member 'x' is here}}
+   */
+  };
+} name2 = {
+  12,
+  13,
+  { 'c' },   /* stock-error {{initialization of flexible array member is not 
allowed}} */
+};
+
+/* Initialization of flexible array in a union is never allowed. */
+struct _name2i {
+  int a;
+  union {
+int b;
+char x[]; /* gnu-warning {{flexible array member 'x' in a union is a GNU 
extension}}
+ microsoft-warning {{flexible array member 'x' in a union is a 
Microsoft extension}}
+ stock-note {{initialized flexible array member 'x' is here}}
+   */
+  };
+} name2i = {
+  .a = 12,
+  .b = 13,  /* stock-note {{previous initialization is here}} */
+  .x = { 'c' }, /* stock-error {{initialization of flexible array member is 
not allowed}}
+   c-warning {{initializer overrides prior initialization of 
this subobject}}
+   cpp-error {{initializer partially overrides prior 
initialization of this subobject}}
+ */
+};
+
+/* Flexible array initialization always allowed when not in a union,
+   and when struct has another member.
+ */
+struct _okay {
+  int a;
+  char x[];
+} okay = {
+  22,
+  { 'x', 'y', 'z' },
+};
+
+struct _okayi {
+  int a;
+  char x[];
+} okayi = {
+  .a = 22,
+  .x = { 'x', 'y', 'z' },
+};
+
+struct _okay0 {
+  int a;
+  char x[];
+} okay0 = { };
+
+struct _flex_extension {
+  char x[]; /* gnu-warning {{flexible array member 'x' in otherwise empty 
struct is a GNU extension}}
+   microsoft-warning {{flexible array member 'x' in otherwise 
empty struct is a Microsoft extension}}
+ */
+} flex_extension = {
+  { 'x', 'y', 'z' },
+};
+
+struct _flex_extensioni {
+  char x[]; /* gnu-warning {{flexible array member 'x' in otherwise empty 
struct is a GNU extension}}
+   microsoft-warning {{flexible array member 'x' in otherwise 
empty struct is a Microsoft extension}}
+ */
+} flex_extensioni = {
+  .x = { 'x', 'y', 'z' },
+};
 
+struct already_hidden {
+  int a;
+  union {
+int b;
+struct {
+  struct { } __empty;  // gnu-warning {{empty struct is a GNU extension}}
+  char x[];
+};
+  };
+};
+
+struct still_zero_sized {
+  struct { } __unused;  // gnu-warning {{empty struct is a GNU extension}}
+  int x[];
+};
+
+struct warn1 {
+  int a;
+  union {
+int b;
+char x[]; /* gnu-warning {{flexible array member 'x' in a union is a GNU 
extension}}
+ microsoft-warning {{flexible array member 'x' in a union is a 
Microsoft extension}}
+   */
+  };
+};
+
+struct warn2 {
+  int x[];  /* gnu-warning {{flexible array member 'x' in otherwise empty 
struct is a GNU extension}}
+   microsoft-warning {{flexible array member 'x' in otherwise 
empty 

[clang] [Clang][Sema]: Allow flexible arrays in unions and alone in structs (PR #84428)

2024-03-18 Thread Kees Cook via cfe-commits


@@ -1,13 +1,58 @@
-// RUN: %clang_cc1 %s -verify=c -fsyntax-only
+// RUN: %clang_cc1 %s -verify -fsyntax-only
 // RUN: %clang_cc1 %s -verify -fsyntax-only -x c++
-// RUN: %clang_cc1 %s -verify -fsyntax-only -fms-compatibility
 // RUN: %clang_cc1 %s -verify -fsyntax-only -fms-compatibility -x c++
+// RUN: %clang_cc1 %s -verify=gnu -fsyntax-only 
-Wgnu-flexible-array-union-member -Wgnu-empty-struct
+// RUN: %clang_cc1 %s -verify=microsoft -fsyntax-only -fms-compatibility 
-Wmicrosoft
 
 // The test checks that an attempt to initialize union with flexible array
 // member with an initializer list doesn't crash clang.
 
 
-union { char x[]; } r = {0}; // c-error {{flexible array member 'x' in a union 
is not allowed}}
+union { char x[]; } r = {0}; /* gnu-warning {{flexible array member 'x' in a 
union is a GNU extension}}
+microsoft-warning {{flexible array member 'x' 
in a union is a Microsoft extension}}
+  */

kees wrote:

I've added a bunch more tests now.

https://github.com/llvm/llvm-project/pull/84428
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][Sema]: Allow flexible arrays in unions and alone in structs (PR #84428)

2024-03-18 Thread Kees Cook via cfe-commits

https://github.com/kees updated https://github.com/llvm/llvm-project/pull/84428

>From eb5138b45fa450737600050ad8dabdcb27513d42 Mon Sep 17 00:00:00 2001
From: Kees Cook 
Date: Thu, 7 Mar 2024 17:03:09 -0800
Subject: [PATCH 1/2] [Clang][Sema]: Allow flexible arrays in unions and alone
 in structs

GNU and MSVC have extensions where flexible array members (or their
equivalent) can be in unions or alone in structs. This is already fully
supported in Clang through the 0-sized array ("fake flexible array")
extension or when C99 flexible array members have been syntactically
obfuscated.

Clang needs to explicitly allow these extensions directly for C99
flexible arrays, since they are common code patterns in active use by the
Linux kernel (and other projects). Such projects have been using either
0-sized arrays (which is considered deprecated in favor of C99 flexible
array members) or via obfuscated syntax, both of which complicate their
code bases.

For example, these do not error by default:

union one {
int a;
int b[0];
};

union two {
int a;
struct {
struct { } __empty;
int b[];
};
};

But this does:

union three {
int a;
int b[];
};

Remove the default error diagnostics for this but continue to provide
warnings under Microsoft or GNU extensions checks. This will allow for
a seamless transition for code bases away from 0-sized arrays without
losing existing code patterns. Add explicit checking for the warnings
under various constructions.

Fixes #84565
---
 clang/docs/ReleaseNotes.rst   |  3 ++
 .../clang/Basic/DiagnosticSemaKinds.td|  5 --
 clang/lib/Sema/SemaDecl.cpp   |  8 +--
 clang/test/C/drs/dr5xx.c  |  2 +-
 clang/test/Sema/flexible-array-in-union.c | 53 +--
 clang/test/Sema/transparent-union.c   |  4 +-
 6 files changed, 57 insertions(+), 18 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 1b901a27fd19d1..960ab7e021cf2f 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -214,6 +214,9 @@ Improvements to Clang's diagnostics
 - Clang now diagnoses lambda function expressions being implicitly cast to 
boolean values, under ``-Wpointer-bool-conversion``.
   Fixes #GH82512.
 
+- ``-Wmicrosoft`` or ``-Wgnu`` is now required to diagnose C99 flexible
+  array members in a union or alone in a struct.
+
 Improvements to Clang's time-trace
 --
 
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index c8dfdc08f5ea07..f09121b8c7ec8f 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -6447,9 +6447,6 @@ def ext_c99_flexible_array_member : Extension<
 def err_flexible_array_virtual_base : Error<
   "flexible array member %0 not allowed in "
   "%select{struct|interface|union|class|enum}1 which has a virtual base 
class">;
-def err_flexible_array_empty_aggregate : Error<
-  "flexible array member %0 not allowed in otherwise empty "
-  "%select{struct|interface|union|class|enum}1">;
 def err_flexible_array_has_nontrivial_dtor : Error<
   "flexible array member %0 of type %1 with non-trivial destruction">;
 def ext_flexible_array_in_struct : Extension<
@@ -6464,8 +6461,6 @@ def ext_flexible_array_empty_aggregate_ms : Extension<
   "flexible array member %0 in otherwise empty "
   "%select{struct|interface|union|class|enum}1 is a Microsoft extension">,
   InGroup;
-def err_flexible_array_union : Error<
-  "flexible array member %0 in a union is not allowed">;
 def ext_flexible_array_union_ms : Extension<
   "flexible array member %0 in a union is a Microsoft extension">,
   InGroup;
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 67e56a917a51de..053122b588246b 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -19357,15 +19357,11 @@ void Sema::ActOnFields(Scope *S, SourceLocation 
RecLoc, Decl *EnclosingDecl,
 } else if (Record->isUnion())
   DiagID = getLangOpts().MicrosoftExt
? diag::ext_flexible_array_union_ms
-   : getLangOpts().CPlusPlus
- ? diag::ext_flexible_array_union_gnu
- : diag::err_flexible_array_union;
+   : diag::ext_flexible_array_union_gnu;
 else if (NumNamedMembers < 1)
   DiagID = getLangOpts().MicrosoftExt
? diag::ext_flexible_array_empty_aggregate_ms
-   : getLangOpts().CPlusPlus
- ? diag::ext_flexible_array_empty_aggregate_gnu
- : diag::err_flexible_array_empty_aggregate;
+   : diag::ext_flexible_array_empty_aggregate_gnu;
 
 if (DiagID)
   

[clang] [Clang][Sema]: Allow flexible arrays in unions and alone in structs (PR #84428)

2024-03-13 Thread Kees Cook via cfe-commits


@@ -1,13 +1,58 @@
-// RUN: %clang_cc1 %s -verify=c -fsyntax-only
+// RUN: %clang_cc1 %s -verify -fsyntax-only
 // RUN: %clang_cc1 %s -verify -fsyntax-only -x c++
-// RUN: %clang_cc1 %s -verify -fsyntax-only -fms-compatibility
 // RUN: %clang_cc1 %s -verify -fsyntax-only -fms-compatibility -x c++
+// RUN: %clang_cc1 %s -verify=gnu -fsyntax-only 
-Wgnu-flexible-array-union-member -Wgnu-empty-struct
+// RUN: %clang_cc1 %s -verify=microsoft -fsyntax-only -fms-compatibility 
-Wmicrosoft
 
 // The test checks that an attempt to initialize union with flexible array
 // member with an initializer list doesn't crash clang.
 
 
-union { char x[]; } r = {0}; // c-error {{flexible array member 'x' in a union 
is not allowed}}
+union { char x[]; } r = {0}; /* gnu-warning {{flexible array member 'x' in a 
union is a GNU extension}}
+microsoft-warning {{flexible array member 'x' 
in a union is a Microsoft extension}}
+  */

kees wrote:

Ah-ha! I think I see what you mean now. I will work on constructing some 
negative tests cases.

https://github.com/llvm/llvm-project/pull/84428
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][Sema]: Allow flexible arrays in unions and alone in structs (PR #84428)

2024-03-13 Thread Kees Cook via cfe-commits


@@ -1,13 +1,58 @@
-// RUN: %clang_cc1 %s -verify=c -fsyntax-only
+// RUN: %clang_cc1 %s -verify -fsyntax-only
 // RUN: %clang_cc1 %s -verify -fsyntax-only -x c++
-// RUN: %clang_cc1 %s -verify -fsyntax-only -fms-compatibility
 // RUN: %clang_cc1 %s -verify -fsyntax-only -fms-compatibility -x c++
+// RUN: %clang_cc1 %s -verify=gnu -fsyntax-only 
-Wgnu-flexible-array-union-member -Wgnu-empty-struct
+// RUN: %clang_cc1 %s -verify=microsoft -fsyntax-only -fms-compatibility 
-Wmicrosoft
 
 // The test checks that an attempt to initialize union with flexible array
 // member with an initializer list doesn't crash clang.
 
 
-union { char x[]; } r = {0}; // c-error {{flexible array member 'x' in a union 
is not allowed}}
+union { char x[]; } r = {0}; /* gnu-warning {{flexible array member 'x' in a 
union is a GNU extension}}
+microsoft-warning {{flexible array member 'x' 
in a union is a Microsoft extension}}
+  */

kees wrote:

Clang's initialization for flexible arrays works for all the cases I've 
checked. Part of Linux's migration to C99 flex arrays uncovered a 
`__builtin_object_size()` bug that has was fixed in Clang 17 via commit 
804af933f7310f78d91e13ad8a13b64b00249614 which includes several initialization 
tests beyond the ones that are touched in this PR. As a result I have 
confidence in this having appropriate test coverage, though if more is needed, 
it should be trivial to add.

https://github.com/llvm/llvm-project/pull/84428
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][Sema]: Allow flexible arrays in unions and alone in structs (PR #84428)

2024-03-12 Thread Kees Cook via cfe-commits

kees wrote:

> That one ends up not being a problem, but presumably you are wanting to 
> change that top-level 'struct' to be a 'union'?

No, I want to collapse the entire macro into just `TYPE NAME[]`. Right now the 
Linux kernel uses the `DECLARE_FLEX_ARRAY` macro _in_ over 200 unions and 
structs. For example:
```
struct iwl_tx_cmd {
...
union {
DECLARE_FLEX_ARRAY(u8, payload);
DECLARE_FLEX_ARRAY(struct ieee80211_hdr, hdr);
};
};
```
or
```
struct pda_antenna_gain {
DECLARE_FLEX_ARRAY(struct {
u8 gain_5GHz;   /* 0.25 dBi units */
u8 gain_2GHz;   /* 0.25 dBi units */
} __packed, antenna);
} __packed;
```
or
```
struct coreboot_device {
struct device dev;
union {
struct coreboot_table_entry entry;
struct lb_cbmem_ref cbmem_ref;
struct lb_cbmem_entry cbmem_entry;
struct lb_framebuffer framebuffer;
DECLARE_FLEX_ARRAY(u8, raw);
};
};
```
> What is the problem with making it `TYPE NAME[0];` ?

Because these are not 0-sized arrays. To have unambiguous mitigation of array 
and buffer overflows, the compiler must have explicitly defined array sizes. 
The Linux kernel uses both `-fstrict-flex-array=3` to get rid of all the 
horrible "fake" flexible arrays (since it was ambiguous and forced GCC and 
Clang to leave trailing arrays uncovered by the array-bounds sanitizer, 
`__builtin_object_size()`, and `__builtin_dynamic_object_size()`), and the 
`counted_by` attribute to gain deterministic size information. Note that with 
`counted_by`, a 0-sized array is a perfectly valid size, so there is no such 
thing has "just ignore 0-sized arrays". The point is to gain determinism. We 
spent 6 years cleaning this up already. Lots of details here:
https://people.kernel.org/kees/bounded-flexible-arrays-in-c

Basically, Linux needed to kill the "if it's a trailing array, it's a flexible 
array" extension, without removing the "flex arrays can be in unions" 
extension. The former was solved with GCC and Clang's addition of the 
`-fstrict-flex-arrays=3` option). The latter was accomplished by bypassing GCC 
and Clang's syntax checking, but I'd prefer that the extension to support flex 
arrays in unions just be properly extended to C99 flex arrays to avoid the 
syntactic hack. (i.e. it's already possible with the hack; why make the hack 
necessary?)


https://github.com/llvm/llvm-project/pull/84428
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][Sema]: Allow flexible arrays in unions and alone in structs (PR #84428)

2024-03-11 Thread Kees Cook via cfe-commits

kees wrote:

> There are currently over 200 separate unions using the work-around.

Specifically, this is what Linux uses for getting C99 flexible arrays in unions 
and alone in structs:

```
#define DECLARE_FLEX_ARRAY(TYPE, NAME)\
struct { \
struct { } __empty_ ## NAME; \
TYPE NAME[]; \
}
```

The conversion from the "struct hack" to C99 flexible arrays is complete, 
except for this wart.

https://github.com/llvm/llvm-project/pull/84428
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][Sema]: Allow flexible arrays in unions and alone in structs (PR #84428)

2024-03-11 Thread Kees Cook via cfe-commits

kees wrote:

> C99 added flexible array members, and the C99 rationale says the feature was 
> added specifically as a replacement for the common idiom known as the "struct 
> hack" for creating a structure containing a variable-size array.

This is my reasoning as well -- we (Linux dev hat on) have been converting a 
giant codebase from the "struct hack" to C99 flexible arrays, but the Standard 
(IMO) made a mistake in not recognizing the need for flexible arrays to be in 
unions. This was (and is) a well-used code pattern for the "struct hack" that 
has persisted for decades. Now, I'm not here to argue for a change to the 
Standard (that's a much different effort), but rather to argue that the 
existing "struck hack" _extensions_ be made to recognize the C99 flexible array 
as well, since otherwise it is not possible to cleanly update existing code.

The fact that Clang _already_ supports C99 flex arrays in unions (through other 
GNU extension) should serve as evidence of its sensible direct extension to C99 
flexible arrays. The "not in unions" check can already be evaded by using the 
"empty struct" GNU extension. So, to be clear: I'm talking about making the 
_existing_ extensions be uniformly applied. I don't want to be distracted by 
whether or not this should be part of the C Standard -- I would like Clang to 
do what it already does but without requiring the pointless syntactic 
obfuscation currently in use by Linux.

https://github.com/llvm/llvm-project/pull/84428
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][Sema]: Allow flexible arrays in unions and alone in structs (PR #84428)

2024-03-09 Thread Kees Cook via cfe-commits

kees wrote:

> Left my comment on the main list, but I don't see this as a well motivated 
> change, and even if GCC supported it, it would still be a very difficult to 
> motivate extension without massive historical workloads already using it.

This is needed by the Linux kernel, and is in active use. Directly converting 
from [0] to [] isn't possible due to all the places flex arrays are used in 
unions. Linux is working around this by tricking the syntax checker currently, 
which needlessly complicates things. There are currently over 200 separate 
unions using the work-around.

So, this fixes the accidental lack of the existing [0] extensions not being 
directly applied to [], and doesn't cause problems for any other users. What 
solution should Linux use if not fixing this directly nor providing something 
like -fflex-array-extensions?



https://github.com/llvm/llvm-project/pull/84428
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][Sema]: Allow flexible arrays in unions and alone in structs (PR #84428)

2024-03-08 Thread Kees Cook via cfe-commits

kees wrote:

For historical reference, the first version of this PR is visible here now: 
https://github.com/kees/llvm-project/commit/ce31f1d75f060b32e5dbc5756fe41cc8eaac83a6

https://github.com/llvm/llvm-project/pull/84428
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][Sema]: Allow flexible arrays in unions and alone in structs (PR #84428)

2024-03-08 Thread Kees Cook via cfe-commits

https://github.com/kees edited https://github.com/llvm/llvm-project/pull/84428
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][Sema]: Allow flexible arrays in unions and alone in structs (PR #84428)

2024-03-08 Thread Kees Cook via cfe-commits

https://github.com/kees updated https://github.com/llvm/llvm-project/pull/84428

>From eb5138b45fa450737600050ad8dabdcb27513d42 Mon Sep 17 00:00:00 2001
From: Kees Cook 
Date: Thu, 7 Mar 2024 17:03:09 -0800
Subject: [PATCH] [Clang][Sema]: Allow flexible arrays in unions and alone in
 structs

GNU and MSVC have extensions where flexible array members (or their
equivalent) can be in unions or alone in structs. This is already fully
supported in Clang through the 0-sized array ("fake flexible array")
extension or when C99 flexible array members have been syntactically
obfuscated.

Clang needs to explicitly allow these extensions directly for C99
flexible arrays, since they are common code patterns in active use by the
Linux kernel (and other projects). Such projects have been using either
0-sized arrays (which is considered deprecated in favor of C99 flexible
array members) or via obfuscated syntax, both of which complicate their
code bases.

For example, these do not error by default:

union one {
int a;
int b[0];
};

union two {
int a;
struct {
struct { } __empty;
int b[];
};
};

But this does:

union three {
int a;
int b[];
};

Remove the default error diagnostics for this but continue to provide
warnings under Microsoft or GNU extensions checks. This will allow for
a seamless transition for code bases away from 0-sized arrays without
losing existing code patterns. Add explicit checking for the warnings
under various constructions.

Fixes #84565
---
 clang/docs/ReleaseNotes.rst   |  3 ++
 .../clang/Basic/DiagnosticSemaKinds.td|  5 --
 clang/lib/Sema/SemaDecl.cpp   |  8 +--
 clang/test/C/drs/dr5xx.c  |  2 +-
 clang/test/Sema/flexible-array-in-union.c | 53 +--
 clang/test/Sema/transparent-union.c   |  4 +-
 6 files changed, 57 insertions(+), 18 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 1b901a27fd19d1..960ab7e021cf2f 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -214,6 +214,9 @@ Improvements to Clang's diagnostics
 - Clang now diagnoses lambda function expressions being implicitly cast to 
boolean values, under ``-Wpointer-bool-conversion``.
   Fixes #GH82512.
 
+- ``-Wmicrosoft`` or ``-Wgnu`` is now required to diagnose C99 flexible
+  array members in a union or alone in a struct.
+
 Improvements to Clang's time-trace
 --
 
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index c8dfdc08f5ea07..f09121b8c7ec8f 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -6447,9 +6447,6 @@ def ext_c99_flexible_array_member : Extension<
 def err_flexible_array_virtual_base : Error<
   "flexible array member %0 not allowed in "
   "%select{struct|interface|union|class|enum}1 which has a virtual base 
class">;
-def err_flexible_array_empty_aggregate : Error<
-  "flexible array member %0 not allowed in otherwise empty "
-  "%select{struct|interface|union|class|enum}1">;
 def err_flexible_array_has_nontrivial_dtor : Error<
   "flexible array member %0 of type %1 with non-trivial destruction">;
 def ext_flexible_array_in_struct : Extension<
@@ -6464,8 +6461,6 @@ def ext_flexible_array_empty_aggregate_ms : Extension<
   "flexible array member %0 in otherwise empty "
   "%select{struct|interface|union|class|enum}1 is a Microsoft extension">,
   InGroup;
-def err_flexible_array_union : Error<
-  "flexible array member %0 in a union is not allowed">;
 def ext_flexible_array_union_ms : Extension<
   "flexible array member %0 in a union is a Microsoft extension">,
   InGroup;
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 67e56a917a51de..053122b588246b 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -19357,15 +19357,11 @@ void Sema::ActOnFields(Scope *S, SourceLocation 
RecLoc, Decl *EnclosingDecl,
 } else if (Record->isUnion())
   DiagID = getLangOpts().MicrosoftExt
? diag::ext_flexible_array_union_ms
-   : getLangOpts().CPlusPlus
- ? diag::ext_flexible_array_union_gnu
- : diag::err_flexible_array_union;
+   : diag::ext_flexible_array_union_gnu;
 else if (NumNamedMembers < 1)
   DiagID = getLangOpts().MicrosoftExt
? diag::ext_flexible_array_empty_aggregate_ms
-   : getLangOpts().CPlusPlus
- ? diag::ext_flexible_array_empty_aggregate_gnu
- : diag::err_flexible_array_empty_aggregate;
+   : diag::ext_flexible_array_empty_aggregate_gnu;
 
 if (DiagID)
   

[clang] [Clang][Sema]: Allow flexible arrays in unions and alone in structs (PR #84428)

2024-03-08 Thread Kees Cook via cfe-commits

kees wrote:

GCC: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53548
Clang: https://github.com/llvm/llvm-project/issues/84565

https://github.com/llvm/llvm-project/pull/84428
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][Sema]: Allow flexible arrays in unions and alone in structs (PR #84428)

2024-03-08 Thread Kees Cook via cfe-commits

kees wrote:

> > I didn't do this because it seemed like this would change a lot of existing 
> > test cases
> 
> Can you give some examples of tests that would fail? If we have tests 
> checking that these fail, then perhaps those tests should add 
> `-Werror=pedantic` so that they can continue to fail.

I think I accidentally exaggerated; there were only a couple I found:

clang/test/Sema/flexible-array-in-union.c
clang/test/Sema/transparent-union.c

But sure, I would be fine moving this to pedantic mode, and I'll see if 
anything else pops up. Should I update this PR or create a new one?

> Weirder, clang and msvc already support this, but only in C++ mode! msvc 
> supports this in C mode! GCC does not support this in either!
> 
> C++ mode: https://godbolt.org/z/esjnE7bnY
> C mode: https://godbolt.org/z/hGE9dhjE1

Right, AIUI, C++ has more strict rules about not allowing 0-sized objects (e.g. 
"struct { } __empty;" in C++ has a sizeof() == 1), which ends up affecting some 
of the syntax sanity checking.


https://github.com/llvm/llvm-project/pull/84428
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][Sema]: Allow flexible arrays in unions and alone in structs (PR #84428)

2024-03-08 Thread Kees Cook via cfe-commits

kees wrote:

> Rather than have a `-f` flag to opt into this extension, I think instead you 
> should just make it always available, then have tests that it can be used, 
> but will trigger diagnostics under `-Wpedantic` since it's technically a 
> language extension (IIUC).

I didn't do this because it seemed like this would change a lot of existing 
test cases, and past attempts at landing extensions to the C Standard (or even 
changes to existing extensions) seemed to get a lot of push-back when they 
weren't opt-in. And specifically for this feature, I, too, would prefer this 
was enabled by default, but given that there are _three_ different ways Clang 
already communicates why it should be considered bad syntax, it didn't seem a 
politically viable solution. An explicit opt-in knob seems the safest way to go.

https://github.com/llvm/llvm-project/pull/84428
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][Sema]: Allow flexible arrays in unions and alone in structs (PR #84428)

2024-03-08 Thread Kees Cook via cfe-commits


@@ -0,0 +1,43 @@
+// RUN: %clang_cc1 %s -verify=c -fsyntax-only -fflex-array-extensions
+
+// The test checks that flexible array members do not emit warnings when
+// -fflex-array-extensions when used in a union or alone in a structure.
+
+struct already_hidden {
+   int a;

kees wrote:

I ran the helper script a few times, actually. It never flagged this, just 
syntax in SemaDecl.cpp

https://github.com/llvm/llvm-project/pull/84428
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [compiler-rt] [Sanitizer] add signed-integer-wrap sanitizer (PR #80089)

2024-03-08 Thread Kees Cook via cfe-commits

kees wrote:

With PR #82432 landed, this PR is redundant. Thanks for changing the option 
name! Closing...

https://github.com/llvm/llvm-project/pull/80089
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [compiler-rt] [Sanitizer] add signed-integer-wrap sanitizer (PR #80089)

2024-03-08 Thread Kees Cook via cfe-commits

https://github.com/kees closed https://github.com/llvm/llvm-project/pull/80089
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][Sema]: Allow flexible arrays in unions and alone in structs (PR #84428)

2024-03-07 Thread Kees Cook via cfe-commits

https://github.com/kees edited https://github.com/llvm/llvm-project/pull/84428
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][Sema]: Allow flexible arrays in unions and alone in structs (PR #84428)

2024-03-07 Thread Kees Cook via cfe-commits

https://github.com/kees created https://github.com/llvm/llvm-project/pull/84428

GNU and MSVC have extensions where flexible array members (or their equivalent) 
can be in unions or alone in structs. This is already fully supported in Clang 
through the 0-sized array ("fake flexible array") extension or when C99 
flexible array members have been syntactically obfuscated.

Provide a feature flag to explicitly allow these extensions directly for C99 
flexible arrays, since they are common code patterns in active use by the Linux 
kernel (and other projects). Such projects have been using either 0-sized 
arrays (which is considered deprecated in favor of C99 flexible array members) 
or via obfuscated syntax, both of which complicate their code bases.

For example, these do not error by default:

union one {
int a;
int b[0];
};

union two {
int a;
struct {
struct { } __empty;
int b[];
};
};

But this does:

union three {
int a;
int b[];
};

Add -fflex-array-extensions so that C99 flexible array members can be fully 
supported in these scenarios without errors, allowing for a seamless transition 
for code bases away from 0-sized arrays without losing existing code patterns.

>From ce31f1d75f060b32e5dbc5756fe41cc8eaac83a6 Mon Sep 17 00:00:00 2001
From: Kees Cook 
Date: Thu, 7 Mar 2024 17:03:09 -0800
Subject: [PATCH] [Clang][Sema]: Allow flexible arrays in unions and alone in
 structs

GNU and MSVC have extensions where flexible array members (or their
equivalent) can be in unions or alone in structs. This is already fully
supported in Clang through the 0-sized array ("fake flexible array")
extension or when C99 flexible array members have been syntactically
obfuscated.

Provide a feature flag to explicitly allow these extensions directly
for C99 flexible arrays, since they are common code patterns in active
use by the Linux kernel (and other projects). Such projects have been
using either 0-sized arrays (which is considered deprecated in favor
of C99 flexible array members) or via obfuscated syntax, both of which
complicate their code bases.

For example, these do not error by default:

union one {
int a;
int b[0];
};

union two {
int a;
struct {
struct { } __empty;
int b[];
};
};

But this does:

union three {
int a;
int b[];
};

Add -fflex-array-extensions so that C99 flexible array members can
be fully supported in these scenarios without errors, allowing for a
seamless transition for code bases away from 0-sized arrays without
losing existing code patterns.
---
 clang/docs/ReleaseNotes.rst   |  6 
 clang/include/clang/Basic/LangOptions.def |  1 +
 clang/include/clang/Driver/Options.td |  5 +++
 clang/lib/Driver/ToolChains/Clang.cpp |  1 +
 clang/lib/Sema/SemaDecl.cpp   | 19 +-
 clang/test/Sema/flex-array-extensions.c   | 43 +++
 6 files changed, 66 insertions(+), 9 deletions(-)
 create mode 100644 clang/test/Sema/flex-array-extensions.c

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 1b901a27fd19d1..c15ff98c60b8b2 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -164,6 +164,12 @@ New Compiler Flags
   This diagnostic can be disabled to make ``-Wmissing-field-initializers`` 
behave
   like it did before Clang 18.x. Fixes (`#56628 
`_)
 
+- ``-fflex-array-extensions``. Allows for the use of extensions to flexible 
array members
+  syntax so they can be directly used in unions and alone in structs. This 
construction is
+  already fully supported by Clang under certain conditions, so this provide 
support for
+  codebases that would otherwise not be able to migrate from 0-sized "fake" 
flexible array
+  members to C99 flexible array members.
+
 Deprecated Compiler Flags
 -
 
diff --git a/clang/include/clang/Basic/LangOptions.def 
b/clang/include/clang/Basic/LangOptions.def
index 2b42b521a30363..c5c810f63d93d1 100644
--- a/clang/include/clang/Basic/LangOptions.def
+++ b/clang/include/clang/Basic/LangOptions.def
@@ -453,6 +453,7 @@ LANGOPT(MatrixTypes, 1, 0, "Enable or disable the builtin 
matrix type")
 ENUM_LANGOPT(StrictFlexArraysLevel, StrictFlexArraysLevelKind, 2,
  StrictFlexArraysLevelKind::Default,
  "Rely on strict definition of flexible arrays")
+COMPATIBLE_LANGOPT(FlexArrayExtensions, 1, 0, "Enable flexible array 
extensions")
 
 COMPATIBLE_VALUE_LANGOPT(MaxTokens, 32, 0, "Max number of tokens per TU or 0")
 
diff --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index bef38738fde82e..bde7ba456fd27d 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -2819,6 +2819,11 @@ defm fine_grained_bitfield_accesses : BoolOption<"f", 

[clang] Sanitizer: Support -fwrapv with -fsanitize=signed-integer-overflow (PR #82432)

2024-02-20 Thread Kees Cook via cfe-commits

https://github.com/kees approved this pull request.

Working as expected for me!

https://github.com/llvm/llvm-project/pull/82432
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Sanitizer: Support -fwrapv with -fsanitize=signed-integer-overflow (PR #82432)

2024-02-20 Thread Kees Cook via cfe-commits

kees wrote:

This doesn't seem to do anything for me with the Linux kernel's -next branch 
(which supports -sio as `CONFIG_UBSAN_SIGNED_WRAP=y`). e.g. I see no behavioral 
difference with test_ubsan.ko nor the expected atomic overflows.

https://github.com/llvm/llvm-project/pull/82432
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [compiler-rt] [Sanitizer] add signed-integer-wrap sanitizer (PR #80089)

2024-02-19 Thread Kees Cook via cfe-commits

kees wrote:

GCC folks have not answered. Adding -wrap keeps the behavior for -overflow the 
same between GCC and Clang. Can we please move this forward and land it as is? 
We can trivially change this in the future if we need to.

https://github.com/llvm/llvm-project/pull/80089
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [compiler-rt] [Sanitizer] add signed-integer-wrap sanitizer (PR #80089)

2024-02-14 Thread Kees Cook via cfe-commits

kees wrote:

> Sure -fwrapv makes wraparound defined, but it doesn't prevent us from making 
> -fsanitize=signed-integer-overflow useful. "-fwrapv => no 
> signed-integer-overflow" is not a solid argument.
> 
> I think we can try making -fsanitize=signed-integer-overflow effective even 
> when -fwrapv if specified. -fsanitize=signed-integer-overflow is rare in the 
> wild, probably rarer when combined with -fwrapv.
.

In earlier GCC discussions, it seemed very much like the 
`-fsanitize=signed-integer-overflow` was meant for UB only, but maybe I 
misunderstood. See replies leading up to this:
https://gcc.gnu.org/pipermail/gcc-patches/2023-September/630578.html

https://github.com/llvm/llvm-project/pull/80089
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [compiler-rt] [Sanitizer] add signed-integer-wrap sanitizer (PR #80089)

2024-02-14 Thread Kees Cook via cfe-commits

kees wrote:

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102317

https://github.com/llvm/llvm-project/pull/80089
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [compiler-rt] [Sanitizer] add signed-integer-wrap sanitizer (PR #80089)

2024-02-14 Thread Kees Cook via cfe-commits

kees wrote:

> > > > > Why not just enforce -fsanitize=signed-integer-overflow with -fwrapv? 
> > > > > I suspect it's just overlook, and not intentional behavior.
> > > > 
> > > > 
> > > > +1
> > > > We should consider this direction
> > > 
> > > 
> > > The UB-vs-non-UB seemed to be a really specific goal in the existing 
> > > code. i.e. that the sanitizer was disabled didn't look like an accident. 
> > > For people using this to find _only_ UB, this would be a behavioral 
> > > change, so to me it seems like a separate name makes the most sense. 
> > > Anyone wanting wrap-around checking can use -wrap, and anyone wanting UB 
> > > checking can use -overflow.
> > 
> > 
> > Isn't this still UB even with -fwrapv? UB is a language feature, not 
> > compiler.
> 
> `-fwrapv` is essentially a language dialect that defines the behavior of 
> integer wraparound. It is no longer UB in compilations using that mode.

Right. `-fwrapv` defines the signed integer overflow resolution strategy. 
Without `-fwrapv` it is undefined (default language feature). With `-fwrapv` it 
is defined as 2s-complement wrap-around (and is well defined, like unsigned 
integer overflow).

https://github.com/llvm/llvm-project/pull/80089
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [compiler-rt] [Sanitizer] add signed-integer-wrap sanitizer (PR #80089)

2024-02-14 Thread Kees Cook via cfe-commits

kees wrote:

> > Why not just enforce -fsanitize=signed-integer-overflow with -fwrapv? I 
> > suspect it's just overlook, and not intentional behavior.
> 
> +1
> 
> We should consider this direction

The UB-vs-non-UB seemed to be a really specific goal in the existing code. i.e. 
that the sanitizer was disabled didn't look like an accident. For people using 
this to find _only_ UB, this would be a behavioral change, so to me it seems 
like a separate name makes the most sense. Anyone wanting wrap-around checking 
can use -wrap, and anyone wanting UB checking can use -overflow.

https://github.com/llvm/llvm-project/pull/80089
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [compiler-rt] [Sanitizer] add signed-integer-wrap sanitizer (PR #80089)

2024-02-13 Thread Kees Cook via cfe-commits

kees wrote:

> > and likely in production kernels. :)
> 
> I doubt you meant running in production. 

I very much do -- I expect to use this like we use the array-bounds sanitizer, 
which is on in production in at least Ubuntu and Android. It may be a long road 
to getting sane coverage without endless false positives, of course. But the 
intent is to use it in production.


https://github.com/llvm/llvm-project/pull/80089
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [compiler-rt] [Sanitizer] add signed-integer-wrap sanitizer (PR #80089)

2024-02-12 Thread Kees Cook via cfe-commits

kees wrote:

I've tested this with the Linux kernel, and it is working as expected. It is 
ready and waiting for this option to gain back a bunch of sanitizer coverage 
for CIs and likely in production kernels. :)

https://github.com/llvm/llvm-project/pull/80089
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [compiler-rt] [Sanitizer] add signed-integer-wrap sanitizer (PR #80089)

2024-02-01 Thread Kees Cook via cfe-commits

kees wrote:

> Hey, does anyone know why the CI bot (buildkite) fails my builds for a 
> trailing whitespace in a file I did not touch?
> 
> It says there's some trailing whitespace in some documentation files that my 
> PR doesn't touch (unless I'm misinterpreting its output): 
> https://buildkite.com/llvm-project/clang-ci/builds/11179#018d6167-a058-43d4-9d2e-6dbd9410a41a

That's mostly just bad luck. It's been fixed now, though, in commit 
0217d2e089afba8ca0713787521ba52a1056 so a it should pass correctly now.

https://github.com/llvm/llvm-project/pull/80089
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang] Implement the 'counted_by' attribute (PR #76348)

2024-01-10 Thread Kees Cook via cfe-commits

https://github.com/kees approved this pull request.

I can't speak to the LLVM code changes, but behaviorally, this passes all the 
torture tests I'd expect it to (kernel-tools/fortify/array-bounds.c and full 
kernel builds with its hundreds of `counted_by` annotations).

https://github.com/llvm/llvm-project/pull/76348
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang] Implement the 'counted_by' attribute (PR #76348)

2024-01-10 Thread Kees Cook via cfe-commits

kees wrote:

That's it! Everything works. :) Ship it!

https://github.com/llvm/llvm-project/pull/76348
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang] Implement the 'counted_by' attribute (PR #76348)

2024-01-10 Thread Kees Cook via cfe-commits

kees wrote:

Thanks! It's fixed for me now. I continue to be highly unlucky, though, and 
keep finding weird stuff. It seems to segfault just parsing libc headers under 
`-D_FORTIFY_SOURCE=3` (but not `=2` or lower):

```
0.  Program arguments: 
/srv/built-compilers/llvm/counted_by/install/bin/clang -Wall -O2 
-fstrict-flex-arrays=3 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=3 -fsanitize=bounds 
-fsanitize=object-size -fsanitize-undefined-trap-on-error -c -o array-bounds.o 
array-bounds.c  
  
1.   parser at end of file 

2.  Per-file LLVM IR generation 

3.  /usr/include/x86_64-linux-gnu/bits/string_fortified.h:57:1 
: 
Generating code for declaration 'memset'
  
... 
 #4 0x55ef8b3ac144 clang::DeclContext::decls_begin() const  
   
 #5 0x55ef892afcad CountCountedByAttrs(clang::RecordDecl const*) 
CGBuiltin.cpp:0:0  
```

In my header at line 57, I see:

```
__fortify_function void *
__NTH (memset (void *__dest, int __ch, size_t __len))
{
  return __builtin___memset_chk (__dest, __ch, __len,
 __glibc_objsize0 (__dest));
}
```

where `__glibc_objsize0` is:

```
sys/cdefs.h:# define __glibc_objsize0(__o) __builtin_dynamic_object_size (__o, 
0)
```

I assume something weird is happening with `bdos` here.

https://github.com/llvm/llvm-project/pull/76348
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang] Implement the 'counted_by' attribute (PR #76348)

2024-01-09 Thread Kees Cook via cfe-commits

kees wrote:

Accessing `mi->ints` is unambiguous (it would use the declared `count_ints`) 
but I'm fine to wait and fix that later. The flex array union is gloriously 
rare in the kernel. As for whole object, I say pick smallest from all available 
under bdos(x, 1) and largest for bdos(x, 0), or just say 0. Again, corner case 
of a corner case.

https://github.com/llvm/llvm-project/pull/76348
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang] Implement the 'counted_by' attribute (PR #76348)

2024-01-09 Thread Kees Cook via cfe-commits

kees wrote:

> @kees, the issue should be fixed with the newest push.

Nice! This is so extremely close. It fixed all but 1 instance in the torture 
testing. The remaining seems to be union order sensitive. O_o This arrangement 
still reports non-zero for the `int count_bytes` one, unless I move it below 
the `signed char count_ints` counter:

```
#include 
#include 
#include 
#include 

#define noinline__attribute__((__noinline__))
#define __counted_by(member)__attribute__((__counted_by__(member)))

#define DECLARE_BOUNDED_FLEX_ARRAY(COUNT_TYPE, COUNT, TYPE, NAME)   \
struct {\
COUNT_TYPE COUNT;   \
TYPE NAME[] __counted_by(COUNT);\
}

#define DECLARE_FLEX_ARRAY(TYPE, NAME)  \
struct {\
struct { } __empty_ ## NAME;\
TYPE NAME[];\
}

struct multi {
unsigned long flags;
union {
/* count member type intentionally mismatched to induce padding 
*/
DECLARE_BOUNDED_FLEX_ARRAY(int, count_bytes, unsigned char, 
bytes);
DECLARE_BOUNDED_FLEX_ARRAY(signed char,  count_ints,  unsigned 
char, ints);
DECLARE_FLEX_ARRAY(unsigned char, unsafe);
};
};

/* Helper to hide the allocation size by using a leaf function. */
static struct multi * noinline alloc_multi_ints(int index)
{
struct multi *p;

p = malloc(sizeof(*p) + index * sizeof(*p->ints));
p->count_ints = index;

return p;
}

/* Helper to hide the allocation size by using a leaf function. */
static struct multi * noinline alloc_multi_bytes(int index)
{
struct multi *p;

p = malloc(sizeof(*p) + index * sizeof(*p->bytes));
p->count_bytes = index;

return p;
}

int main(void)
{
struct multi *mi, *mb;
volatile int negative = -3;
volatile int index = 10;

mi = alloc_multi_ints(index);
mi->count_ints = negative;
printf("%zu\n", __builtin_dynamic_object_size(mi->ints, 1));

mb = alloc_multi_bytes(index);
mb->count_bytes = negative;
printf("%zu\n", __builtin_dynamic_object_size(mb->bytes, 1));

return 0;
}
```

Returns:

```
253
0
```

https://github.com/llvm/llvm-project/pull/76348
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang] Implement the 'counted_by' attribute (PR #76348)

2024-01-09 Thread Kees Cook via cfe-commits

kees wrote:

> > but the value is nonsense, so we must return 0 so that anything checking 
> > lengths will not write anything to the array.
> 
> @kees Oh, I see. I did not know such the convention but it makes sense. Is it 
> documented somewhere?

This is new territory (having a multiplier for finding size that may be 
negative), so there's nothing to document it beyond FORTIFY users needing to 
maintain safe checks. The only safe size to return for "impossible size" is 0 
in this case, otherwise a confused state (negative `count`) can lead to FORTIFY 
bypasses.

https://github.com/llvm/llvm-project/pull/76348
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang] Implement the 'counted_by' attribute (PR #76348)

2024-01-09 Thread Kees Cook via cfe-commits

kees wrote:

> > This prints a wrapped calculation instead of the expected "0".
> 
> Shouldn't `getDefaultBuiltinObjectSizeResult` return `-1`? Have you tried 
> `-2` to see if it's returning the negative count value or it happens to be 
> equal to the default value?

Well, maybe it's not wrapped, but it should absolutely return 0, not -1. The 
return value of -1 means "I don't know how large this is". In this case we _do_ 
know, but it's nonsense, so we must return 0 so that anything checking lengths 
will not write anything to the array.

https://github.com/llvm/llvm-project/pull/76348
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang] Implement the 'counted_by' attribute (PR #76348)

2024-01-09 Thread Kees Cook via cfe-commits

kees wrote:

Thanks! The update fixes the anon struct issue I hit. I've found one more 
issue, though this appears to be a miscalculation with a pathological `count` 
value (i.e. `count` is signed type and contains a negative value):

```
struct annotated {
unsigned long flags;
int count;
int array __counted_by(count);
};

static struct annotated * noinline alloc_annotated(int index)
{
struct annotated *p;

p = malloc(sizeof(*p) + index * sizeof(*p->array));
p->count = index;

return p;
}

...
   struct annotated *a;

  c = alloc_annotated(index);
   c->count = -1;
   printf("%zu\n", __builtin_dynamic_object_size(p->array, 1));
```

This prints a wrapped calculation instead of the expected "0".

https://github.com/llvm/llvm-project/pull/76348
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang] Implement the 'counted_by' attribute (PR #76348)

2024-01-08 Thread Kees Cook via cfe-commits

kees wrote:

Possibly due to bug #72032 , I can get this tree to crash using the latest 
`array-bounds.c` test from
https://github.com/kees/kernel-tools/tree/trunk/fortify

Specifically:

```
struct anon_struct {
unsigned long flags;
long count;
int array[] __counted_by(count);
};

struct composite {
unsigned stuff;
struct annotated inner;
};

static struct composite * noinline alloc_composite(int index)
{
struct composite *p;

p = malloc(sizeof(*p) + index * sizeof(*p->inner.array));
p->inner.count = index;

return p;
}

   struct composite *c;
   c = alloc_composite(index);
   ... actions on c->inner.array ...
```

```
3.  array-bounds.c:363:1 : Generating code 
for declaration 'counted_by_seen_by_bdos'
4.  array-bounds.c:405:2 : LLVM IR 
generation of compound statement ('{}')
...
 #4 0x556574d5b858 
clang::CodeGen::CodeGenTBAA::getAccessInfo(clang::QualType)
 #5 0x55657489e25d 
clang::CodeGen::CodeGenModule::getTBAAAccessInfo(clang::QualType)
 #6 0x5565748a9c20 
clang::CodeGen::CodeGenModule::getNaturalTypeAlignment(clang::QualType, 
clang::CodeGen::LValueBaseInfo*, clang::CodeGen::TBAAAccessInfo*, bool)
 #7 0x5565749fc4a2 EmitPointerWithAlignment(clang::Expr const*, 
clang::CodeGen::LValueBaseInfo*, clang::CodeGen::TBAAAccessInfo*, 
clang::CodeGen::KnownNonNull_t, clang::CodeGen::CodeGenFunction&) CGExpr.cpp:0:0
 #8 0x5565749f94bd 
clang::CodeGen::CodeGenFunction::EmitCountedByFieldExpr(clang::Expr const*, 
clang::FieldDecl const*, clang::FieldDecl const*)
```



https://github.com/llvm/llvm-project/pull/76348
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang] Generate the GEP instead of adding AST nodes (PR #73730)

2023-12-01 Thread Kees Cook via cfe-commits

kees wrote:

> ```
> int foo(struct s *p, int index) {
>   return __builtin_dynamic_object_size((++p)->array[index], 1);
> }
> ```
> 
> This _shouldn't_ increment `p`, but we need to get the array size of the 
> element _after_ `p`. I suspect that this is probably a horrible security 
> violation in the making, but we at least need to handle such an eventuality 
> gracefully. For a first pass, I think returning `-1` or `0` (depending on the 
> default return value) for _any_ pointer arithmetic is probably okay...maybe 
> even the best option?
> 
> @kees Thoughts?

Owch. That really shouldn't be legal: we can't have arrays of structs ending in 
a FAM, so the `++p` isn't sane. That said, I guess pointer arithmetic must 
follow `sizeof`, so `++p` isn't illegal; it's just awful. FWIW, GCC just 
returns `SIZE_MAX` for this sort of "aaah, where are you going?!" insanity for 
non-FAM structs. But Clang appears to just go along for the ride:
https://godbolt.org/z/Pss1oz7sW

So that needs to be fixed in Clang too.

But for doing a FAM struct like this ... my instinct is to say it should return 
0, much like requesting an out of bounds array index. (Rather than `SIZE_MAX`.)

https://github.com/llvm/llvm-project/pull/73730
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[llvm] [clang] [clang] report inlining decisions with -Wattribute-{warning|error} (PR #73552)

2023-11-27 Thread Kees Cook via cfe-commits


@@ -455,18 +455,11 @@ void DiagnosticInfoDontCall::print(DiagnosticPrinter ) 
const {
 SmallVector DiagnosticInfoDontCall::getInliningDecisions() const {
   SmallVector InliningDecisions;
 
-  if (MDN) {
-const MDOperand  = MDN->getOperand(0);
-if (auto *MDT = dyn_cast(MO)) {
-  for (const MDOperand  : MDT->operands()) {
-if (auto *S = dyn_cast(MO)) {
+  if (MDN)
+if (auto *MDT = dyn_cast(MDN->getOperand(0)))
+  for (const MDOperand  : MDT->operands())
+if (auto *S = dyn_cast(MO))
   InliningDecisions.push_back(S->getString().str());
-}
-  }
-} else if (auto *S = dyn_cast(MO)) {
-  InliningDecisions.push_back(S->getString().str());
-}
-  }

kees wrote:

We want it to read like "before", yes?

https://github.com/llvm/llvm-project/pull/73552
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang] Correct handling of negative and out-of-bounds indices (PR #71877)

2023-11-15 Thread Kees Cook via cfe-commits

kees wrote:

Yeah, this is the "compiler doesn't know if pointer points into an array of 
structs or not" that has driven me crazy for years. But we can now reliably 
disambiguate this for structs that end with a flexible array member (or future 
pointers marked with `__single`). It's been a long time frustration with 
FORTIFY coverage. :P

https://github.com/llvm/llvm-project/pull/71877
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang] Correct handling of negative and out-of-bounds indices (PR #71877)

2023-11-15 Thread Kees Cook via cfe-commits

https://github.com/kees commented:

For the test cases, I wonder if it might be good to add __bdos() calls with 
type 0 as well. The results should always be the same, but we do want to check 
for that. i.e.:

`  p->array[index] = __builtin_dynamic_object_size(>count, 0);`


https://github.com/llvm/llvm-project/pull/71877
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang] Correct handling of negative and out-of-bounds indices (PR #71877)

2023-11-15 Thread Kees Cook via cfe-commits


@@ -827,6 +827,165 @@ CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const 
Expr *E, unsigned Type,
   return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true);
 }
 
+llvm::Value *
+CodeGenFunction::emitFlexibleArrayMemberSize(const Expr *E, unsigned Type,
+ llvm::IntegerType *ResType) {
+  // The code generated here calculates the size of a struct with a flexible
+  // array member that uses the counted_by attribute. There are two instances
+  // we handle:
+  //
+  //   struct s {
+  // unsigned long flags;
+  // int count;
+  // int array[] __attribute__((counted_by(count)));
+  //   }
+  //
+  //   1) bdos of the flexible array itself:
+  //
+  // __builtin_dynamic_object_size(p->array, 1) ==
+  // p->count * sizeof(*p->array)
+  //
+  //   2) bdos of a pointer into the flexible array:
+  //
+  // __builtin_dynamic_object_size(>array[42], 1) ==
+  // (p->count - 42) * sizeof(*p->array)
+  //
+  //   2) bdos of the whole struct, including the flexible array:
+  //
+  // __builtin_dynamic_object_size(p, 1) ==
+  //max(sizeof(struct s),
+  //offsetof(struct s, array) + p->count * sizeof(*p->array))
+  //
+  ASTContext  = getContext();
+  const Expr *Base = E->IgnoreParenImpCasts();
+  const Expr *Idx = nullptr;
+
+  if (const auto *UO = dyn_cast(Base);
+  UO && UO->getOpcode() == UO_AddrOf) {
+Expr *SubExpr = UO->getSubExpr()->IgnoreParenImpCasts();
+if (const auto *ASE = dyn_cast(SubExpr)) {
+  Base = ASE->getBase()->IgnoreParenImpCasts();
+  Idx = ASE->getIdx()->IgnoreParenImpCasts();
+
+  if (const auto *IL = dyn_cast(Idx)) {
+int64_t Val = IL->getValue().getSExtValue();
+if (Val < 0)
+  // __bdos returns 0 for negative indexes into an array in a struct.
+  return getDefaultBuiltinObjectSizeResult(Type, ResType);
+
+if (Val == 0)
+  // The index is 0, so we don't need to take it into account.
+  Idx = nullptr;
+  }
+} else {
+  // Potential pointer to another element in the struct.
+  Base = SubExpr;
+}
+  }
+
+  // Get the flexible array member Decl.
+  const ValueDecl *FAMDecl = nullptr;
+  if (const auto *ME = dyn_cast(Base)) {
+// Check if \p Base is referencing the FAM itself.
+if (const ValueDecl *MD = ME->getMemberDecl()) {
+  const LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel =
+  getLangOpts().getStrictFlexArraysLevel();
+  if (!Decl::isFlexibleArrayMemberLike(
+  Ctx, MD, MD->getType(), StrictFlexArraysLevel,
+  /*IgnoreTemplateOrMacroSubstitution=*/true))
+return nullptr;
+
+  FAMDecl = MD;
+}
+  } else if (const auto *DRE = dyn_cast(Base)) {
+// Check if we're pointing to the whole struct.
+QualType Ty = DRE->getDecl()->getType();
+if (Ty->isPointerType())
+  Ty = Ty->getPointeeType();
+
+if (const auto *RD = Ty->getAsRecordDecl())
+  // Don't use the outer lexical record because the FAM might be in a
+  // different RecordDecl.
+  FAMDecl = FindFlexibleArrayMemberField(Ctx, RD);
+  }
+
+  if (!FAMDecl || !FAMDecl->hasAttr())
+// No flexible array member found or it doesn't have the "counted_by"

kees wrote:

What about the "alloc_size" attribute?

https://github.com/llvm/llvm-project/pull/71877
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang] Correct handling of negative and out-of-bounds indices (PR #71877)

2023-11-15 Thread Kees Cook via cfe-commits

https://github.com/kees edited https://github.com/llvm/llvm-project/pull/71877
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang] counted_by attr can apply only to C99 flexible array members (PR #72347)

2023-11-15 Thread Kees Cook via cfe-commits

https://github.com/kees approved this pull request.

Yes, this looks correct. We don't want to let this feature leak into the "fake" 
flex array logic.

https://github.com/llvm/llvm-project/pull/72347
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [CodeGen] Respect pointer-overflow sanitizer for void pointers (PR #67772)

2023-09-29 Thread Kees Cook via cfe-commits

kees wrote:

This is great! Thanks for fixing this!

https://github.com/llvm/llvm-project/pull/67772
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] 00e54d0 - [clang][auto-init] Remove -enable-trivial-auto-var-init-zero-knowing-it-will-be-removed-from-clang

2023-09-03 Thread Kees Cook via cfe-commits

Author: Kees Cook
Date: 2023-09-03T22:24:37-07:00
New Revision: 00e54d04ae2802d498741097d4b83e898bc99c5b

URL: 
https://github.com/llvm/llvm-project/commit/00e54d04ae2802d498741097d4b83e898bc99c5b
DIFF: 
https://github.com/llvm/llvm-project/commit/00e54d04ae2802d498741097d4b83e898bc99c5b.diff

LOG: [clang][auto-init] Remove 
-enable-trivial-auto-var-init-zero-knowing-it-will-be-removed-from-clang

This was deprecated in Clang 16 and scheduled for removal in Clang 18.
Time to remove it.

Reviewed By: nickdesaulniers, MaskRay

Differential Revision: https://reviews.llvm.org/D159373

Added: 


Modified: 
clang/docs/ReleaseNotes.rst
clang/include/clang/Driver/Options.td
clang/test/Driver/clang_f_opts.c

Removed: 




diff  --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 30fc9c43543d522..c6d2c3466a09622 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -127,6 +127,9 @@ Modified Compiler Flags
 Removed Compiler Flags
 -
 
+* ``-enable-trivial-auto-var-init-zero-knowing-it-will-be-removed-from-clang`` 
has been removed.
+  It has not been needed to enable ``-ftrivial-auto-var-init=zero`` since 
Clang 16.
+
 Attribute Changes in Clang
 --
 - On X86, a warning is now emitted if a function with 
``__attribute__((no_caller_saved_registers))``

diff  --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index fa6b69c1c7236dd..e6d8aed6aefc8d9 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -272,12 +272,6 @@ def : Flag<["-"], "fno-slp-vectorize-aggressive">, 
Group, Group;
 def mno_mpx : Flag<["-"], "mno-mpx">, 
Group;
 
-// Retired with clang-16.0, to provide a deprecation period; it should
-// be removed in Clang 18 or later.
-def enable_trivial_var_init_zero : Flag<["-"], 
"enable-trivial-auto-var-init-zero-knowing-it-will-be-removed-from-clang">,
-  Flags<[NoArgumentUnused]>, Visibility<[ClangOption, CC1Option, CLOption]>,
-  Group;
-
 // Group that ignores all gcc optimizations that won't be implemented
 def clang_ignored_gcc_optimization_f_Group : OptionGroup<
   "">, Group, 
Flags<[Ignored]>;

diff  --git a/clang/test/Driver/clang_f_opts.c 
b/clang/test/Driver/clang_f_opts.c
index 1704da892687d22..7a3616a2e9f0a48 100644
--- a/clang/test/Driver/clang_f_opts.c
+++ b/clang/test/Driver/clang_f_opts.c
@@ -563,12 +563,9 @@
 // RUN: %clang -### -S -ftrivial-auto-var-init=uninitialized %s 2>&1 | 
FileCheck -check-prefix=CHECK-TRIVIAL-UNINIT %s
 // RUN: %clang -### -S -ftrivial-auto-var-init=pattern %s 2>&1 | FileCheck 
-check-prefix=CHECK-TRIVIAL-PATTERN %s
 // RUN: %clang -### -S -ftrivial-auto-var-init=zero %s 2>&1 | FileCheck 
-check-prefix=CHECK-TRIVIAL-ZERO %s
-// RUN: %clang -### -S 
-enable-trivial-auto-var-init-zero-knowing-it-will-be-removed-from-clang \
-// RUN:   -ftrivial-auto-var-init=zero %s 2>&1 | FileCheck 
-check-prefix=CHECK-TRIVIAL-ZERO-ENABLE-DEPRECATED %s
 // CHECK-TRIVIAL-UNINIT-NOT: hasn't been enabled
 // CHECK-TRIVIAL-PATTERN-NOT: hasn't been enabled
 // CHECK-TRIVIAL-ZERO-NOT: hasn't been enabled
-// CHECK-TRIVIAL-ZERO-ENABLE-DEPRECATED: has been deprecated
 
 // RUN: %clang -### -S -ftrivial-auto-var-init=pattern 
-ftrivial-auto-var-init-stop-after=1 %s 2>&1 | FileCheck 
-check-prefix=CHECK-TRIVIAL-PATTERN-STOP-AFTER %s
 // RUN: %clang -### -S -ftrivial-auto-var-init=zero 
-ftrivial-auto-var-init-stop-after=1 %s 2>&1 | FileCheck 
-check-prefix=CHECK-TRIVIAL-ZERO-STOP-AFTER %s



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] aef03c9 - [clang][auto-init] Deprecate -enable-trivial-auto-var-init-zero-knowing-it-will-be-removed-from-clang

2022-10-01 Thread Kees Cook via cfe-commits

Author: Kees Cook
Date: 2022-10-01T18:45:45-07:00
New Revision: aef03c9b3bed5cef5a1940774b80128aefcb4095

URL: 
https://github.com/llvm/llvm-project/commit/aef03c9b3bed5cef5a1940774b80128aefcb4095
DIFF: 
https://github.com/llvm/llvm-project/commit/aef03c9b3bed5cef5a1940774b80128aefcb4095.diff

LOG: [clang][auto-init] Deprecate 
-enable-trivial-auto-var-init-zero-knowing-it-will-be-removed-from-clang

GCC 12 has been released and contains unconditional support for
-ftrivial-auto-var-init=zero:
https://gcc.gnu.org/onlinedocs/gcc/Optimize-Options.html#index-ftrivial-auto-var-init

Maintain compatibility with GCC, and remove the -enable flag for "zero"
mode. The flag is left to generate an "unused" warning, though, to not
break all the existing users. The flag will be fully removed in Clang 17.

Link: https://github.com/llvm/llvm-project/issues/44842

Reviewed By: nickdesaulniers, MaskRay, srhines, xbolva00

Differential Revision: https://reviews.llvm.org/D125142

Added: 


Modified: 
clang/docs/ReleaseNotes.rst
clang/include/clang/Basic/DiagnosticDriverKinds.td
clang/include/clang/Driver/Options.td
clang/lib/Driver/ToolChains/Clang.cpp
clang/test/Driver/cl-options.c
clang/test/Driver/clang_f_opts.c

Removed: 




diff  --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index c2fedad18e0c2..2320f4a7a4fac 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -277,6 +277,10 @@ New Compiler Flags
 
 Deprecated Compiler Flags
 -
+- ``-enable-trivial-auto-var-init-zero-knowing-it-will-be-removed-from-clang``
+  has been deprecated. The flag will be removed in Clang 18.
+  ``-ftrivial-auto-var-init=zero`` is now available unconditionally, to be
+  compatible with GCC.
 
 Modified Compiler Flags
 ---

diff  --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td 
b/clang/include/clang/Basic/DiagnosticDriverKinds.td
index 56d8d508cdbc8..282ecdbed4cec 100644
--- a/clang/include/clang/Basic/DiagnosticDriverKinds.td
+++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td
@@ -588,11 +588,6 @@ def warn_drv_darwin_sdk_invalid_settings : Warning<
   "SDK settings were ignored as 'SDKSettings.json' could not be parsed">,
   InGroup>;
 
-def err_drv_trivial_auto_var_init_zero_disabled : Error<
-  "'-ftrivial-auto-var-init=zero' hasn't been enabled; enable it at your own "
-  "peril for benchmarking purpose only with "
-  
"'-enable-trivial-auto-var-init-zero-knowing-it-will-be-removed-from-clang'">;
-
 def err_drv_trivial_auto_var_init_stop_after_missing_dependency : Error<
   "'-ftrivial-auto-var-init-stop-after=*' is used without "
   "'-ftrivial-auto-var-init=zero' or '-ftrivial-auto-var-init=pattern'">;

diff  --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index 4e85242f136b1..f7744a109f9a0 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -247,6 +247,12 @@ def : Flag<["-"], "fno-slp-vectorize-aggressive">, 
Group, Group;
 def mno_mpx : Flag<["-"], "mno-mpx">, 
Group;
 
+// Retired with clang-16.0, to provide a deprecation period; it should
+// be removed in Clang 18 or later.
+def enable_trivial_var_init_zero : Flag<["-"], 
"enable-trivial-auto-var-init-zero-knowing-it-will-be-removed-from-clang">,
+  Flags<[CC1Option, CoreOption, NoArgumentUnused]>,
+  Group;
+
 // Group that ignores all gcc optimizations that won't be implemented
 def clang_ignored_gcc_optimization_f_Group : OptionGroup<
   "">, Group, 
Flags<[Ignored]>;
@@ -2791,9 +2797,6 @@ def ftrivial_auto_var_init : Joined<["-"], 
"ftrivial-auto-var-init=">, Group,
   NormalizedValues<["Uninitialized", "Zero", "Pattern"]>,
   MarshallingInfoEnum, "Uninitialized">;
-def enable_trivial_var_init_zero : Flag<["-"], 
"enable-trivial-auto-var-init-zero-knowing-it-will-be-removed-from-clang">,
-  Flags<[CC1Option, CoreOption, NoArgumentUnused]>,
-  HelpText<"Trivial automatic variable initialization to zero is only here for 
benchmarks, it'll eventually be removed, and I'm OK with that because I'm only 
using it to benchmark">;
 def ftrivial_auto_var_init_stop_after : Joined<["-"], 
"ftrivial-auto-var-init-stop-after=">, Group,
   Flags<[CC1Option, CoreOption]>, HelpText<"Stop initializing trivial 
automatic stack variables after the specified number of instances">,
   MarshallingInfoInt>;

diff  --git a/clang/lib/Driver/ToolChains/Clang.cpp 
b/clang/lib/Driver/ToolChains/Clang.cpp
index dd5596bc942e8..e5fce35793598 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -3458,8 +3458,6 @@ static void RenderTrivialAutoVarInitOptions(const Driver 
,
 }
 
   if (!TrivialAutoVarInit.empty()) {
-if (TrivialAutoVarInit == "zero" && 
!Args.hasArg(options::OPT_enable_trivial_var_init_zero))
-