-
Notifications
You must be signed in to change notification settings - Fork 13.7k
Add an off-by-default warning to complain about MSVC bitfield padding #117428
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add an off-by-default warning to complain about MSVC bitfield padding #117428
Conversation
@llvm/pr-subscribers-clang Author: Oliver Hunt (ojhunt) ChangesThis just adds a warning for bitfields placed next to other bitfields where the underlying type has different storage. Under the MS struct bitfield packing ABI such bitfields are not packed. Full diff: https://github.com/llvm/llvm-project/pull/117428.diff 4 Files Affected:
diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td
index df9bf94b5d0398..57bdda4b8f8b47 100644
--- a/clang/include/clang/Basic/DiagnosticGroups.td
+++ b/clang/include/clang/Basic/DiagnosticGroups.td
@@ -631,6 +631,7 @@ def Packed : DiagGroup<"packed", [PackedNonPod]>;
def PaddedBitField : DiagGroup<"padded-bitfield">;
def Padded : DiagGroup<"padded", [PaddedBitField]>;
def UnalignedAccess : DiagGroup<"unaligned-access">;
+def MSBitfieldCompatibility : DiagGroup<"ms-bitfield-packing-compatibility">;
def PessimizingMove : DiagGroup<"pessimizing-move">;
def ReturnStdMove : DiagGroup<"return-std-move">;
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index eb05a6a77978af..aa13e3438d3739 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -6418,6 +6418,12 @@ def warn_signed_bitfield_enum_conversion : Warning<
InGroup<BitFieldEnumConversion>, DefaultIgnore;
def note_change_bitfield_sign : Note<
"consider making the bitfield type %select{unsigned|signed}0">;
+def warn_ms_bitfield_mismatched_storage_packing : Warning<
+ "bit-field %0 of type %1 has a different storage size (%2 vs %3 bytes) than the "
+ "preceding bit-field and may not be packed under MSVC ABI">,
+ InGroup<MSBitfieldCompatibility>, DefaultIgnore;
+def note_ms_bitfield_mismatched_storage_size_previous : Note<
+ "preceding bit-field %0 declared here with type %1">;
def warn_missing_braces : Warning<
"suggest braces around initialization of subobject">,
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 74b0e5ad23bd48..18aeca3b34f659 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -19001,9 +19001,9 @@ void Sema::ActOnFields(Scope *S, SourceLocation RecLoc, Decl *EnclosingDecl,
// Verify that all the fields are okay.
SmallVector<FieldDecl*, 32> RecFields;
-
+ std::optional<const FieldDecl *> PreviousField;
for (ArrayRef<Decl *>::iterator i = Fields.begin(), end = Fields.end();
- i != end; ++i) {
+ i != end; PreviousField = cast<FieldDecl>(*i), ++i) {
FieldDecl *FD = cast<FieldDecl>(*i);
// Get the type for the field.
@@ -19213,6 +19213,29 @@ void Sema::ActOnFields(Scope *S, SourceLocation RecLoc, Decl *EnclosingDecl,
if (Record && FD->getType().isVolatileQualified())
Record->setHasVolatileMember(true);
+ auto IsNonDependentBitField = [](const FieldDecl *FD) {
+ if (!FD->isBitField())
+ return false;
+ if (FD->getType()->isDependentType())
+ return false;
+ return true;
+ };
+
+ if (Record && PreviousField && IsNonDependentBitField(FD) &&
+ IsNonDependentBitField(*PreviousField)) {
+ unsigned FDStorageSize =
+ Context.getTypeSizeInChars(FD->getType()).getQuantity();
+ unsigned PreviousFieldStorageSize =
+ Context.getTypeSizeInChars((*PreviousField)->getType()).getQuantity();
+ if (FDStorageSize != PreviousFieldStorageSize) {
+ Diag(FD->getLocation(),
+ diag::warn_ms_bitfield_mismatched_storage_packing)
+ << FD << FD->getType() << FDStorageSize << PreviousFieldStorageSize;
+ Diag((*PreviousField)->getLocation(),
+ diag::note_ms_bitfield_mismatched_storage_size_previous)
+ << *PreviousField << (*PreviousField)->getType();
+ }
+ }
// Keep track of the number of named members.
if (FD->getIdentifier())
++NumNamedMembers;
diff --git a/clang/test/SemaCXX/ms_struct-bitfield-padding.cpp b/clang/test/SemaCXX/ms_struct-bitfield-padding.cpp
new file mode 100644
index 00000000000000..001086de5bbd10
--- /dev/null
+++ b/clang/test/SemaCXX/ms_struct-bitfield-padding.cpp
@@ -0,0 +1,180 @@
+
+// RUN: %clang_cc1 -fsyntax-only -Wms-bitfield-packing-compatibility -verify -triple armv8 -std=c++23 %s
+// RUN: %clang_cc1 -fsyntax-only -DMS_BITFIELDS -mms-bitfields -verify=msbitfields -triple armv8-apple-macos10.15 -std=c++23 %s
+
+// msbitfields-no-diagnostics
+
+enum Enum1 { Enum1_A, Enum1_B };
+enum Enum2 { Enum2_A, Enum2_B };
+
+enum class EnumU32_1 : unsigned { A, B };
+enum class EnumU32_2 : unsigned { A, B };
+enum class EnumU64 : unsigned long long { A, B };
+enum class EnumI32 : int { A, B };
+enum class EnumU8 : unsigned char { A, B };
+enum class EnumI8 : char { A, B };
+enum class EnumU16 : unsigned short { A, B };
+enum class EnumI16 : short { A, B };
+
+struct A {
+ unsigned int a : 15;
+ unsigned int b : 15;
+};
+static_assert(sizeof(A) == 4);
+
+struct B {
+ unsigned int a : 15;
+ int b : 15;
+};
+static_assert(sizeof(B) == 4);
+
+struct C {
+ unsigned int a : 15;
+ int b : 15;
+};
+static_assert(sizeof(C) == 4);
+
+struct D {
+ Enum1 a : 15;
+ Enum1 b : 15;
+};
+static_assert(sizeof(D) == 4);
+
+struct E {
+ Enum1 a : 15;
+ Enum2 b : 15;
+};
+static_assert(sizeof(E) == 4);
+
+struct F {
+ EnumU32_1 a : 15;
+ EnumU32_2 b : 15;
+};
+static_assert(sizeof(F) == 4);
+
+struct G {
+ EnumU32_1 a : 15;
+ EnumU64 b : 15;
+ // expected-warning@-1 {{bit-field 'b' of type 'EnumU64' has a different storage size (8 vs 4 bytes) than the preceding bit-field and may not be packed under MSVC ABI}}
+ // expected-note@-3 {{preceding bit-field 'a' declared here with type 'EnumU32_1'}}
+};
+
+#ifdef MS_BITFIELDS
+ static_assert(sizeof(G) == 16);
+#else
+ static_assert(sizeof(G) == 8);
+#endif
+
+struct H {
+ EnumU32_1 a : 10;
+ EnumI32 b : 10;
+ EnumU32_1 c : 10;
+};
+static_assert(sizeof(H) == 4);
+
+struct I {
+ EnumU8 a : 3;
+ EnumI8 b : 5;
+ EnumU32_1 c : 10;
+ // expected-warning@-1 {{bit-field 'c' of type 'EnumU32_1' has a different storage size (4 vs 1 bytes) than the preceding bit-field and may not be packed under MSVC ABI}}
+ // expected-note@-3 {{preceding bit-field 'b' declared here with type 'EnumI8'}}
+};
+#ifdef MS_BITFIELDS
+static_assert(sizeof(I) == 8);
+#else
+static_assert(sizeof(I) == 4);
+#endif
+
+struct J {
+ EnumU8 : 0;
+ EnumU8 b : 4;
+};
+static_assert(sizeof(J) == 1);
+
+struct K {
+ EnumU8 a : 4;
+ EnumU8 : 0;
+};
+static_assert(sizeof(K) == 1);
+
+struct L {
+ EnumU32_1 a : 10;
+ EnumU32_2 b : 10;
+ EnumU32_1 c : 10;
+};
+
+static_assert(sizeof(L) == 4);
+
+struct M {
+ EnumU32_1 a : 10;
+ EnumI32 b : 10;
+ EnumU32_1 c : 10;
+};
+
+static_assert(sizeof(M) == 4);
+
+struct N {
+ EnumU32_1 a : 10;
+ EnumU64 b : 10;
+ // expected-warning@-1 {{bit-field 'b' of type 'EnumU64' has a different storage size (8 vs 4 bytes) than the preceding bit-field and may not be packed under MSVC ABI}}
+ // expected-note@-3 {{preceding bit-field 'a' declared here with type 'EnumU32_1'}}
+ EnumU32_1 c : 10;
+ // expected-warning@-1 {{bit-field 'c' of type 'EnumU32_1' has a different storage size (4 vs 8 bytes) than the preceding bit-field and may not be packed under MSVC ABI}}
+ // expected-note@-5 {{preceding bit-field 'b' declared here with type 'EnumU64'}}
+};
+
+#ifdef MS_BITFIELDS
+static_assert(sizeof(N) == 24);
+#else
+static_assert(sizeof(N) == 8);
+#endif
+
+struct O {
+ EnumU16 a : 10;
+ EnumU32_1 b : 10;
+ // expected-warning@-1 {{bit-field 'b' of type 'EnumU32_1' has a different storage size (4 vs 2 bytes) than the preceding bit-field and may not be packed under MSVC ABI}}
+ // expected-note@-3 {{preceding bit-field 'a' declared here with type 'EnumU16'}}
+};
+#ifdef MS_BITFIELDS
+static_assert(sizeof(O) == 8);
+#else
+static_assert(sizeof(O) == 4);
+#endif
+
+struct P {
+ EnumU32_1 a : 10;
+ EnumU16 b : 10;
+ // expected-warning@-1 {{bit-field 'b' of type 'EnumU16' has a different storage size (2 vs 4 bytes) than the preceding bit-field and may not be packed under MSVC ABI}}
+ // expected-note@-3 {{preceding bit-field 'a' declared here with type 'EnumU32_1'}}
+};
+#ifdef MS_BITFIELDS
+static_assert(sizeof(P) == 8);
+#else
+static_assert(sizeof(P) == 4);
+#endif
+
+struct Q {
+ EnumU8 a : 6;
+ EnumU16 b : 6;
+ // expected-warning@-1 {{bit-field 'b' of type 'EnumU16' has a different storage size (2 vs 1 bytes) than the preceding bit-field and may not be packed under MSVC ABI}}
+ // expected-note@-3 {{preceding bit-field 'a' declared here with type 'EnumU8'}}
+};
+#ifdef MS_BITFIELDS
+static_assert(sizeof(Q) == 4);
+#else
+static_assert(sizeof(Q) == 2);
+#endif
+
+struct R {
+ EnumU16 a : 9;
+ EnumU16 b : 9;
+ EnumU8 c : 6;
+ // expected-warning@-1 {{bit-field 'c' of type 'EnumU8' has a different storage size (1 vs 2 bytes) than the preceding bit-field and may not be packed under MSVC ABI}}
+ // expected-note@-3 {{preceding bit-field 'b' declared here with type 'EnumU16'}}
+};
+
+#ifdef MS_BITFIELDS
+static_assert(sizeof(R) == 6);
+#else
+static_assert(sizeof(R) == 4);
+#endif
|
Oh, my goal for this warning is to have us turn it on in the llvm+clang builds, so we can then move away from endless use of unsigned bitfields and just use enums as nature intended :D (cc @AaronBallman ) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you for working on this! Please be sure to also add a release note to clang/docs/ReleaseNotes.rst
so users know about the new diagnostic.
#70978, which split -Wpadded into -Wpadded and -Wpadded-bitfield, may be relevant here |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So I take it we decided not to enable it by default in -fms-compatibility
mode then?
I don't believe it is appropriate to do so. The intent of this warning is to indicate MSVC compatibility issues when building in non-ms-compatibility modes. The existing padding warnings already trigger for these layouts in the relevant ms compatibility modes |
}; | ||
|
||
static_assert(sizeof(M) == 4); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we add a test for a case for what happens when there is no layout difference, so something like this:
struct Foo {
char a : 4;
char b : 4;
char c : 4;
char d : 4;
short x : 7;
short y : 9;
};
As written, this code will warn, but both compilers use the same / similar struct layouts (link). Does this represent a false positive, or should we encourage users to use a consistent bitfield width?
llvm::Value
doesn't have this problem because it happens to alternate between small bitfields and small integers, but this seems like a category of false positive that is hard to identify.
The only reasonable way to overcome this limitation would be to move this warning into record layout, in which case we'd have to implement it twice, once in the MS and non-MS record layout code.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@rnk fun story: my first pass at this was in record layout and it involved layout out the record twice and was awful :D - it also reminded me that there are a bunch of warnings that we don't issue until the point of use rather than declaration which is irksome.
That said, while my original implementation was in record layout and was intended to be 100% accurate, I realized that the actual issue is different type storage sizes resulting in radically different behavior with seemingly unrelated changes later on. So in cases where it matters we just want all bit-fields to be the same storage size, and the warning currently just drives "all bit-fields shall be the same storage size" which is the safe option. Pushing it to actual accurate warnings seems like it might be less than ideal, as you get cases like this trivial change to the above false positive
struct Foo {
char a : 4;
char b : 4;
char c : 4;
char d : 4;
char e : 1; // +1 bit field
short x : 6; // -1 bit width
short y : 9;
};
which would now issue a warning saying that it is no longer packed on msvc, so the fix will be to go through and restructure/re-type (type system type, not keyboard) the entire structure to ensure everything has the same storage type, when it may have been easier to have it "correct" (from msvc point of view) from the start.
e.g the early false positive forces the code to be written in a way that avoids a real warning down the line that requires more changes. That's also why the warning text originally said "may" :D
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@rnk thoughts?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Wouldn't this negatively affect cases like Value, where we have alternating small integeters and bitfields? The current layout won't be affected, but it seems like you'll get warnings on cases like this that you can't fix without adding padding:
struct Value {
char kind;
unsigned char b1 : 1;
unsigned char b7 : 7; // 8
unsigned short b2 : 2;
unsigned short b14 : 14; // 16
unsigned int b12 : 12;
unsigned int b10a : 10;
unsigned int b10b : 10; // 32
};
Does your warning fire on this case, and what types would one use to silence the warning and get compact layouts on all platforms?
We don't typically add new off-by-default warnings though; users don't enable them often enough to be worth the costs. My thought process is: if we enable this diagnostic by default in If we want to leave it off by default, then I wonder if we want to roll the functionality into |
My concerns with attaching it to The intention would be to then enable the warning (maybe with a
The problem with this is that that people who do not care about windows/ms abi will then be getting what to them are spurious warnings. |
My opinion on this is evolving -- this morally is similar to all the padding warnings we have in that it's telling you "hey, this packing is suboptimal". Those warnings are all off-by-default because they're not identifying a logical mistake in your code; your code still works even though it may be possible to improve it. So I think leaving this as an off-by-default warning is reasonable. And it is a distinct diagnostic from |
No longer perform the field analysis unless the warning is enabled. For safety i've updated a few of the existing bitfield tests to also run with this diagnostic enabled just to get a bit more coverage given we're no longer running this diagnostic code for every test.
✅ With the latest revision this PR passed the C/C++ code formatter. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM with some minor nits about use of MS vs Microsoft.
def warn_ms_bitfield_mismatched_storage_packing : Warning< | ||
"bit-field %0 of type %1 has a different storage size than the " | ||
"preceding bit-field (%2 vs %3 bytes) and will not be packed under " | ||
"the MS Windows platform ABI">, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
"the MS Windows platform ABI">, | |
"the Microsoft ABI">, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In the codebase, we tend to use "Windows" to cover behaviors that need to be shared with GCC and MinGW, and "Microsoft" to cover behaviors that are unique to MSVC. This tends to boil down to C vs. C++. If it's part of the C ABI, it's a Windows behavior, under the rationale that any C compiler for the platform would need to match this behavior, like enums always using a fixed type of int
rather than expanding to long long
if the members require it. C++ behaviors are high cost and "optional".
This mostly reflects the actual coding logic required, which is to check whether the OS is Windows, or whether the "environment" is Microsoft, so perhaps this is not the best language to communicate with users
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the explanation!
Would you recommend we go with Windows ABI
and leave the Microsoft
off? Or should we use Microsoft Windows ABI
? I mostly want to avoid MS
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In this context I would just say "Windows ABI".
I do recall preferring the use of the company name ("Microsoft") over the use of the product name ("MSVC" / "Visual C++") in the past for obscure legal reasons, but now that Clang ships as part of Visual Studio, clarity is more important than marginal increases in legal risk. This is probably why the code talks about the "Microsoft C++ ABI" not the "MSVC(++) ABI".
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
SGTM!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've switched to Microsoft ABI, and the diagnostic is now ms-bitfield-padding
}; | ||
|
||
static_assert(sizeof(M) == 4); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@rnk fun story: my first pass at this was in record layout and it involved layout out the record twice and was awful :D - it also reminded me that there are a bunch of warnings that we don't issue until the point of use rather than declaration which is irksome.
That said, while my original implementation was in record layout and was intended to be 100% accurate, I realized that the actual issue is different type storage sizes resulting in radically different behavior with seemingly unrelated changes later on. So in cases where it matters we just want all bit-fields to be the same storage size, and the warning currently just drives "all bit-fields shall be the same storage size" which is the safe option. Pushing it to actual accurate warnings seems like it might be less than ideal, as you get cases like this trivial change to the above false positive
struct Foo {
char a : 4;
char b : 4;
char c : 4;
char d : 4;
char e : 1; // +1 bit field
short x : 6; // -1 bit width
short y : 9;
};
which would now issue a warning saying that it is no longer packed on msvc, so the fix will be to go through and restructure/re-type (type system type, not keyboard) the entire structure to ensure everything has the same storage type, when it may have been easier to have it "correct" (from msvc point of view) from the start.
e.g the early false positive forces the code to be written in a way that avoids a real warning down the line that requires more changes. That's also why the warning text originally said "may" :D
…/warn-msvc-bitfield-packing t push
(Finally getting back to this one) |
@AaronBallman are you ok with this now? I'm re-requesting the review as it's been a while, and I want to confirm I addressed things as you wanted |
…/warn-msvc-bitfield-packing
This just adds a warning for bitfields placed next to other bitfields where the underlying type has different storage. Under the MS struct bitfield packing ABI such bitfields are not packed.