diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index f40ebe9764892..49e830ef7f99e 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -382,6 +382,40 @@ related warnings within the method body. ``format_matches`` accepts an example valid format string as its third argument. For more information, see the Clang attributes documentation. +- Clang can now verify format strings that can be constant-folded even if they + do not resolve to a string literal. For instance, all of these can now be + verified: + + .. code-block:: c++ + + const char format[] = {'h', 'e', 'l', 'l', 'o', ' ', '%', 's', 0}; + printf(format, "world"); + // no warning + + printf(format, 123); + // warning: format specifies type 'char *' but the argument has type 'int' + + printf(("%"s + "i"s).c_str(), "world"); + // warning: format specifies type 'int' but the argument has type 'char *' + + When the format expression does not evaluate to a string literal, Clang + points diagnostics into a pseudo-file called ```` that contains + the format string literal as it evaluated, like so: + + .. code-block:: text + + example.c:6:17: warning: format specifies type 'char *' but the argument has type 'int' [-Wformat] + 6 | printf(format, 123); + | ~~~~~~ ^~~ + :1:4: note: format string computed from non-literal expression + 1 | "hello %s" + | ^~ + | %d + + This may mean that format strings which were previously unverified (or which + triggered ``-Wformat-nonliteral``) are now verified by ``-Wformat`` and its + allies. + - Introduced a new statement attribute ``[[clang::atomic]]`` that enables fine-grained control over atomic code generation on a per-statement basis. Supported options include ``[no_]remote_memory``, diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h index a83320a7ddec2..791a387fe5a45 100644 --- a/clang/include/clang/AST/Expr.h +++ b/clang/include/clang/AST/Expr.h @@ -791,10 +791,30 @@ class Expr : public ValueStmt { const Expr *PtrExpression, ASTContext &Ctx, EvalResult &Status) const; - /// If the current Expr can be evaluated to a pointer to a null-terminated - /// constant string, return the constant string (without the terminating - /// null). - std::optional tryEvaluateString(ASTContext &Ctx) const; + /// Represents the result of a string compile-time evaluation query. Can tell + /// whether the expression evaluated to a string literal, which is often + /// preferable if diagnostics are involved (since a string literal has + /// source location info), and whether the string was null-terminated. + class StringEvalResult { + std::string Storage; + const StringLiteral *SL; + uint64_t Offset; + bool HasNullTerminator; + + public: + StringEvalResult(std::string Contents, bool HasNullTerminator); + StringEvalResult(const StringLiteral *SL, uint64_t Offset, + bool HasNullTerminator); + + llvm::StringRef getString() const; + bool hasNullTerminator() const { return HasNullTerminator; } + bool getStringLiteral(const StringLiteral *&SL, uint64_t &Offset) const; + }; + + /// If the current \c Expr can be evaluated to a pointer to a constant string, + /// return the constant string. The string may not be NUL-terminated. + std::optional + tryEvaluateString(ASTContext &Ctx, bool InConstantContext = false) const; /// Enumeration used to describe the kind of Null pointer constant /// returned from \c isNullPointerConstant(). diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index be768636d0b49..3656b7a114eff 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -9268,7 +9268,7 @@ def err_expected_callable_argument : Error< def note_building_builtin_dump_struct_call : Note< "in call to printing function with arguments '(%0)' while dumping struct">; def err_builtin_verbose_trap_arg : Error< - "argument to __builtin_verbose_trap must %select{be a pointer to a constant string|not contain $}0">; + "argument to '__builtin_verbose_trap' must %select{be a pointer to a constant null-terminated string|not contain $}0">; def err_atomic_load_store_uses_lib : Error< "atomic %select{load|store}0 requires runtime support that is not " @@ -10354,6 +10354,8 @@ def warn_format_bool_as_character : Warning< "using '%0' format specifier, but argument has boolean value">, InGroup; def note_format_string_defined : Note<"format string is defined here">; +def note_format_string_evaluated_to : Note< + "format string computed from non-literal expression">; def note_format_fix_specifier : Note<"did you mean to use '%0'?">; def note_printf_c_str: Note<"did you mean to call the %0 method?">; def note_format_security_fixit: Note< diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index e5950f461e4b2..55c6a55eaa981 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -1920,9 +1920,23 @@ static bool EvaluateComplex(const Expr *E, ComplexValue &Res, EvalInfo &Info); static bool EvaluateAtomic(const Expr *E, const LValue *This, APValue &Result, EvalInfo &Info); static bool EvaluateAsRValue(EvalInfo &Info, const Expr *E, APValue &Result); +static bool EvaluateStringAsLValue(EvalInfo &Info, const Expr *E, + QualType &CharTy, LValue &String); +static const StringLiteral *GetLValueAsStringLiteralAndOffset(EvalInfo &Info, + LValue &String, + QualType CharTy, + uint64_t &Offset); + +/// Call \c Action (which must be like \c bool(int) ) on each character in a +/// string \c LValue . Iteration stops "normally" when \c Action returns +/// \c false . This function returns \c true if iteration stopped normally; if +/// it runs out of characters before \c Action breaks, it returns \c false. +template +static bool ForEachCharacterInStringLValue(EvalInfo &Info, const Expr *E, + QualType CharTy, LValue &String, + CharAction &&Action); static bool EvaluateBuiltinStrLen(const Expr *E, uint64_t &Result, - EvalInfo &Info, - std::string *StringResult = nullptr); + EvalInfo &Info); /// Evaluate an integer or fixed point expression into an APResult. static bool EvaluateFixedPointOrInteger(const Expr *E, APFixedPoint &Result, @@ -17950,19 +17964,45 @@ bool Expr::tryEvaluateObjectSize(uint64_t &Result, ASTContext &Ctx, return tryEvaluateBuiltinObjectSize(this, Type, Info, Result); } -static bool EvaluateBuiltinStrLen(const Expr *E, uint64_t &Result, - EvalInfo &Info, std::string *StringResult) { - if (!E->getType()->hasPointerRepresentation() || !E->isPRValue()) +static bool EvaluateStringAsLValue(EvalInfo &Info, const Expr *E, + QualType &CharTy, LValue &String) { + QualType Ty = E->getType(); + if (!E->isPRValue()) return false; - LValue String; + if (Ty->canDecayToPointerType()) { + if (E->isGLValue()) { + if (!EvaluateLValue(E, String, Info)) + return false; + } else { + APValue &Value = Info.CurrentCall->createTemporary( + E, Ty, ScopeKind::FullExpression, String); + if (!EvaluateInPlace(Value, Info, String, E)) + return false; + } + // The result is a pointer to the first element of the array. + auto *AT = Info.Ctx.getAsArrayType(Ty); + CharTy = AT->getElementType(); + if (auto *CAT = dyn_cast(AT)) + String.addArray(Info, E, CAT); + else + String.addUnsizedArray(Info, E, CharTy); + return true; + } - if (!EvaluatePointer(E, String, Info)) - return false; + if (Ty->hasPointerRepresentation()) { + if (!EvaluatePointer(E, String, Info)) + return false; + CharTy = Ty->getPointeeType(); + return true; + } - QualType CharTy = E->getType()->getPointeeType(); + return false; +} - // Fast path: if it's a string literal, search the string value. +static const StringLiteral * +GetLValueAsStringLiteralAndOffset(EvalInfo &Info, LValue &String, + QualType CharTy, uint64_t &Offset) { if (const StringLiteral *S = dyn_cast_or_null( String.getLValueBase().dyn_cast())) { StringRef Str = S->getBytes(); @@ -17971,46 +18011,114 @@ static bool EvaluateBuiltinStrLen(const Expr *E, uint64_t &Result, S->getCharByteWidth() == 1 && // FIXME: Add fast-path for wchar_t too. Info.Ctx.hasSameUnqualifiedType(CharTy, Info.Ctx.CharTy)) { - Str = Str.substr(Off); - - StringRef::size_type Pos = Str.find(0); - if (Pos != StringRef::npos) - Str = Str.substr(0, Pos); - - Result = Str.size(); - if (StringResult) - *StringResult = Str; - return true; + Offset = static_cast(Off); + return S; } - - // Fall through to slow path. } + return nullptr; +} - // Slow path: scan the bytes of the string looking for the terminating 0. - for (uint64_t Strlen = 0; /**/; ++Strlen) { +template +static bool ForEachCharacterInStringLValue(EvalInfo &Info, const Expr *E, + QualType CharTy, LValue &String, + CharAction &&Action) { + while (true) { APValue Char; if (!handleLValueToRValueConversion(Info, E, CharTy, String, Char) || !Char.isInt()) return false; - if (!Char.getInt()) { - Result = Strlen; + if (!Action(Char.getInt().getExtValue())) return true; - } else if (StringResult) - StringResult->push_back(Char.getInt().getExtValue()); if (!HandleLValueArrayAdjustment(Info, E, String, CharTy, 1)) return false; } } -std::optional Expr::tryEvaluateString(ASTContext &Ctx) const { +static bool EvaluateBuiltinStrLen(const Expr *E, uint64_t &Result, + EvalInfo &Info) { + LValue String; + QualType CharTy; + if (!EvaluateStringAsLValue(Info, E, CharTy, String)) + return false; + + // Fast path: if it's a string literal, search the string value. + uint64_t Off; + if (const auto *S = + GetLValueAsStringLiteralAndOffset(Info, String, CharTy, Off)) { + StringRef Str = S->getBytes().substr(Off); + + StringRef::size_type Pos = Str.find(0); + if (Pos != StringRef::npos) + Str = Str.substr(0, Pos); + + Result = Str.size(); + return true; + } + + // Slow path: scan the bytes of the string looking for the terminating 0. + Result = 0; + return ForEachCharacterInStringLValue(Info, E, CharTy, String, [&](int Char) { + if (Char) { + Result++; + return true; + } else + return false; + }); +} + +Expr::StringEvalResult::StringEvalResult(const StringLiteral *SL, + uint64_t Offset, bool NullTerm) + : SL(SL), Offset(Offset), HasNullTerminator(NullTerm) {} + +Expr::StringEvalResult::StringEvalResult(std::string Contents, bool NullTerm) + : Storage(std::move(Contents)), SL(nullptr), Offset(0), + HasNullTerminator(NullTerm) {} + +llvm::StringRef Expr::StringEvalResult::getString() const { + return SL ? SL->getBytes().substr(Offset) : Storage; +} + +bool Expr::StringEvalResult::getStringLiteral(const StringLiteral *&SL, + uint64_t &Offset) const { + if (this->SL) { + SL = this->SL; + Offset = this->Offset; + return true; + } + return false; +} + +std::optional +Expr::tryEvaluateString(ASTContext &Ctx, bool InConstantContext) const { Expr::EvalStatus Status; - EvalInfo Info(Ctx, Status, EvalInfo::EM_ConstantFold); - uint64_t Result; - std::string StringResult; + EvalInfo Info(Ctx, Status, + (InConstantContext && + (Ctx.getLangOpts().CPlusPlus || Ctx.getLangOpts().C23)) + ? EvalInfo::EM_ConstantExpression + : EvalInfo::EM_ConstantFold); + Info.InConstantContext = InConstantContext; + LValue String; + QualType CharTy; + if (!EvaluateStringAsLValue(Info, this, CharTy, String)) + return {}; + + uint64_t Off; + if (const auto *S = + GetLValueAsStringLiteralAndOffset(Info, String, CharTy, Off)) { + return StringEvalResult(S, Off, true); + } + + std::string Result; + bool NTFound = + ForEachCharacterInStringLValue(Info, this, CharTy, String, [&](int Char) { + if (Char) { + Result.push_back(Char); + return true; + } else + return false; + }); - if (EvaluateBuiltinStrLen(this, Result, Info, &StringResult)) - return StringResult; - return {}; + return StringEvalResult(Result, NTFound); } template diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index 5b72382ca9772..9c04422ed44be 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -726,12 +726,13 @@ static bool hasUnsafeFormatOrSArg(const CallExpr *Call, const Expr *&UnsafeArg, const Expr *Fmt = Call->getArg(FmtArgIdx); if (auto *SL = dyn_cast(Fmt->IgnoreParenImpCasts())) { + std::optional SER; StringRef FmtStr; if (SL->getCharByteWidth() == 1) FmtStr = SL->getString(); - else if (auto EvaledFmtStr = SL->tryEvaluateString(Ctx)) - FmtStr = *EvaledFmtStr; + else if ((SER = SL->tryEvaluateString(Ctx))) + FmtStr = SER->getString(); else goto CHECK_UNSAFE_PTR; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index e6816736412b8..e1a516b87ded3 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -3477,8 +3477,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, llvm::DILocation *TrapLocation = Builder.getCurrentDebugLocation(); if (getDebugInfo()) { TrapLocation = getDebugInfo()->CreateTrapFailureMessageFor( - TrapLocation, *E->getArg(0)->tryEvaluateString(getContext()), - *E->getArg(1)->tryEvaluateString(getContext())); + TrapLocation, + E->getArg(0)->tryEvaluateString(getContext())->getString(), + E->getArg(1)->tryEvaluateString(getContext())->getString()); } ApplyDebugLocation ApplyTrapDI(*this, TrapLocation); // Currently no attempt is made to prevent traps from being merged. diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 7f45533713bae..f004dd8886c75 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -99,6 +99,7 @@ #include "llvm/Support/Locale.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/SaveAndRestore.h" +#include "llvm/Support/SmallVectorMemoryBuffer.h" #include "llvm/Support/raw_ostream.h" #include "llvm/TargetParser/RISCVTargetParser.h" #include "llvm/TargetParser/Triple.h" @@ -180,12 +181,13 @@ static bool checkBuiltinVerboseTrap(CallExpr *Call, Sema &S) { if (Arg->isValueDependent()) continue; - std::optional ArgString = Arg->tryEvaluateString(S.Context); + // Arguments must be pointers to constant strings, must be NUL-terminated, + // and cannot contain '$'. + auto ArgString = Arg->tryEvaluateString(S.Context); int DiagMsgKind = -1; - // Arguments must be pointers to constant strings and cannot use '$'. - if (!ArgString.has_value()) + if (!(ArgString && ArgString->hasNullTerminator())) DiagMsgKind = 0; - else if (ArgString->find('$') != std::string::npos) + else if (ArgString->getString().find('$') != llvm::StringRef::npos) DiagMsgKind = 1; if (DiagMsgKind >= 0) { @@ -6047,23 +6049,79 @@ static void CheckFormatString( llvm::SmallBitVector &CheckedVarArgs, UncoveredArgHandler &UncoveredArg, bool IgnoreStringsWithoutSpecifiers); -static const Expr *maybeConstEvalStringLiteral(ASTContext &Context, - const Expr *E); +class FormatStringFinder { + enum StringLiteralConstantEvaluationResult { + SLCER_NotEvaluated, + SLCER_NotNullTerminated, + SLCER_Evaluated, + }; -// Determine if an expression is a string literal or constant string. -// If this function returns false on the arguments to a function expecting a -// format string, we will usually need to emit a warning. -// True string literals are then checked by CheckFormatString. -static StringLiteralCheckType checkFormatStringExpr( - Sema &S, const StringLiteral *ReferenceFormatString, const Expr *E, - ArrayRef Args, Sema::FormatArgumentPassingKind APK, - unsigned format_idx, unsigned firstDataArg, FormatStringType Type, - VariadicCallType CallType, bool InFunctionCall, - llvm::SmallBitVector &CheckedVarArgs, UncoveredArgHandler &UncoveredArg, - llvm::APSInt Offset, bool IgnoreStringsWithoutSpecifiers = false) { - if (S.isConstantEvaluatedContext()) - return SLCT_NotALiteral; -tryAgain: + Sema &S; + const StringLiteral *ReferenceFormatString; + ArrayRef Args; + llvm::SmallBitVector &CheckedVarArgs; + UncoveredArgHandler &UncoveredArg; + Sema::FormatArgumentPassingKind APK; + FormatStringType Type; + VariadicCallType CallType; + unsigned format_idx; + unsigned firstDataArg; + bool InFunctionCall; + bool IgnoreStringsWithoutSpecifiers; + + FormatStringFinder(Sema &S, const StringLiteral *ReferenceFormatString, + ArrayRef Args, + llvm::SmallBitVector &CheckedVarArgs, + UncoveredArgHandler &UncoveredArg, + Sema::FormatArgumentPassingKind APK, FormatStringType Type, + VariadicCallType CallType, unsigned format_idx, + unsigned firstDataArg, bool InFunctionCall) + : S(S), ReferenceFormatString(ReferenceFormatString), Args(Args), + CheckedVarArgs(CheckedVarArgs), UncoveredArg(UncoveredArg), APK(APK), + Type(Type), CallType(CallType), format_idx(format_idx), + firstDataArg(firstDataArg), InFunctionCall(InFunctionCall), + IgnoreStringsWithoutSpecifiers(false) {} + + /// Attempt to fold \c E into a constant string that \c checkFormatStringExpr + /// can use. If \c E folds to a string literal, that string literal will be + /// used for diagnostics. If \c E has a constant string value but it does not + /// fold to a literal (for instance, ("%"s + "i"s).c_str() constant-folds to + /// "%i"), a pseudo-source file will be allocated, containing + /// a string literal representation of the constant string, and format + /// diagnostics will point to it. + static StringLiteralConstantEvaluationResult + EvaluateStringAndCreateLiteral(Sema &S, const Expr *E, + bool IsConstantEvaluation, + const StringLiteral *&SL, uint64_t &Offset); + + StringLiteralCheckType + checkEvaluateLiteral(const Expr *E, llvm::APSInt Offset, + bool IsConstantInitialization = false); + StringLiteralCheckType check(const Expr *E, llvm::APSInt Offset); + +public: + // Determine if an expression is a string literal or constant string. + // If this function returns false on the arguments to a function expecting a + // format string, we will usually need to emit a warning. + // True string literals are then checked by CheckFormatString. + static StringLiteralCheckType + check(Sema &S, const StringLiteral *ReferenceFormatString, + ArrayRef Args, unsigned FormatIdx, unsigned FirstDataArg, + Sema::FormatArgumentPassingKind APK, FormatStringType FST, + VariadicCallType CallType, llvm::SmallBitVector &CheckedVarArgs, + UncoveredArgHandler &UncoveredArg) { + FormatStringFinder Finder(S, ReferenceFormatString, Args, CheckedVarArgs, + UncoveredArg, APK, FST, CallType, FormatIdx, + FirstDataArg, true); + const Expr *E = Args[FormatIdx]; + return S.isConstantEvaluatedContext() + ? Finder.checkEvaluateLiteral(E, llvm::APSInt(64, false) = 0) + : Finder.check(E, llvm::APSInt(64, false) = 0); + } +}; + +StringLiteralCheckType FormatStringFinder::check(const Expr *E, + llvm::APSInt Offset) { assert(Offset.isSigned() && "invalid offset"); if (E->isTypeDependent() || E->isValueDependent()) @@ -6079,15 +6137,15 @@ static StringLiteralCheckType checkFormatStringExpr( return SLCT_UncheckedLiteral; switch (E->getStmtClass()) { - case Stmt::InitListExprClass: - // Handle expressions like {"foobar"}. - if (const clang::Expr *SLE = maybeConstEvalStringLiteral(S.Context, E)) { - return checkFormatStringExpr( - S, ReferenceFormatString, SLE, Args, APK, format_idx, firstDataArg, - Type, CallType, /*InFunctionCall*/ false, CheckedVarArgs, - UncoveredArg, Offset, IgnoreStringsWithoutSpecifiers); - } - return SLCT_NotALiteral; + case Stmt::InitListExprClass: { + const InitListExpr *InitList = cast(E); + // Look through initializers like const char c[] = { "foo" } + if (InitList->isStringLiteralInit()) + return check(InitList->getInit(0), Offset); + + return checkEvaluateLiteral(E, Offset); + } + case Stmt::BinaryConditionalOperatorClass: case Stmt::ConditionalOperatorClass: { // The expression is a literal if both sub-expressions were, and it was @@ -6101,8 +6159,7 @@ static StringLiteralCheckType checkFormatStringExpr( bool CheckLeft = true, CheckRight = true; bool Cond; - if (C->getCond()->EvaluateAsBooleanCondition( - Cond, S.getASTContext(), S.isConstantEvaluatedContext())) { + if (C->getCond()->EvaluateAsBooleanCondition(Cond, S.getASTContext())) { if (Cond) CheckRight = false; else @@ -6117,31 +6174,22 @@ static StringLiteralCheckType checkFormatStringExpr( if (!CheckLeft) Left = SLCT_UncheckedLiteral; else { - Left = checkFormatStringExpr( - S, ReferenceFormatString, C->getTrueExpr(), Args, APK, format_idx, - firstDataArg, Type, CallType, InFunctionCall, CheckedVarArgs, - UncoveredArg, Offset, IgnoreStringsWithoutSpecifiers); + Left = check(C->getTrueExpr(), Offset); if (Left == SLCT_NotALiteral || !CheckRight) { return Left; } } - StringLiteralCheckType Right = checkFormatStringExpr( - S, ReferenceFormatString, C->getFalseExpr(), Args, APK, format_idx, - firstDataArg, Type, CallType, InFunctionCall, CheckedVarArgs, - UncoveredArg, Offset, IgnoreStringsWithoutSpecifiers); - + StringLiteralCheckType Right = check(C->getFalseExpr(), Offset); return (CheckLeft && Left < Right) ? Left : Right; } case Stmt::ImplicitCastExprClass: - E = cast(E)->getSubExpr(); - goto tryAgain; + return check(cast(E)->getSubExpr(), Offset); case Stmt::OpaqueValueExprClass: if (const Expr *src = cast(E)->getSourceExpr()) { - E = src; - goto tryAgain; + return check(src, Offset); } return SLCT_NotALiteral; @@ -6172,16 +6220,17 @@ static StringLiteralCheckType checkFormatStringExpr( } if (isConstant) { - if (const Expr *Init = VD->getAnyInitializer()) { - // Look through initializers like const char c[] = { "foo" } - if (const InitListExpr *InitList = dyn_cast(Init)) { - if (InitList->isStringLiteralInit()) - Init = InitList->getInit(0)->IgnoreParenImpCasts(); - } - return checkFormatStringExpr( - S, ReferenceFormatString, Init, Args, APK, format_idx, - firstDataArg, Type, CallType, - /*InFunctionCall*/ false, CheckedVarArgs, UncoveredArg, Offset); + // If the variable has C++ constant initialization, we need to jump out + // of format string handling ad-hoc constant evaluation and follow the + // standard rules. + const Expr *Init = VD->getInit(); + if (Init && VD->hasConstantInitialization()) { + return checkEvaluateLiteral(Init, Offset, true); + } + Init = VD->getAnyInitializer(); + if (Init) { + InFunctionCall = false; + return check(Init, Offset); } } @@ -6254,11 +6303,8 @@ static StringLiteralCheckType checkFormatStringExpr( } return SLCT_UncheckedLiteral; } - return checkFormatStringExpr( - S, ReferenceFormatString, PVFormatMatches->getFormatString(), - Args, APK, format_idx, firstDataArg, Type, CallType, - /*InFunctionCall*/ false, CheckedVarArgs, UncoveredArg, - Offset, IgnoreStringsWithoutSpecifiers); + InFunctionCall = false; + return check(PVFormatMatches->getFormatString(), Offset); } } @@ -6310,10 +6356,7 @@ static StringLiteralCheckType checkFormatStringExpr( StringLiteralCheckType CommonResult; for (const auto *FA : ND->specific_attrs()) { const Expr *Arg = CE->getArg(FA->getFormatIdx().getASTIndex()); - StringLiteralCheckType Result = checkFormatStringExpr( - S, ReferenceFormatString, Arg, Args, APK, format_idx, firstDataArg, - Type, CallType, InFunctionCall, CheckedVarArgs, UncoveredArg, - Offset, IgnoreStringsWithoutSpecifiers); + StringLiteralCheckType Result = check(Arg, Offset); if (IsFirst) { CommonResult = Result; IsFirst = false; @@ -6326,20 +6369,11 @@ static StringLiteralCheckType checkFormatStringExpr( unsigned BuiltinID = FD->getBuiltinID(); if (BuiltinID == Builtin::BI__builtin___CFStringMakeConstantString || BuiltinID == Builtin::BI__builtin___NSStringMakeConstantString) { - const Expr *Arg = CE->getArg(0); - return checkFormatStringExpr( - S, ReferenceFormatString, Arg, Args, APK, format_idx, - firstDataArg, Type, CallType, InFunctionCall, CheckedVarArgs, - UncoveredArg, Offset, IgnoreStringsWithoutSpecifiers); + return check(CE->getArg(0), Offset); } } } - if (const Expr *SLE = maybeConstEvalStringLiteral(S.Context, E)) - return checkFormatStringExpr( - S, ReferenceFormatString, SLE, Args, APK, format_idx, firstDataArg, - Type, CallType, /*InFunctionCall*/ false, CheckedVarArgs, - UncoveredArg, Offset, IgnoreStringsWithoutSpecifiers); - return SLCT_NotALiteral; + return checkEvaluateLiteral(E, Offset); } case Stmt::ObjCMessageExprClass: { const auto *ME = cast(E); @@ -6360,11 +6394,7 @@ static StringLiteralCheckType checkFormatStringExpr( IgnoreStringsWithoutSpecifiers = true; } - const Expr *Arg = ME->getArg(FA->getFormatIdx().getASTIndex()); - return checkFormatStringExpr( - S, ReferenceFormatString, Arg, Args, APK, format_idx, firstDataArg, - Type, CallType, InFunctionCall, CheckedVarArgs, UncoveredArg, - Offset, IgnoreStringsWithoutSpecifiers); + return check(ME->getArg(FA->getFormatIdx().getASTIndex()), Offset); } } @@ -6415,18 +6445,16 @@ static StringLiteralCheckType checkFormatStringExpr( if (LIsInt) { if (BinOpKind == BO_Add) { sumOffsets(Offset, LResult.Val.getInt(), BinOpKind, RIsInt); - E = BinOp->getRHS(); - goto tryAgain; + return check(BinOp->getRHS(), Offset); } } else { sumOffsets(Offset, RResult.Val.getInt(), BinOpKind, RIsInt); - E = BinOp->getLHS(); - goto tryAgain; + return check(BinOp->getLHS(), Offset); } } } - return SLCT_NotALiteral; + return checkEvaluateLiteral(E, Offset); } case Stmt::UnaryOperatorClass: { const UnaryOperator *UnaOp = cast(E); @@ -6438,31 +6466,86 @@ static StringLiteralCheckType checkFormatStringExpr( S.isConstantEvaluatedContext())) { sumOffsets(Offset, IndexResult.Val.getInt(), BO_Add, /*RHS is int*/ true); - E = ASE->getBase(); - goto tryAgain; + return check(ASE->getBase(), Offset); } } - return SLCT_NotALiteral; + return checkEvaluateLiteral(E, Offset); } default: - return SLCT_NotALiteral; + return checkEvaluateLiteral(E, Offset); } } -// If this expression can be evaluated at compile-time, -// check if the result is a StringLiteral and return it -// otherwise return nullptr -static const Expr *maybeConstEvalStringLiteral(ASTContext &Context, - const Expr *E) { - Expr::EvalResult Result; - if (E->EvaluateAsRValue(Result, Context) && Result.Val.isLValue()) { - const auto *LVE = Result.Val.getLValueBase().dyn_cast(); - if (isa_and_nonnull(LVE)) - return LVE; +StringLiteralCheckType +FormatStringFinder::checkEvaluateLiteral(const Expr *E, llvm::APSInt Offset, + bool IsConstantEvaluation) { + uint64_t EvalOffset = 0; + const StringLiteral *FakeLiteral = nullptr; + switch (EvaluateStringAndCreateLiteral(S, E, IsConstantEvaluation, + FakeLiteral, EvalOffset)) { + case SLCER_NotEvaluated: + return SLCT_NotALiteral; + + case SLCER_NotNullTerminated: + S.Diag(Args[format_idx]->getBeginLoc(), + diag::warn_printf_format_string_not_null_terminated) + << Args[format_idx]->getSourceRange(); + if (!InFunctionCall) + S.Diag(E->getBeginLoc(), diag::note_format_string_defined); + // Stop checking, as this might just mean we're missing a chunk of the + // format string and there would be other spurious format issues. + return SLCT_UncheckedLiteral; + + case SLCER_Evaluated: + InFunctionCall = false; + if (EvalOffset) + sumOffsets(Offset, llvm::APSInt(64, false) = EvalOffset, BO_Add, true); + return check(FakeLiteral, Offset); + } +} + +FormatStringFinder::StringLiteralConstantEvaluationResult +FormatStringFinder::EvaluateStringAndCreateLiteral(Sema &S, const Expr *E, + bool IsConstantEvaluation, + const StringLiteral *&SL, + uint64_t &Offset) { + // As a last resort, try to constant-evaluate the format string. + auto SER = E->tryEvaluateString(S.Context, IsConstantEvaluation); + if (!SER) + return SLCER_NotEvaluated; + if (!SER || !SER->hasNullTerminator()) + return SLCER_NotNullTerminated; + + // If it evaluates to a string literal in the first place, we can point to + // that string literal in source and use that. + if (SER->getStringLiteral(SL, Offset)) + return SLCER_Evaluated; + + // Otherwise, plop that string into a scratch buffer, create a string literal + // and then go with that. + std::unique_ptr MemBuf; + { + llvm::SmallString<80> EscapedString; + { + llvm::raw_svector_ostream OS(EscapedString); + OS << '"'; + OS.write_escaped(SER->getString()); + OS << '"'; + } + MemBuf.reset(new llvm::SmallVectorMemoryBuffer(std::move(EscapedString), + "", true)); } - return nullptr; + + FileID ScratchFile = S.getSourceManager().createFileID(std::move(MemBuf)); + SourceLocation Begin = S.getSourceManager().getLocForStartOfFile(ScratchFile); + QualType SLType = S.Context.getStringLiteralArrayType( + S.Context.CharTy, SER->getString().size()); + SL = StringLiteral::Create(S.Context, SER->getString(), + StringLiteralKind::Ordinary, false, SLType, Begin); + Offset = 0; + return SLCER_Evaluated; } StringRef Sema::GetFormatStringTypeName(FormatStringType FST) { @@ -6551,6 +6634,11 @@ bool Sema::CheckFormatArguments(ArrayRef Args, VariadicCallType CallType, SourceLocation Loc, SourceRange Range, llvm::SmallBitVector &CheckedVarArgs) { + // As a last resort, Clang attempts to evaluate the format string as a + // constant, which is expensive. Before we go down that route, check that + // the warnings are at least enabled at Loc, which in the common case points + // at the opening parenthesis of the function call. + // CHECK: printf/scanf-like function is called with no format string. if (format_idx >= Args.size()) { Diag(Loc, diag::warn_missing_format_string) << Range; @@ -6563,20 +6651,13 @@ bool Sema::CheckFormatArguments(ArrayRef Args, // // Dynamically generated format strings are difficult to // automatically vet at compile time. Requiring that format strings - // are string literals: (1) permits the checking of format strings by - // the compiler and thereby (2) can practically remove the source of - // many format string exploits. - - // Format string can be either ObjC string (e.g. @"%d") or - // C string (e.g. "%d") - // ObjC string uses the same format specifiers as C string, so we can use - // the same format string checking logic for both ObjC and C strings. + // can evaluate to constant strings: (1) permits the checking of format + // strings by the compiler and thereby (2) can practically remove the source + // of many format string exploits. UncoveredArgHandler UncoveredArg; - StringLiteralCheckType CT = checkFormatStringExpr( - *this, ReferenceFormatString, OrigFormatExpr, Args, APK, format_idx, - firstDataArg, Type, CallType, - /*IsFunctionCall*/ true, CheckedVarArgs, UncoveredArg, - /*no string offset*/ llvm::APSInt(64, false) = 0); + StringLiteralCheckType CT = FormatStringFinder::check( + *this, ReferenceFormatString, Args, format_idx, firstDataArg, APK, Type, + CallType, CheckedVarArgs, UncoveredArg); // Generate a diagnostic where an uncovered argument is detected. if (UncoveredArg.hasUncoveredArg()) { @@ -7086,10 +7167,11 @@ void CheckFormatHandler::EmitFormatDiagnostic( S.Diag(IsStringLocation ? ArgumentExpr->getExprLoc() : Loc, PDiag) << ArgumentExpr->getSourceRange(); - const Sema::SemaDiagnosticBuilder &Note = - S.Diag(IsStringLocation ? Loc : StringRange.getBegin(), - diag::note_format_string_defined); - + SourceLocation DiagLoc = IsStringLocation ? Loc : StringRange.getBegin(); + unsigned DiagID = S.getSourceManager().isWrittenInScratchSpace(DiagLoc) + ? diag::note_format_string_evaluated_to + : diag::note_format_string_defined; + const Sema::SemaDiagnosticBuilder &Note = S.Diag(DiagLoc, DiagID); Note << StringRange; Note << FixIt; } diff --git a/clang/test/Sema/format-strings.c b/clang/test/Sema/format-strings.c index af30ad5d15fe2..cbeb0f93277c1 100644 --- a/clang/test/Sema/format-strings.c +++ b/clang/test/Sema/format-strings.c @@ -1,3 +1,4 @@ +// #scratch_space // RUN: %clang_cc1 -fblocks -fsyntax-only -verify -Wformat-nonliteral -isystem %S/Inputs %s // RUN: %clang_cc1 -fblocks -fsyntax-only -verify -Wformat-nonliteral -isystem %S/Inputs -fno-signed-char %s // RUN: %clang_cc1 -fblocks -fsyntax-only -verify -Wformat-nonliteral -isystem %S/Inputs -triple=x86_64-unknown-fuchsia %s @@ -900,3 +901,13 @@ void test_promotion(void) { // pointers printf("%s", i); // expected-warning{{format specifies type 'char *' but the argument has type 'int'}} } + +void test_consteval_init_array(void) { + const char buf_not_terminated[] = {'%', 55 * 2 + 5, '\n'}; // expected-note{{format string is defined here}} + printf(buf_not_terminated, "hello"); // expected-warning{{format string is not null-terminated}} + + const char buf[] = {'%', 55 * 2 + 5, '\n', 0}; + printf(buf, "hello"); // no-warning + printf(buf, 123); // expected-warning{{format specifies type 'char *' but the argument has type 'int'}} + // expected-note@#scratch_space {{format string computed from non-literal expression}} +} diff --git a/clang/test/SemaCXX/format-strings.cpp b/clang/test/SemaCXX/format-strings.cpp index 48cf23999a94f..3db56ca7e6174 100644 --- a/clang/test/SemaCXX/format-strings.cpp +++ b/clang/test/SemaCXX/format-strings.cpp @@ -1,6 +1,9 @@ +// #scratch_space // RUN: %clang_cc1 -fsyntax-only -verify -Wformat-nonliteral -Wformat-non-iso -Wformat-pedantic -fblocks %s // RUN: %clang_cc1 -fsyntax-only -verify -Wformat-nonliteral -Wformat-non-iso -fblocks -std=c++98 %s // RUN: %clang_cc1 -fsyntax-only -verify -Wformat-nonliteral -Wformat-non-iso -Wformat-pedantic -fblocks -std=c++11 %s +// RUN: %clang_cc1 -fsyntax-only -verify -Wformat-nonliteral -Wformat-non-iso -Wformat-pedantic -fblocks -std=c++20 %s +// RUN: %clang_cc1 -fsyntax-only -verify -Wformat-nonliteral -Wformat-non-iso -Wformat-pedantic -fblocks -std=c++23 %s #include @@ -238,3 +241,98 @@ void f(Scoped1 S1, Scoped2 S2) { } #endif + +#if __cplusplus >= 202000L +class my_string { + char *data; + unsigned size; + +public: + template + constexpr my_string(const char (&literal)[N]) { + data = new char[N+1]; + for (size = 0; size < N; ++size) { + data[size] = literal[size]; + if (data[size] == 0) + break; + } + data[size] = 0; + } + + my_string(const my_string &) = delete; + + constexpr my_string(my_string &&that) { + data = that.data; + size = that.size; + that.data = nullptr; + that.size = 0; + } + + constexpr ~my_string() { + delete[] data; + } + + template + constexpr void append(const char (&literal)[N]) { + char *cat = new char[size + N + 1]; + char *tmp = cat; + for (unsigned i = 0; i < size; ++i) { + *tmp++ = data[i]; + } + for (unsigned i = 0; i < N; ++i) { + *tmp = literal[i]; + if (*tmp == 0) + break; + ++tmp; + } + *tmp = 0; + delete[] data; + size = tmp - cat; + data = cat; + } + + constexpr const char *c_str() const { + return data; + } +}; + +constexpr my_string const_string() { + my_string str("hello %s"); + str.append(", %d"); + return str; +} + +void test_constexpr_string() { + printf(const_string().c_str(), "hello", 123); // no-warning + printf(const_string().c_str(), 123, 456); // expected-warning {{format specifies type 'char *' but the argument has type 'int'}} + // expected-note@#scratch_space {{format string computed from non-literal expression}} +} +#endif + +#if __cplusplus >= 202300L +constexpr const char *consteval_d() { + if consteval { + return "%s"; // expected-note 3{{format string is defined here}} + } else { + return "%d"; // expected-note{{format string is defined here}} + } +} + +constexpr const char *consteval_s = consteval_d(); +constinit const char *const consteval_s2 = consteval_d(); + +void test_consteval_str() { + printf(consteval_d(), 789); // no-warning + printf(consteval_d(), "hello"); // expected-warning {{format specifies type 'int' but the argument has type 'const char *'}} + + printf(consteval_s, 1234); // expected-warning {{format specifies type 'char *' but the argument has type 'int'}} + printf(consteval_s, "hello"); // no-warning + + printf(consteval_s2, 1234); // expected-warning {{format specifies type 'char *' but the argument has type 'int'}} + printf(consteval_s2, "hello"); // no-warning + + constexpr const char *consteval_s3 = consteval_d(); + printf(consteval_s3, 1234); // expected-warning {{format specifies type 'char *' but the argument has type 'int'}} + printf(consteval_s3, "hello"); // no-warning +} +#endif diff --git a/clang/test/SemaCXX/verbose-trap.cpp b/clang/test/SemaCXX/verbose-trap.cpp index 2503f9860d9c3..52fb6d85db4db 100644 --- a/clang/test/SemaCXX/verbose-trap.cpp +++ b/clang/test/SemaCXX/verbose-trap.cpp @@ -15,6 +15,9 @@ char const constMsg3[] = "hello"; template void f(const char * arg) { + const char buf[] = {'a', 'b', 'c'}; + const char buf_nt1[] = {'a', 'b', 'c', 0}; + const char buf_nt2[] = {'a', 'b', 0, 'c'}; __builtin_verbose_trap("cat1", "Arbitrary string literals can be used!"); __builtin_verbose_trap(" cat1 ", "Argument_must_not_be_null"); __builtin_verbose_trap("cat" "egory1", "hello" "world"); @@ -24,10 +27,13 @@ void f(const char * arg) { __builtin_verbose_trap(); // expected-error {{too few arguments}} __builtin_verbose_trap(""); // expected-error {{too few arguments}} __builtin_verbose_trap("", "", ""); // expected-error {{too many arguments}} - __builtin_verbose_trap("", 0); // expected-error {{argument to __builtin_verbose_trap must be a pointer to a constant string}} + __builtin_verbose_trap("", 0); // expected-error {{argument to '__builtin_verbose_trap' must be a pointer to a constant null-terminated string}} __builtin_verbose_trap(1, ""); // expected-error {{cannot initialize a parameter of type 'const char *' with an rvalue of type 'int'}} - __builtin_verbose_trap(arg, ""); // expected-error {{argument to __builtin_verbose_trap must be a pointer to a constant string}} - __builtin_verbose_trap("cat$1", "hel$lo"); // expected-error 2 {{argument to __builtin_verbose_trap must not contain $}} + __builtin_verbose_trap(arg, ""); // expected-error {{argument to '__builtin_verbose_trap' must be a pointer to a constant null-terminated string}} + __builtin_verbose_trap(buf, ""); // expected-error {{argument to '__builtin_verbose_trap' must be a pointer to a constant null-terminated string}} + __builtin_verbose_trap(buf_nt1, ""); + __builtin_verbose_trap(buf_nt2, ""); + __builtin_verbose_trap("cat$1", "hel$lo"); // expected-error 2 {{argument to '__builtin_verbose_trap' must not contain $}} __builtin_verbose_trap(category, reason); __builtin_verbose_trap(u8"cat1", u8"hello"); #if __cplusplus >= 202002L