[InlineCost]: Optimize inlining of recursive function. #139982

hassnaaHamdi · May 15, 2025

Consider inlining recursive function of depth 1 only when
the caller is the function itself instead of inlining it
for each callsite so that we avoid redundant work.
Use CondContext instead of DomTree for better compilation time.

Depends on #139832.

llvmbot · May 15, 2025

@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-llvm-analysis

Author: Hassnaa Hamdi (hassnaaHamdi)

Changes

Consider inlining recursive function of depth 1 only when
the caller is the function itself instead of inlining it
for each callsite so that we avoid redundant work.
Use CondContext instead of DomTree for better compilation time.

Full diff: https://github.com/llvm/llvm-project/pull/139982.diff

4 Files Affected:

(modified) llvm/include/llvm/Analysis/SimplifyQuery.h (+2)
(modified) llvm/lib/Analysis/InlineCost.cpp (+44-58)
(modified) llvm/lib/Analysis/ValueTracking.cpp (+4)
(added) llvm/test/Transforms/Inline/inline-recursive-fn2.ll (+45)

diff --git a/llvm/include/llvm/Analysis/SimplifyQuery.h b/llvm/include/llvm/Analysis/SimplifyQuery.h
index e8f43c8c2e91f..063ca4eaa9db0 100644
--- a/llvm/include/llvm/Analysis/SimplifyQuery.h
+++ b/llvm/include/llvm/Analysis/SimplifyQuery.h
@@ -62,6 +62,8 @@ struct InstrInfoQuery {
 struct CondContext {
   Value *Cond;
   bool Invert = false;
+  // Condition is true if CxtI is in the true successor of Cond.
+  bool CondIsTrue = false;
   SmallPtrSet<Value *, 4> AffectedValues;
 
   CondContext(Value *Cond) : Cond(Cond) {}
diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index 8ddfa1e4eb6f7..12f780fea1620 100644
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -1688,66 +1688,52 @@ bool CallAnalyzer::simplifyCmpInstForRecCall(CmpInst &Cmp) {
   if (!isa<Argument>(Cmp.getOperand(0)) || !isa<Constant>(Cmp.getOperand(1)))
     return false;
   auto *CmpOp = Cmp.getOperand(0);
-  Function *F = Cmp.getFunction();
-  // Iterate over the users of the function to check if it's a recursive
-  // function:
-  for (auto *U : F->users()) {
-    CallInst *Call = dyn_cast<CallInst>(U);
-    if (!Call || Call->getFunction() != F || Call->getCalledFunction() != F)
-      continue;
-    auto *CallBB = Call->getParent();
-    auto *Predecessor = CallBB->getSinglePredecessor();
-    // Only handle the case when the callsite has a single predecessor:
-    if (!Predecessor)
-      continue;
+  // Make sure that the callsite is recursive:
+  if (CandidateCall.getCaller() != &F)
+    return false;
+  CallInst *CallInstr = dyn_cast<CallInst>(&CandidateCall);
+  // Only handle the case when the callsite has a single predecessor:
+  auto *CallBB = CallInstr->getParent();
+  auto *Predecessor = CallBB->getSinglePredecessor();
+  if (!Predecessor)
+    return false;
+  // Check if the callsite is guarded by the same Cmp instruction:
+  auto *Br = dyn_cast<BranchInst>(Predecessor->getTerminator());
+  if (!Br || Br->isUnconditional() || Br->getCondition() != &Cmp)
+    return false;
 
-    auto *Br = dyn_cast<BranchInst>(Predecessor->getTerminator());
-    if (!Br || Br->isUnconditional())
-      continue;
-    // Check if the Br condition is the same Cmp instr we are investigating:
-    if (Br->getCondition() != &Cmp)
-      continue;
-    // Check if there are any arg of the recursive callsite is affecting the cmp
-    // instr:
-    bool ArgFound = false;
-    Value *FuncArg = nullptr, *CallArg = nullptr;
-    for (unsigned ArgNum = 0;
-         ArgNum < F->arg_size() && ArgNum < Call->arg_size(); ArgNum++) {
-      FuncArg = F->getArg(ArgNum);
-      CallArg = Call->getArgOperand(ArgNum);
-      if (FuncArg == CmpOp && CallArg != CmpOp) {
-        ArgFound = true;
-        break;
-      }
-    }
-    if (!ArgFound)
-      continue;
-    // Now we have a recursive call that is guarded by a cmp instruction.
-    // Check if this cmp can be simplified:
-    SimplifyQuery SQ(DL, dyn_cast<Instruction>(CallArg));
-    DomConditionCache DC;
-    DC.registerBranch(Br);
-    SQ.DC = &DC;
-    if (DT.root_size() == 0) {
-      // Dominator tree was never constructed for any function yet.
-      DT.recalculate(*F);
-    } else if (DT.getRoot()->getParent() != F) {
-      // Dominator tree was constructed for a different function, recalculate
-      // it for the current function.
-      DT.recalculate(*F);
+  // Check if there is any arg of the recursive callsite is affecting the cmp
+  // instr:
+  bool ArgFound = false;
+  Value *FuncArg = nullptr, *CallArg = nullptr;
+  for (unsigned ArgNum = 0;
+        ArgNum < F.arg_size() && ArgNum < CallInstr->arg_size(); ArgNum++) {
+    FuncArg = F.getArg(ArgNum);
+    CallArg = CallInstr->getArgOperand(ArgNum);
+    if (FuncArg == CmpOp && CallArg != CmpOp) {
+      ArgFound = true;
+      break;
     }
-    SQ.DT = &DT;
-    Value *SimplifiedInstruction = llvm::simplifyInstructionWithOperands(
-        cast<CmpInst>(&Cmp), {CallArg, Cmp.getOperand(1)}, SQ);
-    if (auto *ConstVal = dyn_cast_or_null<ConstantInt>(SimplifiedInstruction)) {
-      bool IsTrueSuccessor = CallBB == Br->getSuccessor(0);
-      // Make sure that the BB of the recursive call is NOT the next successor
-      // of the icmp. In other words, make sure that the recursion depth is 1.
-      if ((ConstVal->isOne() && !IsTrueSuccessor) ||
-          (ConstVal->isZero() && IsTrueSuccessor)) {
-        SimplifiedValues[&Cmp] = ConstVal;
-        return true;
-      }
+  }
+  if (!ArgFound)
+    return false;
+
+  // Now we have a recursive call that is guarded by a cmp instruction.
+  // Check if this cmp can be simplified:
+  SimplifyQuery SQ(DL, dyn_cast<Instruction>(CallArg));
+  CondContext CC(cast<Value>(&Cmp));
+  CC.CondIsTrue = CallBB == Br->getSuccessor(0);
+  SQ.CC = &CC;
+  CC.AffectedValues.insert(FuncArg);
+  Value *SimplifiedInstruction = llvm::simplifyInstructionWithOperands(
+      cast<CmpInst>(&Cmp), {CallArg, Cmp.getOperand(1)}, SQ);
+  if (auto *ConstVal = dyn_cast_or_null<ConstantInt>(SimplifiedInstruction)) {
+    // Make sure that the BB of the recursive call is NOT the true successor
+    // of the icmp. In other words, make sure that the recursion depth is 1.
+    if ((ConstVal->isOne() && !CC.CondIsTrue) ||
+        (ConstVal->isZero() && CC.CondIsTrue)) {
+      SimplifiedValues[&Cmp] = ConstVal;
+      return true;
     }
   }
   return false;
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 3d403531cea2f..e7d937a0893ab 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -5014,6 +5014,10 @@ static KnownFPClass computeKnownFPClassFromContext(const Value *V,
                                                    const SimplifyQuery &Q) {
   KnownFPClass KnownFromContext;
 
+  if (Q.CC && Q.CC->AffectedValues.contains(V))
+    computeKnownFPClassFromCond(V, Q.CC->Cond, 0, Q.CC->CondIsTrue, Q.CxtI,
+                                KnownFromContext);
+
   if (!Q.CxtI)
     return KnownFromContext;
 
diff --git a/llvm/test/Transforms/Inline/inline-recursive-fn2.ll b/llvm/test/Transforms/Inline/inline-recursive-fn2.ll
new file mode 100644
index 0000000000000..0323a6ee3a75a
--- /dev/null
+++ b/llvm/test/Transforms/Inline/inline-recursive-fn2.ll
@@ -0,0 +1,45 @@
+; RUN: opt -passes='cgscc(inline),instcombine,cgscc(inline)' -S -debug-only=inline -disable-output < %s 2>&1 | FileCheck %s
+
+; CHECK:  Inlining calls in: test
+; CHECK:      Function size: 2
+; CHECK:      NOT Inlining (cost=never): recursive, Call:   %call = tail call float @inline_rec_true_successor(float %x, float %scale)
+
+; CHECK:  Inlining calls in: inline_rec_true_successor
+; CHECK:      Function size: 10
+; CHECK:      Inlining (cost=-35, threshold=337), Call:   %call = tail call float @inline_rec_true_successor(float %fneg, float %scale)
+; CHECK:      Size after inlining: 17
+; CHECK:      NOT Inlining (cost=never): noinline function attribute, Call:   %call_test = tail call float @test(float %fneg, float %common.ret18.op.i)
+; CHECK:      NOT Inlining (cost=never): noinline function attribute, Call:   %call_test.i = tail call float @test(float %x, float %call.i)
+; CHECK:  Skipping inlining due to history: inline_rec_true_successor -> inline_rec_true_successor
+; CHECK:  Updated inlining SCC: (test, inline_rec_true_successor)
+
+; CHECK:  Inlining calls in: test
+; CHECK:      Function size: 2
+; CHECK:      Inlining (cost=25, threshold=225), Call:   %call = tail call float @inline_rec_true_successor(float %x, float %scale)
+; CHECK:      Size after inlining: 10
+
+define float @test(float %x, float %scale) noinline {
+entry:
+  %call = tail call float @inline_rec_true_successor(float %x, float %scale)
+  ret float %call
+}
+
+define float @inline_rec_true_successor(float %x, float %scale)  {
+entry:
+  %cmp = fcmp olt float %x, 0.000000e+00
+  br i1 %cmp, label %if.then, label %if.end
+
+common.ret18:                                     ; preds = %if.then, %if.end
+  %common.ret18.op = phi float [ %call_test, %if.then ], [ %mul, %if.end ]
+  ret float %common.ret18.op
+
+if.then:                                          ; preds = %entry
+  %fneg = fneg float %x
+  %call = tail call float @inline_rec_true_successor(float %fneg, float %scale)
+  %call_test = tail call float @test(float %fneg, float %call)
+  br label %common.ret18
+
+if.end:                                           ; preds = %entry
+  %mul = fmul float %x, %scale
+  br label %common.ret18
+}

nikic · May 29, 2025

llvm/lib/Analysis/InlineCost.cpp

+  // Make sure that the callsite is recursive:
+  if (CandidateCall.getCaller() != &F)
+    return false;
+  CallInst *CallInstr = dyn_cast<CallInst>(&CandidateCall);


The cast to CallInst here is unnecessary, you should be able to work directly on the CandidateCall.

nikic · May 29, 2025

llvm/lib/Analysis/InlineCost.cpp

+  // Now we have a recursive call that is guarded by a cmp instruction.
+  // Check if this cmp can be simplified:
+  SimplifyQuery SQ(DL, dyn_cast<Instruction>(CallArg));
+  CondContext CC(cast<Value>(&Cmp));


Shouldn't need this cast<>.

nikic · May 29, 2025

llvm/lib/Analysis/InlineCost.cpp

+  CondContext CC(cast<Value>(&Cmp));
+  CC.CondIsTrue = CallBB == Br->getSuccessor(0);
+  SQ.CC = &CC;
+  CC.AffectedValues.insert(FuncArg);


Should also remove

llvm-project/llvm/lib/Analysis/InlineCost.cpp

Line 266 in 180244d

DominatorTree DT;

now?

This is used in other different places in InlineCost not related to my case. I think I shouldn't remove it unless I investigate its uses and that should be in a different patch.

Didn't you add this member in #119677?

Ah yes, sorry.

nikic · May 29, 2025

llvm/test/Transforms/Inline/inline-recursive-fn2.ll

@@ -0,0 +1,45 @@
+; RUN: opt -passes='cgscc(inline),instcombine,cgscc(inline)' -S -debug-only=inline -disable-output < %s 2>&1 | FileCheck %s


Needs REQUIRES: asserts if you're matching debug output. Though I'm not super clear on what exactly this particular test is intended to check.

Here I just want to show the difference between this approach and the old approach in the other patch.
This tests shows that the recursive function will not get simplified unless the caller is the function itself, not another caller as the old patch was doing.

- Consider inlining recursive function of depth 1 only when the caller is the function itself instead of inlining it for each callsite so that we avoid redundant work. - Use CondContext instead of DomTree for better compilation time.

nikic

LGTM

hassnaaHamdi · Jun 2, 2025

Thanks.

- Consider inlining recursive function of depth 1 only when the caller is the function itself instead of inlining it for each callsite so that we avoid redundant work. - Use CondContext instead of DomTree for better compilation time.

hassnaaHamdi requested a review from nikic as a code owner May 15, 2025 00:14

llvmbot added llvm:analysis Includes value tracking, cost tables and constant folding llvm:transforms labels May 15, 2025

hassnaaHamdi force-pushed the improve_inlining_decision branch from 66378f5 to 180244d Compare May 15, 2025 00:15

nikic reviewed May 29, 2025

View reviewed changes

hassnaaHamdi force-pushed the improve_inlining_decision branch 2 times, most recently from 66dc85a to fe616fb Compare June 1, 2025 18:25

hassnaaHamdi added 6 commits June 2, 2025 11:36

[ValueTracking][NFC]: Use injected condition to compute known FPClass

c219bb0

Use !CondContext.Invert instead of CondIsTrue

d4d36df

[InlineCost][precommit]: Add test file

c4004e3

resolve comments

0718158

Rebase. Add explanation comment to the test file

6f7b1a4

hassnaaHamdi force-pushed the improve_inlining_decision branch from fe616fb to 6f7b1a4 Compare June 2, 2025 13:44

nikic approved these changes Jun 2, 2025

View reviewed changes

hassnaaHamdi merged commit c81d84c into llvm:main Jun 4, 2025
9 of 11 checks passed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[InlineCost]: Optimize inlining of recursive function. #139982

[InlineCost]: Optimize inlining of recursive function. #139982

hassnaaHamdi commented May 15, 2025 •

edited by nikic

Loading

Uh oh!

llvmbot commented May 15, 2025 •

edited

Loading

Uh oh!

nikic May 29, 2025

Uh oh!

nikic May 29, 2025

Uh oh!

nikic May 29, 2025

Uh oh!

hassnaaHamdi May 30, 2025

Uh oh!

nikic May 30, 2025

Uh oh!

hassnaaHamdi Jun 1, 2025

Uh oh!

nikic May 29, 2025

Uh oh!

hassnaaHamdi May 30, 2025

Uh oh!

nikic left a comment

Uh oh!

hassnaaHamdi commented Jun 2, 2025

Uh oh!

Uh oh!

Uh oh!

		@@ -0,0 +1,45 @@
		; RUN: opt -passes='cgscc(inline),instcombine,cgscc(inline)' -S -debug-only=inline -disable-output < %s 2>&1 \| FileCheck %s

Search code, repositories, users, issues, pull requests...

[InlineCost]: Optimize inlining of recursive function. #139982

[InlineCost]: Optimize inlining of recursive function. #139982

Conversation

hassnaaHamdi commented May 15, 2025 • edited by nikic Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented May 15, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

nikic left a comment

Choose a reason for hiding this comment

Uh oh!

hassnaaHamdi commented Jun 2, 2025

Uh oh!

Uh oh!

Uh oh!

hassnaaHamdi commented May 15, 2025 •

edited by nikic

Loading

llvmbot commented May 15, 2025 •

edited

Loading