From 7fadab699144313c1cb39662734c1f16c309437a Mon Sep 17 00:00:00 2001 From: William Huang Date: Fri, 24 May 2024 05:42:27 -0400 Subject: [PATCH 1/2] [Sample Profile] Check hot callsite threshold when inlining a function with a sample profile Currently if a callsite is hot as determined by the sample profile, it is unconditionally inlined barring invalid cases (such as recursion). Inline cost check should still apply because a function's hotness and its inline cost are two different things. For example if a function is calling another very large function multiple times (at different code paths), the large function should not be inlined even if its hot. --- llvm/lib/Transforms/IPO/SampleProfile.cpp | 8 ++- .../Inputs/inline-hot-callsite-threshold.prof | 3 + .../inline-hot-callsite-threshold.ll | 61 +++++++++++++++++++ .../SampleProfile/pseudo-probe-inline.ll | 2 +- llvm/test/Transforms/SampleProfile/remarks.ll | 4 +- 5 files changed, 72 insertions(+), 6 deletions(-) create mode 100644 llvm/test/Transforms/SampleProfile/Inputs/inline-hot-callsite-threshold.prof create mode 100644 llvm/test/Transforms/SampleProfile/inline-hot-callsite-threshold.ll diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index 0920179fb76b7..10d6e2282ac2a 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -1391,10 +1391,12 @@ SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) { return InlineCost::getAlways("preinliner"); } - // For old FDO inliner, we inline the call site as long as cost is not - // "Never". The cost-benefit check is done earlier. + // For old FDO inliner, we inline the call site if it is below hot threshold, + // even if the function is hot based on sample profile data. This is to + // prevent huge functions from being inlined. if (!CallsitePrioritizedInline) { - return InlineCost::get(Cost.getCost(), INT_MAX); + return InlineCost::get(Cost.getCost(), + Params.HotCallSiteThreshold.value_or(INT_MAX)); } // Otherwise only use the cost from call analyzer, but overwite threshold with diff --git a/llvm/test/Transforms/SampleProfile/Inputs/inline-hot-callsite-threshold.prof b/llvm/test/Transforms/SampleProfile/Inputs/inline-hot-callsite-threshold.prof new file mode 100644 index 0000000000000..d1c0408210f49 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/inline-hot-callsite-threshold.prof @@ -0,0 +1,3 @@ +foo:100:100 + 1: bar:100 + 1:100 diff --git a/llvm/test/Transforms/SampleProfile/inline-hot-callsite-threshold.ll b/llvm/test/Transforms/SampleProfile/inline-hot-callsite-threshold.ll new file mode 100644 index 0000000000000..0584e1b5ee679 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/inline-hot-callsite-threshold.ll @@ -0,0 +1,61 @@ +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline-hot-callsite-threshold.prof -S -pass-remarks=sample-profile -hot-callsite-threshold=100 2>&1 | FileCheck %s + +; CHECK: remark: a.cc:6:12: 'bar' inlined into 'foo' to match profiling context with (cost={{.*}}, threshold=100) +; CHECK: define dso_local noundef i32 @foo(i32 noundef %0) +; CHECK-NOT: %2 = tail call noundef i32 @bar(i32 noundef %0) +; CHECK-NEXT: %2 = icmp sgt i32 %0, 1 +; CHECK-NEXT: br i1 %2, label %3, label %bar.exit + +; Manually lower cost threshold for hot function inlining, so that the function +; is not inlined even profile indicates it as hot. +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline-hot-callsite-threshold.prof -S -pass-remarks=sample-profile -hot-callsite-threshold=1 2>&1 | FileCheck %s --check-prefix=COST + +; COST-NOT: remark +; COST: define dso_local noundef i32 @foo(i32 noundef %0) +; COST-NEXT: %2 = tail call noundef i32 @bar(i32 noundef %0) + +define dso_local noundef i32 @bar(i32 noundef %0) #0 !dbg !10 { + %2 = icmp sgt i32 %0, 1 + br i1 %2, label %3, label %15 +3: ; preds = %1 + %4 = add nsw i32 %0, -2 + %5 = mul i32 %4, %4 + %6 = add i32 %5, %0 + %7 = zext nneg i32 %4 to i33 + %8 = add nsw i32 %0, -3 + %9 = zext i32 %8 to i33 + %10 = mul i33 %7, %9 + %11 = lshr i33 %10, 1 + %12 = trunc nuw i33 %11 to i32 + %13 = xor i32 %12, -1 + %14 = add i32 %6, %13 + br label %15 +15: ; preds = %3, %1 + %16 = phi i32 [ 0, %1 ], [ %14, %3 ] + ret i32 %16 +} + +define dso_local noundef i32 @foo(i32 noundef %0) #1 !dbg !20 { + %2 = tail call noundef i32 @bar(i32 noundef %0), !dbg !24 + ret i32 %2 +} + +attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable "use-sample-profile" } +attributes #1 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable "use-sample-profile" } +attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: NoDebug) +!1 = !DIFile(filename: "a.cc", directory: ".") +!2 = !{i32 2, !"Dwarf Version", i32 4} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!10 = distinct !DISubprogram(name: "bar", linkageName: "bar", scope: !1, file: !1, line: 1, type: !12, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: true, unit: !0) +!11 = !DIFile(filename: "a.cc", directory: ".") +!12 = !DISubroutineType(types: !13) +!13 = !{!14, !14} +!14 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!20 = distinct !DISubprogram(name: "foo", linkageName: "foo", scope: !11, file: !11, line: 5, type: !12, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: true, unit: !0) +!23 = !DILocation(line: 0, scope: !20) +!24 = !DILocation(line: 6, column: 12, scope: !20) diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll index 18cbd857d97bb..2cd9abf0e11e9 100644 --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll @@ -98,7 +98,7 @@ if.end: ;YAML-NEXT: - String: '(cost=' ;YAML-NEXT: - Cost: '15' ;YAML-NEXT: - String: ', threshold=' -;YAML-NEXT: - Threshold: '2147483647' +;YAML-NEXT: - Threshold: '3000' ;YAML-NEXT: - String: ')' ;YAML-NEXT: - String: ' at callsite ' ;YAML-NEXT: - String: foo diff --git a/llvm/test/Transforms/SampleProfile/remarks.ll b/llvm/test/Transforms/SampleProfile/remarks.ll index 997e02bb5b544..9c0143ae65ca7 100644 --- a/llvm/test/Transforms/SampleProfile/remarks.ll +++ b/llvm/test/Transforms/SampleProfile/remarks.ll @@ -22,7 +22,7 @@ ; We are expecting foo() to be inlined in main() (almost all the cycles are ; spent inside foo). -; CHECK: remark: remarks.cc:13:21: '_Z3foov' inlined into 'main' to match profiling context with (cost=130, threshold=2147483647) at callsite main:0:21; +; CHECK: remark: remarks.cc:13:21: '_Z3foov' inlined into 'main' to match profiling context with (cost=130, threshold=3000) at callsite main:0:21; ; CHECK: remark: remarks.cc:9:19: 'rand' inlined into 'main' to match profiling context with (cost=always): always inline attribute at callsite _Z3foov:6:19 @ main:0:21; ; The back edge for the loop is the hottest edge in the loop subgraph. @@ -51,7 +51,7 @@ ;YAML-NEXT: - String: '(cost=' ;YAML-NEXT: - Cost: '130' ;YAML-NEXT: - String: ', threshold=' -;YAML-NEXT: - Threshold: '2147483647' +;YAML-NEXT: - Threshold: '3000' ;YAML-NEXT: - String: ')' ;YAML-NEXT: - String: ' at callsite ' ;YAML-NEXT: - String: main From c5849d6dcd80d9ae24e36c5bb2e22ad6951a698b Mon Sep 17 00:00:00 2001 From: William Huang Date: Tue, 28 May 2024 16:40:41 -0400 Subject: [PATCH 2/2] Changed to use SampleHotCallSiteThreshold --- llvm/lib/Transforms/IPO/SampleProfile.cpp | 3 +-- .../Transforms/SampleProfile/inline-hot-callsite-threshold.ll | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index 10d6e2282ac2a..92ad4c34da6e7 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -1395,8 +1395,7 @@ SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) { // even if the function is hot based on sample profile data. This is to // prevent huge functions from being inlined. if (!CallsitePrioritizedInline) { - return InlineCost::get(Cost.getCost(), - Params.HotCallSiteThreshold.value_or(INT_MAX)); + return InlineCost::get(Cost.getCost(), SampleHotCallSiteThreshold); } // Otherwise only use the cost from call analyzer, but overwite threshold with diff --git a/llvm/test/Transforms/SampleProfile/inline-hot-callsite-threshold.ll b/llvm/test/Transforms/SampleProfile/inline-hot-callsite-threshold.ll index 0584e1b5ee679..914ab4f1e3da5 100644 --- a/llvm/test/Transforms/SampleProfile/inline-hot-callsite-threshold.ll +++ b/llvm/test/Transforms/SampleProfile/inline-hot-callsite-threshold.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline-hot-callsite-threshold.prof -S -pass-remarks=sample-profile -hot-callsite-threshold=100 2>&1 | FileCheck %s +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline-hot-callsite-threshold.prof -S -pass-remarks=sample-profile -sample-profile-hot-inline-threshold=100 2>&1 | FileCheck %s ; CHECK: remark: a.cc:6:12: 'bar' inlined into 'foo' to match profiling context with (cost={{.*}}, threshold=100) ; CHECK: define dso_local noundef i32 @foo(i32 noundef %0) @@ -8,7 +8,7 @@ ; Manually lower cost threshold for hot function inlining, so that the function ; is not inlined even profile indicates it as hot. -; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline-hot-callsite-threshold.prof -S -pass-remarks=sample-profile -hot-callsite-threshold=1 2>&1 | FileCheck %s --check-prefix=COST +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline-hot-callsite-threshold.prof -S -pass-remarks=sample-profile -sample-profile-hot-inline-threshold=1 2>&1 | FileCheck %s --check-prefix=COST ; COST-NOT: remark ; COST: define dso_local noundef i32 @foo(i32 noundef %0)