diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp index f5ae204426170..faa0c1e470fd9 100644 --- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -90,6 +90,10 @@ STATISTIC(FoundProfiledCalleeMaxDepth, STATISTIC(FoundProfiledCalleeNonUniquelyCount, "Number of profiled callees found via multiple tail call chains"); STATISTIC(DeferredBackedges, "Number of backedges with deferred cloning"); +STATISTIC(NewMergedNodes, "Number of new nodes created during merging"); +STATISTIC(NonNewMergedNodes, "Number of non new nodes used during merging"); +STATISTIC(MissingAllocForContextId, + "Number of missing alloc nodes for context ids"); static cl::opt DotFilePathPrefix( "memprof-dot-file-path-prefix", cl::init(""), cl::Hidden, @@ -160,6 +164,13 @@ static cl::opt CloneRecursiveContexts( "memprof-clone-recursive-contexts", cl::init(true), cl::Hidden, cl::desc("Allow cloning of contexts through recursive cycles")); +// Generally this is needed for correct assignment of allocation clones to +// function clones, however, allow it to be disabled for debugging while the +// functionality is new and being tested more widely. +static cl::opt + MergeClones("memprof-merge-clones", cl::init(true), cl::Hidden, + cl::desc("Merge clones before assigning functions")); + // When disabled, try to detect and prevent cloning of recursive contexts. // This is only necessary until we support cloning through recursive cycles. // Leave on by default for now, as disabling requires a little bit of compile @@ -560,6 +571,10 @@ class CallsiteContextGraph { /// Mark backedges via the standard DFS based backedge algorithm. void markBackedges(); + /// Merge clones generated during cloning for different allocations but that + /// are called by the same caller node, to ensure proper function assignment. + void mergeClones(); + // Try to partition calls on the given node (already placed into the AllCalls // array) by callee function, creating new copies of Node as needed to hold // calls with different callees, and moving the callee edges appropriately. @@ -778,6 +793,21 @@ class CallsiteContextGraph { void markBackedges(ContextNode *Node, DenseSet &Visited, DenseSet &CurrentStack); + /// Recursive helper for merging clones. + void + mergeClones(ContextNode *Node, DenseSet &Visited, + DenseMap &ContextIdToAllocationNode); + /// Main worker for merging callee clones for a given node. + void mergeNodeCalleeClones( + ContextNode *Node, DenseSet &Visited, + DenseMap &ContextIdToAllocationNode); + /// Helper to find other callers of the given set of callee edges that can + /// share the same callee merge node. + void findOtherCallersToShareMerge( + ContextNode *Node, std::vector> &CalleeEdges, + DenseMap &ContextIdToAllocationNode, + DenseSet &OtherCallersToShareMerge); + /// Recursively perform cloning on the graph for the given Node and its /// callers, in order to uniquely identify the allocation behavior of an /// allocation given its context. The context ids of the allocation being @@ -4016,6 +4046,338 @@ IndexCallsiteContextGraph::cloneFunctionForCallsite( return {Func.func(), CloneNo}; } +// We perform cloning for each allocation node separately. However, this +// sometimes results in a situation where the same node calls multiple +// clones of the same callee, created for different allocations. This +// causes issues when assigning functions to these clones, as each node can +// in reality only call a single callee clone. +// +// To address this, before assigning functions, merge callee clone nodes as +// needed using a post order traversal from the allocations. We attempt to +// use existing clones as the merge node when legal, and to share them +// among callers with the same properties (callers calling the same set of +// callee clone nodes for the same allocations). +// +// Without this fix, in some cases incorrect function assignment will lead +// to calling the wrong allocation clone. +template +void CallsiteContextGraph::mergeClones() { + if (!MergeClones) + return; + + // Generate a map from context id to the associated allocation node for use + // when merging clones. + DenseMap ContextIdToAllocationNode; + for (auto &Entry : AllocationCallToContextNodeMap) { + auto *Node = Entry.second; + for (auto Id : Node->getContextIds()) + ContextIdToAllocationNode[Id] = Node->getOrigNode(); + for (auto *Clone : Node->Clones) { + for (auto Id : Clone->getContextIds()) + ContextIdToAllocationNode[Id] = Clone->getOrigNode(); + } + } + + // Post order traversal starting from allocations to ensure each callsite + // calls a single clone of its callee. Callee nodes that are clones of each + // other are merged (via new merge nodes if needed) to achieve this. + DenseSet Visited; + for (auto &Entry : AllocationCallToContextNodeMap) { + auto *Node = Entry.second; + + mergeClones(Node, Visited, ContextIdToAllocationNode); + + // Make a copy so the recursive post order traversal that may create new + // clones doesn't mess up iteration. Note that the recursive traversal + // itself does not call mergeClones on any of these nodes, which are all + // (clones of) allocations. + auto Clones = Node->Clones; + for (auto *Clone : Clones) + mergeClones(Clone, Visited, ContextIdToAllocationNode); + } + + if (DumpCCG) { + dbgs() << "CCG after merging:\n"; + dbgs() << *this; + } + if (ExportToDot) + exportToDot("aftermerge"); + + if (VerifyCCG) { + check(); + } +} + +// Recursive helper for above mergeClones method. +template +void CallsiteContextGraph::mergeClones( + ContextNode *Node, DenseSet &Visited, + DenseMap &ContextIdToAllocationNode) { + auto Inserted = Visited.insert(Node); + if (!Inserted.second) + return; + + // Make a copy since the recursive call may move a caller edge to a new + // callee, messing up the iterator. + auto CallerEdges = Node->CallerEdges; + for (auto CallerEdge : CallerEdges) { + // Skip any caller edge moved onto a different callee during recursion. + if (CallerEdge->Callee != Node) + continue; + mergeClones(CallerEdge->Caller, Visited, ContextIdToAllocationNode); + } + + // Merge for this node after we handle its callers. + mergeNodeCalleeClones(Node, Visited, ContextIdToAllocationNode); +} + +template +void CallsiteContextGraph::mergeNodeCalleeClones( + ContextNode *Node, DenseSet &Visited, + DenseMap &ContextIdToAllocationNode) { + // Ignore Node if we moved all of its contexts to clones. + if (Node->emptyContextIds()) + return; + + // First identify groups of clones among Node's callee edges, by building + // a map from each callee base node to the associated callee edges from Node. + MapVector>> + OrigNodeToCloneEdges; + for (const auto &E : Node->CalleeEdges) { + auto *Callee = E->Callee; + if (!Callee->CloneOf && Callee->Clones.empty()) + continue; + ContextNode *Base = Callee->getOrigNode(); + OrigNodeToCloneEdges[Base].push_back(E); + } + + // Helper for callee edge sorting below. Return true if A's callee has fewer + // caller edges than B, or if A is a clone and B is not, or if A's first + // context id is smaller than B's. + auto CalleeCallerEdgeLessThan = [](const std::shared_ptr &A, + const std::shared_ptr &B) { + if (A->Callee->CallerEdges.size() != B->Callee->CallerEdges.size()) + return A->Callee->CallerEdges.size() < B->Callee->CallerEdges.size(); + if (A->Callee->CloneOf && !B->Callee->CloneOf) + return true; + else if (!A->Callee->CloneOf && B->Callee->CloneOf) + return false; + // Use the first context id for each edge as a + // tie-breaker. + return *A->ContextIds.begin() < *B->ContextIds.begin(); + }; + + // Process each set of callee clones called by Node, performing the needed + // merging. + for (auto Entry : OrigNodeToCloneEdges) { + // CalleeEdges is the set of edges from Node reaching callees that are + // mutual clones of each other. + auto &CalleeEdges = Entry.second; + auto NumCalleeClones = CalleeEdges.size(); + // A single edge means there is no merging needed. + if (NumCalleeClones == 1) + continue; + // Sort the CalleeEdges calling this group of clones in ascending order of + // their caller edge counts, putting the original non-clone node first in + // cases of a tie. This simplifies finding an existing node to use as the + // merge node. + std::stable_sort(CalleeEdges.begin(), CalleeEdges.end(), + CalleeCallerEdgeLessThan); + + /// Find other callers of the given set of callee edges that can + /// share the same callee merge node. See the comments at this method + /// definition for details. + DenseSet OtherCallersToShareMerge; + findOtherCallersToShareMerge(Node, CalleeEdges, ContextIdToAllocationNode, + OtherCallersToShareMerge); + + // Now do the actual merging. Identify existing or create a new MergeNode + // during the first iteration. Move each callee over, along with edges from + // other callers we've determined above can share the same merge node. + ContextNode *MergeNode = nullptr; + DenseMap CallerToMoveCount; + for (auto CalleeEdge : CalleeEdges) { + auto *OrigCallee = CalleeEdge->Callee; + // If we don't have a MergeNode yet (only happens on the first iteration, + // as a new one will be created when we go to move the first callee edge + // over as needed), see if we can use this callee. + if (!MergeNode) { + // If there are no other callers, simply use this callee. + if (CalleeEdge->Callee->CallerEdges.size() == 1) { + MergeNode = OrigCallee; + NonNewMergedNodes++; + continue; + } + // Otherwise, if we have identified other caller nodes that can share + // the merge node with Node, see if all of OrigCallee's callers are + // going to share the same merge node. In that case we can use callee + // (since all of its callers would move to the new merge node). + if (!OtherCallersToShareMerge.empty()) { + bool MoveAllCallerEdges = true; + for (auto CalleeCallerE : OrigCallee->CallerEdges) { + if (CalleeCallerE == CalleeEdge) + continue; + if (!OtherCallersToShareMerge.contains(CalleeCallerE->Caller)) { + MoveAllCallerEdges = false; + break; + } + } + // If we are going to move all callers over, we can use this callee as + // the MergeNode. + if (MoveAllCallerEdges) { + MergeNode = OrigCallee; + NonNewMergedNodes++; + continue; + } + } + } + // Move this callee edge, creating a new merge node if necessary. + if (MergeNode) { + assert(MergeNode != OrigCallee); + moveEdgeToExistingCalleeClone(CalleeEdge, MergeNode, + /*NewClone*/ false); + } else { + MergeNode = moveEdgeToNewCalleeClone(CalleeEdge); + NewMergedNodes++; + } + // Now move all identified edges from other callers over to the merge node + // as well. + if (!OtherCallersToShareMerge.empty()) { + // Make and iterate over a copy of OrigCallee's caller edges because + // some of these will be moved off of the OrigCallee and that would mess + // up the iteration from OrigCallee. + auto OrigCalleeCallerEdges = OrigCallee->CallerEdges; + for (auto &CalleeCallerE : OrigCalleeCallerEdges) { + if (CalleeCallerE == CalleeEdge) + continue; + if (!OtherCallersToShareMerge.contains(CalleeCallerE->Caller)) + continue; + CallerToMoveCount[CalleeCallerE->Caller]++; + moveEdgeToExistingCalleeClone(CalleeCallerE, MergeNode, + /*NewClone*/ false); + } + } + removeNoneTypeCalleeEdges(OrigCallee); + removeNoneTypeCalleeEdges(MergeNode); + } + } +} + +// Look for other nodes that have edges to the same set of callee +// clones as the current Node. Those can share the eventual merge node +// (reducing cloning and binary size overhead) iff: +// - they have edges to the same set of callee clones +// - each callee edge reaches a subset of the same allocations as Node's +// corresponding edge to the same callee clone. +// The second requirement is to ensure that we don't undo any of the +// necessary cloning to distinguish contexts with different allocation +// behavior. +// FIXME: This is somewhat conservative, as we really just need to ensure +// that they don't reach the same allocations as contexts on edges from Node +// going to any of the *other* callee clones being merged. However, that +// requires more tracking and checking to get right. +template +void CallsiteContextGraph:: + findOtherCallersToShareMerge( + ContextNode *Node, + std::vector> &CalleeEdges, + DenseMap &ContextIdToAllocationNode, + DenseSet &OtherCallersToShareMerge) { + auto NumCalleeClones = CalleeEdges.size(); + // This map counts how many edges to the same callee clone exist for other + // caller nodes of each callee clone. + DenseMap OtherCallersToSharedCalleeEdgeCount; + // Counts the number of other caller nodes that have edges to all callee + // clones that don't violate the allocation context checking. + unsigned PossibleOtherCallerNodes = 0; + + // We only need to look at other Caller nodes if the first callee edge has + // multiple callers (recall they are sorted in ascending order above). + if (CalleeEdges[0]->Callee->CallerEdges.size() < 2) + return; + + // For each callee edge: + // - Collect the count of other caller nodes calling the same callees. + // - Collect the alloc nodes reached by contexts on each callee edge. + DenseMap> CalleeEdgeToAllocNodes; + for (auto CalleeEdge : CalleeEdges) { + assert(CalleeEdge->Callee->CallerEdges.size() > 1); + // For each other caller of the same callee, increment the count of + // edges reaching the same callee clone. + for (auto CalleeCallerEdges : CalleeEdge->Callee->CallerEdges) { + if (CalleeCallerEdges->Caller == Node) { + assert(CalleeCallerEdges == CalleeEdge); + continue; + } + OtherCallersToSharedCalleeEdgeCount[CalleeCallerEdges->Caller]++; + // If this caller edge now reaches all of the same callee clones, + // increment the count of candidate other caller nodes. + if (OtherCallersToSharedCalleeEdgeCount[CalleeCallerEdges->Caller] == + NumCalleeClones) + PossibleOtherCallerNodes++; + } + // Collect the alloc nodes reached by contexts on each callee edge, for + // later analysis. + for (auto Id : CalleeEdge->getContextIds()) { + auto *Alloc = ContextIdToAllocationNode.lookup(Id); + if (!Alloc) { + // FIXME: unclear why this happens occasionally, presumably + // imperfect graph updates possibly with recursion. + MissingAllocForContextId++; + continue; + } + CalleeEdgeToAllocNodes[CalleeEdge.get()].insert(Alloc); + } + } + + // Now walk the callee edges again, and make sure that for each candidate + // caller node all of its edges to the callees reach the same allocs (or + // a subset) as those along the corresponding callee edge from Node. + for (auto CalleeEdge : CalleeEdges) { + assert(CalleeEdge->Callee->CallerEdges.size() > 1); + // Stop if we do not have any (more) candidate other caller nodes. + if (!PossibleOtherCallerNodes) + break; + auto &CurCalleeAllocNodes = CalleeEdgeToAllocNodes[CalleeEdge.get()]; + // Check each other caller of this callee clone. + for (auto &CalleeCallerE : CalleeEdge->Callee->CallerEdges) { + // Not interested in the callee edge from Node itself. + if (CalleeCallerE == CalleeEdge) + continue; + // Skip any callers that didn't have callee edges to all the same + // callee clones. + if (OtherCallersToSharedCalleeEdgeCount[CalleeCallerE->Caller] != + NumCalleeClones) + continue; + // Make sure that each context along edge from candidate caller node + // reaches an allocation also reached by this callee edge from Node. + for (auto Id : CalleeCallerE->getContextIds()) { + auto *Alloc = ContextIdToAllocationNode.lookup(Id); + if (!Alloc) + continue; + // If not, simply reset the map entry to 0 so caller is ignored, and + // reduce the count of candidate other caller nodes. + if (!CurCalleeAllocNodes.contains(Alloc)) { + OtherCallersToSharedCalleeEdgeCount[CalleeCallerE->Caller] = 0; + PossibleOtherCallerNodes--; + break; + } + } + } + } + + if (!PossibleOtherCallerNodes) + return; + + // Build the set of other caller nodes that can use the same callee merge + // node. + for (auto &[OtherCaller, Count] : OtherCallersToSharedCalleeEdgeCount) { + if (Count != NumCalleeClones) + continue; + OtherCallersToShareMerge.insert(OtherCaller); + } +} + // This method assigns cloned callsites to functions, cloning the functions as // needed. The assignment is greedy and proceeds roughly as follows: // @@ -4051,6 +4413,8 @@ template bool CallsiteContextGraph::assignFunctions() { bool Changed = false; + mergeClones(); + // Keep track of the assignment of nodes (callsites) to function clones they // call. DenseMap CallsiteToCalleeFuncCloneMap; diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/mergenodes.ll b/llvm/test/Transforms/MemProfContextDisambiguation/mergenodes.ll new file mode 100644 index 0000000000000..990a4a4e4d064 --- /dev/null +++ b/llvm/test/Transforms/MemProfContextDisambiguation/mergenodes.ll @@ -0,0 +1,404 @@ +;; Test that correct clones are generated and reached when we need to +;; re-merge clone nodes before function assignment. +;; +;; The code is similar to that of basic.ll, but with a second allocation. + +; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \ +; RUN: -memprof-verify-ccg -memprof-dump-ccg %s -S 2>&1 | FileCheck %s \ +; RUN: --check-prefix=IR --check-prefix=DUMP + +;; Make sure the option to disable merging causes the expected regression. +; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \ +; RUN: -memprof-merge-clones=false %s -S 2>&1 | FileCheck %s --check-prefix=NOMERGE +;; main should incorrectly call the same clone of foo. +; NOMERGE: define {{.*}} @main +; NOMERGE-NEXT: entry: +; NOMERGE-NEXT: call {{.*}} @_Z3foov.memprof.1() +; NOMERGE-NEXT: call {{.*}} @_Z3foov.memprof.1() + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @main() { +entry: + ;; Ultimately calls bar and allocates notcold memory from first call to new + ;; and cold memory from second call to new. + %call = call noundef ptr @_Z3foov(), !callsite !0 + ;; Ultimately calls bar and allocates cold memory from first call to new + ;; and notcold memory from second call to new. + %call1 = call noundef ptr @_Z3foov(), !callsite !1 + ret i32 0 +} + +define internal ptr @_Z3barv() { +entry: + ;; notcold when called from first call to foo from main, cold when called from second. + %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #0, !memprof !2, !callsite !7 + ;; cold when called from first call to foo from main, notcold when called from second. + %call2 = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #0, !memprof !13, !callsite !18 + ret ptr null +} + +declare ptr @_Znam(i64) + +define internal ptr @_Z3bazv() { +entry: + %call = call noundef ptr @_Z3barv(), !callsite !8 + ret ptr null +} + +; Function Attrs: noinline +define internal ptr @_Z3foov() { +entry: + %call = call noundef ptr @_Z3bazv(), !callsite !9 + ret ptr null +} + +attributes #0 = { builtin } + +!0 = !{i64 8632435727821051414} +!1 = !{i64 -3421689549917153178} +!2 = !{!3, !5} +!3 = !{!4, !"notcold"} +!4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414} +!5 = !{!6, !"cold"} +!6 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178} +!7 = !{i64 9086428284934609951} +!8 = !{i64 -5964873800580613432} +!9 = !{i64 2732490490862098848} +!13 = !{!14, !16} +!14 = !{!15, !"cold"} +!15 = !{i64 123, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414} +!16 = !{!17, !"notcold"} +!17 = !{i64 123, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178} +!18 = !{i64 123} + +;; After cloning, each callsite in main calls different clones of foo with +;; different allocaton types, and ditto all the way through the leaf +;; allocation callsites. The single allocation-type clones are shared between +;; the two callsites in main. This would lead to incorrect assignment of +;; the leaf allocations to function clones as is, since we have lost the +;; information that each callsite in main ultimately reaches two allocation +;; callsites with *different* allocation types. +; DUMP: CCG after cloning: +; DUMP: Callsite Context Graph: +; DUMP: Node [[BAR1ALLOC1:0x[a-f0-9]+]] +; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #0 (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 1 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAR1ALLOC1]] to Caller: [[BAZ1:0x[a-f0-9]+]] AllocTypes: NotCold ContextIds: 1 +; DUMP: Clones: [[BAR2ALLOC1:0x[a-f0-9]+]] + +; DUMP: Node [[BAZ1]] +; DUMP: %call = call noundef ptr @_Z3barv() (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 1 4 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR1ALLOC1]] to Caller: [[BAZ1]] AllocTypes: NotCold ContextIds: 1 +; DUMP: Edge from Callee [[BAR1ALLOC2:0x[a-f0-9]+]] to Caller: [[BAZ1]] AllocTypes: NotCold ContextIds: 4 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAZ1]] to Caller: [[FOO1:0x[a-f0-9]+]] AllocTypes: NotCold ContextIds: 1 4 +; DUMP: Clones: [[BAZ2:0x[a-f0-9]+]] + +; DUMP: Node [[FOO1]] +; DUMP: %call = call noundef ptr @_Z3bazv() (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 1 4 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAZ1]] to Caller: [[FOO1]] AllocTypes: NotCold ContextIds: 1 4 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[FOO1]] to Caller: [[MAIN1:0x[a-f0-9]+]] AllocTypes: NotCold ContextIds: 1 +; DUMP: Edge from Callee [[FOO1]] to Caller: [[MAIN2:0x[a-f0-9]+]] AllocTypes: NotCold ContextIds: 4 +; DUMP: Clones: [[FOO2:0x[a-f0-9]+]] + +; DUMP: Node [[MAIN1]] +; DUMP: %call = call noundef ptr @_Z3foov() (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 3 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO1]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1 +; DUMP: Edge from Callee [[FOO2:0x[a-f0-9]+]] to Caller: [[MAIN1]] AllocTypes: Cold ContextIds: 3 +; DUMP: CallerEdges: + +; DUMP: Node [[MAIN2]] +; DUMP: %call1 = call noundef ptr @_Z3foov() (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 2 4 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO1]] to Caller: [[MAIN2]] AllocTypes: NotCold ContextIds: 4 +; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2 +; DUMP: CallerEdges: + +; DUMP: Node [[BAR1ALLOC2]] +; DUMP: %call2 = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #0 (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 4 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAR1ALLOC2]] to Caller: [[BAZ1]] AllocTypes: NotCold ContextIds: 4 +; DUMP: Clones: [[BAR2ALLOC2:0x[a-f0-9]+]] + +; DUMP: Node [[FOO2]] +; DUMP: %call = call noundef ptr @_Z3bazv() (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 2 3 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAZ2]] to Caller: [[FOO2]] AllocTypes: Cold ContextIds: 2 3 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2 +; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN1]] AllocTypes: Cold ContextIds: 3 +; DUMP: Clone of [[FOO1]] + +; DUMP: Node [[BAZ2]] +; DUMP: %call = call noundef ptr @_Z3barv() (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 2 3 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR2ALLOC1]] to Caller: [[BAZ2]] AllocTypes: Cold ContextIds: 2 +; DUMP: Edge from Callee [[BAR2ALLOC2]] to Caller: [[BAZ2]] AllocTypes: Cold ContextIds: 3 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAZ2]] to Caller: [[FOO2]] AllocTypes: Cold ContextIds: 2 3 +; DUMP: Clone of [[BAZ1]] + +; DUMP: Node [[BAR2ALLOC1]] +; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #0 (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 2 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAR2ALLOC1]] to Caller: [[BAZ2]] AllocTypes: Cold ContextIds: 2 +; DUMP: Clone of [[BAR1ALLOC1]] + +; DUMP: Node [[BAR2ALLOC2]] +; DUMP: %call2 = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #0 (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 3 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAR2ALLOC2]] to Caller: [[BAZ2]] AllocTypes: Cold ContextIds: 3 +; DUMP: Clone of [[BAR1ALLOC2]] + +;; After merging, each callsite in main calls a different single clone of foo +;; with both cold and not cold allocation types, but ultimately reaches two +;; single allocation type allocation callsite clones of the correct +;; combination. The graph after assigning function clones is the same, but +;; with function calls updated to the new function clones. +; DUMP: CCG after merging: +; DUMP: Callsite Context Graph: +; DUMP: Node [[BAR1ALLOC1]] +; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #0 (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 1 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAR1ALLOC1]] to Caller: [[BAZ2]] AllocTypes: NotCold ContextIds: 1 +; DUMP: Clones: [[BAR2ALLOC1]] + +; DUMP: Node [[MAIN1]] +; DUMP: %call = call noundef ptr @_Z3foov() (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 3 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO3:0x[a-f0-9]+]] to Caller: [[MAIN1]] AllocTypes: NotColdCold ContextIds: 1 3 +; DUMP: CallerEdges: + +; DUMP: Node [[MAIN2]] +; DUMP: %call1 = call noundef ptr @_Z3foov() (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 2 4 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: NotColdCold ContextIds: 2 4 +; DUMP: CallerEdges: + +; DUMP: Node [[BAR1ALLOC2]] +; DUMP: %call2 = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #0 (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 4 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAR1ALLOC2]] to Caller: [[BAZ3:0x[a-f0-9]+]] AllocTypes: NotCold ContextIds: 4 +; DUMP: Clones: [[BAR2ALLOC2]] + +; DUMP: Node [[FOO2]] +; DUMP: %call = call noundef ptr @_Z3bazv() (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 2 4 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAZ3]] to Caller: [[FOO2]] AllocTypes: NotColdCold ContextIds: 2 4 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: NotColdCold ContextIds: 2 4 +; DUMP: Clone of [[FOO1]] + +; DUMP: Node [[BAZ2]] +; DUMP: %call = call noundef ptr @_Z3barv() (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 3 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR2ALLOC2]] to Caller: [[BAZ2]] AllocTypes: Cold ContextIds: 3 +; DUMP: Edge from Callee [[BAR1ALLOC1]] to Caller: [[BAZ2]] AllocTypes: NotCold ContextIds: 1 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAZ2]] to Caller: [[FOO3]] AllocTypes: NotColdCold ContextIds: 1 3 +; DUMP: Clone of [[BAZ1]] + +; DUMP: Node [[BAR2ALLOC1]] +; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #0 (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 2 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAR2ALLOC1]] to Caller: [[BAZ3]] AllocTypes: Cold ContextIds: 2 +; DUMP: Clone of [[BAR1ALLOC1]] + +; DUMP: Node [[BAR2ALLOC2]] +; DUMP: %call2 = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #0 (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 3 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAR2ALLOC2]] to Caller: [[BAZ2]] AllocTypes: Cold ContextIds: 3 +; DUMP: Clone of [[BAR1ALLOC2]] + +; DUMP: Node [[FOO3]] +; DUMP: %call = call noundef ptr @_Z3bazv() (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 3 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAZ2]] to Caller: [[FOO3]] AllocTypes: NotColdCold ContextIds: 1 3 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[FOO3]] to Caller: [[MAIN1]] AllocTypes: NotColdCold ContextIds: 1 3 +; DUMP: Clone of [[FOO1]] + +; DUMP: Node [[BAZ3]] +; DUMP: %call = call noundef ptr @_Z3barv() (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 2 4 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR2ALLOC1]] to Caller: [[BAZ3]] AllocTypes: Cold ContextIds: 2 +; DUMP: Edge from Callee [[BAR1ALLOC2]] to Caller: [[BAZ3]] AllocTypes: NotCold ContextIds: 4 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAZ3]] to Caller: [[FOO2]] AllocTypes: NotColdCold ContextIds: 2 4 +; DUMP: Clone of [[BAZ1]] + +; DUMP: CCG after assigning function clones: +; DUMP: Callsite Context Graph: +; DUMP: Node [[BAR1ALLOC1]] +; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #0 (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 1 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAR1ALLOC1]] to Caller: [[BAZ2]] AllocTypes: NotCold ContextIds: 1 +; DUMP: Clones: [[BAR2ALLOC1]] + +; DUMP: Node [[MAIN1]] +; DUMP: %call = call noundef ptr @_Z3foov.memprof.1() (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 3 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO3]] to Caller: [[MAIN1]] AllocTypes: NotColdCold ContextIds: 1 3 +; DUMP: CallerEdges: + +; DUMP: Node [[MAIN2]] +; DUMP: %call1 = call noundef ptr @_Z3foov() (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 2 4 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: NotColdCold ContextIds: 2 4 +; DUMP: CallerEdges: + +; DUMP: Node [[BAR1ALLOC2]] +; DUMP: %call2 = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #1 (clone 1) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 4 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAR1ALLOC2]] to Caller: [[BAZ3]] AllocTypes: NotCold ContextIds: 4 +; DUMP: Clones: [[BAR2ALLOC2]] + +; DUMP: Node [[FOO2]] +; DUMP: %call = call noundef ptr @_Z3bazv.memprof.1() (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 2 4 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAZ3]] to Caller: [[FOO2]] AllocTypes: NotColdCold ContextIds: 2 4 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: NotColdCold ContextIds: 2 4 +; DUMP: Clone of [[FOO1]] + +; DUMP: Node [[BAZ2]] +; DUMP: %call = call noundef ptr @_Z3barv() (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 3 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR2ALLOC2]] to Caller: [[BAZ2]] AllocTypes: Cold ContextIds: 3 +; DUMP: Edge from Callee [[BAR1ALLOC1]] to Caller: [[BAZ2]] AllocTypes: NotCold ContextIds: 1 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAZ2]] to Caller: [[FOO3]] AllocTypes: NotColdCold ContextIds: 1 3 +; DUMP: Clone of [[BAZ1]] + +; DUMP: Node [[BAR2ALLOC1]] +; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #0 (clone 1) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 2 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAR2ALLOC1]] to Caller: [[BAZ3]] AllocTypes: Cold ContextIds: 2 +; DUMP: Clone of [[BAR1ALLOC1]] + +; DUMP: Node [[BAR2ALLOC2]] +; DUMP: %call2 = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #1 (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 3 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAR2ALLOC2]] to Caller: [[BAZ2]] AllocTypes: Cold ContextIds: 3 +; DUMP: Clone of [[BAR1ALLOC2]] + +; DUMP: Node [[FOO3]] +; DUMP: %call = call noundef ptr @_Z3bazv() (clone 1) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 3 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAZ2]] to Caller: [[FOO3]] AllocTypes: NotColdCold ContextIds: 1 3 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[FOO3]] to Caller: [[MAIN1]] AllocTypes: NotColdCold ContextIds: 1 3 +; DUMP: Clone of [[FOO1]] + +; DUMP: Node [[BAZ3]] +; DUMP: %call = call noundef ptr @_Z3barv.memprof.1() (clone 1) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 2 4 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR2ALLOC1]] to Caller: [[BAZ3]] AllocTypes: Cold ContextIds: 2 +; DUMP: Edge from Callee [[BAR1ALLOC2]] to Caller: [[BAZ3]] AllocTypes: NotCold ContextIds: 4 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAZ3]] to Caller: [[FOO2]] AllocTypes: NotColdCold ContextIds: 2 4 +; DUMP: Clone of [[BAZ1]] + +; IR: define {{.*}} @main +;; The first call to foo should now call foo.memprof.1 that ultimately +;; calls bar with the first allocation hinted notcold and the second cold +;; (via call chain foo.memprof.1 -> baz -> bar). +; IR: call {{.*}} @_Z3foov.memprof.1() +;; The second call to foo still calls the original foo, but ultimately +;; reaches a clone of bar with the first allocation hinted cold and the +;; second notcold. +; IR: call {{.*}} @_Z3foov() +; IR: define internal {{.*}} @_Z3barv() +; IR: call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD:[0-9]+]] +; IR: call {{.*}} @_Znam(i64 noundef 10) #[[COLD:[0-9]+]] +; IR: define internal {{.*}} @_Z3bazv() +; IR: call {{.*}} @_Z3barv() +; IR: define internal {{.*}} @_Z3foov() +; IR: call {{.*}} @_Z3bazv.memprof.1() +; IR: define internal {{.*}} @_Z3barv.memprof.1() +; IR: call {{.*}} @_Znam(i64 noundef 10) #[[COLD]] +; IR: call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD]] +; IR: define internal {{.*}} @_Z3bazv.memprof.1() +; IR: call {{.*}} @_Z3barv.memprof.1() +; IR: define internal {{.*}} @_Z3foov.memprof.1() +; IR: call {{.*}} @_Z3bazv() +; IR: attributes #[[NOTCOLD]] = { builtin "memprof"="notcold" } +; IR: attributes #[[COLD]] = { builtin "memprof"="cold" } diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/mergenodes2.ll b/llvm/test/Transforms/MemProfContextDisambiguation/mergenodes2.ll new file mode 100644 index 0000000000000..80f6bc7d8ddaf --- /dev/null +++ b/llvm/test/Transforms/MemProfContextDisambiguation/mergenodes2.ll @@ -0,0 +1,474 @@ +;; Test that correct clones are generated and reached when we need to +;; re-merge clone nodes before function assignment. +;; +;; The code is similar to that of mergenodes.ll, but with two additional +;; contexts sharing the same two allocations. This tests that we correctly +;; share the merged nodes when possible. + +; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \ +; RUN: -memprof-verify-ccg -memprof-dump-ccg %s -S 2>&1 | FileCheck %s \ +; RUN: --check-prefix=IR --check-prefix=DUMP + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @main() { +entry: + ;; Ultimately calls bar and allocates notcold memory from first call to new + ;; and cold memory from second call to new. + %call = call noundef ptr @_Z3foov(), !callsite !0 + ;; Ultimately calls bar and allocates cold memory from first call to new + ;; and notcold memory from second call to new. + %call1 = call noundef ptr @_Z3foov(), !callsite !1 + ;; Same allocation pattern as the first call to foo above, should end up + ;; sharing the same merge nodes. + %call2 = call noundef ptr @_Z3foov(), !callsite !19 + ;; Same allocation pattern as the second call to foo above, should end up + ;; sharing the same merge nodes. + %call3 = call noundef ptr @_Z3foov(), !callsite !20 + ret i32 0 +} + +define internal ptr @_Z3barv() { +entry: + ;; notcold when called from first and third calls to foo from main, cold when + ;; called from second and fourth. + %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #0, !memprof !2, !callsite !7 + ;; cold when called from first and third calls to foo from main, notcold when + ;; called from second and fourth. + %call2 = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #0, !memprof !13, !callsite !18 + ret ptr null +} + +declare ptr @_Znam(i64) + +define internal ptr @_Z3bazv() { +entry: + %call = call noundef ptr @_Z3barv(), !callsite !8 + ret ptr null +} + +; Function Attrs: noinline +define internal ptr @_Z3foov() { +entry: + %call = call noundef ptr @_Z3bazv(), !callsite !9 + ret ptr null +} + +attributes #0 = { builtin } + +!0 = !{i64 8632435727821051414} +!1 = !{i64 -3421689549917153178} +!19 = !{i64 8910} +!20 = !{i64 91011} +!2 = !{!3, !5, !21, !23} +!3 = !{!4, !"notcold"} +!4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414} +!5 = !{!6, !"cold"} +!6 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178} +!21 = !{!22, !"notcold"} +!22 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8910} +!23 = !{!24, !"cold"} +!24 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 91011} +!7 = !{i64 9086428284934609951} +!8 = !{i64 -5964873800580613432} +!9 = !{i64 2732490490862098848} +!13 = !{!14, !16, !25, !27} +!14 = !{!15, !"cold"} +!15 = !{i64 123, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414} +!16 = !{!17, !"notcold"} +!17 = !{i64 123, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178} +!25 = !{!26, !"cold"} +!26 = !{i64 123, i64 -5964873800580613432, i64 2732490490862098848, i64 8910} +!27 = !{!28, !"notcold"} +!28 = !{i64 123, i64 -5964873800580613432, i64 2732490490862098848, i64 91011} +!18 = !{i64 123} + +;; After cloning, each callsite in main calls different clones of foo with +;; different allocaton types, and ditto all the way through the leaf +;; allocation callsites. The single allocation-type clones are shared between +;; the four callsites in main. This would lead to incorrect assignment of +;; the leaf allocations to function clones as is, since we have lost the +;; information that each callsite in main ultimately reaches two allocation +;; callsites with *different* allocation types. +; DUMP: CCG after cloning: +; DUMP: Callsite Context Graph: +; DUMP: Node [[BAR1ALLOC1:0x[a-f0-9]+]] +; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #0 (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 1 3 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAR1ALLOC1]] to Caller: [[BAZ1:0x[a-f0-9]+]] AllocTypes: NotCold ContextIds: 1 3 +; DUMP: Clones: [[BAR2ALLOC1:0x[a-f0-9]+]] + +; DUMP: Node [[BAZ1]] +; DUMP: %call = call noundef ptr @_Z3barv() (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 1 3 6 8 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR1ALLOC1]] to Caller: [[BAZ1]] AllocTypes: NotCold ContextIds: 1 3 +; DUMP: Edge from Callee [[BAR1ALLOC2:0x[a-f0-9]+]] to Caller: [[BAZ1]] AllocTypes: NotCold ContextIds: 6 8 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAZ1]] to Caller: [[FOO1:0x[a-f0-9]+]] AllocTypes: NotCold ContextIds: 1 3 6 8 +; DUMP: Clones: [[BAZ2:0x[a-f0-9]+]] + +; DUMP: Node [[FOO1]] +; DUMP: %call = call noundef ptr @_Z3bazv() (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 1 3 6 8 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAZ1]] to Caller: [[FOO1]] AllocTypes: NotCold ContextIds: 1 3 6 8 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[FOO1]] to Caller: [[MAIN1:0x[a-f0-9]+]] AllocTypes: NotCold ContextIds: 1 +; DUMP: Edge from Callee [[FOO1]] to Caller: [[MAIN3:0x[a-f0-9]+]] AllocTypes: NotCold ContextIds: 3 +; DUMP: Edge from Callee [[FOO1]] to Caller: [[MAIN2:0x[a-f0-9]+]] AllocTypes: NotCold ContextIds: 6 +; DUMP: Edge from Callee [[FOO1]] to Caller: [[MAIN4:0x[a-f0-9]+]] AllocTypes: NotCold ContextIds: 8 +; DUMP: Clones: [[FOO2:0x[a-f0-9]+]] + +; DUMP: Node [[MAIN1]] +; DUMP: %call = call noundef ptr @_Z3foov() (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 5 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO1]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1 +; DUMP: Edge from Callee [[FOO2:0x[a-f0-9]+]] to Caller: [[MAIN1]] AllocTypes: Cold ContextIds: 5 +; DUMP: CallerEdges: + +; DUMP: Node [[MAIN2]] +; DUMP: %call1 = call noundef ptr @_Z3foov() (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 2 6 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO1]] to Caller: [[MAIN2]] AllocTypes: NotCold ContextIds: 6 +; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2 +; DUMP: CallerEdges: + +; DUMP: Node [[MAIN3]] +; DUMP: %call2 = call noundef ptr @_Z3foov() (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 3 7 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO1]] to Caller: [[MAIN3]] AllocTypes: NotCold ContextIds: 3 +; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN3]] AllocTypes: Cold ContextIds: 7 +; DUMP: CallerEdges: + +; DUMP: Node [[MAIN4]] +; DUMP: %call3 = call noundef ptr @_Z3foov() (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 4 8 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO1]] to Caller: [[MAIN4]] AllocTypes: NotCold ContextIds: 8 +; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN4]] AllocTypes: Cold ContextIds: 4 +; DUMP: CallerEdges: + +; DUMP: Node [[BAR1ALLOC2]] +; DUMP: %call2 = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #0 (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 6 8 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAR1ALLOC2]] to Caller: [[BAZ1]] AllocTypes: NotCold ContextIds: 6 8 +; DUMP: Clones: [[BAR2ALLOC2:0x[a-f0-9]+]] + +; DUMP: Node [[FOO2]] +; DUMP: %call = call noundef ptr @_Z3bazv() (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 2 4 5 7 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAZ2]] to Caller: [[FOO2]] AllocTypes: Cold ContextIds: 2 4 5 7 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2 +; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN4]] AllocTypes: Cold ContextIds: 4 +; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN1]] AllocTypes: Cold ContextIds: 5 +; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN3]] AllocTypes: Cold ContextIds: 7 +; DUMP: Clone of [[FOO1]] + +; DUMP: Node [[BAZ2]] +; DUMP: %call = call noundef ptr @_Z3barv() (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 2 4 5 7 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR2ALLOC1]] to Caller: [[BAZ2]] AllocTypes: Cold ContextIds: 2 4 +; DUMP: Edge from Callee [[BAR2ALLOC2]] to Caller: [[BAZ2]] AllocTypes: Cold ContextIds: 5 7 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAZ2]] to Caller: [[FOO2]] AllocTypes: Cold ContextIds: 2 4 5 7 +; DUMP: Clone of [[BAZ1]] + +; DUMP: Node [[BAR2ALLOC1]] +; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #0 (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 2 4 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAR2ALLOC1]] to Caller: [[BAZ2]] AllocTypes: Cold ContextIds: 2 4 +; DUMP: Clone of [[BAR1ALLOC1]] + +; DUMP: Node [[BAR2ALLOC2]] +; DUMP: %call2 = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #0 (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 5 7 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAR2ALLOC2]] to Caller: [[BAZ2]] AllocTypes: Cold ContextIds: 5 7 +; DUMP: Clone of [[BAR1ALLOC2]] + +;; After merging, each pair of callsites in main with the same allocation +;; pattern call a different single clone of foo with both cold and not cold +;; allocation types, but ultimately reach two single allocation type +;; allocation callsite clones of the correct combination. The graph after +;; assigning function clones is the same, but with function calls updated to +;; the new function clones. +; DUMP: CCG after merging: +; DUMP: Callsite Context Graph: +; DUMP: Node [[BAR1ALLOC1]] +; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #0 (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 1 3 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAR1ALLOC1]] to Caller: [[BAZ2]] AllocTypes: NotCold ContextIds: 1 3 +; DUMP: Clones: [[BAR2ALLOC1]] + +; DUMP: Node [[MAIN1]] +; DUMP: %call = call noundef ptr @_Z3foov() (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 5 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO3:0x[a-f0-9]+]] to Caller: [[MAIN1]] AllocTypes: NotColdCold ContextIds: 1 5 +; DUMP: CallerEdges: + +; DUMP: Node [[MAIN2]] +; DUMP: %call1 = call noundef ptr @_Z3foov() (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 2 6 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: NotColdCold ContextIds: 2 6 +; DUMP: CallerEdges: + +; DUMP: Node [[MAIN3]] +; DUMP: %call2 = call noundef ptr @_Z3foov() (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 3 7 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO3]] to Caller: [[MAIN3]] AllocTypes: NotColdCold ContextIds: 3 7 +; DUMP: CallerEdges: + +; DUMP: Node [[MAIN4]] +; DUMP: %call3 = call noundef ptr @_Z3foov() (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 4 8 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN4]] AllocTypes: NotColdCold ContextIds: 4 8 +; DUMP: CallerEdges: + +; DUMP: Node [[BAR1ALLOC2]] +; DUMP: %call2 = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #0 (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 6 8 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAR1ALLOC2]] to Caller: [[BAZ3:0x[a-f0-9]+]] AllocTypes: NotCold ContextIds: 6 8 +; DUMP: Clones: [[BAR2ALLOC2]] + +; DUMP: Node [[FOO2]] +; DUMP: %call = call noundef ptr @_Z3bazv() (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 2 4 6 8 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAZ3]] to Caller: [[FOO2]] AllocTypes: NotColdCold ContextIds: 2 4 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: NotColdCold ContextIds: 2 6 +; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN4]] AllocTypes: NotColdCold ContextIds: 4 8 +; DUMP: Clone of [[FOO1]] + +; DUMP: Node [[BAZ2]] +; DUMP: %call = call noundef ptr @_Z3barv() (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 3 5 7 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR2ALLOC2]] to Caller: [[BAZ2]] AllocTypes: Cold ContextIds: 5 7 +; DUMP: Edge from Callee [[BAR1ALLOC1]] to Caller: [[BAZ2]] AllocTypes: NotCold ContextIds: 1 3 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAZ2]] to Caller: [[FOO3]] AllocTypes: NotColdCold ContextIds: 1 3 5 7 +; DUMP: Clone of [[BAZ1]] + +; DUMP: Node [[BAR2ALLOC1]] +; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #0 (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 2 4 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAR2ALLOC1]] to Caller: [[BAZ3]] AllocTypes: Cold ContextIds: 2 4 +; DUMP: Clone of [[BAR1ALLOC1]] + +; DUMP: Node [[BAR2ALLOC2]] +; DUMP: %call2 = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #0 (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 5 7 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAR2ALLOC2]] to Caller: [[BAZ2]] AllocTypes: Cold ContextIds: 5 7 +; DUMP: Clone of [[BAR1ALLOC2]] + +; DUMP: Node [[FOO3]] +; DUMP: %call = call noundef ptr @_Z3bazv() (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 3 5 7 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAZ2]] to Caller: [[FOO3]] AllocTypes: NotColdCold ContextIds: 1 3 5 7 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[FOO3]] to Caller: [[MAIN1]] AllocTypes: NotColdCold ContextIds: 1 5 +; DUMP: Edge from Callee [[FOO3]] to Caller: [[MAIN3]] AllocTypes: NotColdCold ContextIds: 3 7 +; DUMP: Clone of [[FOO1]] + +; DUMP: Node [[BAZ3]] +; DUMP: %call = call noundef ptr @_Z3barv() (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 2 4 6 8 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR2ALLOC1]] to Caller: [[BAZ3]] AllocTypes: Cold ContextIds: 2 4 +; DUMP: Edge from Callee [[BAR1ALLOC2]] to Caller: [[BAZ3]] AllocTypes: NotCold ContextIds: 6 8 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAZ3]] to Caller: [[FOO2]] AllocTypes: NotColdCold ContextIds: 2 4 6 8 +; DUMP: Clone of [[BAZ1]] + +; DUMP: CCG after assigning function clones: +; DUMP: Callsite Context Graph: +; DUMP: Node [[BAR1ALLOC1]] +; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #0 (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 1 3 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAR1ALLOC1]] to Caller: [[BAZ2]] AllocTypes: NotCold ContextIds: 1 3 +; DUMP: Clones: [[BAR2ALLOC1]] + +; DUMP: Node [[MAIN1]] +; DUMP: %call = call noundef ptr @_Z3foov.memprof.1() (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 5 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO3]] to Caller: [[MAIN1]] AllocTypes: NotColdCold ContextIds: 1 5 +; DUMP: CallerEdges: + +; DUMP: Node [[MAIN2]] +; DUMP: %call1 = call noundef ptr @_Z3foov() (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 2 6 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: NotColdCold ContextIds: 2 6 +; DUMP: CallerEdges: + +; DUMP: Node [[MAIN3]] +; DUMP: %call2 = call noundef ptr @_Z3foov.memprof.1() (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 3 7 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO3]] to Caller: [[MAIN3]] AllocTypes: NotColdCold ContextIds: 3 7 +; DUMP: CallerEdges: + +; DUMP: Node [[MAIN4]] +; DUMP: %call3 = call noundef ptr @_Z3foov() (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 4 8 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN4]] AllocTypes: NotColdCold ContextIds: 4 8 +; DUMP: CallerEdges: + +; DUMP: Node [[BAR1ALLOC2]] +; DUMP: %call2 = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #1 (clone 1) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 6 8 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAR1ALLOC2]] to Caller: [[BAZ3]] AllocTypes: NotCold ContextIds: 6 8 +; DUMP: Clones: [[BAR2ALLOC2]] + +; DUMP: Node [[FOO2]] +; DUMP: %call = call noundef ptr @_Z3bazv.memprof.1() (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 2 4 6 8 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAZ3]] to Caller: [[FOO2]] AllocTypes: NotColdCold ContextIds: 2 4 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: NotColdCold ContextIds: 2 6 +; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN4]] AllocTypes: NotColdCold ContextIds: 4 8 +; DUMP: Clone of [[FOO1]] + +; DUMP: Node [[BAZ2]] +; DUMP: %call = call noundef ptr @_Z3barv() (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 3 5 7 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR2ALLOC2]] to Caller: [[BAZ2]] AllocTypes: Cold ContextIds: 5 7 +; DUMP: Edge from Callee [[BAR1ALLOC1]] to Caller: [[BAZ2]] AllocTypes: NotCold ContextIds: 1 3 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAZ2]] to Caller: [[FOO3]] AllocTypes: NotColdCold ContextIds: 1 3 5 7 +; DUMP: Clone of [[BAZ1]] + +; DUMP: Node [[BAR2ALLOC1]] +; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #0 (clone 1) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 2 4 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAR2ALLOC1]] to Caller: [[BAZ3]] AllocTypes: Cold ContextIds: 2 4 +; DUMP: Clone of [[BAR1ALLOC1]] + +; DUMP: Node [[BAR2ALLOC2]] +; DUMP: %call2 = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #1 (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 5 7 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAR2ALLOC2]] to Caller: [[BAZ2]] AllocTypes: Cold ContextIds: 5 7 +; DUMP: Clone of [[BAR1ALLOC2]] + +; DUMP: Node [[FOO3]] +; DUMP: %call = call noundef ptr @_Z3bazv() (clone 1) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 3 5 7 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAZ2]] to Caller: [[FOO3]] AllocTypes: NotColdCold ContextIds: 1 3 5 7 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[FOO3]] to Caller: [[MAIN1]] AllocTypes: NotColdCold ContextIds: 1 5 +; DUMP: Edge from Callee [[FOO3]] to Caller: [[MAIN3]] AllocTypes: NotColdCold ContextIds: 3 7 +; DUMP: Clone of [[FOO1]] + +; DUMP: Node [[BAZ3]] +; DUMP: %call = call noundef ptr @_Z3barv.memprof.1() (clone 1) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 2 4 6 8 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR2ALLOC1]] to Caller: [[BAZ3]] AllocTypes: Cold ContextIds: 2 4 +; DUMP: Edge from Callee [[BAR1ALLOC2]] to Caller: [[BAZ3]] AllocTypes: NotCold ContextIds: 6 8 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAZ3]] to Caller: [[FOO2]] AllocTypes: NotColdCold ContextIds: 2 4 6 8 +; DUMP: Clone of [[BAZ1]] + +; IR: define {{.*}} @main +;; The first call to foo should now call foo.memprof.1 that ultimately +;; calls bar with the first allocation hinted notcold and the second cold +;; (via call chain foo.memprof.1 -> baz -> bar). +; IR: call {{.*}} @_Z3foov.memprof.1() +;; The second call to foo still calls the original foo, but ultimately +;; reaches a clone of bar with the first allocation hinted cold and the +;; second notcold. +; IR: call {{.*}} @_Z3foov() +; IR: define internal {{.*}} @_Z3barv() +; IR: call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD:[0-9]+]] +; IR: call {{.*}} @_Znam(i64 noundef 10) #[[COLD:[0-9]+]] +; IR: define internal {{.*}} @_Z3bazv() +; IR: call {{.*}} @_Z3barv() +; IR: define internal {{.*}} @_Z3foov() +; IR: call {{.*}} @_Z3bazv.memprof.1() +; IR: define internal {{.*}} @_Z3barv.memprof.1() +; IR: call {{.*}} @_Znam(i64 noundef 10) #[[COLD]] +; IR: call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD]] +; IR: define internal {{.*}} @_Z3bazv.memprof.1() +; IR: call {{.*}} @_Z3barv.memprof.1() +; IR: define internal {{.*}} @_Z3foov.memprof.1() +; IR: call {{.*}} @_Z3bazv() +; IR: attributes #[[NOTCOLD]] = { builtin "memprof"="notcold" } +; IR: attributes #[[COLD]] = { builtin "memprof"="cold" } diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/overlapping-contexts.ll b/llvm/test/Transforms/MemProfContextDisambiguation/overlapping-contexts.ll index 7fe9dc96921c6..af93d56c5f33a 100644 --- a/llvm/test/Transforms/MemProfContextDisambiguation/overlapping-contexts.ll +++ b/llvm/test/Transforms/MemProfContextDisambiguation/overlapping-contexts.ll @@ -67,7 +67,9 @@ ; REMARKS: created clone _Z1Ab.memprof.1 ; REMARKS: created clone _Z1Xb.memprof.1 +; REMARKS: created clone _Z1Xb.memprof.2 ; REMARKS: created clone _Z1Bb.memprof.1 +; REMARKS: created clone _Z1Bb.memprof.2 ; REMARKS: created clone _Z1Cb.memprof.1 ; REMARKS: created clone _Z1Eb.memprof.1 ; REMARKS: call in clone _Z1Gv assigned to call function clone _Z1Eb.memprof.1 @@ -78,17 +80,18 @@ ; REMARKS: call in clone _Z1Cb.memprof.1 assigned to call function clone _Z1Bb.memprof.1 ; REMARKS: call in clone _Z1Fv assigned to call function clone _Z1Eb ; REMARKS: call in clone _Z1Eb assigned to call function clone _Z1Cb -; REMARKS: call in clone _Z1Cb assigned to call function clone _Z1Bb.memprof.1 -; REMARKS: call in clone _Z1Bb.memprof.1 assigned to call function clone _Z1Xb.memprof.1 -; REMARKS: call in clone _Z1Xb.memprof.1 assigned to call function clone _Z1Ab.memprof.1 +; REMARKS: call in clone _Z1Cb assigned to call function clone _Z1Bb.memprof.2 +; REMARKS: call in clone _Z1Bb.memprof.2 assigned to call function clone _Z1Xb.memprof.2 +; REMARKS: call in clone _Z1Xb.memprof.2 assigned to call function clone _Z1Ab.memprof.1 ; REMARKS: call in clone _Z1Ab.memprof.1 marked with memprof allocation attribute cold -; REMARKS: call in clone _Z1Bb.memprof.1 assigned to call function clone _Z1Xb +; REMARKS: call in clone _Z1Bb.memprof.1 assigned to call function clone _Z1Xb.memprof.1 +; REMARKS: call in clone _Z1Xb.memprof.1 assigned to call function clone _Z1Ab ; REMARKS: call in clone _Z1Dv assigned to call function clone _Z1Bb ; REMARKS: call in clone _Z1Bb assigned to call function clone _Z1Xb ; REMARKS: call in clone _Z1Xb assigned to call function clone _Z1Ab ; REMARKS: call in clone _Z1Ab marked with memprof allocation attribute notcold -; REMARKS: call in clone _Z1Ab.memprof.1 marked with memprof allocation attribute cold -; REMARKS: call in clone _Z1Ab marked with memprof allocation attribute notcold +; REMARKS: call in clone _Z1Ab marked with memprof allocation attribute cold +; REMARKS: call in clone _Z1Ab.memprof.1 marked with memprof allocation attribute notcold target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" @@ -204,7 +207,7 @@ attributes #7 = { builtin } ; IR: define {{.*}} @_Z1Cb(i1 noundef zeroext %b) ; IR-NEXT: entry: -; IR-NEXT: call {{.*}} @_Z1Bb.memprof.1(i1 noundef zeroext %b) +; IR-NEXT: call {{.*}} @_Z1Bb.memprof.2(i1 noundef zeroext %b) ; IR: define {{.*}} @_Z1Ab.memprof.1(i1 noundef zeroext %b) ; IR-NEXT: entry: @@ -215,18 +218,20 @@ attributes #7 = { builtin } ; IR-NEXT: br label %if.end ; IR-EMPTY: ; IR-NEXT: if.else: -; IR-NEXT: call {{.*}} @_Znam(i64 noundef 10) #[[COLD]] +; IR-NEXT: call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD:[0-9]+]] -; IR: define {{.*}} @_Z1Xb.memprof.1(i1 noundef zeroext %b) +; IR: define {{.*}} @_Z1Xb.memprof.2(i1 noundef zeroext %b) ; IR-NEXT: entry: ; IR-NEXT: call {{.*}} @_Z1Ab.memprof.1(i1 noundef zeroext %b) -; IR: define {{.*}} @_Z1Bb.memprof.1(i1 noundef zeroext %b) +; IR: define {{.*}} @_Z1Bb.memprof.2(i1 noundef zeroext %b) ; IR-NEXT: entry: -; IR-NEXT: call {{.*}} @_Z1Xb.memprof.1(i1 noundef zeroext %b) +; IR-NEXT: call {{.*}} @_Z1Xb.memprof.2(i1 noundef zeroext %b) +; IR: attributes #[[NOTCOLD]] = { builtin "memprof"="notcold" } ; IR: attributes #[[COLD]] = { builtin "memprof"="cold" } ; STATS: 2 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) ; STATS: 2 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) -; STATS: 5 memprof-context-disambiguation - Number of function clones created during whole program analysis +; STATS: 7 memprof-context-disambiguation - Number of function clones created during whole program analysis +; STATS: 2 memprof-context-disambiguation - Number of new nodes created during merging