Skip to content

Commit 295a01f

Browse files
[MemProf] Fix reporting with -memprof-matching-cold-threshold (llvm#173327)
With the -memprof-matching-cold-threshold option, we hint as cold allocations where the fraction of cold bytes is at least the given threshold. However, we were incorrectly reporting all of the allocation's contexts and bytes as hinted cold. Fix this to report the non-cold contexts as ignored. To do this, refactor out some existing reporting, and also keep track of the original allocation type for each context in the Trie along with its ContextTotalSize information. Most of the changes are the change to this array's type and name.
1 parent 42ea774 commit 295a01f

File tree

3 files changed

+62
-28
lines changed

3 files changed

+62
-28
lines changed

llvm/include/llvm/Analysis/MemoryProfileInfo.h

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,15 @@ LLVM_ABI void removeAnyExistingAmbiguousAttribute(CallBase *CB);
6161
/// profile but that we haven't yet been able to disambiguate.
6262
LLVM_ABI void addAmbiguousAttribute(CallBase *CB);
6363

64+
// During matching we also keep the AllocationType along with the
65+
// ContextTotalSize in the Trie for the most accurate reporting when we decide
66+
// to hint unambiguously where there is a dominant type. We don't put the
67+
// AllocationType in the ContextTotalSize struct as it isn't needed there
68+
// during the LTO step, because due to context trimming a summarized
69+
// context with its allocation type can correspond to multiple context/size
70+
// pairs. Here the redundancy is a short-lived convenience.
71+
using ContextSizeTypePair = std::pair<ContextTotalSize, AllocationType>;
72+
6473
/// Class to build a trie of call stack contexts for a particular profiled
6574
/// allocation call, along with their associated allocation types.
6675
/// The allocation will be at the root of the trie, which is then used to
@@ -75,8 +84,9 @@ class CallStackTrie {
7584
// If the user has requested reporting of hinted sizes, keep track of the
7685
// associated full stack id and profiled sizes. Can have more than one
7786
// after trimming (e.g. when building from metadata). This is only placed on
78-
// the last (root-most) trie node for each allocation context.
79-
std::vector<ContextTotalSize> ContextSizeInfo;
87+
// the last (root-most) trie node for each allocation context. Also
88+
// track the original allocation type of the context.
89+
std::vector<ContextSizeTypePair> ContextInfo;
8090
// Map of caller stack id to the corresponding child Trie node.
8191
std::map<uint64_t, CallStackTrieNode *> Callers;
8292
CallStackTrieNode(AllocationType Type)
@@ -118,10 +128,11 @@ class CallStackTrie {
118128
delete Node;
119129
}
120130

121-
// Recursively build up a complete list of context size information from the
122-
// trie nodes reached form the given Node, for hint size reporting.
123-
void collectContextSizeInfo(CallStackTrieNode *Node,
124-
std::vector<ContextTotalSize> &ContextSizeInfo);
131+
// Recursively build up a complete list of context information from the
132+
// trie nodes reached form the given Node, including each context's
133+
// ContextTotalSize and AllocationType, for hint size reporting.
134+
void collectContextInfo(CallStackTrieNode *Node,
135+
std::vector<ContextSizeTypePair> &ContextInfo);
125136

126137
// Recursively convert hot allocation types to notcold, since we don't
127138
// actually do any cloning for hot contexts, to facilitate more aggressive

llvm/lib/Analysis/MemoryProfileInfo.cpp

Lines changed: 43 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,13 @@ void CallStackTrie::addCallStack(
181181
Curr = New;
182182
}
183183
assert(Curr);
184-
llvm::append_range(Curr->ContextSizeInfo, ContextSizeInfo);
184+
// Append all of the ContextSizeInfo, along with their original AllocType.
185+
llvm::append_range(Curr->ContextInfo,
186+
llvm::map_range(ContextSizeInfo,
187+
[AllocType](const ContextTotalSize &CTS) {
188+
return ContextSizeTypePair(CTS,
189+
AllocType);
190+
}));
185191
}
186192

187193
void CallStackTrie::addCallStack(MDNode *MIB) {
@@ -216,7 +222,7 @@ void CallStackTrie::addCallStack(MDNode *MIB) {
216222

217223
static MDNode *createMIBNode(LLVMContext &Ctx, ArrayRef<uint64_t> MIBCallStack,
218224
AllocationType AllocType,
219-
ArrayRef<ContextTotalSize> ContextSizeInfo,
225+
ArrayRef<ContextSizeTypePair> ContextInfo,
220226
const uint64_t MaxColdSize,
221227
bool BuiltFromExistingMetadata,
222228
uint64_t &TotalBytes, uint64_t &ColdBytes) {
@@ -225,7 +231,7 @@ static MDNode *createMIBNode(LLVMContext &Ctx, ArrayRef<uint64_t> MIBCallStack,
225231
MIBPayload.push_back(
226232
MDString::get(Ctx, getAllocTypeAttributeString(AllocType)));
227233

228-
if (ContextSizeInfo.empty()) {
234+
if (ContextInfo.empty()) {
229235
// The profile matcher should have provided context size info if there was a
230236
// MinCallsiteColdBytePercent < 100. Here we check >=100 to gracefully
231237
// handle a user-provided percent larger than 100. However, we may not have
@@ -234,7 +240,8 @@ static MDNode *createMIBNode(LLVMContext &Ctx, ArrayRef<uint64_t> MIBCallStack,
234240
return MDNode::get(Ctx, MIBPayload);
235241
}
236242

237-
for (const auto &[FullStackId, TotalSize] : ContextSizeInfo) {
243+
for (const auto &[CSI, AT] : ContextInfo) {
244+
const auto &[FullStackId, TotalSize] = CSI;
238245
TotalBytes += TotalSize;
239246
bool LargeColdContext = false;
240247
if (AllocType == AllocationType::Cold) {
@@ -267,11 +274,11 @@ static MDNode *createMIBNode(LLVMContext &Ctx, ArrayRef<uint64_t> MIBCallStack,
267274
return MDNode::get(Ctx, MIBPayload);
268275
}
269276

270-
void CallStackTrie::collectContextSizeInfo(
271-
CallStackTrieNode *Node, std::vector<ContextTotalSize> &ContextSizeInfo) {
272-
llvm::append_range(ContextSizeInfo, Node->ContextSizeInfo);
277+
void CallStackTrie::collectContextInfo(
278+
CallStackTrieNode *Node, std::vector<ContextSizeTypePair> &ContextInfo) {
279+
llvm::append_range(ContextInfo, Node->ContextInfo);
273280
for (auto &Caller : Node->Callers)
274-
collectContextSizeInfo(Caller.second, ContextSizeInfo);
281+
collectContextInfo(Caller.second, ContextInfo);
275282
}
276283

277284
void CallStackTrie::convertHotToNotCold(CallStackTrieNode *Node) {
@@ -283,6 +290,17 @@ void CallStackTrie::convertHotToNotCold(CallStackTrieNode *Node) {
283290
convertHotToNotCold(Caller.second);
284291
}
285292

293+
// Helper to emit messages for non-cold contexts that are ignored for various
294+
// reasons when reporting of hinted bytes is enabled.
295+
static void emitIgnoredNonColdContextMessage(StringRef Tag,
296+
uint64_t FullStackId,
297+
StringRef Extra,
298+
uint64_t TotalSize) {
299+
errs() << "MemProf hinting: Total size for " << Tag
300+
<< " non-cold full allocation context hash " << FullStackId << Extra
301+
<< ": " << TotalSize << "\n";
302+
}
303+
286304
// Copy over some or all of NewMIBNodes to the SavedMIBNodes vector, depending
287305
// on options that enable filtering out some NotCold contexts.
288306
static void saveFilteredNewMIBNodes(std::vector<Metadata *> &NewMIBNodes,
@@ -321,9 +339,7 @@ static void saveFilteredNewMIBNodes(std::vector<Metadata *> &NewMIBNodes,
321339
uint64_t TS =
322340
mdconst::dyn_extract<ConstantInt>(ContextSizePair->getOperand(1))
323341
->getZExtValue();
324-
errs() << "MemProf hinting: Total size for " << Tag
325-
<< " non-cold full allocation context hash " << FullStackId
326-
<< Extra << ": " << TS << "\n";
342+
emitIgnoredNonColdContextMessage(Tag, FullStackId, Extra, TS);
327343
}
328344
};
329345

@@ -430,10 +446,10 @@ bool CallStackTrie::buildMIBNodes(CallStackTrieNode *Node, LLVMContext &Ctx,
430446
// Trim context below the first node in a prefix with a single alloc type.
431447
// Add an MIB record for the current call stack prefix.
432448
if (hasSingleAllocType(Node->AllocTypes)) {
433-
std::vector<ContextTotalSize> ContextSizeInfo;
434-
collectContextSizeInfo(Node, ContextSizeInfo);
449+
std::vector<ContextSizeTypePair> ContextInfo;
450+
collectContextInfo(Node, ContextInfo);
435451
MIBNodes.push_back(createMIBNode(
436-
Ctx, MIBCallStack, (AllocationType)Node->AllocTypes, ContextSizeInfo,
452+
Ctx, MIBCallStack, (AllocationType)Node->AllocTypes, ContextInfo,
437453
MaxColdSize, BuiltFromExistingMetadata, TotalBytes, ColdBytes));
438454
return true;
439455
}
@@ -486,10 +502,10 @@ bool CallStackTrie::buildMIBNodes(CallStackTrieNode *Node, LLVMContext &Ctx,
486502
// non-cold allocation type.
487503
if (!CalleeHasAmbiguousCallerContext)
488504
return false;
489-
std::vector<ContextTotalSize> ContextSizeInfo;
490-
collectContextSizeInfo(Node, ContextSizeInfo);
505+
std::vector<ContextSizeTypePair> ContextInfo;
506+
collectContextInfo(Node, ContextInfo);
491507
MIBNodes.push_back(createMIBNode(
492-
Ctx, MIBCallStack, AllocationType::NotCold, ContextSizeInfo, MaxColdSize,
508+
Ctx, MIBCallStack, AllocationType::NotCold, ContextInfo, MaxColdSize,
493509
BuiltFromExistingMetadata, TotalBytes, ColdBytes));
494510
return true;
495511
}
@@ -503,9 +519,16 @@ void CallStackTrie::addSingleAllocTypeAttribute(CallBase *CI, AllocationType AT,
503519
removeAnyExistingAmbiguousAttribute(CI);
504520
CI->addFnAttr(A);
505521
if (MemProfReportHintedSizes) {
506-
std::vector<ContextTotalSize> ContextSizeInfo;
507-
collectContextSizeInfo(Alloc, ContextSizeInfo);
508-
for (const auto &[FullStackId, TotalSize] : ContextSizeInfo) {
522+
std::vector<ContextSizeTypePair> ContextInfo;
523+
collectContextInfo(Alloc, ContextInfo);
524+
for (const auto &[CSI, OrigAT] : ContextInfo) {
525+
const auto &[FullStackId, TotalSize] = CSI;
526+
// If the original alloc type is not the one being applied as the hint,
527+
// report that we ignored this context.
528+
if (AT != OrigAT) {
529+
emitIgnoredNonColdContextMessage("ignored", FullStackId, "", TotalSize);
530+
continue;
531+
}
509532
errs() << "MemProf hinting: Total size for full allocation context hash "
510533
<< FullStackId << " and " << Descriptor << " alloc type "
511534
<< getAllocTypeAttributeString(AT) << ": " << TotalSize << "\n";

llvm/test/Transforms/PGOProfile/memprof.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -400,10 +400,10 @@ for.end: ; preds = %for.cond
400400
;; with the full allocation context hash, type, and size in bytes.
401401
; TOTALSIZESTHRESH60: Total size for full allocation context hash 8525406123785421946 and dominant alloc type cold: 10
402402
; TOTALSIZESTHRESH60: Total size for full allocation context hash 11714230664165068698 and dominant alloc type cold: 10
403-
; TOTALSIZESTHRESH60: Total size for full allocation context hash 5725971306423925017 and dominant alloc type cold: 10
403+
; TOTALSIZESTHRESH60: Total size for ignored non-cold full allocation context hash 5725971306423925017: 10
404404
; TOTALSIZESTHRESH60: Total size for full allocation context hash 16342802530253093571 and dominant alloc type cold: 10
405405
; TOTALSIZESTHRESH60: Total size for full allocation context hash 18254812774972004394 and dominant alloc type cold: 10
406-
; TOTALSIZESTHRESH60: Total size for full allocation context hash 1093248920606587996 and dominant alloc type cold: 10
406+
; TOTALSIZESTHRESH60: Total size for ignored non-cold full allocation context hash 1093248920606587996: 10
407407
; TOTALSIZESSINGLE: Total size for full allocation context hash 6792096022461663180 and single alloc type notcold: 10
408408
; REMARKSINGLE: remark: memprof.cc:25:13: call in function main marked with memprof allocation attribute notcold
409409
; TOTALSIZESSINGLE: Total size for full allocation context hash 15737101490731057601 and single alloc type cold: 10

0 commit comments

Comments
 (0)