Skip to content

Commit 46c1770

Browse files
committed
[DLCov] Origin-Tracking: SymbolizeAddresses
1 parent 5a9cc93 commit 46c1770

File tree

4 files changed

+195
-63
lines changed

4 files changed

+195
-63
lines changed

llvm/include/llvm/Support/Signals.h

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,25 @@
1414
#ifndef LLVM_SUPPORT_SIGNALS_H
1515
#define LLVM_SUPPORT_SIGNALS_H
1616

17+
#include "llvm/Config/llvm-config.h"
1718
#include "llvm/Support/Compiler.h"
1819
#include <cstdint>
1920
#include <string>
2021

22+
#if LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN
23+
#include "llvm/ADT/SmallVector.h"
24+
#include "llvm/ADT/DenseMap.h"
25+
#include "llvm/ADT/DenseSet.h"
26+
namespace llvm {
27+
// Typedefs that are convenient but only used by the stack-trace-collection code
28+
// added if DebugLoc origin-tracking is enabled.
29+
using AddressSet = DenseSet<void *, DenseMapInfo<void *, void>>;
30+
using SymbolizedAddressMap =
31+
DenseMap<void *, SmallVector<std::string, 0>, DenseMapInfo<void *, void>,
32+
detail::DenseMapPair<void *, SmallVector<std::string, 0>>>;
33+
}
34+
#endif
35+
2136
namespace llvm {
2237
class StringRef;
2338
class raw_ostream;
@@ -57,6 +72,28 @@ LLVM_ABI void DisableSystemDialogsOnCrash();
5772
/// specified, the entire frame is printed.
5873
LLVM_ABI void PrintStackTrace(raw_ostream &OS, int Depth = 0);
5974

75+
#if LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN
76+
#ifdef NDEBUG
77+
#error DebugLoc origin-tracking should not be enabled in Release builds.
78+
#endif
79+
/// Populates the given array with a stack trace of the current program, up to
80+
/// MaxDepth frames. Returns the number of frames returned, which will be
81+
/// inserted into \p StackTrace from index 0. All entries after the returned
82+
/// depth will be unmodified. NB: This is only intended to be used for
83+
/// introspection of LLVM by Debugify, will not be enabled in release builds,
84+
/// and should not be relied on for other purposes.
85+
template <unsigned long MaxDepth>
86+
int getStackTrace(std::array<void *, MaxDepth> &StackTrace);
87+
88+
/// Takes a set of \p Addresses, symbolizes them and stores the result in the
89+
/// provided \p SymbolizedAddresses map.
90+
/// NB: This is only intended to be used for introspection of LLVM by
91+
/// Debugify, will not be enabled in release builds, and should not be relied
92+
/// on for other purposes.
93+
void symbolizeAddresses(AddressSet &Addresses,
94+
SymbolizedAddressMap &SymbolizedAddresses);
95+
#endif
96+
6097
// Run all registered signal handlers.
6198
LLVM_ABI void RunSignalHandlers();
6299

llvm/lib/Support/Signals.cpp

Lines changed: 138 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@
3131
#include "llvm/Support/raw_ostream.h"
3232
#include <array>
3333
#include <cmath>
34-
#include <vector>
3534

3635
//===----------------------------------------------------------------------===//
3736
//=== WARNING: Implementation here must contain only TRULY operating system
@@ -137,47 +136,28 @@ static FormattedNumber format_ptr(void *PC) {
137136
return format_hex((uint64_t)PC, PtrWidth);
138137
}
139138

140-
/// Helper that launches llvm-symbolizer and symbolizes a backtrace.
141-
LLVM_ATTRIBUTE_USED
142-
static bool printSymbolizedStackTrace(StringRef Argv0, void **StackTrace,
143-
int Depth, llvm::raw_ostream &OS) {
144-
if (DisableSymbolicationFlag || getenv(DisableSymbolizationEnv))
145-
return false;
146-
147-
// Don't recursively invoke the llvm-symbolizer binary.
148-
if (Argv0.contains("llvm-symbolizer"))
149-
return false;
150-
151-
// FIXME: Subtract necessary number from StackTrace entries to turn return addresses
152-
// into actual instruction addresses.
153-
// Use llvm-symbolizer tool to symbolize the stack traces. First look for it
154-
// alongside our binary, then in $PATH.
155-
ErrorOr<std::string> LLVMSymbolizerPathOrErr = std::error_code();
156-
if (const char *Path = getenv(LLVMSymbolizerPathEnv)) {
157-
LLVMSymbolizerPathOrErr = sys::findProgramByName(Path);
158-
} else if (!Argv0.empty()) {
159-
StringRef Parent = llvm::sys::path::parent_path(Argv0);
160-
if (!Parent.empty())
161-
LLVMSymbolizerPathOrErr = sys::findProgramByName("llvm-symbolizer", Parent);
162-
}
163-
if (!LLVMSymbolizerPathOrErr)
164-
LLVMSymbolizerPathOrErr = sys::findProgramByName("llvm-symbolizer");
165-
if (!LLVMSymbolizerPathOrErr)
166-
return false;
167-
const std::string &LLVMSymbolizerPath = *LLVMSymbolizerPathOrErr;
168-
169-
// If we don't know argv0 or the address of main() at this point, try
170-
// to guess it anyway (it's possible on some platforms).
171-
std::string MainExecutableName =
172-
sys::fs::exists(Argv0) ? (std::string)std::string(Argv0)
173-
: sys::fs::getMainExecutable(nullptr, nullptr);
139+
/// Reads a file \p Filename written by llvm-symbolizer containing function
140+
/// names and source locations for the addresses in \p AddressList and returns
141+
/// the strings in a vector of pairs, where the first pair element is the index
142+
/// of the corresponding entry in AddressList and the second is the symbolized
143+
/// frame, in a format based on the sanitizer stack trace printer, with the
144+
/// exception that it does not write out frame numbers (i.e. "#2 " for the
145+
/// third address), as it is not assumed that \p AddressList corresponds to a
146+
/// single stack trace.
147+
/// There may be multiple returned entries for a single \p AddressList entry if
148+
/// that frame address corresponds to one or more inlined frames; in this case,
149+
/// all frames for an address will appear contiguously and in-order.
150+
std::optional<SmallVector<std::pair<unsigned, std::string>, 0>>
151+
collectAddressSymbols(void **AddressList, unsigned AddressCount,
152+
const char *MainExecutableName,
153+
const std::string &LLVMSymbolizerPath) {
174154
BumpPtrAllocator Allocator;
175155
StringSaver StrPool(Allocator);
176-
std::vector<const char *> Modules(Depth, nullptr);
177-
std::vector<intptr_t> Offsets(Depth, 0);
178-
if (!findModulesAndOffsets(StackTrace, Depth, Modules.data(), Offsets.data(),
179-
MainExecutableName.c_str(), StrPool))
180-
return false;
156+
SmallVector<const char *, 0> Modules(AddressCount, nullptr);
157+
SmallVector<intptr_t, 0> Offsets(AddressCount, 0);
158+
if (!findModulesAndOffsets(AddressList, AddressCount, Modules.data(), Offsets.data(),
159+
MainExecutableName, StrPool))
160+
return {};
181161
int InputFD;
182162
SmallString<32> InputFile, OutputFile;
183163
sys::fs::createTemporaryFile("symbolizer-input", "", InputFD, InputFile);
@@ -187,9 +167,9 @@ static bool printSymbolizedStackTrace(StringRef Argv0, void **StackTrace,
187167

188168
{
189169
raw_fd_ostream Input(InputFD, true);
190-
for (int i = 0; i < Depth; i++) {
191-
if (Modules[i])
192-
Input << Modules[i] << " " << (void*)Offsets[i] << "\n";
170+
for (unsigned AddrIdx = 0; AddrIdx < AddressCount; AddrIdx++) {
171+
if (Modules[AddrIdx])
172+
Input << Modules[AddrIdx] << " " << (void*)Offsets[AddrIdx] << "\n";
193173
}
194174
}
195175

@@ -206,53 +186,148 @@ static bool printSymbolizedStackTrace(StringRef Argv0, void **StackTrace,
206186
int RunResult =
207187
sys::ExecuteAndWait(LLVMSymbolizerPath, Args, std::nullopt, Redirects);
208188
if (RunResult != 0)
209-
return false;
189+
return {};
210190

211-
// This report format is based on the sanitizer stack trace printer. See
212-
// sanitizer_stacktrace_printer.cc in compiler-rt.
191+
192+
SmallVector<std::pair<unsigned, std::string>, 0> Result;
213193
auto OutputBuf = MemoryBuffer::getFile(OutputFile.c_str());
214194
if (!OutputBuf)
215-
return false;
195+
return {};
216196
StringRef Output = OutputBuf.get()->getBuffer();
217197
SmallVector<StringRef, 32> Lines;
218198
Output.split(Lines, "\n");
219-
auto CurLine = Lines.begin();
220-
int frame_no = 0;
221-
for (int i = 0; i < Depth; i++) {
222-
auto PrintLineHeader = [&]() {
223-
OS << right_justify(formatv("#{0}", frame_no++).str(),
224-
std::log10(Depth) + 2)
225-
<< ' ' << format_ptr(StackTrace[i]) << ' ';
226-
};
227-
if (!Modules[i]) {
228-
PrintLineHeader();
229-
OS << '\n';
199+
auto *CurLine = Lines.begin();
200+
// Lines contains the output from llvm-symbolizer, which should contain for
201+
// each address with a module in order of appearance, one or more lines
202+
// containing the function name and line associated with that address,
203+
// followed by an empty line.
204+
// For each address, adds an output entry for every real or inlined frame at
205+
// that address. For addresses without known modules, we have a single entry
206+
// containing just the formatted address; for all other output entries, we
207+
// output the function entry if it is known, and either the line number if it
208+
// is known or the module+address offset otherwise.
209+
for (unsigned AddrIdx = 0; AddrIdx < AddressCount; AddrIdx++) {
210+
if (!Modules[AddrIdx]) {
211+
auto &SymbolizedFrame =
212+
Result.emplace_back(std::make_pair(AddrIdx, ""));
213+
raw_string_ostream OS(SymbolizedFrame.second);
214+
OS << format_ptr(AddressList[AddrIdx]);
230215
continue;
231216
}
232217
// Read pairs of lines (function name and file/line info) until we
233218
// encounter empty line.
234219
for (;;) {
235220
if (CurLine == Lines.end())
236-
return false;
221+
return {};
237222
StringRef FunctionName = *CurLine++;
238223
if (FunctionName.empty())
239224
break;
240-
PrintLineHeader();
225+
auto &SymbolizedFrame =
226+
Result.emplace_back(std::make_pair(AddrIdx, ""));
227+
raw_string_ostream OS(SymbolizedFrame.second);
228+
OS << format_ptr(AddressList[AddrIdx]) << ' ';
241229
if (!FunctionName.starts_with("??"))
242230
OS << FunctionName << ' ';
243231
if (CurLine == Lines.end())
244-
return false;
232+
return {};
245233
StringRef FileLineInfo = *CurLine++;
246234
if (!FileLineInfo.starts_with("??"))
247235
OS << FileLineInfo;
248236
else
249-
OS << "(" << Modules[i] << '+' << format_hex(Offsets[i], 0) << ")";
250-
OS << "\n";
237+
OS << "(" << Modules[AddrIdx] << '+' << format_hex(Offsets[AddrIdx], 0) << ")";
251238
}
252239
}
240+
return Result;
241+
}
242+
243+
ErrorOr<std::string> getLLVMSymbolizerPath(StringRef Argv0 = {}) {
244+
ErrorOr<std::string> LLVMSymbolizerPathOrErr = std::error_code();
245+
if (const char *Path = getenv(LLVMSymbolizerPathEnv)) {
246+
LLVMSymbolizerPathOrErr = sys::findProgramByName(Path);
247+
} else if (!Argv0.empty()) {
248+
StringRef Parent = llvm::sys::path::parent_path(Argv0);
249+
if (!Parent.empty())
250+
LLVMSymbolizerPathOrErr = sys::findProgramByName("llvm-symbolizer", Parent);
251+
}
252+
if (!LLVMSymbolizerPathOrErr)
253+
LLVMSymbolizerPathOrErr = sys::findProgramByName("llvm-symbolizer");
254+
return LLVMSymbolizerPathOrErr;
255+
}
256+
257+
/// Helper that launches llvm-symbolizer and symbolizes a backtrace.
258+
LLVM_ATTRIBUTE_USED
259+
static bool printSymbolizedStackTrace(StringRef Argv0, void **StackTrace,
260+
int Depth, llvm::raw_ostream &OS) {
261+
if (DisableSymbolicationFlag || getenv(DisableSymbolizationEnv))
262+
return false;
263+
264+
// Don't recursively invoke the llvm-symbolizer binary.
265+
if (Argv0.contains("llvm-symbolizer"))
266+
return false;
267+
268+
// FIXME: Subtract necessary number from StackTrace entries to turn return addresses
269+
// into actual instruction addresses.
270+
// Use llvm-symbolizer tool to symbolize the stack traces. First look for it
271+
// alongside our binary, then in $PATH.
272+
ErrorOr<std::string> LLVMSymbolizerPathOrErr = getLLVMSymbolizerPath(Argv0);
273+
if (!LLVMSymbolizerPathOrErr)
274+
return false;
275+
const std::string &LLVMSymbolizerPath = *LLVMSymbolizerPathOrErr;
276+
277+
// If we don't know argv0 or the address of main() at this point, try
278+
// to guess it anyway (it's possible on some platforms).
279+
std::string MainExecutableName =
280+
sys::fs::exists(Argv0) ? (std::string)std::string(Argv0)
281+
: sys::fs::getMainExecutable(nullptr, nullptr);
282+
283+
auto SymbolizedAddressesOpt = collectAddressSymbols(
284+
StackTrace, Depth, MainExecutableName.c_str(), LLVMSymbolizerPath);
285+
if (!SymbolizedAddressesOpt)
286+
return false;
287+
for (unsigned FrameNo = 0; FrameNo < SymbolizedAddressesOpt->size();
288+
++FrameNo) {
289+
OS << right_justify(formatv("#{0}", FrameNo).str(), std::log10(Depth) + 2)
290+
<< ' ' << (*SymbolizedAddressesOpt)[FrameNo].second << '\n';
291+
}
253292
return true;
254293
}
255294

295+
#if LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN
296+
void sys::symbolizeAddresses(AddressSet &Addresses,
297+
SymbolizedAddressMap &SymbolizedAddresses) {
298+
assert(!DisableSymbolicationFlag && !getenv(DisableSymbolizationEnv) &&
299+
"Debugify origin stacktraces require symbolization to be enabled.");
300+
301+
// Convert Set of Addresses to ordered list.
302+
SmallVector<void *, 0> AddressList(Addresses.begin(), Addresses.end());
303+
if (AddressList.empty())
304+
return;
305+
llvm::sort(AddressList);
306+
307+
// Use llvm-symbolizer tool to symbolize the stack traces. First look for it
308+
// alongside our binary, then in $PATH.
309+
ErrorOr<std::string> LLVMSymbolizerPathOrErr = getLLVMSymbolizerPath();
310+
if (!LLVMSymbolizerPathOrErr)
311+
report_fatal_error("Debugify origin stacktraces require llvm-symbolizer");
312+
const std::string &LLVMSymbolizerPath = *LLVMSymbolizerPathOrErr;
313+
314+
// Try to guess the main executable name, since we don't have argv0 available
315+
// here.
316+
std::string MainExecutableName = sys::fs::getMainExecutable(nullptr, nullptr);
317+
318+
auto SymbolizedAddressesOpt = collectAddressSymbols(
319+
AddressList.begin(), AddressList.size(),
320+
MainExecutableName.c_str(), LLVMSymbolizerPath);
321+
if (!SymbolizedAddressesOpt)
322+
return;
323+
for (auto SymbolizedFrame : *SymbolizedAddressesOpt) {
324+
SmallVector<std::string, 0> &SymbolizedAddrs = SymbolizedAddresses[AddressList[SymbolizedFrame.first]];
325+
SymbolizedAddrs.push_back(SymbolizedFrame.second);
326+
}
327+
return;
328+
}
329+
#endif
330+
256331
static bool printMarkupContext(raw_ostream &OS, const char *MainExecutableName);
257332

258333
LLVM_ATTRIBUTE_USED

llvm/lib/Support/Unix/Signals.inc

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -507,6 +507,21 @@ static int dl_iterate_phdr_cb(dl_phdr_info *info, size_t size, void *arg) {
507507
return 0;
508508
}
509509

510+
#if LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN
511+
#if !defined(HAVE_BACKTRACE)
512+
#error DebugLoc origin-tracking currently requires `backtrace()`.
513+
#endif
514+
namespace llvm {
515+
namespace sys {
516+
template <unsigned long MaxDepth>
517+
int getStackTrace(std::array<void *, MaxDepth> &StackTrace) {
518+
return backtrace(StackTrace.data(), MaxDepth);
519+
}
520+
template int getStackTrace<16ul>(std::array<void *, 16ul> &);
521+
} // namespace sys
522+
} // namespace llvm
523+
#endif
524+
510525
/// If this is an ELF platform, we can find all loaded modules and their virtual
511526
/// addresses with dl_iterate_phdr.
512527
static bool findModulesAndOffsets(void **StackTrace, int Depth,

llvm/lib/Support/Windows/Signals.inc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
// This file provides the Win32 specific implementation of the Signals class.
1010
//
1111
//===----------------------------------------------------------------------===//
12+
#include "llvm/Config/llvm-config.h"
1213
#include "llvm/Support/ConvertUTF.h"
1314
#include "llvm/Support/ExitCodes.h"
1415
#include "llvm/Support/FileSystem.h"
@@ -542,6 +543,10 @@ void sys::PrintStackTraceOnErrorSignal(StringRef Argv0,
542543
extern "C" VOID WINAPI RtlCaptureContext(PCONTEXT ContextRecord);
543544
#endif
544545

546+
#if LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN
547+
#error DebugLoc origin-tracking currently unimplemented for Windows.
548+
#endif
549+
545550
static void LocalPrintStackTrace(raw_ostream &OS, PCONTEXT C) {
546551
STACKFRAME64 StackFrame{};
547552
CONTEXT Context{};

0 commit comments

Comments
 (0)