//===-- TraceDumper.h -------------------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "lldb/Symbol/SymbolContext.h" #include "lldb/Target/TraceCursor.h" #include #ifndef LLDB_TARGET_TRACE_INSTRUCTION_DUMPER_H #define LLDB_TARGET_TRACE_INSTRUCTION_DUMPER_H namespace lldb_private { /// Class that holds the configuration used by \a TraceDumper for /// traversing and dumping instructions. struct TraceDumperOptions { /// If \b true, the cursor will be iterated forwards starting from the /// oldest instruction. Otherwise, the iteration starts from the most /// recent instruction. bool forwards = false; /// Dump only instruction addresses without disassembly nor symbol /// information. bool raw = false; /// Dump in json format. bool json = false; /// When dumping in JSON format, pretty print the output. bool pretty_print_json = false; /// For each trace item, print the corresponding timestamp in nanoseconds if /// available. bool show_timestamps = false; /// Dump the events that happened between instructions. bool show_events = false; /// Dump events and none of the instructions. bool only_events = false; /// For each instruction, print the instruction kind. bool show_control_flow_kind = false; /// Optional custom id to start traversing from. std::optional id; /// Optional number of instructions to skip from the starting position /// of the cursor. std::optional skip; }; /// Class used to dump the instructions of a \a TraceCursor using its current /// state and granularity. class TraceDumper { public: /// Helper struct that holds symbol, disassembly and address information of an /// instruction. struct SymbolInfo { SymbolContext sc; Address address; lldb::DisassemblerSP disassembler; lldb::InstructionSP instruction; lldb_private::ExecutionContext exe_ctx; }; /// Helper struct that holds all the information we know about a trace item struct TraceItem { lldb::user_id_t id; lldb::addr_t load_address; std::optional timestamp; std::optional hw_clock; std::optional sync_point_metadata; std::optional error; std::optional event; std::optional symbol_info; std::optional prev_symbol_info; std::optional cpu_id; }; /// An object representing a traced function call. /// /// A function call is represented using segments and subcalls. /// /// TracedSegment: /// A traced segment is a maximal list of consecutive traced instructions /// that belong to the same function call. A traced segment will end in /// three possible ways: /// - With a call to a function deeper in the callstack. In this case, /// most of the times this nested call will return /// and resume with the next segment of this segment's owning function /// call. More on this later. /// - Abruptly due to end of trace. In this case, we weren't able to trace /// the end of this function call. /// - Simply a return higher in the callstack. /// /// In terms of implementation details, as segment can be represented with /// the beginning and ending instruction IDs from the instruction trace. /// /// UntracedPrefixSegment: /// It might happen that we didn't trace the beginning of a function and we /// saw it for the first time as part of a return. As a way to signal these /// cases, we have a placeholder UntracedPrefixSegment class that completes the /// callgraph. /// /// Example: /// We might have this piece of execution: /// /// main() [offset 0x00 to 0x20] [traced instruction ids 1 to 4] /// foo() [offset 0x00 to 0x80] [traced instruction ids 5 to 20] # main /// invoked foo /// main() [offset 0x24 to 0x40] [traced instruction ids 21 to 30] /// /// In this case, our function main invokes foo. We have 3 segments: main /// [offset 0x00 to 0x20], foo() [offset 0x00 to 0x80], and main() [offset /// 0x24 to 0x40]. We also have the instruction ids from the corresponding /// linear instruction trace for each segment. /// /// But what if we started tracing since the middle of foo? Then we'd have /// an incomplete trace /// /// foo() [offset 0x30 to 0x80] [traced instruction ids 1 to 10] /// main() [offset 0x24 to 0x40] [traced instruction ids 11 to 20] /// /// Notice that we changed the instruction ids because this is a new trace. /// Here, in order to have a somewhat complete tree with good traversal /// capabilities, we can create an UntracedPrefixSegment to signal the portion of /// main() that we didn't trace. We don't know if this segment was in fact /// multiple segments with many function calls. We'll never know. The /// resulting tree looks like the following: /// /// main() [untraced] /// foo() [offset 0x30 to 0x80] [traced instruction ids 1 to 10] /// main() [offset 0x24 to 0x40] [traced instruction ids 11 to 20] /// /// And in pseudo-code: /// /// FunctionCall [ /// UntracedPrefixSegment { /// symbol: main() /// nestedCall: FunctionCall [ # this untraced segment has a nested /// call /// TracedSegment { /// symbol: foo() /// fromInstructionId: 1 /// toInstructionId: 10 /// nestedCall: none # this doesn't have a nested call /// } /// } /// ], /// TracedSegment { /// symbol: main() /// fromInstructionId: 11 /// toInstructionId: 20 /// nestedCall: none # this also doesn't have a nested call /// } /// ] /// /// We can see the nested structure and how instructions are represented as /// segments. /// /// /// Returns: /// Code doesn't always behave intuitively. Some interesting functions /// might modify the stack and thus change the behavior of common /// instructions like CALL and RET. We try to identify these cases, and /// the result is that the return edge from a segment might connect with a /// function call very high the stack. For example, you might have /// /// main() /// foo() /// bar() /// # here bar modifies the stack and pops foo() from it. Then it /// finished the a RET (return) /// main() # we came back directly to main() /// /// I have observed some trampolines doing this, as well as some std /// functions (like ostream functions). So consumers should be aware of /// this. /// /// There are all sorts of "abnormal" behaviors you can see in code, and /// whenever we fail at identifying what's going on, we prefer to create a /// new tree. /// /// Function call forest: /// A single tree would suffice if a trace didn't contain errors nor /// abnormal behaviors that made our algorithms fail. Sadly these /// anomalies exist and we prefer not to use too many heuristics and /// probably end up lying to the user. So we create a new tree from the /// point we can't continue using the previous tree. This results in /// having a forest instead of a single tree. This is probably the best we /// can do if we consumers want to use this data to perform performance /// analysis or reverse debugging. /// /// Non-functions: /// Not everything in a program is a function. There are blocks of /// instructions that are simply labeled or even regions without symbol /// information that we don't what they are. We treat all of them as /// functions for simplicity. /// /// Errors: /// Whenever an error is found, a new tree with a single segment is /// created. All consecutive errors after the original one are then /// appended to this segment. As a note, something that GDB does is to use /// some heuristics to merge trees that were interrupted by errors. We are /// leaving that out of scope until a feature like that one is really /// needed. /// Forward declaration class FunctionCall; using FunctionCallUP = std::unique_ptr; class FunctionCall { public: class TracedSegment { public: /// \param[in] cursor_sp /// A cursor pointing to the beginning of the segment. /// /// \param[in] symbol_info /// The symbol information of the first instruction of the segment. /// /// \param[in] call /// The FunctionCall object that owns this segment. TracedSegment(const lldb::TraceCursorSP &cursor_sp, const SymbolInfo &symbol_info, FunctionCall &owning_call) : m_first_insn_id(cursor_sp->GetId()), m_last_insn_id(cursor_sp->GetId()), m_first_symbol_info(symbol_info), m_last_symbol_info(symbol_info), m_owning_call(owning_call) {} /// \return /// The chronologically first instruction ID in this segment. lldb::user_id_t GetFirstInstructionID() const; /// \return /// The chronologically last instruction ID in this segment. lldb::user_id_t GetLastInstructionID() const; /// \return /// The symbol information of the chronologically first instruction ID /// in this segment. const SymbolInfo &GetFirstInstructionSymbolInfo() const; /// \return /// The symbol information of the chronologically last instruction ID in /// this segment. const SymbolInfo &GetLastInstructionSymbolInfo() const; /// \return /// Get the call that owns this segment. const FunctionCall &GetOwningCall() const; /// Append a new instruction to this segment. /// /// \param[in] cursor_sp /// A cursor pointing to the new instruction. /// /// \param[in] symbol_info /// The symbol information of the new instruction. void AppendInsn(const lldb::TraceCursorSP &cursor_sp, const SymbolInfo &symbol_info); /// Create a nested call at the end of this segment. /// /// \param[in] cursor_sp /// A cursor pointing to the first instruction of the nested call. /// /// \param[in] symbol_info /// The symbol information of the first instruction of the nested call. FunctionCall &CreateNestedCall(const lldb::TraceCursorSP &cursor_sp, const SymbolInfo &symbol_info); /// Executed the given callback if there's a nested call at the end of /// this segment. void IfNestedCall(std::function callback) const; private: TracedSegment(const TracedSegment &) = delete; TracedSegment &operator=(TracedSegment const &); /// Delimiting instruction IDs taken chronologically. /// \{ lldb::user_id_t m_first_insn_id; lldb::user_id_t m_last_insn_id; /// \} /// An optional nested call starting at the end of this segment. FunctionCallUP m_nested_call; /// The symbol information of the delimiting instructions /// \{ SymbolInfo m_first_symbol_info; SymbolInfo m_last_symbol_info; /// \} FunctionCall &m_owning_call; }; class UntracedPrefixSegment { public: /// Note: Untraced segments can only exist if have also seen a traced /// segment of the same function call. Thus, we can use those traced /// segments if we want symbol information and such. UntracedPrefixSegment(FunctionCallUP &&nested_call) : m_nested_call(std::move(nested_call)) {} const FunctionCall &GetNestedCall() const; private: UntracedPrefixSegment(const UntracedPrefixSegment &) = delete; UntracedPrefixSegment &operator=(UntracedPrefixSegment const &); FunctionCallUP m_nested_call; }; /// Create a new function call given an instruction. This will also create a /// segment for that instruction. /// /// \param[in] cursor_sp /// A cursor pointing to the first instruction of that function call. /// /// \param[in] symbol_info /// The symbol information of that first instruction. FunctionCall(const lldb::TraceCursorSP &cursor_sp, const SymbolInfo &symbol_info); /// Append a new traced segment to this function call. /// /// \param[in] cursor_sp /// A cursor pointing to the first instruction of the new segment. /// /// \param[in] symbol_info /// The symbol information of that first instruction. void AppendSegment(const lldb::TraceCursorSP &cursor_sp, const SymbolInfo &symbol_info); /// \return /// The symbol info of some traced instruction of this call. const SymbolInfo &GetSymbolInfo() const; /// \return /// \b true if and only if the instructions in this function call are /// trace errors, in which case this function call is a fake one. bool IsError() const; /// \return /// The list of traced segments of this call. const std::deque &GetTracedSegments() const; /// \return /// A non-const reference to the most-recent traced segment. TracedSegment &GetLastTracedSegment(); /// Create an untraced segment for this call that jumps to the provided /// nested call. void SetUntracedPrefixSegment(FunctionCallUP &&nested_call); /// \return /// A optional to the untraced prefix segment of this call. const std::optional & GetUntracedPrefixSegment() const; /// \return /// A pointer to the parent call. It may be \b nullptr. FunctionCall *GetParentCall() const; void SetParentCall(FunctionCall &parent_call); private: /// An optional untraced segment that precedes all the traced segments. std::optional m_untraced_prefix_segment; /// The traced segments in order. We used a deque to prevent moving these /// objects when appending to the list, which would happen with vector. std::deque m_traced_segments; /// The parent call, which might be null. Useful for reconstructing /// callstacks. FunctionCall *m_parent_call = nullptr; /// Whether this call represents a list of consecutive errors. bool m_is_error; }; /// Interface used to abstract away the format in which the instruction /// information will be dumped. class OutputWriter { public: virtual ~OutputWriter() = default; /// Notify this writer that the cursor ran out of data. virtual void NoMoreData() {} /// Dump a trace item (instruction, error or event). virtual void TraceItem(const TraceItem &item) = 0; /// Dump a function call forest. virtual void FunctionCallForest(const std::vector &forest) = 0; }; /// Create a instruction dumper for the cursor. /// /// \param[in] cursor /// The cursor whose instructions will be dumped. /// /// \param[in] s /// The stream where to dump the instructions to. /// /// \param[in] options /// Additional options for configuring the dumping. TraceDumper(lldb::TraceCursorSP cursor_sp, Stream &s, const TraceDumperOptions &options); /// Dump \a count instructions of the thread trace starting at the current /// cursor position. /// /// This effectively moves the cursor to the next unvisited position, so that /// a subsequent call to this method continues where it left off. /// /// \param[in] count /// The number of instructions to print. /// /// \return /// The instruction id of the last traversed instruction, or \b /// std::nullopt if no instructions were visited. std::optional DumpInstructions(size_t count); /// Dump all function calls forwards chronologically and hierarchically void DumpFunctionCalls(); private: /// Create a trace item for the current position without symbol information. TraceItem CreatRawTraceItem(); lldb::TraceCursorSP m_cursor_sp; TraceDumperOptions m_options; std::unique_ptr m_writer_up; }; } // namespace lldb_private #endif // LLDB_TARGET_TRACE_INSTRUCTION_DUMPER_H