diff options
author | yelouis <yelouis@google.com> | 2024-01-31 12:12:03 -0800 |
---|---|---|
committer | shantuo <sturbo89@gmail.com> | 2024-02-13 10:21:13 -0800 |
commit | 334d1c982219fc1f0131ceba4889d9dacc46c421 (patch) | |
tree | 8ebdaa7ba99a100e6cff8ec1b499006ec3ae43ac | |
parent | 3ada358d7fb3f71233da590b578e8d24dc71f3cd (diff) | |
download | perf_data_converter-334d1c982219fc1f0131ceba4889d9dacc46c421.tar.gz |
Arm SPE support: convert SPE auxtrace into perf data samples
PiperOrigin-RevId: 603122760
-rw-r--r-- | src/BUILD | 4 | ||||
-rw-r--r-- | src/perf_data_converter_test.cc | 3 | ||||
-rw-r--r-- | src/perf_data_handler.cc | 126 | ||||
-rw-r--r-- | src/perf_data_handler_test.cc | 111 | ||||
-rw-r--r-- | src/quipper/BUILD | 3 | ||||
-rw-r--r-- | src/quipper/kernel/perf_internals.h | 10 |
6 files changed, 226 insertions, 31 deletions
@@ -17,11 +17,11 @@ cc_library( ], deps = [ ":intervalmap", + "//src/quipper:arm_spe_decoder", "//src/quipper:binary_data_utils", "//src/quipper:dso", "//src/quipper:kernel", "//src/quipper:perf_data_cc_proto", - "//src/quipper:perf_reader", ], ) @@ -35,6 +35,7 @@ cc_library( ":perf_data_handler", ":builder", ":profile_cc_proto", + "//src/quipper:arm_spe_decoder", "//src/quipper:kernel", "//src/quipper:perf_data_cc_proto", "//src/quipper:perf_parser", @@ -62,6 +63,7 @@ cc_test( "//src/quipper:binary_data_utils", "//src/quipper:kernel", "//src/quipper:perf_buildid", + "//src/quipper:test_utils", ], ) diff --git a/src/perf_data_converter_test.cc b/src/perf_data_converter_test.cc index 5c6b32c..12d4e39 100644 --- a/src/perf_data_converter_test.cc +++ b/src/perf_data_converter_test.cc @@ -11,6 +11,7 @@ #include <unistd.h> +#include <cstdint> #include <cstdlib> #include <cstring> #include <fstream> @@ -1114,7 +1115,7 @@ TEST_F(PerfDataConverterTest, BuildIdFromMmapEvents) { } // If we change buildid-mmap events back to normal mmap events, then we will - // see thoes two samples being assigned the same build ID (the injected one). + // see those two samples being assigned the same build ID (the injected one). { for (size_t i = 0; i < perf_data_proto.events().size(); ++i) { auto event = perf_data_proto.mutable_events()->Mutable(i); diff --git a/src/perf_data_handler.cc b/src/perf_data_handler.cc index a336624..b5bc75d 100644 --- a/src/perf_data_handler.cc +++ b/src/perf_data_handler.cc @@ -7,6 +7,7 @@ #include "src/perf_data_handler.h" +#include <cstdint> #include <cstring> #include <iomanip> #include <iostream> @@ -21,10 +22,11 @@ #include "src/intervalmap.h" #include "src/path_matching.h" +#include "src/quipper/arm_spe_decoder.h" #include "src/quipper/binary_data_utils.h" #include "src/quipper/dso.h" #include "src/quipper/kernel/perf_event.h" -#include "src/quipper/perf_reader.h" +#include "src/quipper/kernel/perf_internals.h" using quipper::PerfDataProto; using quipper::PerfDataProto_MMapEvent; @@ -50,9 +52,40 @@ bool HasSuffixString(const std::string& s, const char* substr) { s.compare(s_len - substr_len, substr_len, substr) == 0; } -// Normalizer processes a PerfDataProto and maintains tables to the -// current metadata for each process. It drives callbacks to -// PerfDataHandler with samples in a fully normalized form. +// Checks if the auxtrace events contain Arm SPE data. +bool HasArmSPEAuxtrace(const PerfDataProto& perf_proto) { + for (const auto& event_proto : perf_proto.events()) { + if (event_proto.has_auxtrace_info_event()) { + if (event_proto.auxtrace_info_event().type() == + quipper::PERF_AUXTRACE_ARM_SPE) { + return true; + } + } + } + return false; +} + +// Creates a tid->pid mapping through fork & comm events. +std::unordered_map<uint32_t, uint32_t> TidToPidMapping( + const PerfDataProto& perf_proto) { + std::unordered_map<uint32_t, uint32_t> t2p; + for (const auto& event_proto : perf_proto.events()) { + if (event_proto.has_fork_event()) { + const auto& fork = event_proto.fork_event(); + t2p[fork.tid()] = fork.pid(); + } else if (event_proto.has_comm_event()) { + const auto& comm = event_proto.comm_event(); + t2p[comm.tid()] = comm.pid(); + } + } + return t2p; +} + +// Normalizer iterates through the events and metadata of the given +// PerfDataProto to create its own tables and metadata for each process. During +// the iteration, it drives callbacks to PerfDataHandler with samples in a fully +// normalized form (e.g. samples with their corresponding metadata like their +// mappings, call chains, branch stacks etc.). class Normalizer { public: Normalizer(const PerfDataProto& perf_proto, PerfDataHandler* handler) @@ -135,6 +168,11 @@ class Normalizer { } current_event_index++; } + + has_spe_auxtrace_ = HasArmSPEAuxtrace(perf_proto_); + if (has_spe_auxtrace_) { + tid_to_pid_ = TidToPidMapping(perf_proto_); + } } Normalizer(const Normalizer&) = delete; @@ -142,7 +180,7 @@ class Normalizer { ~Normalizer() {} - // Convert to a protobuf using quipper and then aggregate the results. + // Converts to a protobuf using quipper and then aggregate the results. void Normalize(); private: @@ -165,8 +203,16 @@ class Normalizer { void UpdateMapsWithForkEvent(const quipper::PerfDataProto_ForkEvent& fork); void LogStats(); - // Normalize the sample_event in event_proto and call handler_->Sample - void InvokeHandleSample(const quipper::PerfDataProto::PerfEvent& event_proto); + // Handles the sample_event in event_proto and call handler_->Sample. + // TODO(b/277114009): replace use_first_file_attr with a proper file_attr + // index when we have feasible way to tell which file attribute is for certain + // event (e.g. Arm SPE). + void HandleSample(const quipper::PerfDataProto::PerfEvent& event_proto, + bool use_first_file_attr); + + // Handles the auxtrace event in event_proto that contains the Arm SPE + // records to parse potential samples. + void HandleSpeAuxtrace(const quipper::PerfDataProto::PerfEvent& event_proto); // Handles the perf LOST event or LOST_SAMPLE event. void HandleLost(const quipper::PerfDataProto::PerfEvent& event_proto); @@ -273,6 +319,13 @@ class Normalizer { // older perf data. bool use_lost_sample_ = false; + // Whether the following auxtrace events contain Arm SPE data. + bool has_spe_auxtrace_ = false; + + // map from thread ID to process ID. It is used for parsing SPE records into + // samples. + std::unordered_map<uint32_t, uint32_t> tid_to_pid_; + struct { int64_t samples = 0; int64_t samples_with_addr = 0; @@ -280,6 +333,7 @@ class Normalizer { int64_t missing_main_mmap = 0; int64_t missing_sample_mmap = 0; int64_t missing_addr_mmap = 0; + int64_t missing_pid = 0; int64_t callchain_ips = 0; int64_t missing_callchain_mmap = 0; @@ -387,20 +441,35 @@ void Normalizer::Normalize() { event_proto.has_lost_event()) { HandleLost(event_proto); } else if (event_proto.has_sample_event()) { - InvokeHandleSample(event_proto); + HandleSample(event_proto, false); + } else if (event_proto.has_auxtrace_event()) { + if (has_spe_auxtrace_) { + HandleSpeAuxtrace(event_proto); + } + } else if (event_proto.has_auxtrace_error_event()) { + LOG(WARNING) << "auxtrace_error event: " + << event_proto.auxtrace_error_event().msg(); } } LogStats(); } -void Normalizer::InvokeHandleSample( - const quipper::PerfDataProto::PerfEvent& event_proto) { +void Normalizer::HandleSample( + const quipper::PerfDataProto::PerfEvent& event_proto, + bool use_first_file_attribute) { CHECK(event_proto.has_sample_event()); const auto& sample = event_proto.sample_event(); PerfDataHandler::SampleContext context(event_proto.header(), event_proto.sample_event()); - context.file_attrs_index = GetEventIndexForSample(context.sample); + if (use_first_file_attribute) { + // This is for the situation like SPE-record generated sample, where we want + // to use the first file_attrs_index instead of finding it through ID, + // because such synthesized sample does not have sample.id. + context.file_attrs_index = 0; + } else { + context.file_attrs_index = GetEventIndexForSample(context.sample); + } if (context.file_attrs_index == -1) { ++stat_.no_event_errors; return; @@ -589,6 +658,7 @@ void Normalizer::LogStats() { "missing_callchain_mmap"); CheckStat(stat_.missing_branch_stack_mmap, stat_.branch_stack_ips, "missing_branch_stack_mmap"); + CheckStat(stat_.missing_pid, stat_.samples, "missing_pid"); CheckStat(stat_.no_event_errors, 1, "unknown event id"); } @@ -816,6 +886,40 @@ int64_t Normalizer::GetEventIndexForSample( } return it->second; } + +void Normalizer::HandleSpeAuxtrace( + const quipper::PerfDataProto::PerfEvent& event_proto) { + const quipper::PerfDataProto::AuxtraceEvent& auxtrace_event = + event_proto.auxtrace_event(); + if (!auxtrace_event.has_trace_data()) { + return; + } + + quipper::ArmSpeDecoder::Record record; + quipper::ArmSpeDecoder decoder(auxtrace_event.trace_data(), false); + while (decoder.NextRecord(&record)) { + // Synthesize a perf data sample with from the SPE record. + uint32_t tid = record.context.id; + uint32_t pid = 0; + if (tid != 0) { + auto pid_it = tid_to_pid_.find(tid); + if (pid_it == tid_to_pid_.end()) { + stat_.missing_pid++; + LOG(WARNING) << "tid->pid mapping does not contain tid " << tid; + } else { + pid = pid_it->second; + } + } + + quipper::PerfDataProto::PerfEvent event_proto; + auto& sample = *event_proto.mutable_sample_event(); + sample.set_tid(tid); + sample.set_pid(pid); + sample.set_ip(record.ip.addr); + HandleSample(event_proto, true); + } +} + } // namespace // Finds needle in haystack starting at cursor. It then returns the index diff --git a/src/perf_data_handler_test.cc b/src/perf_data_handler_test.cc index 10c85cf..248967e 100644 --- a/src/perf_data_handler_test.cc +++ b/src/perf_data_handler_test.cc @@ -7,8 +7,12 @@ #include "src/perf_data_handler.h" +#include <cstdint> +#include <memory> +#include <string> #include <unordered_map> #include <unordered_set> +#include <utility> #include <vector> #include <gmock/gmock.h> @@ -16,7 +20,9 @@ #include "src/path_matching.h" #include "src/quipper/binary_data_utils.h" #include "src/quipper/kernel/perf_event.h" +#include "src/quipper/kernel/perf_internals.h" #include "src/quipper/perf_buildid.h" +#include "src/quipper/test_utils.h" using BranchStackEntry = quipper::PerfDataProto::BranchStackEntry; @@ -81,8 +87,8 @@ class TestPerfDataHandler : public PerfDataHandler { TestPerfDataHandler(std::vector<BranchStackEntry> expected_branch_stack, std::unordered_map<std::string, std::string> expected_filename_to_build_id) - : _expected_branch_stack(std::move(expected_branch_stack)), - _expected_filename_to_build_id( + : expected_branch_stack_(std::move(expected_branch_stack)), + expected_filename_to_build_id_( std::move(expected_filename_to_build_id)) {} TestPerfDataHandler(const TestPerfDataHandler&) = delete; TestPerfDataHandler& operator=(const TestPerfDataHandler&) = delete; @@ -90,17 +96,18 @@ class TestPerfDataHandler : public PerfDataHandler { // Callbacks for PerfDataHandler void Sample(const SampleContext& sample) override { + seen_sample_events_.push_back(sample.sample); if (sample.addr_mapping != nullptr) { const Mapping* m = sample.addr_mapping; - _seen_addr_mappings.push_back(std::unique_ptr<Mapping>( + seen_addr_mappings_.push_back(std::unique_ptr<Mapping>( new Mapping(m->filename, m->build_id, m->start, m->limit, m->file_offset, m->filename_md5_prefix))); } else { - _seen_addr_mappings.push_back(nullptr); + seen_addr_mappings_.push_back(nullptr); } - EXPECT_EQ(_expected_branch_stack.size(), sample.branch_stack.size()); + EXPECT_EQ(expected_branch_stack_.size(), sample.branch_stack.size()); for (size_t i = 0; i < sample.branch_stack.size(); i++) { - CheckBranchEquality(_expected_branch_stack[i], sample.branch_stack[i]); + CheckBranchEquality(expected_branch_stack_[i], sample.branch_stack[i]); } } void Comm(const CommContext& comm) override {} @@ -108,24 +115,29 @@ class TestPerfDataHandler : public PerfDataHandler { std::string actual_build_id = mmap.mapping->build_id.value; std::string actual_filename = mmap.mapping->filename; const auto expected_build_id_it = - _expected_filename_to_build_id.find(actual_filename); - if (expected_build_id_it != _expected_filename_to_build_id.end()) { + expected_filename_to_build_id_.find(actual_filename); + if (expected_build_id_it != expected_filename_to_build_id_.end()) { EXPECT_EQ(actual_build_id, expected_build_id_it->second) << "Build ID mismatch for the filename " << actual_filename; - _seen_filenames.insert(actual_filename); + seen_filenames_.insert(actual_filename); } } void CheckSeenFilenames() { - EXPECT_EQ(_expected_filename_to_build_id.size(), _seen_filenames.size()); - for (auto const& filename : _seen_filenames) { - EXPECT_TRUE(_expected_filename_to_build_id.find(filename) != - _expected_filename_to_build_id.end()); + EXPECT_EQ(expected_filename_to_build_id_.size(), seen_filenames_.size()); + for (auto const& filename : seen_filenames_) { + EXPECT_TRUE(expected_filename_to_build_id_.find(filename) != + expected_filename_to_build_id_.end()); } } const std::vector<std::unique_ptr<Mapping>>& SeenAddrMappings() const { - return _seen_addr_mappings; + return seen_addr_mappings_; + } + + const std::vector<quipper::PerfDataProto::SampleEvent>& SeenSampleEvents() + const { + return seen_sample_events_; } private: @@ -141,10 +153,11 @@ class TestPerfDataHandler : public PerfDataHandler { EXPECT_EQ(expected.abort(), actual.abort); EXPECT_EQ(expected.cycles(), actual.cycles); } - std::vector<BranchStackEntry> _expected_branch_stack; - std::unordered_map<std::string, std::string> _expected_filename_to_build_id; - std::unordered_set<std::string> _seen_filenames; - std::vector<std::unique_ptr<Mapping>> _seen_addr_mappings; + std::vector<BranchStackEntry> expected_branch_stack_; + std::unordered_map<std::string, std::string> expected_filename_to_build_id_; + std::unordered_set<std::string> seen_filenames_; + std::vector<std::unique_ptr<Mapping>> seen_addr_mappings_; + std::vector<quipper::PerfDataProto::SampleEvent> seen_sample_events_; }; TEST(PerfDataHandlerTest, KernelBuildIdWithDifferentFilename) { @@ -475,6 +488,68 @@ TEST(PerfDataHandlerTest, LostEventsAreHandledInOlderPerf) { } } +TEST(PerfDataHandlerTest, SpeAuxtraceIntoSamples) { + quipper::PerfDataProto proto; + + // File attrs are required for sample event processing. + uint64_t file_attr_id = 0; + auto* file_attr = proto.add_file_attrs(); + file_attr->add_ids(file_attr_id); + + // Add a fork and a comm events for tid->pid mapping . + auto* fork = proto.add_events()->mutable_fork_event(); + fork->set_tid(0x5f80); + fork->set_pid(0x1); + auto* comm = proto.add_events()->mutable_comm_event(); + comm->set_tid(0xe); + comm->set_pid(2); + + // Add an auxtrace info event. + proto.add_events()->mutable_auxtrace_info_event()->set_type( + quipper::PERF_AUXTRACE_ARM_SPE); + + // Add an auxtrace event. + auto* auxtrace_event = proto.add_events()->mutable_auxtrace_event(); + std::string trace_data = quipper::GenerateBinaryTrace({ + ///////////////////////////////// record 0 + "b0 d0 c2 a1 ed 66 ba ff c0", // PC 0xffba66eda1c2d0 el2 ns=1 + "00 00 00 00 00", // PAD + "65 80 5f 00 00", // CONTEXT 0x5f80 el2 + "49 00", // LD GP-REG + "52 16 00", // EV RETIRED L1D-ACCESS TLB-ACCESS + "99 04 00", // LAT 4 ISSUE + "98 0c 00", // LAT 12 TOT + "b2 28 6b 09 03 37 0e ff 00", // VA 0xff0e3703096b28 + "9a 01 00", // LAT 1 XLAT + "00 00 00 00 00 00 00 00 00", // PAD + "43 00", // DATA-SOURCE 0 + "00 00", // PAD + "71 2e 65 2f 6a 0a 00 00 00", // TS 44731163950 + ///////////////////////////////// record 1 + "b0 e0 b0 ef ed 66 ba ff c0", // PC 0xffba66edefb0e0 el2 ns=1 + "00 00 00 00 00", // PAD + "65 0e 00 00 00", // CONTEXT 0xe el2 + "4a 01", // B COND + "52 42 00", // EV RETIRED NOT-TAKEN + "99 10 00", // LAT 16 ISSUE + "98 11 00", // LAT 17 TOT + "b1 e4 b0 ef ed 66 ba ff c0", // TGT 0xffba66edefb0e4 el2 ns=1 + "00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00", // PAD + "71 8d 65 2f 6a 0a 00 00 00", // TS 44731164045 + }); + auxtrace_event->set_trace_data(trace_data); + + TestPerfDataHandler handler(std::vector<BranchStackEntry>{}, + std::unordered_map<std::string, std::string>{}); + PerfDataHandler::Process(proto, &handler); + + const auto& sample_events = handler.SeenSampleEvents(); + ASSERT_EQ(sample_events.size(), 2); + // Match to the correct pids. + EXPECT_EQ(sample_events[0].pid(), 1); + EXPECT_EQ(sample_events[1].pid(), 2); +} + } // namespace perftools int main(int argc, char** argv) { diff --git a/src/quipper/BUILD b/src/quipper/BUILD index 2f49d3e..3d550d0 100644 --- a/src/quipper/BUILD +++ b/src/quipper/BUILD @@ -409,6 +409,7 @@ cc_library( "test_perf_data.h", "test_utils.h", ], + visibility = ["//src:__subpackages__"], deps = [ ":binary_data_utils", ":compat", @@ -515,6 +516,8 @@ cc_binary( srcs = ["quipper.cc"], visibility = ["//src:__subpackages__"], deps = [ + ":arm_spe_decoder", + ":compat", ":file_utils", ":perf_protobuf_io", ":perf_recorder", diff --git a/src/quipper/kernel/perf_internals.h b/src/quipper/kernel/perf_internals.h index 7834bdd..d6b64ee 100644 --- a/src/quipper/kernel/perf_internals.h +++ b/src/quipper/kernel/perf_internals.h @@ -548,6 +548,16 @@ enum { PERF_STAT_ROUND_TYPE__FINAL = 1, }; +enum auxtrace_type { + PERF_AUXTRACE_UNKNOWN, + PERF_AUXTRACE_INTEL_PT, + PERF_AUXTRACE_INTEL_BTS, + PERF_AUXTRACE_CS_ETM, + PERF_AUXTRACE_ARM_SPE, + PERF_AUXTRACE_S390_CPUMSF, + PERF_AUXTRACE_HISI_PTT, +}; + struct stat_round_event { struct perf_event_header header; u64 type; |