diff options
author | shantuo <shantuo@google.com> | 2023-09-11 11:16:10 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-09-11 11:16:10 -0700 |
commit | 380f048e0e51e5b1cc22b9e3962a3430fe397e81 (patch) | |
tree | 69e9778fa00ad4c98c259b76f552aef0d57f5b57 | |
parent | 9a5254a0bce3d86c6d5d29bd29bfbc1daa0bfa9a (diff) | |
download | perf_data_converter-380f048e0e51e5b1cc22b9e3962a3430fe397e81.tar.gz |
Add data source label support (#147)
PiperOrigin-RevId: 560196881
Co-authored-by: xliuprof <xliuprof@google.com>
-rw-r--r-- | src/BUILD | 1 | ||||
-rw-r--r-- | src/perf_data_converter.cc | 44 | ||||
-rw-r--r-- | src/perf_data_converter.h | 4 | ||||
-rw-r--r-- | src/perf_data_converter_test.cc | 44 | ||||
-rw-r--r-- | src/quipper/kernel/perf_event.h | 77 | ||||
-rw-r--r-- | src/testdata/perf-datasrc.textproto | 141 |
6 files changed, 266 insertions, 45 deletions
@@ -80,6 +80,7 @@ cc_test( "//src/testdata:perf-comm-and-task-comm.textproto", "//src/testdata:perf-cpu.textproto", "//src/testdata:perf-cros-kernel-3_18-mapping.textproto", + "//src/testdata:perf-datasrc.textproto", "//src/testdata:perf-include-comm-md5-prefix.textproto", "//src/testdata:perf-java-classes-jsa.textproto", "//src/testdata:perf-kernel-mapping-by-name.textproto", diff --git a/src/perf_data_converter.cc b/src/perf_data_converter.cc index 781a2d1..b4a9631 100644 --- a/src/perf_data_converter.cc +++ b/src/perf_data_converter.cc @@ -111,6 +111,7 @@ struct SampleKey { uint64_t data_page_size = 0; uint32_t cpu = 0; uint64_t weight = 0; + uint64_t data_src = 0; LocationIdVector stack; }; @@ -122,7 +123,8 @@ struct SampleKeyEqualityTester { (a.thread_comm == b.thread_comm) && (a.cgroup == b.cgroup) && (a.code_page_size == b.code_page_size) && (a.data_page_size == b.data_page_size) && (a.cpu == b.cpu) && - (a.weight == b.weight) && (a.stack == b.stack)); + (a.weight == b.weight) && (a.data_src == b.data_src) && + (a.stack == b.stack)); } }; @@ -141,6 +143,7 @@ struct SampleKeyHasher { hash ^= std::hash<uint64_t>()(k.data_page_size); hash ^= std::hash<uint32_t>()(k.cpu); hash ^= std::hash<uint64_t>()(k.weight); + hash ^= std::hash<uint64_t>()(k.data_src); for (const auto& id : k.stack) { hash ^= std::hash<uint64_t>()(id); } @@ -301,6 +304,9 @@ class PerfDataConverter : public PerfDataHandler { bool IncludeCacheLatencyLabel() const { return (sample_labels_ & kCacheLatencyLabel); } + // Returns whether data source labels were requested for inclusion in the + // profile.proto's Sample.DataSrc field. + bool IncludeDataSrcLabels() const { return (sample_labels_ & kDataSrcLabel); } SampleKey MakeSampleKey(const PerfDataHandler::SampleContext& sample, ProfileBuilder* builder); @@ -390,6 +396,37 @@ SampleKey PerfDataConverter::MakeSampleKey( sample_key.weight = sample.sample.weight(); } } + // If sample has a data_src, we decode it to find the data source. + if (IncludeDataSrcLabels() && sample.sample.has_data_src()) { + quipper::perf_mem_data_src ds; + std::string cache_lvl; + ds.val = static_cast<uint64_t>(sample.sample.data_src()); + if (ds.mem_lvl & quipper::PERF_MEM_LVL_HIT) { + if (ds.mem_lvl & quipper::PERF_MEM_LVL_L1) + cache_lvl = "L1"; + else if (ds.mem_lvl & quipper::PERF_MEM_LVL_LFB) + cache_lvl = "LFB"; + else if (ds.mem_lvl & quipper::PERF_MEM_LVL_L2) + cache_lvl = "L2"; + else if (ds.mem_lvl & quipper::PERF_MEM_LVL_L3) + cache_lvl = "L3"; + else if (ds.mem_lvl & quipper::PERF_MEM_LVL_LOC_RAM) + cache_lvl = "Local DRAM"; + else if (ds.mem_lvl & (quipper::PERF_MEM_LVL_REM_RAM1 | + quipper::PERF_MEM_LVL_REM_RAM2)) + cache_lvl = "Remote DRAM"; + else if (ds.mem_lvl & (quipper::PERF_MEM_LVL_REM_CCE1 | + quipper::PERF_MEM_LVL_REM_CCE2)) + cache_lvl = "Remote Cache"; + else if (ds.mem_lvl & quipper::PERF_MEM_LVL_IO) + cache_lvl = "IO Memory"; + else if (ds.mem_lvl & quipper::PERF_MEM_LVL_UNC) + cache_lvl = "Uncached Memory"; + else + cache_lvl = "Unknown Level"; + sample_key.data_src = UTF8StringId(cache_lvl, builder); + } + } return sample_key; } @@ -595,6 +632,11 @@ void PerfDataConverter::AddOrUpdateSample( label->set_num(sample_key.weight); label->set_num_unit(builder->StringId("cycles")); } + if (IncludeDataSrcLabels() && sample_key.data_src != 0) { + auto* label = sample->add_label(); + label->set_key(builder->StringId(DataSrcLabelKey)); + label->set_str(sample_key.data_src); + } // Two values per collected event: the first is sample counts, the second is // event counts (unsampled weight for each sample). for (int event_id = 0; event_id < perf_data_.file_attrs_size(); diff --git a/src/perf_data_converter.h b/src/perf_data_converter.h index a3a64e5..c9beb38 100644 --- a/src/perf_data_converter.h +++ b/src/perf_data_converter.h @@ -63,6 +63,9 @@ enum SampleLabels { // Adds a label with CacheLatencyLabelKey and number value set to the cache // latency. kCacheLatencyLabel = 1 << 11, + // Adds a label with DataSrcLabelKey and string value set to the level of + // caches. + kDataSrcLabel = 1 << 12, }; // Sample label key names. @@ -78,6 +81,7 @@ const char CodePageSizeLabelKey[] = "code_page_size"; const char DataPageSizeLabelKey[] = "data_page_size"; const char CpuLabelKey[] = "cpu"; const char CacheLatencyLabelKey[] = "cache_latency"; +const char DataSrcLabelKey[] = "data_src"; // Execution mode label values. const char ExecutionModeHostKernel[] = "Host Kernel"; diff --git a/src/perf_data_converter_test.cc b/src/perf_data_converter_test.cc index 5aacab0..1d2e79c 100644 --- a/src/perf_data_converter_test.cc +++ b/src/perf_data_converter_test.cc @@ -981,6 +981,50 @@ TEST_F(PerfDataConverterTest, ConvertsNoWeight) { EXPECT_THAT(weight_counts, IsEmpty()); } +TEST_F(PerfDataConverterTest, DataSrcBitSet) { + const std::string ascii_pb( + GetContents(GetResource("perf-datasrc.textproto"))); + ASSERT_FALSE(ascii_pb.empty()); + PerfDataProto perf_data_proto; + ASSERT_TRUE(google::protobuf::TextFormat::ParseFromString(ascii_pb, &perf_data_proto)); + // Round-trip deserialize/serialize, so we can make sure the test case + // represents a valid perf.data file. + std::string str; + quipper::PerfReader reader; + ASSERT_TRUE(reader.Deserialize(perf_data_proto)); + ASSERT_TRUE(reader.WriteToString(&str)); +} + +TEST_F(PerfDataConverterTest, ConvertsDataSrc) { + const std::string ascii_pb( + GetContents(GetResource("perf-datasrc.textproto"))); + ASSERT_FALSE(ascii_pb.empty()); + PerfDataProto perf_data_proto; + ASSERT_TRUE(google::protobuf::TextFormat::ParseFromString(ascii_pb, &perf_data_proto)); + + const ProcessProfiles pps = + PerfDataProtoToProfiles(&perf_data_proto, kDataSrcLabel); + ASSERT_EQ(pps.size(), 1); + + std::unordered_map<std::string, uint64_t> counts_by_datasrc; + const auto& p = pps[0]->data; + for (const auto& sample : p.sample()) { + std::string datasrc; + for (const auto& label : sample.label()) { + if (p.string_table(label.key()) == DataSrcLabelKey) { + datasrc = p.string_table(label.str()); + counts_by_datasrc[datasrc]++; + } + } + } + const std::unordered_map<std::string, uint64_t> expected_counts{ + {"L1", 1}, + {"L2", 1}, + {"L3", 1}, + }; + EXPECT_THAT(counts_by_datasrc, UnorderedPointwise(Eq(), expected_counts)); +} + TEST_F(PerfDataConverterTest, HandlesAlternateKernelNames) { std::string ascii_pb = GetContents(GetResource("perf-kernel-mapping-by-name.textproto")); diff --git a/src/quipper/kernel/perf_event.h b/src/quipper/kernel/perf_event.h index c531dbf..f3755e2 100644 --- a/src/quipper/kernel/perf_event.h +++ b/src/quipper/kernel/perf_event.h @@ -1040,59 +1040,48 @@ union perf_mem_data_src { }; }; -/* -// The below macros are not used in quipper. They are commented out for future -// reference. // type of opcode (load/store/prefetch,code) -#define PERF_MEM_OP_NA 0x01 // not available -#define PERF_MEM_OP_LOAD 0x02 // load instruction -#define PERF_MEM_OP_STORE 0x04 // store instruction -#define PERF_MEM_OP_PFETCH 0x08 // prefetch -#define PERF_MEM_OP_EXEC 0x10 // code (execution) -#define PERF_MEM_OP_SHIFT 0 +const u64 PERF_MEM_OP_NA = 0x01; // not available +const u64 PERF_MEM_OP_LOAD = 0x02; // load instruction +const u64 PERF_MEM_OP_STORE = 0x04; // store instruction +const u64 PERF_MEM_OP_PFETCH = 0x08; // prefetch +const u64 PERF_MEM_OP_EXEC = 0x10; // code (execution) // memory hierarchy (memory level, hit or miss) -#define PERF_MEM_LVL_NA 0x01 // not available -#define PERF_MEM_LVL_HIT 0x02 // hit level -#define PERF_MEM_LVL_MISS 0x04 // miss level -#define PERF_MEM_LVL_L1 0x08 // L1 -#define PERF_MEM_LVL_LFB 0x10 // Line Fill Buffer -#define PERF_MEM_LVL_L2 0x20 // L2 -#define PERF_MEM_LVL_L3 0x40 // L3 -#define PERF_MEM_LVL_LOC_RAM 0x80 // Local DRAM -#define PERF_MEM_LVL_REM_RAM1 0x100 // Remote DRAM (1 hop) -#define PERF_MEM_LVL_REM_RAM2 0x200 // Remote DRAM (2 hops) -#define PERF_MEM_LVL_REM_CCE1 0x400 // Remote Cache (1 hop) -#define PERF_MEM_LVL_REM_CCE2 0x800 // Remote Cache (2 hops) -#define PERF_MEM_LVL_IO 0x1000 // I/O memory -#define PERF_MEM_LVL_UNC 0x2000 // Uncached memory -#define PERF_MEM_LVL_SHIFT 5 +const u64 PERF_MEM_LVL_NA = 0x01; // not available +const u64 PERF_MEM_LVL_HIT = 0x02; // hit level +const u64 PERF_MEM_LVL_MISS = 0x04; // miss level +const u64 PERF_MEM_LVL_L1 = 0x08; // L1 +const u64 PERF_MEM_LVL_LFB = 0x10; // Line Fill Buffer +const u64 PERF_MEM_LVL_L2 = 0x20; // L2 +const u64 PERF_MEM_LVL_L3 = 0x40; // L3 +const u64 PERF_MEM_LVL_LOC_RAM = 0x80; // Local DRAM +const u64 PERF_MEM_LVL_REM_RAM1 = 0x100; // Remote DRAM (1 hop) +const u64 PERF_MEM_LVL_REM_RAM2 = 0x200; // Remote DRAM (2 hops) +const u64 PERF_MEM_LVL_REM_CCE1 = 0x400; // Remote Cache (1 hop) +const u64 PERF_MEM_LVL_REM_CCE2 = 0x800; // Remote Cache (2 hops) +const u64 PERF_MEM_LVL_IO = 0x1000; // I/O memory +const u64 PERF_MEM_LVL_UNC = 0x2000; // Uncached memory // snoop mode -#define PERF_MEM_SNOOP_NA 0x01 // not available -#define PERF_MEM_SNOOP_NONE 0x02 // no snoop -#define PERF_MEM_SNOOP_HIT 0x04 // snoop hit -#define PERF_MEM_SNOOP_MISS 0x08 // snoop miss -#define PERF_MEM_SNOOP_HITM 0x10 // snoop hit modified -#define PERF_MEM_SNOOP_SHIFT 19 +const u64 PERF_MEM_SNOOP_NA = 0x01; // not available +const u64 PERF_MEM_SNOOP_NONE = 0x02; // no snoop +const u64 PERF_MEM_SNOOP_HIT = 0x04; // snoop hit +const u64 PERF_MEM_SNOOP_MISS = 0x08; // snoop miss +const u64 PERF_MEM_SNOOP_HITM = 0x10; // snoop hit modified // locked instruction -#define PERF_MEM_LOCK_NA 0x01 // not available -#define PERF_MEM_LOCK_LOCKED 0x02 // locked transaction -#define PERF_MEM_LOCK_SHIFT 24 +const u64 PERF_MEM_LOCK_NA = 0x01; // not available +const u64 PERF_MEM_LOCK_LOCKED = 0x02; // locked transaction // TLB access -#define PERF_MEM_TLB_NA 0x01 // not available -#define PERF_MEM_TLB_HIT 0x02 // hit level -#define PERF_MEM_TLB_MISS 0x04 // miss level -#define PERF_MEM_TLB_L1 0x08 // L1 -#define PERF_MEM_TLB_L2 0x10 // L2 -#define PERF_MEM_TLB_WK 0x20 // Hardware Walker -#define PERF_MEM_TLB_OS 0x40 // OS fault handler -#define PERF_MEM_TLB_SHIFT 26 - -#define PERF_MEM_S(a, s) (((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT) -*/ +const u64 PERF_MEM_TLB_NA = 0x01; // not available +const u64 PERF_MEM_TLB_HIT = 0x02; // hit level +const u64 PERF_MEM_TLB_MISS = 0x04; // miss level +const u64 PERF_MEM_TLB_L1 = 0x08; // L1 +const u64 PERF_MEM_TLB_L2 = 0x10; // L2 +const u64 PERF_MEM_TLB_WK = 0x20; // Hardware Walker +const u64 PERF_MEM_TLB_OS = 0x40; // OS fault handler /* * single taken branch record layout: diff --git a/src/testdata/perf-datasrc.textproto b/src/testdata/perf-datasrc.textproto new file mode 100644 index 0000000..b674dcb --- /dev/null +++ b/src/testdata/perf-datasrc.textproto @@ -0,0 +1,141 @@ +# proto-file: src/quipper/perf_data.proto +# proto-message: quipper.PerfDataProto +file_attrs { + attr { + type: 0 + size: 96 + config: 0 + sample_period: 1000 + # PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_TIME | PERF_SAMPLE_CPU | + # PERF_SAMPLE_WEIGHT | PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_PERIOD + sample_type: 49543 + # PERF_FORMAT_ID + read_format: 4 + disabled: false + inherit: true + pinned: false + exclusive: false + exclude_user: false + exclude_kernel: false + exclude_hv: false + exclude_idle: false + mmap: true + comm: true + freq: true + inherit_stat: false + enable_on_exec: false + task: false + watermark: false + precise_ip: 0 + mmap_data: false + sample_id_all: true + exclude_host: false + exclude_guest: true + wakeup_events: 0 + bp_type: 0 + bp_addr: 0 + bp_len: 0 + branch_sample_type: 0 + exclude_callchain_kernel: false + exclude_callchain_user: false + mmap2: false + comm_exec: false + sample_regs_user: 0 + sample_stack_user: 0 + } + ids: 1 +} + +events { + header { + type: 9 + misc: 1 + size: 64 + } + sample_event { + ip: 0xffffffff8bc002c8 + pid: 100 + tid: 100 + sample_time_ns: 100000000000 + period: 3170393 + cpu: 0 + weight: 352 + # Set hit level bit and L1 bit + data_src: 320 + } +} +events { + header { + type: 9 + misc: 1 + size: 64 + } + sample_event { + ip: 0x5dc + pid: 100 + tid: 100 + sample_time_ns: 100000000010 + period: 3170393 + cpu: 0 + weight: 146 + # Set hit level bit and L2 bit + data_src: 1088 + } +} +events { + header { + type: 9 + misc: 1 + size: 64 + } + sample_event { + ip: 0x1f4 + pid: 100 + tid: 100 + sample_time_ns: 100000000020 + period: 3170393 + cpu: 0 + weight: 352 + # Set hit level bit and L3 bit + data_src: 2112 + } +} +timestamp_sec: 0 +stats { + num_sample_events: 4 + num_mmap_events: 0 + num_fork_events: 0 + num_exit_events: 0 + num_sample_events_mapped: 0 + did_remap: false +} +# HEADER_BUILD_ID | HEADER_NRCPUS | HEADER_TOTAL_MEM +metadata_mask: 9348 +build_ids { + # Use PERF_RECORD_MISC_CPUMODE_UNKNOWN so we have to match by filename. + misc: 0 + pid: 100 + build_id_hash: "12345678901234567890" + filename: "[kernel.kallsyms]" +} +build_ids { + misc: 2 + pid: 100 + build_id_hash: "09876543210987654321" + filename: "/bin/ls" +} +# nr_cpus +uint32_metadata { + type: 7 + data: 4 + data: 4 +} +# total memory +uint64_metadata { + type: 10 + data: 3990112 +} +event_types { + id: 1 + name: "cycles" +} |