aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorshantuo <shantuo@google.com>2023-09-11 11:16:10 -0700
committerGitHub <noreply@github.com>2023-09-11 11:16:10 -0700
commit380f048e0e51e5b1cc22b9e3962a3430fe397e81 (patch)
tree69e9778fa00ad4c98c259b76f552aef0d57f5b57
parent9a5254a0bce3d86c6d5d29bd29bfbc1daa0bfa9a (diff)
downloadperf_data_converter-380f048e0e51e5b1cc22b9e3962a3430fe397e81.tar.gz
Add data source label support (#147)
PiperOrigin-RevId: 560196881 Co-authored-by: xliuprof <xliuprof@google.com>
-rw-r--r--src/BUILD1
-rw-r--r--src/perf_data_converter.cc44
-rw-r--r--src/perf_data_converter.h4
-rw-r--r--src/perf_data_converter_test.cc44
-rw-r--r--src/quipper/kernel/perf_event.h77
-rw-r--r--src/testdata/perf-datasrc.textproto141
6 files changed, 266 insertions, 45 deletions
diff --git a/src/BUILD b/src/BUILD
index ecbb3bb..b6dafdd 100644
--- a/src/BUILD
+++ b/src/BUILD
@@ -80,6 +80,7 @@ cc_test(
"//src/testdata:perf-comm-and-task-comm.textproto",
"//src/testdata:perf-cpu.textproto",
"//src/testdata:perf-cros-kernel-3_18-mapping.textproto",
+ "//src/testdata:perf-datasrc.textproto",
"//src/testdata:perf-include-comm-md5-prefix.textproto",
"//src/testdata:perf-java-classes-jsa.textproto",
"//src/testdata:perf-kernel-mapping-by-name.textproto",
diff --git a/src/perf_data_converter.cc b/src/perf_data_converter.cc
index 781a2d1..b4a9631 100644
--- a/src/perf_data_converter.cc
+++ b/src/perf_data_converter.cc
@@ -111,6 +111,7 @@ struct SampleKey {
uint64_t data_page_size = 0;
uint32_t cpu = 0;
uint64_t weight = 0;
+ uint64_t data_src = 0;
LocationIdVector stack;
};
@@ -122,7 +123,8 @@ struct SampleKeyEqualityTester {
(a.thread_comm == b.thread_comm) && (a.cgroup == b.cgroup) &&
(a.code_page_size == b.code_page_size) &&
(a.data_page_size == b.data_page_size) && (a.cpu == b.cpu) &&
- (a.weight == b.weight) && (a.stack == b.stack));
+ (a.weight == b.weight) && (a.data_src == b.data_src) &&
+ (a.stack == b.stack));
}
};
@@ -141,6 +143,7 @@ struct SampleKeyHasher {
hash ^= std::hash<uint64_t>()(k.data_page_size);
hash ^= std::hash<uint32_t>()(k.cpu);
hash ^= std::hash<uint64_t>()(k.weight);
+ hash ^= std::hash<uint64_t>()(k.data_src);
for (const auto& id : k.stack) {
hash ^= std::hash<uint64_t>()(id);
}
@@ -301,6 +304,9 @@ class PerfDataConverter : public PerfDataHandler {
bool IncludeCacheLatencyLabel() const {
return (sample_labels_ & kCacheLatencyLabel);
}
+ // Returns whether data source labels were requested for inclusion in the
+ // profile.proto's Sample.DataSrc field.
+ bool IncludeDataSrcLabels() const { return (sample_labels_ & kDataSrcLabel); }
SampleKey MakeSampleKey(const PerfDataHandler::SampleContext& sample,
ProfileBuilder* builder);
@@ -390,6 +396,37 @@ SampleKey PerfDataConverter::MakeSampleKey(
sample_key.weight = sample.sample.weight();
}
}
+ // If sample has a data_src, we decode it to find the data source.
+ if (IncludeDataSrcLabels() && sample.sample.has_data_src()) {
+ quipper::perf_mem_data_src ds;
+ std::string cache_lvl;
+ ds.val = static_cast<uint64_t>(sample.sample.data_src());
+ if (ds.mem_lvl & quipper::PERF_MEM_LVL_HIT) {
+ if (ds.mem_lvl & quipper::PERF_MEM_LVL_L1)
+ cache_lvl = "L1";
+ else if (ds.mem_lvl & quipper::PERF_MEM_LVL_LFB)
+ cache_lvl = "LFB";
+ else if (ds.mem_lvl & quipper::PERF_MEM_LVL_L2)
+ cache_lvl = "L2";
+ else if (ds.mem_lvl & quipper::PERF_MEM_LVL_L3)
+ cache_lvl = "L3";
+ else if (ds.mem_lvl & quipper::PERF_MEM_LVL_LOC_RAM)
+ cache_lvl = "Local DRAM";
+ else if (ds.mem_lvl & (quipper::PERF_MEM_LVL_REM_RAM1 |
+ quipper::PERF_MEM_LVL_REM_RAM2))
+ cache_lvl = "Remote DRAM";
+ else if (ds.mem_lvl & (quipper::PERF_MEM_LVL_REM_CCE1 |
+ quipper::PERF_MEM_LVL_REM_CCE2))
+ cache_lvl = "Remote Cache";
+ else if (ds.mem_lvl & quipper::PERF_MEM_LVL_IO)
+ cache_lvl = "IO Memory";
+ else if (ds.mem_lvl & quipper::PERF_MEM_LVL_UNC)
+ cache_lvl = "Uncached Memory";
+ else
+ cache_lvl = "Unknown Level";
+ sample_key.data_src = UTF8StringId(cache_lvl, builder);
+ }
+ }
return sample_key;
}
@@ -595,6 +632,11 @@ void PerfDataConverter::AddOrUpdateSample(
label->set_num(sample_key.weight);
label->set_num_unit(builder->StringId("cycles"));
}
+ if (IncludeDataSrcLabels() && sample_key.data_src != 0) {
+ auto* label = sample->add_label();
+ label->set_key(builder->StringId(DataSrcLabelKey));
+ label->set_str(sample_key.data_src);
+ }
// Two values per collected event: the first is sample counts, the second is
// event counts (unsampled weight for each sample).
for (int event_id = 0; event_id < perf_data_.file_attrs_size();
diff --git a/src/perf_data_converter.h b/src/perf_data_converter.h
index a3a64e5..c9beb38 100644
--- a/src/perf_data_converter.h
+++ b/src/perf_data_converter.h
@@ -63,6 +63,9 @@ enum SampleLabels {
// Adds a label with CacheLatencyLabelKey and number value set to the cache
// latency.
kCacheLatencyLabel = 1 << 11,
+ // Adds a label with DataSrcLabelKey and string value set to the level of
+ // caches.
+ kDataSrcLabel = 1 << 12,
};
// Sample label key names.
@@ -78,6 +81,7 @@ const char CodePageSizeLabelKey[] = "code_page_size";
const char DataPageSizeLabelKey[] = "data_page_size";
const char CpuLabelKey[] = "cpu";
const char CacheLatencyLabelKey[] = "cache_latency";
+const char DataSrcLabelKey[] = "data_src";
// Execution mode label values.
const char ExecutionModeHostKernel[] = "Host Kernel";
diff --git a/src/perf_data_converter_test.cc b/src/perf_data_converter_test.cc
index 5aacab0..1d2e79c 100644
--- a/src/perf_data_converter_test.cc
+++ b/src/perf_data_converter_test.cc
@@ -981,6 +981,50 @@ TEST_F(PerfDataConverterTest, ConvertsNoWeight) {
EXPECT_THAT(weight_counts, IsEmpty());
}
+TEST_F(PerfDataConverterTest, DataSrcBitSet) {
+ const std::string ascii_pb(
+ GetContents(GetResource("perf-datasrc.textproto")));
+ ASSERT_FALSE(ascii_pb.empty());
+ PerfDataProto perf_data_proto;
+ ASSERT_TRUE(google::protobuf::TextFormat::ParseFromString(ascii_pb, &perf_data_proto));
+ // Round-trip deserialize/serialize, so we can make sure the test case
+ // represents a valid perf.data file.
+ std::string str;
+ quipper::PerfReader reader;
+ ASSERT_TRUE(reader.Deserialize(perf_data_proto));
+ ASSERT_TRUE(reader.WriteToString(&str));
+}
+
+TEST_F(PerfDataConverterTest, ConvertsDataSrc) {
+ const std::string ascii_pb(
+ GetContents(GetResource("perf-datasrc.textproto")));
+ ASSERT_FALSE(ascii_pb.empty());
+ PerfDataProto perf_data_proto;
+ ASSERT_TRUE(google::protobuf::TextFormat::ParseFromString(ascii_pb, &perf_data_proto));
+
+ const ProcessProfiles pps =
+ PerfDataProtoToProfiles(&perf_data_proto, kDataSrcLabel);
+ ASSERT_EQ(pps.size(), 1);
+
+ std::unordered_map<std::string, uint64_t> counts_by_datasrc;
+ const auto& p = pps[0]->data;
+ for (const auto& sample : p.sample()) {
+ std::string datasrc;
+ for (const auto& label : sample.label()) {
+ if (p.string_table(label.key()) == DataSrcLabelKey) {
+ datasrc = p.string_table(label.str());
+ counts_by_datasrc[datasrc]++;
+ }
+ }
+ }
+ const std::unordered_map<std::string, uint64_t> expected_counts{
+ {"L1", 1},
+ {"L2", 1},
+ {"L3", 1},
+ };
+ EXPECT_THAT(counts_by_datasrc, UnorderedPointwise(Eq(), expected_counts));
+}
+
TEST_F(PerfDataConverterTest, HandlesAlternateKernelNames) {
std::string ascii_pb =
GetContents(GetResource("perf-kernel-mapping-by-name.textproto"));
diff --git a/src/quipper/kernel/perf_event.h b/src/quipper/kernel/perf_event.h
index c531dbf..f3755e2 100644
--- a/src/quipper/kernel/perf_event.h
+++ b/src/quipper/kernel/perf_event.h
@@ -1040,59 +1040,48 @@ union perf_mem_data_src {
};
};
-/*
-// The below macros are not used in quipper. They are commented out for future
-// reference.
// type of opcode (load/store/prefetch,code)
-#define PERF_MEM_OP_NA 0x01 // not available
-#define PERF_MEM_OP_LOAD 0x02 // load instruction
-#define PERF_MEM_OP_STORE 0x04 // store instruction
-#define PERF_MEM_OP_PFETCH 0x08 // prefetch
-#define PERF_MEM_OP_EXEC 0x10 // code (execution)
-#define PERF_MEM_OP_SHIFT 0
+const u64 PERF_MEM_OP_NA = 0x01; // not available
+const u64 PERF_MEM_OP_LOAD = 0x02; // load instruction
+const u64 PERF_MEM_OP_STORE = 0x04; // store instruction
+const u64 PERF_MEM_OP_PFETCH = 0x08; // prefetch
+const u64 PERF_MEM_OP_EXEC = 0x10; // code (execution)
// memory hierarchy (memory level, hit or miss)
-#define PERF_MEM_LVL_NA 0x01 // not available
-#define PERF_MEM_LVL_HIT 0x02 // hit level
-#define PERF_MEM_LVL_MISS 0x04 // miss level
-#define PERF_MEM_LVL_L1 0x08 // L1
-#define PERF_MEM_LVL_LFB 0x10 // Line Fill Buffer
-#define PERF_MEM_LVL_L2 0x20 // L2
-#define PERF_MEM_LVL_L3 0x40 // L3
-#define PERF_MEM_LVL_LOC_RAM 0x80 // Local DRAM
-#define PERF_MEM_LVL_REM_RAM1 0x100 // Remote DRAM (1 hop)
-#define PERF_MEM_LVL_REM_RAM2 0x200 // Remote DRAM (2 hops)
-#define PERF_MEM_LVL_REM_CCE1 0x400 // Remote Cache (1 hop)
-#define PERF_MEM_LVL_REM_CCE2 0x800 // Remote Cache (2 hops)
-#define PERF_MEM_LVL_IO 0x1000 // I/O memory
-#define PERF_MEM_LVL_UNC 0x2000 // Uncached memory
-#define PERF_MEM_LVL_SHIFT 5
+const u64 PERF_MEM_LVL_NA = 0x01; // not available
+const u64 PERF_MEM_LVL_HIT = 0x02; // hit level
+const u64 PERF_MEM_LVL_MISS = 0x04; // miss level
+const u64 PERF_MEM_LVL_L1 = 0x08; // L1
+const u64 PERF_MEM_LVL_LFB = 0x10; // Line Fill Buffer
+const u64 PERF_MEM_LVL_L2 = 0x20; // L2
+const u64 PERF_MEM_LVL_L3 = 0x40; // L3
+const u64 PERF_MEM_LVL_LOC_RAM = 0x80; // Local DRAM
+const u64 PERF_MEM_LVL_REM_RAM1 = 0x100; // Remote DRAM (1 hop)
+const u64 PERF_MEM_LVL_REM_RAM2 = 0x200; // Remote DRAM (2 hops)
+const u64 PERF_MEM_LVL_REM_CCE1 = 0x400; // Remote Cache (1 hop)
+const u64 PERF_MEM_LVL_REM_CCE2 = 0x800; // Remote Cache (2 hops)
+const u64 PERF_MEM_LVL_IO = 0x1000; // I/O memory
+const u64 PERF_MEM_LVL_UNC = 0x2000; // Uncached memory
// snoop mode
-#define PERF_MEM_SNOOP_NA 0x01 // not available
-#define PERF_MEM_SNOOP_NONE 0x02 // no snoop
-#define PERF_MEM_SNOOP_HIT 0x04 // snoop hit
-#define PERF_MEM_SNOOP_MISS 0x08 // snoop miss
-#define PERF_MEM_SNOOP_HITM 0x10 // snoop hit modified
-#define PERF_MEM_SNOOP_SHIFT 19
+const u64 PERF_MEM_SNOOP_NA = 0x01; // not available
+const u64 PERF_MEM_SNOOP_NONE = 0x02; // no snoop
+const u64 PERF_MEM_SNOOP_HIT = 0x04; // snoop hit
+const u64 PERF_MEM_SNOOP_MISS = 0x08; // snoop miss
+const u64 PERF_MEM_SNOOP_HITM = 0x10; // snoop hit modified
// locked instruction
-#define PERF_MEM_LOCK_NA 0x01 // not available
-#define PERF_MEM_LOCK_LOCKED 0x02 // locked transaction
-#define PERF_MEM_LOCK_SHIFT 24
+const u64 PERF_MEM_LOCK_NA = 0x01; // not available
+const u64 PERF_MEM_LOCK_LOCKED = 0x02; // locked transaction
// TLB access
-#define PERF_MEM_TLB_NA 0x01 // not available
-#define PERF_MEM_TLB_HIT 0x02 // hit level
-#define PERF_MEM_TLB_MISS 0x04 // miss level
-#define PERF_MEM_TLB_L1 0x08 // L1
-#define PERF_MEM_TLB_L2 0x10 // L2
-#define PERF_MEM_TLB_WK 0x20 // Hardware Walker
-#define PERF_MEM_TLB_OS 0x40 // OS fault handler
-#define PERF_MEM_TLB_SHIFT 26
-
-#define PERF_MEM_S(a, s) (((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT)
-*/
+const u64 PERF_MEM_TLB_NA = 0x01; // not available
+const u64 PERF_MEM_TLB_HIT = 0x02; // hit level
+const u64 PERF_MEM_TLB_MISS = 0x04; // miss level
+const u64 PERF_MEM_TLB_L1 = 0x08; // L1
+const u64 PERF_MEM_TLB_L2 = 0x10; // L2
+const u64 PERF_MEM_TLB_WK = 0x20; // Hardware Walker
+const u64 PERF_MEM_TLB_OS = 0x40; // OS fault handler
/*
* single taken branch record layout:
diff --git a/src/testdata/perf-datasrc.textproto b/src/testdata/perf-datasrc.textproto
new file mode 100644
index 0000000..b674dcb
--- /dev/null
+++ b/src/testdata/perf-datasrc.textproto
@@ -0,0 +1,141 @@
+# proto-file: src/quipper/perf_data.proto
+# proto-message: quipper.PerfDataProto
+file_attrs {
+ attr {
+ type: 0
+ size: 96
+ config: 0
+ sample_period: 1000
+ # PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_TIME | PERF_SAMPLE_CPU |
+ # PERF_SAMPLE_WEIGHT | PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_PERIOD
+ sample_type: 49543
+ # PERF_FORMAT_ID
+ read_format: 4
+ disabled: false
+ inherit: true
+ pinned: false
+ exclusive: false
+ exclude_user: false
+ exclude_kernel: false
+ exclude_hv: false
+ exclude_idle: false
+ mmap: true
+ comm: true
+ freq: true
+ inherit_stat: false
+ enable_on_exec: false
+ task: false
+ watermark: false
+ precise_ip: 0
+ mmap_data: false
+ sample_id_all: true
+ exclude_host: false
+ exclude_guest: true
+ wakeup_events: 0
+ bp_type: 0
+ bp_addr: 0
+ bp_len: 0
+ branch_sample_type: 0
+ exclude_callchain_kernel: false
+ exclude_callchain_user: false
+ mmap2: false
+ comm_exec: false
+ sample_regs_user: 0
+ sample_stack_user: 0
+ }
+ ids: 1
+}
+
+events {
+ header {
+ type: 9
+ misc: 1
+ size: 64
+ }
+ sample_event {
+ ip: 0xffffffff8bc002c8
+ pid: 100
+ tid: 100
+ sample_time_ns: 100000000000
+ period: 3170393
+ cpu: 0
+ weight: 352
+ # Set hit level bit and L1 bit
+ data_src: 320
+ }
+}
+events {
+ header {
+ type: 9
+ misc: 1
+ size: 64
+ }
+ sample_event {
+ ip: 0x5dc
+ pid: 100
+ tid: 100
+ sample_time_ns: 100000000010
+ period: 3170393
+ cpu: 0
+ weight: 146
+ # Set hit level bit and L2 bit
+ data_src: 1088
+ }
+}
+events {
+ header {
+ type: 9
+ misc: 1
+ size: 64
+ }
+ sample_event {
+ ip: 0x1f4
+ pid: 100
+ tid: 100
+ sample_time_ns: 100000000020
+ period: 3170393
+ cpu: 0
+ weight: 352
+ # Set hit level bit and L3 bit
+ data_src: 2112
+ }
+}
+timestamp_sec: 0
+stats {
+ num_sample_events: 4
+ num_mmap_events: 0
+ num_fork_events: 0
+ num_exit_events: 0
+ num_sample_events_mapped: 0
+ did_remap: false
+}
+# HEADER_BUILD_ID | HEADER_NRCPUS | HEADER_TOTAL_MEM
+metadata_mask: 9348
+build_ids {
+ # Use PERF_RECORD_MISC_CPUMODE_UNKNOWN so we have to match by filename.
+ misc: 0
+ pid: 100
+ build_id_hash: "12345678901234567890"
+ filename: "[kernel.kallsyms]"
+}
+build_ids {
+ misc: 2
+ pid: 100
+ build_id_hash: "09876543210987654321"
+ filename: "/bin/ls"
+}
+# nr_cpus
+uint32_metadata {
+ type: 7
+ data: 4
+ data: 4
+}
+# total memory
+uint64_metadata {
+ type: 10
+ data: 3990112
+}
+event_types {
+ id: 1
+ name: "cycles"
+}