aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthias Maennich <maennich@google.com>2021-11-22 17:11:00 +0000
committerMatthias Maennich <maennich@google.com>2021-11-24 12:29:14 +0000
commit29479b2b604c0723be3fc5d147c7e747c22e82b3 (patch)
tree67ae69e61cebcde41c2946db0f305884a5f594d7
parent7f5dee8478515c09f671b91396e118fc9ac0e2af (diff)
downloadinterceptor-29479b2b604c0723be3fc5d147c7e747c22e82b3.tar.gz
interceptor: replace custom Command class by proto class
Though this removes the lazy evaluation of args and env to a be always done now, this hardly every was truly omitted. Hence, this is not a regression. Instead the code could be tidied up to have only one data format that also serializes. No functional change intended. Bug: 205577427 Signed-off-by: Matthias Maennich <maennich@google.com> Change-Id: Ia3790d36c5477eb39dd6980d35b638803726d49d
-rw-r--r--analysis.cc10
-rw-r--r--interceptor.cc125
-rw-r--r--interceptor.h47
-rw-r--r--log.proto9
-rw-r--r--main.cc4
5 files changed, 66 insertions, 129 deletions
diff --git a/analysis.cc b/analysis.cc
index 49013bd..05564c7 100644
--- a/analysis.cc
+++ b/analysis.cc
@@ -89,8 +89,8 @@ static Options parse_args(int argc, char* argv[]) {
return result;
}
-interceptor::log::Log read_log(const fs::path& log_file) {
- interceptor::log::Log result;
+interceptor::Log read_log(const fs::path& log_file) {
+ interceptor::Log result;
std::ifstream input(log_file);
if (!input) {
std::cerr << "Could not open input file for reading.\n";
@@ -100,7 +100,7 @@ interceptor::log::Log read_log(const fs::path& log_file) {
return result;
}
-void text_to_file(const interceptor::log::Log& log, const fs::path& output) {
+void text_to_file(const interceptor::Log& log, const fs::path& output) {
std::string content;
google::protobuf::TextFormat::PrintToString(log, &content);
std::ofstream os(output);
@@ -115,7 +115,7 @@ void text_to_file(const interceptor::log::Log& log, const fs::path& output) {
}
}
-void compdb_to_file(const interceptor::log::Log& log, const fs::path& output) {
+void compdb_to_file(const interceptor::Log& log, const fs::path& output) {
static const std::unordered_set<std::string_view> COMPILE_EXTENSIONS = {
".c", ".cc", ".cpp", ".cxx", ".S",
};
@@ -126,7 +126,7 @@ void compdb_to_file(const interceptor::log::Log& log, const fs::path& output) {
"g++",
};
- interceptor::log::CompilationDatabase compdb;
+ interceptor::CompilationDatabase compdb;
for (const auto& command : log.commands()) {
// skip anything that is not a compiler invocation
diff --git a/interceptor.cc b/interceptor.cc
index f31b343..1920167 100644
--- a/interceptor.cc
+++ b/interceptor.cc
@@ -48,7 +48,7 @@ static void process_command(const char* filename, char* const argv[], char* cons
static void log(const interceptor::Command&);
// execute potentially modified command
-static void exec(const interceptor::Command&);
+static void exec(const interceptor::Command&, char* const envp[]);
// OVERLOADS for LD_PRELOAD USE
@@ -68,36 +68,26 @@ int execve(const char* filename, char* const argv[], char* const envp[]) {
namespace interceptor {
-Command::Command(const char* program, char* const argv[], char* const envp[])
- : program_(program), cwd_(fs::current_path()), argv_(argv), envp_(envp) {}
+static Command instantiate_command(const char* program, char* const argv[], char* const envp[]) {
+ Command result;
+ result.set_program(program);
+ result.set_current_dir(fs::current_path());
-const ArgVec& Command::args() const {
- if (!args_.has_value()) {
- args_ = ArgVec();
- for (auto current_arg = argv_; *current_arg; ++current_arg) {
- args_->emplace_back(*current_arg);
- }
+ for (auto current_arg = argv; *current_arg; ++current_arg) {
+ result.add_args(*current_arg);
}
- return *args_;
-}
-const EnvMap& Command::env() const {
- if (!env_.has_value()) {
- env_ = EnvMap();
- for (auto current_env = envp_; *current_env; ++current_env) {
- const std::string_view s(*current_env);
- const auto pos = s.find('=');
- if (pos == EnvMap::key_type::npos) {
- continue;
- }
- env_->emplace(s.substr(0, pos), s.substr(pos + 1));
+ for (auto current_env = envp; *current_env; ++current_env) {
+ const std::string s(*current_env);
+ const auto pos = s.find('=');
+ if (pos == std::string::npos) {
+ continue;
}
+
+ (*result.mutable_env_vars())[s.substr(0, pos)] = s.substr(pos + 1);
}
- return *env_;
-}
-const std::string& Command::program() const {
- return program_;
+ return result;
}
// TODO: chain output iterators instead and find a common expression
@@ -107,8 +97,8 @@ static std::string escape(std::string in) {
return in;
}
-template <typename T>
-static void dump_vector(std::ostream& os, const char* key, const std::vector<T>& vec) {
+template <typename V>
+static void dump_vector(std::ostream& os, const char* key, const V& vec) {
os << std::quoted(key) << ": [";
bool comma = false;
for (const auto& e : vec) {
@@ -121,47 +111,31 @@ static void dump_vector(std::ostream& os, const char* key, const std::vector<T>&
os << "]";
}
-std::string Command::command() const {
+static std::string repr(const Command& command) {
std::ostringstream cmd;
- cmd << program();
- if (args().size() > 1) cmd << ' ';
- std::transform(args().cbegin() + 1, args().cend(), std::ostream_iterator<std::string>(cmd, " "),
- escape);
- return cmd.str();
-}
+ cmd << command.program();
+ if (command.args().size() > 1) cmd << ' ';
+ std::transform(command.args().cbegin() + 1, command.args().cend(),
+ std::ostream_iterator<std::string>(cmd, " "), escape);
-std::string Command::repr() const {
std::ostringstream os;
- os << R"({"cmd": )" << std::quoted(command());
+ os << R"({"cmd": )" << std::quoted(cmd.str());
os << ", ";
- dump_vector(os, "in", inputs());
+ dump_vector(os, "in", command.inputs());
os << ", ";
- dump_vector(os, "out", outputs());
+ dump_vector(os, "out", command.outputs());
- os << R"(, "cwd": )" << std::quoted(cwd_);
+ os << R"(, "cwd": )" << std::quoted(command.current_dir());
os << "}";
return os.str();
}
-log::Message Command::message() const {
- log::Message result;
- auto& command = *result.mutable_command();
-
- command.set_program(program_);
- *command.mutable_args() = {args().cbegin(), args().cend()};
- command.set_current_dir(cwd_);
- *command.mutable_outputs() = {outputs().cbegin(), outputs().cend()};
- *command.mutable_inputs() = {inputs().cbegin(), inputs().cend()};
-
- return result;
-}
-
-void Command::make_relative() {
+static void make_relative(Command* command) {
// determine the ROOT_DIR
std::string root_dir;
- if (auto it = env().find(ENV_root_dir); it != env().cend()) {
+ if (auto it = command->env_vars().find(ENV_root_dir); it != command->env_vars().cend()) {
root_dir = it->second;
if (root_dir[root_dir.size() - 1] != '/') root_dir += '/';
} else {
@@ -171,7 +145,7 @@ void Command::make_relative() {
// determine the relative path to ROOT_DIR from the current working dir
std::string rel_root = fs::relative(root_dir);
if (rel_root[rel_root.size() - 1] != '/') rel_root += '/';
- if (rel_root == "./") rel_root = "";
+ if (rel_root == "./") rel_root.clear();
// TODO: This is generally bad as this means we can't make anything relative.
// This happens if the out dir is outside of the root.
@@ -179,7 +153,7 @@ void Command::make_relative() {
return;
}
- cwd_ = fs::relative(cwd_, root_dir);
+ command->set_current_dir(fs::relative(command->current_dir(), root_dir));
// replacement functor
const auto replace_all = [&](auto& str) {
@@ -189,17 +163,15 @@ void Command::make_relative() {
}
};
- if (!args_.has_value()) args();
-
// now go and replace everything
- replace_all(program_);
- std::for_each(args_->begin(), args_->end(), replace_all);
+ replace_all(*command->mutable_program());
+ std::for_each(command->mutable_args()->begin(), command->mutable_args()->end(), replace_all);
}
static AnalysisResult analyze_command(const interceptor::Command& command);
-void Command::analyze() {
- auto [inputs, outputs] = analyze_command(*this);
+static void analyze(Command* command) {
+ auto [inputs, outputs] = analyze_command(*command);
// TODO: this sanitizing should be done during make_relative
for (auto& input : inputs) {
@@ -215,13 +187,13 @@ void Command::analyze() {
for (const auto& input : inputs) {
if (!fs::is_regular_file(input)) {
std::cerr << "missing input: " << input << "\n";
- std::cerr << Command::repr() << "\n";
+ std::cerr << repr(*command) << "\n";
exit(1);
}
}
- inputs_ = std::move(inputs);
- outputs_ = std::move(outputs);
+ *command->mutable_inputs() = {inputs.cbegin(), inputs.cend()};
+ *command->mutable_outputs() = {outputs.cbegin(), outputs.cend()};
}
/// COMMAND ANALYSIS
@@ -300,7 +272,7 @@ static const std::initializer_list<std::pair<std::regex, Analyzer>> analyzers{
static AnalysisResult analyze_command(const Command& command) {
for (const auto& [regex, analyzer] : analyzers) {
if (std::regex_match(command.args()[0], regex)) {
- return analyzer(command.program(), command.args(), command.env());
+ return analyzer(command.program(), command.args(), command.env_vars());
}
}
return {};
@@ -320,35 +292,38 @@ static void process_command(const char* filename, char* const argv[], char* cons
// Ok, we can handle that one, let's transform it.
- interceptor::Command command(filename, argv, envp);
+ auto command = interceptor::instantiate_command(filename, argv, envp);
// rewrite all command line arguments (including the program itself) to use
// paths relative to ROOT_DIR. This is essential for reproducible builds and
// furthermore necessary to produce cache hits in RBE.
- command.make_relative();
+ make_relative(&command);
- command.analyze();
+ analyze(&command);
log(command);
// pass down the transformed command to execve
- exec(command);
+ exec(command, envp);
}
static void log(const interceptor::Command& command) {
- const auto& env = command.env();
+ const auto& env = command.env_vars();
if (const auto env_it = env.find(ENV_command_log); env_it != env.cend()) {
std::ofstream file;
file.open(std::string(env_it->second),
std::ofstream::out | std::ofstream::app | std::ofstream::binary);
+ interceptor::Message message;
+ *message.mutable_command() = command;
+ message.mutable_command()->clear_env_vars();
if (file.is_open()) {
- google::protobuf::util::SerializeDelimitedToOstream(command.message(), &file);
+ google::protobuf::util::SerializeDelimitedToOstream(message, &file);
}
}
}
-static void exec(const interceptor::Command& command) {
+static void exec(const interceptor::Command& command, char* const envp[]) {
std::vector<const char*> c_args;
c_args.reserve(command.args().size() + 1);
c_args[command.args().size()] = nullptr;
@@ -358,5 +333,7 @@ static void exec(const interceptor::Command& command) {
// TODO: at this point, we could free some memory that is held in Command.
// While the args vector is reused for args, we could free the EnvMap
// and the original args.
- old_execve(command.program().c_str(), const_cast<char**>(c_args.data()), command.envp());
+
+ // does not return
+ old_execve(command.program().c_str(), const_cast<char**>(c_args.data()), envp);
}
diff --git a/interceptor.h b/interceptor.h
index 9e9a1e8..9d0157d 100644
--- a/interceptor.h
+++ b/interceptor.h
@@ -17,6 +17,7 @@
#include <functional>
#include <optional>
#include <string>
+#include <type_traits>
#include <unordered_map>
#include <vector>
@@ -29,54 +30,12 @@ constexpr static auto ENV_root_dir = "INTERCEPTOR_root_dir";
namespace interceptor {
// Some type definitions to gain some type safety
-using ArgVec = std::vector<std::string>;
-using EnvMap = std::unordered_map<std::string, std::string>;
+using ArgVec = std::remove_pointer_t<decltype(Command().mutable_args())>;
+using EnvMap = std::remove_pointer_t<decltype(Command().mutable_env_vars())>;
using Inputs = std::vector<std::string>;
using Outputs = Inputs;
-// Command abstraction
-//
-// This is a utility container to keep program, args and env in an accessible
-// fashion. Most data structures are created lazily.
-class Command {
- public:
- Command(const char* program, char* const argv[], char* const envp[]);
-
- const std::string& program() const;
- const ArgVec& args() const;
- const EnvMap& env() const;
-
- char* const* envp() const { return envp_; };
-
- const Inputs& inputs() const { return inputs_; }
- const Outputs& outputs() const { return outputs_; }
-
- std::string repr() const;
- log::Message message() const;
-
- // make command line calls relative to ROOT_DIR
- void make_relative();
-
- // determine inputs/outputs
- void analyze();
-
- private:
- std::string command() const;
-
- std::string program_;
- std::string cwd_;
-
- char* const* argv_;
- char* const* envp_;
-
- mutable std::optional<ArgVec> args_;
- mutable std::optional<EnvMap> env_;
-
- Inputs inputs_;
- Outputs outputs_;
-};
-
// Command analysis
struct AnalysisResult {
diff --git a/log.proto b/log.proto
index a3650ec..8e721b3 100644
--- a/log.proto
+++ b/log.proto
@@ -16,15 +16,16 @@
syntax = "proto3";
-package interceptor.log;
+package interceptor;
// A Command as traced by intercepting an execve() invocation.
message Command {
string program = 1;
repeated string args = 2;
- string current_dir = 3;
- repeated string inputs = 4;
- repeated string outputs = 5;
+ map<string, string> env_vars = 3;
+ string current_dir = 4;
+ repeated string inputs = 5;
+ repeated string outputs = 6;
};
// A single message as emitted by an intercepted process. It can contain any of
diff --git a/main.cc b/main.cc
index e46300c..49d33d2 100644
--- a/main.cc
+++ b/main.cc
@@ -121,13 +121,13 @@ class CommandLog {
if (command_log_file_) {
// compact the log by re-reading the individual log::Message's to combine
// them to a log::Log
- interceptor::log::Log log;
+ interceptor::Log log;
log.set_root_dir(root_dir_);
{
std::ifstream command_log(command_log_file_->c_str(), std::ios_base::binary);
google::protobuf::io::IstreamInputStream input_stream(&command_log);
- interceptor::log::Message message;
+ interceptor::Message message;
while (true) {
if (!google::protobuf::util::ParseDelimitedFromZeroCopyStream(&message, &input_stream,
nullptr))