aboutsummaryrefslogtreecommitdiff
path: root/debuginfod/debuginfod.cxx
diff options
context:
space:
mode:
Diffstat (limited to 'debuginfod/debuginfod.cxx')
-rw-r--r--debuginfod/debuginfod.cxx460
1 files changed, 358 insertions, 102 deletions
diff --git a/debuginfod/debuginfod.cxx b/debuginfod/debuginfod.cxx
index 99b1f2b9..c11aeda1 100644
--- a/debuginfod/debuginfod.cxx
+++ b/debuginfod/debuginfod.cxx
@@ -44,6 +44,12 @@ extern "C" {
}
#endif
+#ifdef HAVE_EXECINFO_H
+extern "C" {
+#include <execinfo.h>
+}
+#endif
+
extern "C" {
#include "printversion.h"
#include "system.h"
@@ -85,6 +91,7 @@ extern "C" {
#include <cstring>
#include <vector>
#include <set>
+#include <unordered_set>
#include <map>
#include <string>
#include <iostream>
@@ -94,6 +101,7 @@ extern "C" {
#include <mutex>
#include <deque>
#include <condition_variable>
+#include <exception>
#include <thread>
// #include <regex> // on rhel7 gcc 4.8, not competent
#include <regex.h>
@@ -138,7 +146,7 @@ string_endswith(const string& haystack, const string& needle)
// Roll this identifier for every sqlite schema incompatibility.
-#define BUILDIDS "buildids9"
+#define BUILDIDS "buildids10"
#if SQLITE_VERSION_NUMBER >= 3008000
#define WITHOUT_ROWID "without rowid"
@@ -157,10 +165,23 @@ static const char DEBUGINFOD_SQLITE_DDL[] =
// NB: all these are overridable with -D option
// Normalization table for interning file names
- "create table if not exists " BUILDIDS "_files (\n"
+ "create table if not exists " BUILDIDS "_fileparts (\n"
" id integer primary key not null,\n"
" name text unique not null\n"
" );\n"
+ "create table if not exists " BUILDIDS "_files (\n"
+ " id integer primary key not null,\n"
+ " dirname integer not null,\n"
+ " basename integer not null,\n"
+ " unique (dirname, basename),\n"
+ " foreign key (dirname) references " BUILDIDS "_fileparts(id) on delete cascade,\n"
+ " foreign key (basename) references " BUILDIDS "_fileparts(id) on delete cascade\n"
+ " );\n"
+ "create view if not exists " BUILDIDS "_files_v as\n" // a
+ " select f.id, n1.name || '/' || n2.name as name\n"
+ " from " BUILDIDS "_files f, " BUILDIDS "_fileparts n1, " BUILDIDS "_fileparts n2\n"
+ " where f.dirname = n1.id and f.basename = n2.id;\n"
+
// Normalization table for interning buildids
"create table if not exists " BUILDIDS "_buildids (\n"
" id integer primary key not null,\n"
@@ -230,33 +251,33 @@ static const char DEBUGINFOD_SQLITE_DDL[] =
"create view if not exists " BUILDIDS "_query_d as \n"
"select\n"
" b.hex as buildid, n.mtime, 'F' as sourcetype, f0.name as source0, n.mtime as mtime, null as source1\n"
- " from " BUILDIDS "_buildids b, " BUILDIDS "_files f0, " BUILDIDS "_f_de n\n"
+ " from " BUILDIDS "_buildids b, " BUILDIDS "_files_v f0, " BUILDIDS "_f_de n\n"
" where b.id = n.buildid and f0.id = n.file and n.debuginfo_p = 1\n"
"union all select\n"
" b.hex as buildid, n.mtime, 'R' as sourcetype, f0.name as source0, n.mtime as mtime, f1.name as source1\n"
- " from " BUILDIDS "_buildids b, " BUILDIDS "_files f0, " BUILDIDS "_files f1, " BUILDIDS "_r_de n\n"
+ " from " BUILDIDS "_buildids b, " BUILDIDS "_files_v f0, " BUILDIDS "_files_v f1, " BUILDIDS "_r_de n\n"
" where b.id = n.buildid and f0.id = n.file and f1.id = n.content and n.debuginfo_p = 1\n"
";"
// ... and for E queries
"create view if not exists " BUILDIDS "_query_e as \n"
"select\n"
" b.hex as buildid, n.mtime, 'F' as sourcetype, f0.name as source0, n.mtime as mtime, null as source1\n"
- " from " BUILDIDS "_buildids b, " BUILDIDS "_files f0, " BUILDIDS "_f_de n\n"
+ " from " BUILDIDS "_buildids b, " BUILDIDS "_files_v f0, " BUILDIDS "_f_de n\n"
" where b.id = n.buildid and f0.id = n.file and n.executable_p = 1\n"
"union all select\n"
" b.hex as buildid, n.mtime, 'R' as sourcetype, f0.name as source0, n.mtime as mtime, f1.name as source1\n"
- " from " BUILDIDS "_buildids b, " BUILDIDS "_files f0, " BUILDIDS "_files f1, " BUILDIDS "_r_de n\n"
+ " from " BUILDIDS "_buildids b, " BUILDIDS "_files_v f0, " BUILDIDS "_files_v f1, " BUILDIDS "_r_de n\n"
" where b.id = n.buildid and f0.id = n.file and f1.id = n.content and n.executable_p = 1\n"
";"
// ... and for S queries
"create view if not exists " BUILDIDS "_query_s as \n"
"select\n"
" b.hex as buildid, fs.name as artifactsrc, 'F' as sourcetype, f0.name as source0, n.mtime as mtime, null as source1, null as source0ref\n"
- " from " BUILDIDS "_buildids b, " BUILDIDS "_files f0, " BUILDIDS "_files fs, " BUILDIDS "_f_s n\n"
+ " from " BUILDIDS "_buildids b, " BUILDIDS "_files_v f0, " BUILDIDS "_files_v fs, " BUILDIDS "_f_s n\n"
" where b.id = n.buildid and f0.id = n.file and fs.id = n.artifactsrc\n"
"union all select\n"
" b.hex as buildid, f1.name as artifactsrc, 'R' as sourcetype, f0.name as source0, sd.mtime as mtime, f1.name as source1, fsref.name as source0ref\n"
- " from " BUILDIDS "_buildids b, " BUILDIDS "_files f0, " BUILDIDS "_files f1, " BUILDIDS "_files fsref, "
+ " from " BUILDIDS "_buildids b, " BUILDIDS "_files_v f0, " BUILDIDS "_files_v f1, " BUILDIDS "_files_v fsref, "
" " BUILDIDS "_r_sdef sd, " BUILDIDS "_r_sref sr, " BUILDIDS "_r_de sde\n"
" where b.id = sr.buildid and f0.id = sd.file and fsref.id = sde.file and f1.id = sd.content\n"
" and sr.artifactsrc = sd.content and sde.buildid = sr.buildid\n"
@@ -271,6 +292,7 @@ static const char DEBUGINFOD_SQLITE_DDL[] =
"union all select 'archive sdef',count(*) from " BUILDIDS "_r_sdef\n"
"union all select 'buildids',count(*) from " BUILDIDS "_buildids\n"
"union all select 'filenames',count(*) from " BUILDIDS "_files\n"
+ "union all select 'fileparts',count(*) from " BUILDIDS "_fileparts\n"
"union all select 'files scanned (#)',count(*) from " BUILDIDS "_file_mtime_scanned\n"
"union all select 'files scanned (mb)',coalesce(sum(size)/1024/1024,0) from " BUILDIDS "_file_mtime_scanned\n"
#if SQLITE_VERSION_NUMBER >= 3016000
@@ -281,10 +303,26 @@ static const char DEBUGINFOD_SQLITE_DDL[] =
// schema change history & garbage collection
//
// XXX: we could have migration queries here to bring prior-schema
-// data over instead of just dropping it.
+// data over instead of just dropping it. But that could incur
+// doubled storage costs.
//
-// buildids9: widen the mtime_scanned table
+// buildids10: split the _files table into _parts
"" // <<< we are here
+// buildids9: widen the mtime_scanned table
+ "DROP VIEW IF EXISTS buildids9_stats;\n"
+ "DROP INDEX IF EXISTS buildids9_r_de_idx;\n"
+ "DROP INDEX IF EXISTS buildids9_f_de_idx;\n"
+ "DROP VIEW IF EXISTS buildids9_query_s;\n"
+ "DROP VIEW IF EXISTS buildids9_query_e;\n"
+ "DROP VIEW IF EXISTS buildids9_query_d;\n"
+ "DROP TABLE IF EXISTS buildids9_r_sdef;\n"
+ "DROP TABLE IF EXISTS buildids9_r_sref;\n"
+ "DROP TABLE IF EXISTS buildids9_r_de;\n"
+ "DROP TABLE IF EXISTS buildids9_f_s;\n"
+ "DROP TABLE IF EXISTS buildids9_f_de;\n"
+ "DROP TABLE IF EXISTS buildids9_file_mtime_scanned;\n"
+ "DROP TABLE IF EXISTS buildids9_buildids;\n"
+ "DROP TABLE IF EXISTS buildids9_files;\n"
// buildids8: slim the sref table
"drop table if exists buildids8_f_de;\n"
"drop table if exists buildids8_f_s;\n"
@@ -398,6 +436,8 @@ static const struct argp_option options[] =
{ "passive", ARGP_KEY_PASSIVE, NULL, 0, "Do not scan or groom, read-only database.", 0 },
#define ARGP_KEY_DISABLE_SOURCE_SCAN 0x1009
{ "disable-source-scan", ARGP_KEY_DISABLE_SOURCE_SCAN, NULL, 0, "Do not scan dwarf source info.", 0 },
+#define ARGP_SCAN_CHECKPOINT 0x100A
+ { "scan-checkpoint", ARGP_SCAN_CHECKPOINT, "NUM", 0, "Number of files scanned before a WAL checkpoint.", 0 },
{ NULL, 0, NULL, 0, NULL, 0 },
};
@@ -452,6 +492,7 @@ static unsigned forwarded_ttl_limit = 8;
static bool scan_source_info = true;
static string tmpdir;
static bool passive_p = false;
+static long scan_checkpoint = 256;
static void set_metric(const string& key, double value);
// static void inc_metric(const string& key);
@@ -653,6 +694,11 @@ parse_opt (int key, char *arg,
case ARGP_KEY_DISABLE_SOURCE_SCAN:
scan_source_info = false;
break;
+ case ARGP_SCAN_CHECKPOINT:
+ scan_checkpoint = atol (arg);
+ if (scan_checkpoint < 0)
+ argp_failure(state, 1, EINVAL, "scan checkpoint");
+ break;
// case 'h': argp_state_help (state, stderr, ARGP_HELP_LONG|ARGP_HELP_EXIT_OK);
default: return ARGP_ERR_UNKNOWN;
}
@@ -736,7 +782,7 @@ struct elfutils_exception: public reportable_exception
template <typename Payload>
class workq
{
- set<Payload> q; // eliminate duplicates
+ unordered_set<Payload> q; // eliminate duplicates
mutex mtx;
condition_variable cv;
bool dead;
@@ -825,6 +871,24 @@ inline bool operator< (const scan_payload& a, const scan_payload& b)
{
return a.first < b.first; // don't bother compare the stat fields
}
+
+namespace std { // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=56480
+ template<> struct hash<::scan_payload>
+ {
+ std::size_t operator() (const ::scan_payload& p) const noexcept
+ {
+ return hash<string>()(p.first);
+ }
+ };
+ template<> struct equal_to<::scan_payload>
+ {
+ std::size_t operator() (const ::scan_payload& a, const ::scan_payload& b) const noexcept
+ {
+ return a.first == b.first;
+ }
+ };
+}
+
static workq<scan_payload> scanq; // just a single one
// producer & idler: thread_main_fts_source_paths()
// consumer: thread_main_scanner()
@@ -882,6 +946,72 @@ public:
////////////////////////////////////////////////////////////////////////
+// periodic_barrier is a concurrency control object that lets N threads
+// periodically (based on counter value) agree to wait at a barrier,
+// let one of them carry out some work, then be set free
+
+class periodic_barrier
+{
+private:
+ unsigned period; // number of count() reports to trigger barrier activation
+ unsigned threads; // number of threads participating
+ mutex mtx; // protects all the following fields
+ unsigned counter; // count of count() reports in the current generation
+ unsigned generation; // barrier activation generation
+ unsigned waiting; // number of threads waiting for barrier
+ bool dead; // bring out your
+ condition_variable cv;
+public:
+ periodic_barrier(unsigned t, unsigned p):
+ period(p), threads(t), counter(0), generation(0), waiting(0), dead(false) { }
+ virtual ~periodic_barrier() {}
+
+ virtual void periodic_barrier_work() noexcept = 0;
+ void nuke() {
+ unique_lock<mutex> lock(mtx);
+ dead = true;
+ cv.notify_all();
+ }
+
+ void count()
+ {
+ unique_lock<mutex> lock(mtx);
+ unsigned prev_generation = this->generation;
+ if (counter < period-1) // normal case: counter just freely running
+ {
+ counter ++;
+ return;
+ }
+ else if (counter == period-1) // we're the doer
+ {
+ counter = period; // entering barrier holding phase
+ cv.notify_all();
+ while (waiting < threads-1 && !dead)
+ cv.wait(lock);
+ // all other threads are now stuck in the barrier
+ this->periodic_barrier_work(); // NB: we're holding the mutex the whole time
+ // reset for next barrier, releasing other waiters
+ counter = 0;
+ generation ++;
+ cv.notify_all();
+ return;
+ }
+ else if (counter == period) // we're a waiter, in holding phase
+ {
+ waiting ++;
+ cv.notify_all();
+ while (counter == period && generation == prev_generation && !dead)
+ cv.wait(lock);
+ waiting --;
+ return;
+ }
+ }
+};
+
+
+
+////////////////////////////////////////////////////////////////////////
+
// Print a standard timestamp.
static ostream&
@@ -1026,6 +1156,24 @@ public:
////////////////////////////////////////////////////////////////////////
+
+struct sqlite_checkpoint_pb: public periodic_barrier
+{
+ // NB: don't use sqlite_ps since it can throw exceptions during ctor etc.
+ sqlite_checkpoint_pb(unsigned t, unsigned p):
+ periodic_barrier(t, p) { }
+
+ void periodic_barrier_work() noexcept
+ {
+ (void) sqlite3_exec (db, "pragma wal_checkpoint(truncate);", NULL, NULL, NULL);
+ }
+};
+
+static periodic_barrier* scan_barrier = 0; // initialized in main()
+
+
+////////////////////////////////////////////////////////////////////////
+
// RAII style templated autocloser
template <class Payload, class Ignore>
@@ -1635,13 +1783,14 @@ extract_section (int elf_fd, int64_t parent_mtime,
throw libc_exception (errno, "cannot write to temporary file");
/* Set mtime to be the same as the parent file's mtime. */
- struct timeval tvs[2];
+ struct timespec tvs[2];
if (fstat (elf_fd, &fs) != 0)
throw libc_exception (errno, "cannot fstat file");
- tvs[0].tv_sec = tvs[1].tv_sec = fs.st_mtime;
- tvs[0].tv_usec = tvs[1].tv_usec = 0;
- (void) futimes (fd, tvs);
+ tvs[0].tv_sec = 0;
+ tvs[0].tv_nsec = UTIME_OMIT;
+ tvs[1] = fs.st_mtim;
+ (void) futimens (fd, tvs);
/* Add to fdcache. */
fdcache.intern (b_source, section, tmppath, data->d_size, true);
@@ -1727,11 +1876,10 @@ handle_buildid_f_match (bool internal_req_t,
}
else
{
- std::string file = b_source0.substr(b_source0.find_last_of("/")+1, b_source0.length());
add_mhd_response_header (r, "Content-Type", "application/octet-stream");
add_mhd_response_header (r, "X-DEBUGINFOD-SIZE",
to_string(s.st_size).c_str());
- add_mhd_response_header (r, "X-DEBUGINFOD-FILE", file.c_str());
+ add_mhd_response_header (r, "X-DEBUGINFOD-FILE", b_source0.c_str());
add_mhd_last_modified (r, s.st_mtime);
if (verbose > 1)
obatched(clog) << "serving file " << b_source0 << " section=" << section << endl;
@@ -1951,10 +2099,12 @@ handle_buildid_r_match (bool internal_req_p,
// Set the mtime so the fdcache file mtimes, even prefetched ones,
// propagate to future webapi clients.
- struct timeval tvs[2];
- tvs[0].tv_sec = tvs[1].tv_sec = archive_entry_mtime(e);
- tvs[0].tv_usec = tvs[1].tv_usec = 0;
- (void) futimes (fd, tvs); /* best effort */
+ struct timespec tvs[2];
+ tvs[0].tv_sec = 0;
+ tvs[0].tv_nsec = UTIME_OMIT;
+ tvs[1].tv_sec = archive_entry_mtime(e);
+ tvs[1].tv_nsec = archive_entry_mtime_nsec(e);
+ (void) futimens (fd, tvs); /* best effort */
if (r != 0) // stage 3
{
@@ -2013,14 +2163,12 @@ handle_buildid_r_match (bool internal_req_p,
}
else
{
- std::string file = b_source1.substr(b_source1.find_last_of("/")+1, b_source1.length());
add_mhd_response_header (r, "Content-Type",
"application/octet-stream");
add_mhd_response_header (r, "X-DEBUGINFOD-SIZE",
to_string(archive_entry_size(e)).c_str());
- add_mhd_response_header (r, "X-DEBUGINFOD-ARCHIVE",
- b_source0.c_str());
- add_mhd_response_header (r, "X-DEBUGINFOD-FILE", file.c_str());
+ add_mhd_response_header (r, "X-DEBUGINFOD-ARCHIVE", b_source0.c_str());
+ add_mhd_response_header (r, "X-DEBUGINFOD-FILE", b_source1.c_str());
add_mhd_last_modified (r, archive_entry_mtime(e));
if (verbose > 1)
obatched(clog) << "serving archive " << b_source0
@@ -3037,10 +3185,65 @@ elf_classify (int fd, bool &executable_p, bool &debuginfo_p, string &buildid, se
}
+// Intern the given file name in two parts (dirname & basename) and
+// return the resulting file's id.
+static int64_t
+register_file_name(sqlite_ps& ps_upsert_fileparts,
+ sqlite_ps& ps_upsert_file,
+ sqlite_ps& ps_lookup_file,
+ const string& name)
+{
+ std::size_t slash = name.rfind('/');
+ string dirname, basename;
+ if (slash == std::string::npos)
+ {
+ dirname = "";
+ basename = name;
+ }
+ else
+ {
+ dirname = name.substr(0, slash);
+ basename = name.substr(slash+1);
+ }
+
+ // intern the two substrings
+ ps_upsert_fileparts
+ .reset()
+ .bind(1, dirname)
+ .step_ok_done();
+ ps_upsert_fileparts
+ .reset()
+ .bind(1, basename)
+ .step_ok_done();
+
+ // intern the tuple
+ ps_upsert_file
+ .reset()
+ .bind(1, dirname)
+ .bind(2, basename)
+ .step_ok_done();
+
+ // look up the tuple's id
+ ps_lookup_file
+ .reset()
+ .bind(1, dirname)
+ .bind(2, basename);
+ int rc = ps_lookup_file.step();
+ if (rc != SQLITE_ROW) throw sqlite_exception(rc, "step");
+
+ int64_t id = sqlite3_column_int64 (ps_lookup_file, 0);
+ ps_lookup_file.reset();
+ return id;
+}
+
+
+
static void
scan_source_file (const string& rps, const stat_t& st,
sqlite_ps& ps_upsert_buildids,
- sqlite_ps& ps_upsert_files,
+ sqlite_ps& ps_upsert_fileparts,
+ sqlite_ps& ps_upsert_file,
+ sqlite_ps& ps_lookup_file,
sqlite_ps& ps_upsert_de,
sqlite_ps& ps_upsert_s,
sqlite_ps& ps_query,
@@ -3050,10 +3253,12 @@ scan_source_file (const string& rps, const stat_t& st,
unsigned& fts_debuginfo,
unsigned& fts_sourcefiles)
{
+ int64_t fileid = register_file_name(ps_upsert_fileparts, ps_upsert_file, ps_lookup_file, rps);
+
/* See if we know of it already. */
int rc = ps_query
.reset()
- .bind(1, rps)
+ .bind(1, fileid)
.bind(2, st.st_mtime)
.step();
ps_query.reset();
@@ -3093,12 +3298,6 @@ scan_source_file (const string& rps, const stat_t& st,
if (fd >= 0)
close (fd);
- // register this file name in the interning table
- ps_upsert_files
- .reset()
- .bind(1, rps)
- .step_ok_done();
-
if (buildid == "")
{
// no point storing an elf file without buildid
@@ -3125,7 +3324,7 @@ scan_source_file (const string& rps, const stat_t& st,
.bind(1, buildid)
.bind(2, debuginfo_p ? 1 : 0)
.bind(3, executable_p ? 1 : 0)
- .bind(4, rps)
+ .bind(4, fileid)
.bind(5, st.st_mtime)
.step_ok_done();
}
@@ -3157,11 +3356,6 @@ scan_source_file (const string& rps, const stat_t& st,
<< " mtime=" << sfs.st_mtime
<< " as source " << dwarfsrc << endl;
- ps_upsert_files
- .reset()
- .bind(1, srps)
- .step_ok_done();
-
// PR25548: store canonicalized dwarfsrc path
string dwarfsrc_canon = canon_pathname (dwarfsrc);
if (dwarfsrc_canon != dwarfsrc)
@@ -3170,16 +3364,14 @@ scan_source_file (const string& rps, const stat_t& st,
obatched(clog) << "canonicalized src=" << dwarfsrc << " alias=" << dwarfsrc_canon << endl;
}
- ps_upsert_files
- .reset()
- .bind(1, dwarfsrc_canon)
- .step_ok_done();
+ int64_t fileid1 = register_file_name (ps_upsert_fileparts, ps_upsert_file, ps_lookup_file, dwarfsrc_canon);
+ int64_t fileid2 = register_file_name (ps_upsert_fileparts, ps_upsert_file, ps_lookup_file, srps);
ps_upsert_s
.reset()
.bind(1, buildid)
- .bind(2, dwarfsrc_canon)
- .bind(3, srps)
+ .bind(2, fileid1)
+ .bind(3, fileid2)
.bind(4, sfs.st_mtime)
.step_ok_done();
@@ -3189,7 +3381,7 @@ scan_source_file (const string& rps, const stat_t& st,
ps_scan_done
.reset()
- .bind(1, rps)
+ .bind(1, fileid)
.bind(2, st.st_mtime)
.bind(3, st.st_size)
.step_ok_done();
@@ -3208,8 +3400,9 @@ scan_source_file (const string& rps, const stat_t& st,
// Analyze given archive file of given age; record buildids / exec/debuginfo-ness of its
// constituent files with given upsert statements.
static void
-archive_classify (const string& rps, string& archive_extension,
- sqlite_ps& ps_upsert_buildids, sqlite_ps& ps_upsert_files,
+archive_classify (const string& rps, string& archive_extension, int64_t archiveid,
+ sqlite_ps& ps_upsert_buildids, sqlite_ps& ps_upsert_fileparts, sqlite_ps& ps_upsert_file,
+ sqlite_ps& ps_lookup_file,
sqlite_ps& ps_upsert_de, sqlite_ps& ps_upsert_sref, sqlite_ps& ps_upsert_sdef,
time_t mtime,
unsigned& fts_executable, unsigned& fts_debuginfo, unsigned& fts_sref, unsigned& fts_sdef,
@@ -3263,8 +3456,9 @@ archive_classify (const string& rps, string& archive_extension,
}
if (verbose > 3)
- obatched(clog) << "libarchive scanning " << rps << endl;
+ obatched(clog) << "libarchive scanning " << rps << " id " << archiveid << endl;
+ bool any_exceptions = false;
while(1) // parse archive entries
{
if (interrupted)
@@ -3316,10 +3510,7 @@ archive_classify (const string& rps, string& archive_extension,
.step_ok_done();
}
- ps_upsert_files // register this rpm constituent file name in interning table
- .reset()
- .bind(1, fn)
- .step_ok_done();
+ int64_t fileid = register_file_name (ps_upsert_fileparts, ps_upsert_file, ps_lookup_file, fn);
if (sourcefiles.size() > 0) // sref records needed
{
@@ -3348,15 +3539,13 @@ archive_classify (const string& rps, string& archive_extension,
obatched(clog) << "canonicalized src=" << dwarfsrc << " alias=" << dwarfsrc_canon << endl;
}
- ps_upsert_files
- .reset()
- .bind(1, dwarfsrc_canon)
- .step_ok_done();
-
+ int64_t srcfileid = register_file_name(ps_upsert_fileparts, ps_upsert_file, ps_lookup_file,
+ dwarfsrc_canon);
+
ps_upsert_sref
.reset()
.bind(1, buildid)
- .bind(2, dwarfsrc_canon)
+ .bind(2, srcfileid)
.step_ok_done();
fts_sref ++;
@@ -3375,9 +3564,9 @@ archive_classify (const string& rps, string& archive_extension,
.bind(1, buildid)
.bind(2, debuginfo_p ? 1 : 0)
.bind(3, executable_p ? 1 : 0)
- .bind(4, rps)
+ .bind(4, archiveid)
.bind(5, mtime)
- .bind(6, fn)
+ .bind(6, fileid)
.step_ok_done();
}
else // potential source - sdef record
@@ -3385,9 +3574,9 @@ archive_classify (const string& rps, string& archive_extension,
fts_sdef ++;
ps_upsert_sdef
.reset()
- .bind(1, rps)
+ .bind(1, archiveid)
.bind(2, mtime)
- .bind(3, fn)
+ .bind(3, fileid)
.step_ok_done();
}
@@ -3402,8 +3591,17 @@ archive_classify (const string& rps, string& archive_extension,
catch (const reportable_exception& e)
{
e.report(clog);
+ any_exceptions = true;
+ // NB: but we allow the libarchive iteration to continue, in
+ // case we can still gather some useful information. That
+ // would allow some webapi queries to work, until later when
+ // this archive is rescanned. (Its vitals won't go into the
+ // _file_mtime_scanned table until after a successful scan.)
}
}
+
+ if (any_exceptions)
+ throw reportable_exception("exceptions encountered during archive scan");
}
@@ -3412,7 +3610,9 @@ archive_classify (const string& rps, string& archive_extension,
static void
scan_archive_file (const string& rps, const stat_t& st,
sqlite_ps& ps_upsert_buildids,
- sqlite_ps& ps_upsert_files,
+ sqlite_ps& ps_upsert_fileparts,
+ sqlite_ps& ps_upsert_file,
+ sqlite_ps& ps_lookup_file,
sqlite_ps& ps_upsert_de,
sqlite_ps& ps_upsert_sref,
sqlite_ps& ps_upsert_sdef,
@@ -3424,10 +3624,13 @@ scan_archive_file (const string& rps, const stat_t& st,
unsigned& fts_sref,
unsigned& fts_sdef)
{
+ // intern the archive file name
+ int64_t archiveid = register_file_name (ps_upsert_fileparts, ps_upsert_file, ps_lookup_file, rps);
+
/* See if we know of it already. */
int rc = ps_query
.reset()
- .bind(1, rps)
+ .bind(1, archiveid)
.bind(2, st.st_mtime)
.step();
ps_query.reset();
@@ -3441,20 +3644,15 @@ scan_archive_file (const string& rps, const stat_t& st,
return;
}
- // intern the archive file name
- ps_upsert_files
- .reset()
- .bind(1, rps)
- .step_ok_done();
-
// extract the archive contents
unsigned my_fts_executable = 0, my_fts_debuginfo = 0, my_fts_sref = 0, my_fts_sdef = 0;
bool my_fts_sref_complete_p = true;
+ bool any_exceptions = false;
try
{
string archive_extension;
- archive_classify (rps, archive_extension,
- ps_upsert_buildids, ps_upsert_files,
+ archive_classify (rps, archive_extension, archiveid,
+ ps_upsert_buildids, ps_upsert_fileparts, ps_upsert_file, ps_lookup_file,
ps_upsert_de, ps_upsert_sref, ps_upsert_sdef, // dalt
st.st_mtime,
my_fts_executable, my_fts_debuginfo, my_fts_sref, my_fts_sdef,
@@ -3472,6 +3670,7 @@ scan_archive_file (const string& rps, const stat_t& st,
catch (const reportable_exception& e)
{
e.report(clog);
+ any_exceptions = true;
}
if (verbose > 2)
@@ -3481,6 +3680,7 @@ scan_archive_file (const string& rps, const stat_t& st,
<< " debuginfos=" << my_fts_debuginfo
<< " srefs=" << my_fts_sref
<< " sdefs=" << my_fts_sdef
+ << " exceptions=" << any_exceptions
<< endl;
fts_executable += my_fts_executable;
@@ -3488,10 +3688,13 @@ scan_archive_file (const string& rps, const stat_t& st,
fts_sref += my_fts_sref;
fts_sdef += my_fts_sdef;
+ if (any_exceptions)
+ throw reportable_exception("exceptions encountered during archive scan");
+
if (my_fts_sref_complete_p) // leave incomplete?
ps_scan_done
.reset()
- .bind(1, rps)
+ .bind(1, archiveid)
.bind(2, st.st_mtime)
.bind(3, st.st_size)
.step_ok_done();
@@ -3506,57 +3709,63 @@ scan_archive_file (const string& rps, const stat_t& st,
// The thread that consumes file names off of the scanq. We hold
// the persistent sqlite_ps's at this level and delegate file/archive
// scanning to other functions.
-static void*
-thread_main_scanner (void* arg)
+static void
+scan ()
{
- (void) arg;
-
// all the prepared statements fit to use, the _f_ set:
sqlite_ps ps_f_upsert_buildids (db, "file-buildids-intern", "insert or ignore into " BUILDIDS "_buildids VALUES (NULL, ?);");
- sqlite_ps ps_f_upsert_files (db, "file-files-intern", "insert or ignore into " BUILDIDS "_files VALUES (NULL, ?);");
+ sqlite_ps ps_f_upsert_fileparts (db, "file-fileparts-intern", "insert or ignore into " BUILDIDS "_fileparts VALUES (NULL, ?);");
+ sqlite_ps ps_f_upsert_file (db, "file-file-intern", "insert or ignore into " BUILDIDS "_files VALUES (NULL, \n"
+ "(select id from " BUILDIDS "_fileparts where name = ?),\n"
+ "(select id from " BUILDIDS "_fileparts where name = ?));");
+ sqlite_ps ps_f_lookup_file (db, "file-file-lookup",
+ "select f.id\n"
+ " from " BUILDIDS "_files f, " BUILDIDS "_fileparts p1, " BUILDIDS "_fileparts p2 \n"
+ " where f.dirname = p1.id and f.basename = p2.id and p1.name = ? and p2.name = ?;\n");
sqlite_ps ps_f_upsert_de (db, "file-de-upsert",
"insert or ignore into " BUILDIDS "_f_de "
"(buildid, debuginfo_p, executable_p, file, mtime) "
"values ((select id from " BUILDIDS "_buildids where hex = ?),"
- " ?,?,"
- " (select id from " BUILDIDS "_files where name = ?), ?);");
+ " ?,?,?,?);");
sqlite_ps ps_f_upsert_s (db, "file-s-upsert",
"insert or ignore into " BUILDIDS "_f_s "
"(buildid, artifactsrc, file, mtime) "
"values ((select id from " BUILDIDS "_buildids where hex = ?),"
- " (select id from " BUILDIDS "_files where name = ?),"
- " (select id from " BUILDIDS "_files where name = ?),"
- " ?);");
+ " ?,?,?);");
sqlite_ps ps_f_query (db, "file-negativehit-find",
"select 1 from " BUILDIDS "_file_mtime_scanned where sourcetype = 'F' "
- "and file = (select id from " BUILDIDS "_files where name = ?) and mtime = ?;");
+ "and file = ? and mtime = ?;");
sqlite_ps ps_f_scan_done (db, "file-scanned",
"insert or ignore into " BUILDIDS "_file_mtime_scanned (sourcetype, file, mtime, size)"
- "values ('F', (select id from " BUILDIDS "_files where name = ?), ?, ?);");
+ "values ('F', ?,?,?);");
// and now for the _r_ set
sqlite_ps ps_r_upsert_buildids (db, "rpm-buildid-intern", "insert or ignore into " BUILDIDS "_buildids VALUES (NULL, ?);");
- sqlite_ps ps_r_upsert_files (db, "rpm-file-intern", "insert or ignore into " BUILDIDS "_files VALUES (NULL, ?);");
+ sqlite_ps ps_r_upsert_fileparts (db, "rpm-fileparts-intern", "insert or ignore into " BUILDIDS "_fileparts VALUES (NULL, ?);");
+ sqlite_ps ps_r_upsert_file (db, "rpm-file-intern", "insert or ignore into " BUILDIDS "_files VALUES (NULL, \n"
+ "(select id from " BUILDIDS "_fileparts where name = ?),\n"
+ "(select id from " BUILDIDS "_fileparts where name = ?));");
+ sqlite_ps ps_r_lookup_file (db, "rpm-file-lookup",
+ "select f.id\n"
+ " from " BUILDIDS "_files f, " BUILDIDS "_fileparts p1, " BUILDIDS "_fileparts p2 \n"
+ " where f.dirname = p1.id and f.basename = p2.id and p1.name = ? and p2.name = ?;\n");
sqlite_ps ps_r_upsert_de (db, "rpm-de-insert",
"insert or ignore into " BUILDIDS "_r_de (buildid, debuginfo_p, executable_p, file, mtime, content) values ("
- "(select id from " BUILDIDS "_buildids where hex = ?), ?, ?, "
- "(select id from " BUILDIDS "_files where name = ?), ?, "
- "(select id from " BUILDIDS "_files where name = ?));");
+ "(select id from " BUILDIDS "_buildids where hex = ?), ?, ?, ?, ?, ?);");
sqlite_ps ps_r_upsert_sref (db, "rpm-sref-insert",
"insert or ignore into " BUILDIDS "_r_sref (buildid, artifactsrc) values ("
"(select id from " BUILDIDS "_buildids where hex = ?), "
- "(select id from " BUILDIDS "_files where name = ?));");
+ "?);");
sqlite_ps ps_r_upsert_sdef (db, "rpm-sdef-insert",
"insert or ignore into " BUILDIDS "_r_sdef (file, mtime, content) values ("
- "(select id from " BUILDIDS "_files where name = ?), ?,"
- "(select id from " BUILDIDS "_files where name = ?));");
+ "?, ?, ?);");
sqlite_ps ps_r_query (db, "rpm-negativehit-query",
"select 1 from " BUILDIDS "_file_mtime_scanned where "
- "sourcetype = 'R' and file = (select id from " BUILDIDS "_files where name = ?) and mtime = ?;");
+ "sourcetype = 'R' and file = ? and mtime = ?;");
sqlite_ps ps_r_scan_done (db, "rpm-scanned",
"insert or ignore into " BUILDIDS "_file_mtime_scanned (sourcetype, file, mtime, size)"
- "values ('R', (select id from " BUILDIDS "_files where name = ?), ?, ?);");
-
+ "values ('R', ?, ?, ?);");
+
unsigned fts_cached = 0, fts_executable = 0, fts_debuginfo = 0, fts_sourcefiles = 0;
unsigned fts_sref = 0, fts_sdef = 0;
@@ -3568,6 +3777,9 @@ thread_main_scanner (void* arg)
scan_payload p;
add_metric("thread_busy", "role", "scan", -1);
+ // NB: threads may be blocked within either of these two waiting
+ // states, if the work queue happens to run dry. That's OK.
+ if (scan_barrier) scan_barrier->count();
bool gotone = scanq.wait_front(p);
add_metric("thread_busy", "role", "scan", 1);
@@ -3583,7 +3795,9 @@ thread_main_scanner (void* arg)
if (scan_archive)
scan_archive_file (p.first, p.second,
ps_r_upsert_buildids,
- ps_r_upsert_files,
+ ps_r_upsert_fileparts,
+ ps_r_upsert_file,
+ ps_r_lookup_file,
ps_r_upsert_de,
ps_r_upsert_sref,
ps_r_upsert_sdef,
@@ -3598,7 +3812,9 @@ thread_main_scanner (void* arg)
if (scan_files) // NB: maybe "else if" ?
scan_source_file (p.first, p.second,
ps_f_upsert_buildids,
- ps_f_upsert_files,
+ ps_f_upsert_fileparts,
+ ps_f_upsert_file,
+ ps_f_lookup_file,
ps_f_upsert_de,
ps_f_upsert_s,
ps_f_query,
@@ -3622,8 +3838,25 @@ thread_main_scanner (void* arg)
inc_metric("thread_work_total","role","scan");
}
-
add_metric("thread_busy", "role", "scan", -1);
+}
+
+
+// Use this function as the thread entry point, so it can catch our
+// fleet of exceptions (incl. the sqlite_ps ctors) and report.
+static void*
+thread_main_scanner (void* arg)
+{
+ (void) arg;
+ while (! interrupted)
+ try
+ {
+ scan();
+ }
+ catch (const reportable_exception& e)
+ {
+ e.report(cerr);
+ }
return 0;
}
@@ -3836,7 +4069,7 @@ void groom()
// scan for files that have disappeared
sqlite_ps files (db, "check old files",
"select distinct s.mtime, s.file, f.name from "
- BUILDIDS "_file_mtime_scanned s, " BUILDIDS "_files f "
+ BUILDIDS "_file_mtime_scanned s, " BUILDIDS "_files_v f "
"where f.id = s.file");
// NB: Because _ftime_mtime_scanned can contain both F and
// R records for the same file, this query would return duplicates if the
@@ -3874,7 +4107,7 @@ void groom()
{
bool reg_include = !regexec (&file_include_regex, filename, 0, 0, 0);
bool reg_exclude = !regexec (&file_exclude_regex, filename, 0, 0, 0);
- regex_file_drop = reg_exclude && !reg_include;
+ regex_file_drop = !reg_include || reg_exclude; // match logic of scan_source_paths
}
rc = stat(filename, &s);
@@ -4136,6 +4369,20 @@ default_concurrency() // guaranteed >= 1
}
+// 30879: Something to help out in case of an uncaught exception.
+void my_terminate_handler()
+{
+#if defined(__GLIBC__)
+ void *array[40];
+ int size = backtrace (array, 40);
+ backtrace_symbols_fd (array, size, STDERR_FILENO);
+#endif
+#if defined(__GLIBCXX__) || defined(__GLIBCPP__)
+ __gnu_cxx::__verbose_terminate_handler();
+#endif
+ abort();
+}
+
int
main (int argc, char *argv[])
@@ -4144,6 +4391,8 @@ main (int argc, char *argv[])
(void) bindtextdomain (PACKAGE_TARNAME, LOCALEDIR);
(void) textdomain (PACKAGE_TARNAME);
+ std::set_terminate(& my_terminate_handler);
+
/* Tell the library which version we are expecting. */
elf_version (EV_CURRENT);
@@ -4364,8 +4613,10 @@ main (int argc, char *argv[])
obatched(clog) << "search concurrency " << concurrency << endl;
obatched(clog) << "webapi connection pool " << connection_pool
<< (connection_pool ? "" : " (unlimited)") << endl;
- if (! passive_p)
+ if (! passive_p) {
obatched(clog) << "rescan time " << rescan_s << endl;
+ obatched(clog) << "scan checkpoint " << scan_checkpoint << endl;
+ }
obatched(clog) << "fdcache fds " << fdcache_fds << endl;
obatched(clog) << "fdcache mbs " << fdcache_mbs << endl;
obatched(clog) << "fdcache prefetch " << fdcache_prefetch << endl;
@@ -4407,6 +4658,9 @@ main (int argc, char *argv[])
if (scan_files || scan_archives.size() > 0)
{
+ if (scan_checkpoint > 0)
+ scan_barrier = new sqlite_checkpoint_pb(concurrency, (unsigned) scan_checkpoint);
+
rc = pthread_create (& pt, NULL, thread_main_fts_source_paths, NULL);
if (rc)
error (EXIT_FAILURE, rc, "cannot spawn thread to traverse source paths\n");
@@ -4433,6 +4687,7 @@ main (int argc, char *argv[])
while (! interrupted)
pause ();
scanq.nuke(); // wake up any remaining scanq-related threads, let them die
+ if (scan_barrier) scan_barrier->nuke(); // ... in case they're stuck in a barrier
set_metric("ready", 0);
if (verbose)
@@ -4458,6 +4713,7 @@ main (int argc, char *argv[])
}
debuginfod_pool_groom ();
+ delete scan_barrier;
// NB: no problem with unconditional free here - an earlier failed regcomp would exit program
(void) regfree (& file_include_regex);