xref: /aosp_15_r20/external/elfutils/debuginfod/debuginfod.cxx (revision 7304104da70ce23c86437a01be71edd1a2d7f37e)
1 /* Debuginfo-over-http server.
2    Copyright (C) 2019-2024 Red Hat, Inc.
3    Copyright (C) 2021, 2022 Mark J. Wielaard <[email protected]>
4    This file is part of elfutils.
5 
6    This file is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3 of the License, or
9    (at your option) any later version.
10 
11    elfutils is distributed in the hope that it will be useful, but
12    WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15 
16    You should have received a copy of the GNU General Public License
17    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
18 
19 
20 /* cargo-cult from libdwfl linux-kernel-modules.c */
21 /* In case we have a bad fts we include this before config.h because it
22    can't handle _FILE_OFFSET_BITS.
23    Everything we need here is fine if its declarations just come first.
24    Also, include sys/types.h before fts. On some systems fts.h is not self
25    contained. */
26 #ifdef BAD_FTS
27   #include <sys/types.h>
28   #include <fts.h>
29 #endif
30 
31 #ifdef HAVE_CONFIG_H
32   #include "config.h"
33 #endif
34 
35 // #define _GNU_SOURCE
36 #ifdef HAVE_SCHED_H
37 extern "C" {
38 #include <sched.h>
39 }
40 #endif
41 #ifdef HAVE_SYS_RESOURCE_H
42 extern "C" {
43 #include <sys/resource.h>
44 }
45 #endif
46 
47 #ifdef HAVE_EXECINFO_H
48 extern "C" {
49 #include <execinfo.h>
50 }
51 #endif
52 
53 #include "debuginfod.h"
54 #include <dwarf.h>
55 
56 #include <argp.h>
57 #ifdef __GNUC__
58 #undef __attribute__ /* glibc bug - rhbz 1763325 */
59 #endif
60 
61 #include <unistd.h>
62 #include <stdlib.h>
63 #include <locale.h>
64 #include <pthread.h>
65 #include <signal.h>
66 #include <sys/stat.h>
67 #include <sys/time.h>
68 #include <sys/vfs.h>
69 #include <unistd.h>
70 #include <fcntl.h>
71 #include <netdb.h>
72 #include <math.h>
73 #include <float.h>
74 
75 
76 /* If fts.h is included before config.h, its indirect inclusions may not
77    give us the right LFS aliases of these functions, so map them manually.  */
78 #ifdef BAD_FTS
79   #ifdef _FILE_OFFSET_BITS
80     #define open open64
81     #define fopen fopen64
82   #endif
83 #else
84   #include <sys/types.h>
85   #include <fts.h>
86 #endif
87 
88 #include <cstring>
89 #include <vector>
90 #include <set>
91 #include <unordered_set>
92 #include <map>
93 #include <string>
94 #include <iostream>
95 #include <iomanip>
96 #include <ostream>
97 #include <sstream>
98 #include <mutex>
99 #include <deque>
100 #include <condition_variable>
101 #include <exception>
102 #include <thread>
103 // #include <regex> // on rhel7 gcc 4.8, not competent
104 #include <regex.h>
105 // #include <algorithm>
106 using namespace std;
107 
108 #include <gelf.h>
109 #include <libdwelf.h>
110 
111 #include <microhttpd.h>
112 
113 #if MHD_VERSION >= 0x00097002
114 // libmicrohttpd 0.9.71 broke API
115 #define MHD_RESULT enum MHD_Result
116 #else
117 #define MHD_RESULT int
118 #endif
119 
120 #include <curl/curl.h>
121 #include <archive.h>
122 #include <archive_entry.h>
123 #include <sqlite3.h>
124 
125 #ifdef __linux__
126 #include <sys/syscall.h>
127 #endif
128 
129 #ifdef __linux__
130 #define tid() syscall(SYS_gettid)
131 #else
132 #define tid() pthread_self()
133 #endif
134 
135 extern "C" {
136 #include "printversion.h"
137 #include "system.h"
138 }
139 
140 
141 inline bool
string_endswith(const string & haystack,const string & needle)142 string_endswith(const string& haystack, const string& needle)
143 {
144   return (haystack.size() >= needle.size() &&
145 	  equal(haystack.end()-needle.size(), haystack.end(),
146                 needle.begin()));
147 }
148 
149 
150 // Roll this identifier for every sqlite schema incompatibility.
151 #define BUILDIDS "buildids10"
152 
153 #if SQLITE_VERSION_NUMBER >= 3008000
154 #define WITHOUT_ROWID "without rowid"
155 #else
156 #define WITHOUT_ROWID ""
157 #endif
158 
159 static const char DEBUGINFOD_SQLITE_DDL[] =
160   "pragma foreign_keys = on;\n"
161   "pragma synchronous = 0;\n" // disable fsync()s - this cache is disposable across a machine crash
162   "pragma journal_mode = wal;\n" // https://sqlite.org/wal.html
163   "pragma wal_checkpoint = truncate;\n" // clean out any preexisting wal file
164   "pragma journal_size_limit = 0;\n" // limit steady state file (between grooming, which also =truncate's)
165   "pragma auto_vacuum = incremental;\n" // https://sqlite.org/pragma.html
166   "pragma busy_timeout = 1000;\n" // https://sqlite.org/pragma.html
167   // NB: all these are overridable with -D option
168 
169   // Normalization table for interning file names
170   "create table if not exists " BUILDIDS "_fileparts (\n"
171   "        id integer primary key not null,\n"
172   "        name text unique not null\n"
173   "        );\n"
174   "create table if not exists " BUILDIDS "_files (\n"
175   "        id integer primary key not null,\n"
176   "        dirname integer not null,\n"
177   "        basename integer not null,\n"
178   "        unique (dirname, basename),\n"
179   "        foreign key (dirname) references " BUILDIDS "_fileparts(id) on delete cascade,\n"
180   "        foreign key (basename) references " BUILDIDS "_fileparts(id) on delete cascade\n"
181   "        );\n"
182   "create view if not exists " BUILDIDS "_files_v as\n" // a
183   "        select f.id, n1.name || '/' || n2.name as name\n"
184   "        from " BUILDIDS "_files f, " BUILDIDS "_fileparts n1, " BUILDIDS "_fileparts n2\n"
185   "        where f.dirname = n1.id and f.basename = n2.id;\n"
186 
187   // Normalization table for interning buildids
188   "create table if not exists " BUILDIDS "_buildids (\n"
189   "        id integer primary key not null,\n"
190   "        hex text unique not null);\n"
191   // Track the completion of scanning of a given file & sourcetype at given time
192   "create table if not exists " BUILDIDS "_file_mtime_scanned (\n"
193   "        mtime integer not null,\n"
194   "        file integer not null,\n"
195   "        size integer not null,\n" // in bytes
196   "        sourcetype text(1) not null\n"
197   "            check (sourcetype IN ('F', 'R')),\n"
198   "        foreign key (file) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
199   "        primary key (file, mtime, sourcetype)\n"
200   "        ) " WITHOUT_ROWID ";\n"
201   "create table if not exists " BUILDIDS "_f_de (\n"
202   "        buildid integer not null,\n"
203   "        debuginfo_p integer not null,\n"
204   "        executable_p integer not null,\n"
205   "        file integer not null,\n"
206   "        mtime integer not null,\n"
207   "        foreign key (file) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
208   "        foreign key (buildid) references " BUILDIDS "_buildids(id) on update cascade on delete cascade,\n"
209   "        primary key (buildid, file, mtime)\n"
210   "        ) " WITHOUT_ROWID ";\n"
211   // Index for faster delete by file identifier
212   "create index if not exists " BUILDIDS "_f_de_idx on " BUILDIDS "_f_de (file, mtime);\n"
213   "create table if not exists " BUILDIDS "_f_s (\n"
214   "        buildid integer not null,\n"
215   "        artifactsrc integer not null,\n"
216   "        file integer not null,\n" // NB: not necessarily entered into _mtime_scanned
217   "        mtime integer not null,\n"
218   "        foreign key (file) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
219   "        foreign key (artifactsrc) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
220   "        foreign key (buildid) references " BUILDIDS "_buildids(id) on update cascade on delete cascade,\n"
221   "        primary key (buildid, artifactsrc, file, mtime)\n"
222   "        ) " WITHOUT_ROWID ";\n"
223   "create table if not exists " BUILDIDS "_r_de (\n"
224   "        buildid integer not null,\n"
225   "        debuginfo_p integer not null,\n"
226   "        executable_p integer not null,\n"
227   "        file integer not null,\n"
228   "        mtime integer not null,\n"
229   "        content integer not null,\n"
230   "        foreign key (file) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
231   "        foreign key (content) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
232   "        foreign key (buildid) references " BUILDIDS "_buildids(id) on update cascade on delete cascade,\n"
233   "        primary key (buildid, debuginfo_p, executable_p, file, content, mtime)\n"
234   "        ) " WITHOUT_ROWID ";\n"
235   // Index for faster delete by archive file identifier
236   "create index if not exists " BUILDIDS "_r_de_idx on " BUILDIDS "_r_de (file, mtime);\n"
237   "create table if not exists " BUILDIDS "_r_sref (\n" // outgoing dwarf sourcefile references from rpm
238   "        buildid integer not null,\n"
239   "        artifactsrc integer not null,\n"
240   "        foreign key (artifactsrc) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
241   "        foreign key (buildid) references " BUILDIDS "_buildids(id) on update cascade on delete cascade,\n"
242   "        primary key (buildid, artifactsrc)\n"
243   "        ) " WITHOUT_ROWID ";\n"
244   "create table if not exists " BUILDIDS "_r_sdef (\n" // rpm contents that may satisfy sref
245   "        file integer not null,\n"
246   "        mtime integer not null,\n"
247   "        content integer not null,\n"
248   "        foreign key (file) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
249   "        foreign key (content) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
250   "        primary key (content, file, mtime)\n"
251   "        ) " WITHOUT_ROWID ";\n"
252   // create views to glue together some of the above tables, for webapi D queries
253   "create view if not exists " BUILDIDS "_query_d as \n"
254   "select\n"
255   "        b.hex as buildid, n.mtime, 'F' as sourcetype, f0.name as source0, n.mtime as mtime, null as source1\n"
256   "        from " BUILDIDS "_buildids b, " BUILDIDS "_files_v f0, " BUILDIDS "_f_de n\n"
257   "        where b.id = n.buildid and f0.id = n.file and n.debuginfo_p = 1\n"
258   "union all select\n"
259   "        b.hex as buildid, n.mtime, 'R' as sourcetype, f0.name as source0, n.mtime as mtime, f1.name as source1\n"
260   "        from " BUILDIDS "_buildids b, " BUILDIDS "_files_v f0, " BUILDIDS "_files_v f1, " BUILDIDS "_r_de n\n"
261   "        where b.id = n.buildid and f0.id = n.file and f1.id = n.content and n.debuginfo_p = 1\n"
262   ";"
263   // ... and for E queries
264   "create view if not exists " BUILDIDS "_query_e as \n"
265   "select\n"
266   "        b.hex as buildid, n.mtime, 'F' as sourcetype, f0.name as source0, n.mtime as mtime, null as source1\n"
267   "        from " BUILDIDS "_buildids b, " BUILDIDS "_files_v f0, " BUILDIDS "_f_de n\n"
268   "        where b.id = n.buildid and f0.id = n.file and n.executable_p = 1\n"
269   "union all select\n"
270   "        b.hex as buildid, n.mtime, 'R' as sourcetype, f0.name as source0, n.mtime as mtime, f1.name as source1\n"
271   "        from " BUILDIDS "_buildids b, " BUILDIDS "_files_v f0, " BUILDIDS "_files_v f1, " BUILDIDS "_r_de n\n"
272   "        where b.id = n.buildid and f0.id = n.file and f1.id = n.content and n.executable_p = 1\n"
273   ";"
274   // ... and for S queries
275   "create view if not exists " BUILDIDS "_query_s as \n"
276   "select\n"
277   "        b.hex as buildid, fs.name as artifactsrc, 'F' as sourcetype, f0.name as source0, n.mtime as mtime, null as source1, null as source0ref\n"
278   "        from " BUILDIDS "_buildids b, " BUILDIDS "_files_v f0, " BUILDIDS "_files_v fs, " BUILDIDS "_f_s n\n"
279   "        where b.id = n.buildid and f0.id = n.file and fs.id = n.artifactsrc\n"
280   "union all select\n"
281   "        b.hex as buildid, f1.name as artifactsrc, 'R' as sourcetype, f0.name as source0, sd.mtime as mtime, f1.name as source1, fsref.name as source0ref\n"
282   "        from " BUILDIDS "_buildids b, " BUILDIDS "_files_v f0, " BUILDIDS "_files_v f1, " BUILDIDS "_files_v fsref, "
283   "        " BUILDIDS "_r_sdef sd, " BUILDIDS "_r_sref sr, " BUILDIDS "_r_de sde\n"
284   "        where b.id = sr.buildid and f0.id = sd.file and fsref.id = sde.file and f1.id = sd.content\n"
285   "        and sr.artifactsrc = sd.content and sde.buildid = sr.buildid\n"
286   ";"
287   // and for startup overview counts
288   "drop view if exists " BUILDIDS "_stats;\n"
289   "create view if not exists " BUILDIDS "_stats as\n"
290   "          select 'file d/e' as label,count(*) as quantity from " BUILDIDS "_f_de\n"
291   "union all select 'file s',count(*) from " BUILDIDS "_f_s\n"
292   "union all select 'archive d/e',count(*) from " BUILDIDS "_r_de\n"
293   "union all select 'archive sref',count(*) from " BUILDIDS "_r_sref\n"
294   "union all select 'archive sdef',count(*) from " BUILDIDS "_r_sdef\n"
295   "union all select 'buildids',count(*) from " BUILDIDS "_buildids\n"
296   "union all select 'filenames',count(*) from " BUILDIDS "_files\n"
297   "union all select 'fileparts',count(*) from " BUILDIDS "_fileparts\n"
298   "union all select 'files scanned (#)',count(*) from " BUILDIDS "_file_mtime_scanned\n"
299   "union all select 'files scanned (mb)',coalesce(sum(size)/1024/1024,0) from " BUILDIDS "_file_mtime_scanned\n"
300 #if SQLITE_VERSION_NUMBER >= 3016000
301   "union all select 'index db size (mb)',page_count*page_size/1024/1024 as size FROM pragma_page_count(), pragma_page_size()\n"
302 #endif
303   ";\n"
304 
305 // schema change history & garbage collection
306 //
307 // XXX: we could have migration queries here to bring prior-schema
308 // data over instead of just dropping it.  But that could incur
309 // doubled storage costs.
310 //
311 // buildids10: split the _files table into _parts
312   "" // <<< we are here
313 // buildids9: widen the mtime_scanned table
314   "DROP VIEW IF EXISTS buildids9_stats;\n"
315   "DROP INDEX IF EXISTS buildids9_r_de_idx;\n"
316   "DROP INDEX IF EXISTS buildids9_f_de_idx;\n"
317   "DROP VIEW IF EXISTS buildids9_query_s;\n"
318   "DROP VIEW IF EXISTS buildids9_query_e;\n"
319   "DROP VIEW IF EXISTS buildids9_query_d;\n"
320   "DROP TABLE IF EXISTS buildids9_r_sdef;\n"
321   "DROP TABLE IF EXISTS buildids9_r_sref;\n"
322   "DROP TABLE IF EXISTS buildids9_r_de;\n"
323   "DROP TABLE IF EXISTS buildids9_f_s;\n"
324   "DROP TABLE IF EXISTS buildids9_f_de;\n"
325   "DROP TABLE IF EXISTS buildids9_file_mtime_scanned;\n"
326   "DROP TABLE IF EXISTS buildids9_buildids;\n"
327   "DROP TABLE IF EXISTS buildids9_files;\n"
328 // buildids8: slim the sref table
329   "drop table if exists buildids8_f_de;\n"
330   "drop table if exists buildids8_f_s;\n"
331   "drop table if exists buildids8_r_de;\n"
332   "drop table if exists buildids8_r_sref;\n"
333   "drop table if exists buildids8_r_sdef;\n"
334   "drop table if exists buildids8_file_mtime_scanned;\n"
335   "drop table if exists buildids8_files;\n"
336   "drop table if exists buildids8_buildids;\n"
337 // buildids7: separate _norm table into dense subtype tables
338   "drop table if exists buildids7_f_de;\n"
339   "drop table if exists buildids7_f_s;\n"
340   "drop table if exists buildids7_r_de;\n"
341   "drop table if exists buildids7_r_sref;\n"
342   "drop table if exists buildids7_r_sdef;\n"
343   "drop table if exists buildids7_file_mtime_scanned;\n"
344   "drop table if exists buildids7_files;\n"
345   "drop table if exists buildids7_buildids;\n"
346 // buildids6: drop bolo/rfolo again, represent sources / rpmcontents in main table
347   "drop table if exists buildids6_norm;\n"
348   "drop table if exists buildids6_files;\n"
349   "drop table if exists buildids6_buildids;\n"
350   "drop view if exists buildids6;\n"
351 // buildids5: redefine srcfile1 column to be '.'-less (for rpms)
352   "drop table if exists buildids5_norm;\n"
353   "drop table if exists buildids5_files;\n"
354   "drop table if exists buildids5_buildids;\n"
355   "drop table if exists buildids5_bolo;\n"
356   "drop table if exists buildids5_rfolo;\n"
357   "drop view if exists buildids5;\n"
358 // buildids4: introduce rpmfile RFOLO
359   "drop table if exists buildids4_norm;\n"
360   "drop table if exists buildids4_files;\n"
361   "drop table if exists buildids4_buildids;\n"
362   "drop table if exists buildids4_bolo;\n"
363   "drop table if exists buildids4_rfolo;\n"
364   "drop view if exists buildids4;\n"
365 // buildids3*: split out srcfile BOLO
366   "drop table if exists buildids3_norm;\n"
367   "drop table if exists buildids3_files;\n"
368   "drop table if exists buildids3_buildids;\n"
369   "drop table if exists buildids3_bolo;\n"
370   "drop view if exists buildids3;\n"
371 // buildids2: normalized buildid and filenames into interning tables;
372   "drop table if exists buildids2_norm;\n"
373   "drop table if exists buildids2_files;\n"
374   "drop table if exists buildids2_buildids;\n"
375   "drop view if exists buildids2;\n"
376   // buildids1: made buildid and artifacttype NULLable, to represent cached-negative
377 //           lookups from sources, e.g. files or rpms that contain no buildid-indexable content
378   "drop table if exists buildids1;\n"
379 // buildids: original
380   "drop table if exists buildids;\n"
381   ;
382 
383 static const char DEBUGINFOD_SQLITE_CLEANUP_DDL[] =
384   "pragma wal_checkpoint = truncate;\n" // clean out any preexisting wal file
385   ;
386 
387 
388 
389 
390 /* Name and version of program.  */
391 ARGP_PROGRAM_VERSION_HOOK_DEF = print_version;
392 
393 /* Bug report address.  */
394 ARGP_PROGRAM_BUG_ADDRESS_DEF = PACKAGE_BUGREPORT;
395 
396 /* Definitions of arguments for argp functions.  */
397 static const struct argp_option options[] =
398   {
399    { NULL, 0, NULL, 0, "Scanners:", 1 },
400    { "scan-file-dir", 'F', NULL, 0, "Enable ELF/DWARF file scanning.", 0 },
401    { "scan-rpm-dir", 'R', NULL, 0, "Enable RPM scanning.", 0 },
402    { "scan-deb-dir", 'U', NULL, 0, "Enable DEB scanning.", 0 },
403    { "scan-archive", 'Z', "EXT=CMD", 0, "Enable arbitrary archive scanning.", 0 },
404    // "source-oci-imageregistry"  ...
405 
406    { NULL, 0, NULL, 0, "Options:", 2 },
407    { "logical", 'L', NULL, 0, "Follow symlinks, default=ignore.", 0 },
408    { "rescan-time", 't', "SECONDS", 0, "Number of seconds to wait between rescans, 0=disable.", 0 },
409    { "groom-time", 'g', "SECONDS", 0, "Number of seconds to wait between database grooming, 0=disable.", 0 },
410    { "maxigroom", 'G', NULL, 0, "Run a complete database groom/shrink pass at startup.", 0 },
411    { "concurrency", 'c', "NUM", 0, "Limit scanning thread concurrency to NUM, default=#CPUs.", 0 },
412    { "connection-pool", 'C', "NUM", OPTION_ARG_OPTIONAL,
413      "Use webapi connection pool with NUM threads, default=unlim.", 0 },
414    { "include", 'I', "REGEX", 0, "Include files matching REGEX, default=all.", 0 },
415    { "exclude", 'X', "REGEX", 0, "Exclude files matching REGEX, default=none.", 0 },
416    { "port", 'p', "NUM", 0, "HTTP port to listen on, default 8002.", 0 },
417    { "database", 'd', "FILE", 0, "Path to sqlite database.", 0 },
418    { "ddl", 'D', "SQL", 0, "Apply extra sqlite ddl/pragma to connection.", 0 },
419    { "verbose", 'v', NULL, 0, "Increase verbosity.", 0 },
420    { "regex-groom", 'r', NULL, 0,"Uses regexes from -I and -X arguments to groom the database.",0},
421 #define ARGP_KEY_FDCACHE_FDS 0x1001
422    { "fdcache-fds", ARGP_KEY_FDCACHE_FDS, "NUM", OPTION_HIDDEN, NULL, 0 },
423 #define ARGP_KEY_FDCACHE_MBS 0x1002
424    { "fdcache-mbs", ARGP_KEY_FDCACHE_MBS, "MB", 0, "Maximum total size of archive file fdcache.", 0 },
425 #define ARGP_KEY_FDCACHE_PREFETCH 0x1003
426    { "fdcache-prefetch", ARGP_KEY_FDCACHE_PREFETCH, "NUM", 0, "Number of archive files to prefetch into fdcache.", 0 },
427 #define ARGP_KEY_FDCACHE_MINTMP 0x1004
428    { "fdcache-mintmp", ARGP_KEY_FDCACHE_MINTMP, "NUM", 0, "Minimum free space% on tmpdir.", 0 },
429 #define ARGP_KEY_FDCACHE_PREFETCH_MBS 0x1005
430    { "fdcache-prefetch-mbs", ARGP_KEY_FDCACHE_PREFETCH_MBS, "MB", OPTION_HIDDEN, NULL, 0},
431 #define ARGP_KEY_FDCACHE_PREFETCH_FDS 0x1006
432    { "fdcache-prefetch-fds", ARGP_KEY_FDCACHE_PREFETCH_FDS, "NUM", OPTION_HIDDEN, NULL, 0},
433 #define ARGP_KEY_FORWARDED_TTL_LIMIT 0x1007
434    {"forwarded-ttl-limit", ARGP_KEY_FORWARDED_TTL_LIMIT, "NUM", 0, "Limit of X-Forwarded-For hops, default 8.", 0},
435 #define ARGP_KEY_PASSIVE 0x1008
436    { "passive", ARGP_KEY_PASSIVE, NULL, 0, "Do not scan or groom, read-only database.", 0 },
437 #define ARGP_KEY_DISABLE_SOURCE_SCAN 0x1009
438    { "disable-source-scan", ARGP_KEY_DISABLE_SOURCE_SCAN, NULL, 0, "Do not scan dwarf source info.", 0 },
439 #define ARGP_SCAN_CHECKPOINT 0x100A
440    { "scan-checkpoint", ARGP_SCAN_CHECKPOINT, "NUM", 0, "Number of files scanned before a WAL checkpoint.", 0 },
441    { NULL, 0, NULL, 0, NULL, 0 },
442   };
443 
444 /* Short description of program.  */
445 static const char doc[] = "Serve debuginfo-related content across HTTP from files under PATHs.";
446 
447 /* Strings for arguments in help texts.  */
448 static const char args_doc[] = "[PATH ...]";
449 
450 /* Prototype for option handler.  */
451 static error_t parse_opt (int key, char *arg, struct argp_state *state);
452 
453 static unsigned default_concurrency();
454 
455 /* Data structure to communicate with argp functions.  */
456 static struct argp argp =
457   {
458    options, parse_opt, args_doc, doc, NULL, NULL, NULL
459   };
460 
461 
462 static string db_path;
463 static sqlite3 *db;  // single connection, serialized across all our threads!
464 static sqlite3 *dbq; // webapi query-servicing readonly connection, serialized ditto!
465 static unsigned verbose;
466 static volatile sig_atomic_t interrupted = 0;
467 static volatile sig_atomic_t forced_rescan_count = 0;
468 static volatile sig_atomic_t sigusr1 = 0;
469 static volatile sig_atomic_t forced_groom_count = 0;
470 static volatile sig_atomic_t sigusr2 = 0;
471 static unsigned http_port = 8002;
472 static unsigned rescan_s = 300;
473 static unsigned groom_s = 86400;
474 static bool maxigroom = false;
475 static unsigned concurrency = default_concurrency();
476 static int connection_pool = 0;
477 static set<string> source_paths;
478 static bool scan_files = false;
479 static map<string,string> scan_archives;
480 static vector<string> extra_ddl;
481 static regex_t file_include_regex;
482 static regex_t file_exclude_regex;
483 static bool regex_groom = false;
484 static bool traverse_logical;
485 static long fdcache_mbs;
486 static long fdcache_prefetch;
487 static long fdcache_mintmp;
488 static unsigned forwarded_ttl_limit = 8;
489 static bool scan_source_info = true;
490 static string tmpdir;
491 static bool passive_p = false;
492 static long scan_checkpoint = 256;
493 
494 static void set_metric(const string& key, double value);
495 static void inc_metric(const string& key);
496 static void add_metric(const string& metric,
497                        double value);
498 static void set_metric(const string& metric,
499                        const string& lname, const string& lvalue,
500                        double value);
501 static void inc_metric(const string& metric,
502                        const string& lname, const string& lvalue);
503 static void add_metric(const string& metric,
504                        const string& lname, const string& lvalue,
505                        double value);
506 static void inc_metric(const string& metric,
507                        const string& lname, const string& lvalue,
508                        const string& rname, const string& rvalue);
509 static void add_metric(const string& metric,
510                        const string& lname, const string& lvalue,
511                        const string& rname, const string& rvalue,
512                        double value);
513 
514 
515 class tmp_inc_metric { // a RAII style wrapper for exception-safe scoped increment & decrement
516   string m, n, v;
517 public:
tmp_inc_metric(const string & mname,const string & lname,const string & lvalue)518   tmp_inc_metric(const string& mname, const string& lname, const string& lvalue):
519     m(mname), n(lname), v(lvalue)
520   {
521     add_metric (m, n, v, 1);
522   }
~tmp_inc_metric()523   ~tmp_inc_metric()
524   {
525     add_metric (m, n, v, -1);
526   }
527 };
528 
529 class tmp_ms_metric { // a RAII style wrapper for exception-safe scoped timing
530   string m, n, v;
531   struct timespec ts_start;
532 public:
tmp_ms_metric(const string & mname,const string & lname,const string & lvalue)533   tmp_ms_metric(const string& mname, const string& lname, const string& lvalue):
534     m(mname), n(lname), v(lvalue)
535   {
536     clock_gettime (CLOCK_MONOTONIC, & ts_start);
537   }
~tmp_ms_metric()538   ~tmp_ms_metric()
539   {
540     struct timespec ts_end;
541     clock_gettime (CLOCK_MONOTONIC, & ts_end);
542     double deltas = (ts_end.tv_sec - ts_start.tv_sec)
543       + (ts_end.tv_nsec - ts_start.tv_nsec)/1.e9;
544 
545     add_metric (m + "_milliseconds_sum", n, v, (deltas*1000.0));
546     inc_metric (m + "_milliseconds_count", n, v);
547   }
548 };
549 
550 
551 /* Handle program arguments.  */
552 static error_t
parse_opt(int key,char * arg,struct argp_state * state)553 parse_opt (int key, char *arg,
554 	   struct argp_state *state __attribute__ ((unused)))
555 {
556   int rc;
557   switch (key)
558     {
559     case 'v': verbose ++; break;
560     case 'd':
561       /* When using the in-memory database make sure it is shareable,
562 	 so we can open it twice as read/write and read-only.  */
563       if (strcmp (arg, ":memory:") == 0)
564 	db_path = "file::memory:?cache=shared";
565       else
566 	db_path = string(arg);
567       break;
568     case 'p': http_port = (unsigned) atoi(arg);
569       if (http_port == 0 || http_port > 65535)
570         argp_failure(state, 1, EINVAL, "port number");
571       break;
572     case 'F': scan_files = true; break;
573     case 'R':
574       scan_archives[".rpm"]="cat"; // libarchive groks rpm natively
575       break;
576     case 'U':
577       scan_archives[".deb"]="(bsdtar -O -x -f - data.tar\\*)<";
578       scan_archives[".ddeb"]="(bsdtar -O -x -f - data.tar\\*)<";
579       scan_archives[".ipk"]="(bsdtar -O -x -f - data.tar\\*)<";
580       // .udeb too?
581       break;
582     case 'Z':
583       {
584         char* extension = strchr(arg, '=');
585         if (arg[0] == '\0')
586           argp_failure(state, 1, EINVAL, "missing EXT");
587         else if (extension)
588           scan_archives[string(arg, (extension-arg))]=string(extension+1);
589         else
590           scan_archives[string(arg)]=string("cat");
591       }
592       break;
593     case 'L':
594       if (passive_p)
595         argp_failure(state, 1, EINVAL, "-L option inconsistent with passive mode");
596       traverse_logical = true;
597       break;
598     case 'D':
599       if (passive_p)
600         argp_failure(state, 1, EINVAL, "-D option inconsistent with passive mode");
601       extra_ddl.push_back(string(arg));
602       break;
603     case 't':
604       if (passive_p)
605         argp_failure(state, 1, EINVAL, "-t option inconsistent with passive mode");
606       rescan_s = (unsigned) atoi(arg);
607       break;
608     case 'g':
609       if (passive_p)
610         argp_failure(state, 1, EINVAL, "-g option inconsistent with passive mode");
611       groom_s = (unsigned) atoi(arg);
612       break;
613     case 'G':
614       if (passive_p)
615         argp_failure(state, 1, EINVAL, "-G option inconsistent with passive mode");
616       maxigroom = true;
617       break;
618     case 'c':
619       if (passive_p)
620         argp_failure(state, 1, EINVAL, "-c option inconsistent with passive mode");
621       concurrency = (unsigned) atoi(arg);
622       if (concurrency < 1) concurrency = 1;
623       break;
624     case 'C':
625       if (arg)
626         {
627           connection_pool = atoi(arg);
628           if (connection_pool < 2)
629             argp_failure(state, 1, EINVAL, "-C NUM minimum 2");
630         }
631       break;
632     case 'I':
633       // NB: no problem with unconditional free here - an earlier failed regcomp would exit program
634       if (passive_p)
635         argp_failure(state, 1, EINVAL, "-I option inconsistent with passive mode");
636       regfree (&file_include_regex);
637       rc = regcomp (&file_include_regex, arg, REG_EXTENDED|REG_NOSUB);
638       if (rc != 0)
639         argp_failure(state, 1, EINVAL, "regular expression");
640       break;
641     case 'X':
642       if (passive_p)
643         argp_failure(state, 1, EINVAL, "-X option inconsistent with passive mode");
644       regfree (&file_exclude_regex);
645       rc = regcomp (&file_exclude_regex, arg, REG_EXTENDED|REG_NOSUB);
646       if (rc != 0)
647         argp_failure(state, 1, EINVAL, "regular expression");
648       break;
649     case 'r':
650       if (passive_p)
651         argp_failure(state, 1, EINVAL, "-r option inconsistent with passive mode");
652       regex_groom = true;
653       break;
654     case ARGP_KEY_FDCACHE_FDS:
655       // deprecated
656       break;
657     case ARGP_KEY_FDCACHE_MBS:
658       fdcache_mbs = atol (arg);
659       break;
660     case ARGP_KEY_FDCACHE_PREFETCH:
661       fdcache_prefetch = atol (arg);
662       break;
663     case ARGP_KEY_FDCACHE_MINTMP:
664       fdcache_mintmp = atol (arg);
665       if( fdcache_mintmp > 100 || fdcache_mintmp < 0 )
666         argp_failure(state, 1, EINVAL, "fdcache mintmp percent");
667       break;
668     case ARGP_KEY_FORWARDED_TTL_LIMIT:
669       forwarded_ttl_limit = (unsigned) atoi(arg);
670       break;
671     case ARGP_KEY_ARG:
672       source_paths.insert(string(arg));
673       break;
674     case ARGP_KEY_FDCACHE_PREFETCH_FDS:
675       // deprecated
676       break;
677     case ARGP_KEY_FDCACHE_PREFETCH_MBS:
678       // deprecated
679       break;
680     case ARGP_KEY_PASSIVE:
681       passive_p = true;
682       if (source_paths.size() > 0
683           || maxigroom
684           || extra_ddl.size() > 0
685           || traverse_logical)
686         // other conflicting options tricky to check
687         argp_failure(state, 1, EINVAL, "inconsistent options with passive mode");
688       break;
689     case ARGP_KEY_DISABLE_SOURCE_SCAN:
690       scan_source_info = false;
691       break;
692     case ARGP_SCAN_CHECKPOINT:
693       scan_checkpoint = atol (arg);
694       if (scan_checkpoint < 0)
695         argp_failure(state, 1, EINVAL, "scan checkpoint");
696       break;
697       // case 'h': argp_state_help (state, stderr, ARGP_HELP_LONG|ARGP_HELP_EXIT_OK);
698     default: return ARGP_ERR_UNKNOWN;
699     }
700 
701   return 0;
702 }
703 
704 
705 ////////////////////////////////////////////////////////////////////////
706 
707 
708 static void add_mhd_response_header (struct MHD_Response *r,
709 				     const char *h, const char *v);
710 
711 // represent errors that may get reported to an ostream and/or a libmicrohttpd connection
712 
713 struct reportable_exception
714 {
715   int code;
716   string message;
717 
reportable_exceptionreportable_exception718   reportable_exception(int c, const string& m): code(c), message(m) {}
reportable_exceptionreportable_exception719   reportable_exception(const string& m): code(503), message(m) {}
reportable_exceptionreportable_exception720   reportable_exception(): code(503), message() {}
721 
722   void report(ostream& o) const; // defined under obatched() class below
723 
mhd_send_responsereportable_exception724   MHD_RESULT mhd_send_response(MHD_Connection* c) const {
725     MHD_Response* r = MHD_create_response_from_buffer (message.size(),
726                                                        (void*) message.c_str(),
727                                                        MHD_RESPMEM_MUST_COPY);
728     add_mhd_response_header (r, "Content-Type", "text/plain");
729     MHD_RESULT rc = MHD_queue_response (c, code, r);
730     MHD_destroy_response (r);
731     return rc;
732   }
733 };
734 
735 
736 struct sqlite_exception: public reportable_exception
737 {
sqlite_exceptionsqlite_exception738   sqlite_exception(int rc, const string& msg):
739     reportable_exception(string("sqlite3 error: ") + msg + ": " + string(sqlite3_errstr(rc) ?: "?")) {
740     inc_metric("error_count","sqlite3",sqlite3_errstr(rc));
741   }
742 };
743 
744 struct libc_exception: public reportable_exception
745 {
libc_exceptionlibc_exception746   libc_exception(int rc, const string& msg):
747     reportable_exception(string("libc error: ") + msg + ": " + string(strerror(rc) ?: "?")) {
748     inc_metric("error_count","libc",strerror(rc));
749   }
750 };
751 
752 
753 struct archive_exception: public reportable_exception
754 {
archive_exceptionarchive_exception755   archive_exception(const string& msg):
756     reportable_exception(string("libarchive error: ") + msg) {
757       inc_metric("error_count","libarchive",msg);
758   }
archive_exceptionarchive_exception759   archive_exception(struct archive* a, const string& msg):
760     reportable_exception(string("libarchive error: ") + msg + ": " + string(archive_error_string(a) ?: "?")) {
761     inc_metric("error_count","libarchive",msg + ": " + string(archive_error_string(a) ?: "?"));
762   }
763 };
764 
765 
766 struct elfutils_exception: public reportable_exception
767 {
elfutils_exceptionelfutils_exception768   elfutils_exception(int rc, const string& msg):
769     reportable_exception(string("elfutils error: ") + msg + ": " + string(elf_errmsg(rc) ?: "?")) {
770     inc_metric("error_count","elfutils",elf_errmsg(rc));
771   }
772 };
773 
774 
775 ////////////////////////////////////////////////////////////////////////
776 
777 template <typename Payload>
778 class workq
779 {
780   unordered_set<Payload> q; // eliminate duplicates
781   mutex mtx;
782   condition_variable cv;
783   bool dead;
784   unsigned idlers;   // number of threads busy with wait_idle / done_idle
785   unsigned fronters; // number of threads busy with wait_front / done_front
786 
787 public:
workq()788   workq() { dead = false; idlers = 0; fronters = 0; }
~workq()789   ~workq() {}
790 
push_back(const Payload & p)791   void push_back(const Payload& p)
792   {
793     unique_lock<mutex> lock(mtx);
794     q.insert (p);
795     set_metric("thread_work_pending","role","scan", q.size());
796     cv.notify_all();
797   }
798 
799   // kill this workqueue, wake up all idlers / scanners
nuke()800   void nuke() {
801     unique_lock<mutex> lock(mtx);
802     // optional: q.clear();
803     dead = true;
804     cv.notify_all();
805   }
806 
807   // clear the workqueue, when scanning is interrupted with USR2
clear()808   void clear() {
809     unique_lock<mutex> lock(mtx);
810     q.clear();
811     set_metric("thread_work_pending","role","scan", q.size());
812     // NB: there may still be some live fronters
813     cv.notify_all(); // maybe wake up waiting idlers
814   }
815 
816   // block this scanner thread until there is work to do and no active idler
wait_front(Payload & p)817   bool wait_front (Payload& p)
818   {
819     unique_lock<mutex> lock(mtx);
820     while (!dead && (q.size() == 0 || idlers > 0))
821       cv.wait(lock);
822     if (dead)
823       return false;
824     else
825       {
826         p = * q.begin();
827         q.erase (q.begin());
828         fronters ++; // prevent idlers from starting awhile, even if empty q
829         set_metric("thread_work_pending","role","scan", q.size());
830         // NB: don't wake up idlers yet!  The consumer is busy
831         // processing this element until it calls done_front().
832         return true;
833       }
834   }
835 
836   // notify waitq that scanner thread is done with that last item
done_front()837   void done_front ()
838   {
839     unique_lock<mutex> lock(mtx);
840     fronters --;
841     if (q.size() == 0 && fronters == 0)
842       cv.notify_all(); // maybe wake up waiting idlers
843   }
844 
845   // block this idler thread until there is no work to do
wait_idle()846   void wait_idle ()
847   {
848     unique_lock<mutex> lock(mtx);
849     cv.notify_all(); // maybe wake up waiting scanners
850     while (!dead && ((q.size() != 0) || fronters > 0))
851       cv.wait(lock);
852     idlers ++;
853   }
854 
done_idle()855   void done_idle ()
856   {
857     unique_lock<mutex> lock(mtx);
858     idlers --;
859     cv.notify_all(); // maybe wake up waiting scanners, but probably not (shutting down)
860   }
861 };
862 
863 typedef struct stat stat_t;
864 typedef pair<string,stat_t> scan_payload;
operator <(const scan_payload & a,const scan_payload & b)865 inline bool operator< (const scan_payload& a, const scan_payload& b)
866 {
867   return a.first < b.first; // don't bother compare the stat fields
868 }
869 
870 namespace std { // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=56480
871   template<> struct hash<::scan_payload>
872   {
operator ()std::hash873     std::size_t operator() (const ::scan_payload& p) const noexcept
874     {
875       return hash<string>()(p.first);
876     }
877   };
878   template<> struct equal_to<::scan_payload>
879   {
operator ()std::equal_to880     std::size_t operator() (const ::scan_payload& a, const ::scan_payload& b) const noexcept
881     {
882       return a.first == b.first;
883     }
884   };
885 }
886 
887 static workq<scan_payload> scanq; // just a single one
888 // producer & idler: thread_main_fts_source_paths()
889 // consumer: thread_main_scanner()
890 // idler: thread_main_groom()
891 
892 
893 ////////////////////////////////////////////////////////////////////////
894 
895 // Unique set is a thread-safe structure that lends 'ownership' of a value
896 // to a thread.  Other threads requesting the same thing are made to wait.
897 // It's like a semaphore-on-demand.
898 template <typename T>
899 class unique_set
900 {
901 private:
902   set<T> values;
903   mutex mtx;
904   condition_variable cv;
905 public:
unique_set()906   unique_set() {}
~unique_set()907   ~unique_set() {}
908 
acquire(const T & value)909   void acquire(const T& value)
910   {
911     unique_lock<mutex> lock(mtx);
912     while (values.find(value) != values.end())
913       cv.wait(lock);
914     values.insert(value);
915   }
916 
release(const T & value)917   void release(const T& value)
918   {
919     unique_lock<mutex> lock(mtx);
920     // assert (values.find(value) != values.end());
921     values.erase(value);
922     cv.notify_all();
923   }
924 };
925 
926 
927 // This is the object that's instantiate to uniquely hold a value in a
928 // RAII-pattern way.
929 template <typename T>
930 class unique_set_reserver
931 {
932 private:
933   unique_set<T>& please_hold;
934   T mine;
935 public:
unique_set_reserver(unique_set<T> & t,const T & value)936   unique_set_reserver(unique_set<T>& t, const T& value):
937     please_hold(t), mine(value)  { please_hold.acquire(mine); }
~unique_set_reserver()938   ~unique_set_reserver() { please_hold.release(mine); }
939 };
940 
941 
942 ////////////////////////////////////////////////////////////////////////
943 
944 // periodic_barrier is a concurrency control object that lets N threads
945 // periodically (based on counter value) agree to wait at a barrier,
946 // let one of them carry out some work, then be set free
947 
948 class periodic_barrier
949 {
950 private:
951   unsigned period; // number of count() reports to trigger barrier activation
952   unsigned threads; // number of threads participating
953   mutex mtx; // protects all the following fields
954   unsigned counter; // count of count() reports in the current generation
955   unsigned generation; // barrier activation generation
956   unsigned waiting; // number of threads waiting for barrier
957   bool dead; // bring out your
958   condition_variable cv;
959 public:
periodic_barrier(unsigned t,unsigned p)960   periodic_barrier(unsigned t, unsigned p):
961     period(p), threads(t), counter(0), generation(0), waiting(0), dead(false) { }
~periodic_barrier()962   virtual ~periodic_barrier() {}
963 
964   virtual void periodic_barrier_work() noexcept = 0;
nuke()965   void nuke() {
966     unique_lock<mutex> lock(mtx);
967     dead = true;
968     cv.notify_all();
969   }
970 
count()971   void count()
972   {
973     unique_lock<mutex> lock(mtx);
974     unsigned prev_generation = this->generation;
975     if (counter < period-1) // normal case: counter just freely running
976       {
977         counter ++;
978         return;
979       }
980     else if (counter == period-1) // we're the doer
981       {
982         counter = period; // entering barrier holding phase
983         cv.notify_all();
984         while (waiting < threads-1 && !dead)
985           cv.wait(lock);
986         // all other threads are now stuck in the barrier
987         this->periodic_barrier_work(); // NB: we're holding the mutex the whole time
988         // reset for next barrier, releasing other waiters
989         counter = 0;
990         generation ++;
991         cv.notify_all();
992         return;
993       }
994     else if (counter == period) // we're a waiter, in holding phase
995       {
996         waiting ++;
997         cv.notify_all();
998         while (counter == period && generation == prev_generation && !dead)
999           cv.wait(lock);
1000         waiting --;
1001         return;
1002       }
1003   }
1004 };
1005 
1006 
1007 
1008 ////////////////////////////////////////////////////////////////////////
1009 
1010 
1011 // Print a standard timestamp.
1012 static ostream&
timestamp(ostream & o)1013 timestamp (ostream &o)
1014 {
1015   char datebuf[80];
1016   char *now2 = NULL;
1017   time_t now_t = time(NULL);
1018   struct tm now;
1019   struct tm *nowp = gmtime_r (&now_t, &now);
1020   if (nowp)
1021     {
1022       (void) strftime (datebuf, sizeof (datebuf), "%c", nowp);
1023       now2 = datebuf;
1024     }
1025 
1026   return o << "[" << (now2 ? now2 : "") << "] "
1027            << "(" << getpid () << "/" << tid() << "): ";
1028 }
1029 
1030 
1031 // A little class that impersonates an ostream to the extent that it can
1032 // take << streaming operations.  It batches up the bits into an internal
1033 // stringstream until it is destroyed; then flushes to the original ostream.
1034 // It adds a timestamp
1035 class obatched
1036 {
1037 private:
1038   ostream& o;
1039   stringstream stro;
1040   static mutex lock;
1041 public:
obatched(ostream & oo,bool timestamp_p=true)1042   obatched(ostream& oo, bool timestamp_p = true): o(oo)
1043   {
1044     if (timestamp_p)
1045       timestamp(stro);
1046   }
~obatched()1047   ~obatched()
1048   {
1049     unique_lock<mutex> do_not_cross_the_streams(obatched::lock);
1050     o << stro.str();
1051     o.flush();
1052   }
operator ostream&()1053   operator ostream& () { return stro; }
operator <<(const T & t)1054   template <typename T> ostream& operator << (const T& t) { stro << t; return stro; }
1055 };
1056 mutex obatched::lock; // just the one, since cout/cerr iostreams are not thread-safe
1057 
1058 
report(ostream & o) const1059 void reportable_exception::report(ostream& o) const {
1060   obatched(o) << message << endl;
1061 }
1062 
1063 
1064 ////////////////////////////////////////////////////////////////////////
1065 
1066 
1067 // RAII style sqlite prepared-statement holder that matches { } block lifetime
1068 
1069 struct sqlite_ps
1070 {
1071 private:
1072   sqlite3* db;
1073   const string nickname;
1074   const string sql;
1075   sqlite3_stmt *pp;
1076 
1077   sqlite_ps(const sqlite_ps&); // make uncopyable
1078   sqlite_ps& operator=(const sqlite_ps &); // make unassignable
1079 
1080 public:
sqlite_pssqlite_ps1081   sqlite_ps (sqlite3* d, const string& n, const string& s): db(d), nickname(n), sql(s) {
1082     // tmp_ms_metric tick("sqlite3","prep",nickname);
1083     if (verbose > 4)
1084       obatched(clog) << nickname << " prep " << sql << endl;
1085     int rc = sqlite3_prepare_v2 (db, sql.c_str(), -1 /* to \0 */, & this->pp, NULL);
1086     if (rc != SQLITE_OK)
1087       throw sqlite_exception(rc, "prepare " + sql);
1088   }
1089 
resetsqlite_ps1090   sqlite_ps& reset()
1091   {
1092     tmp_ms_metric tick("sqlite3","reset",nickname);
1093     sqlite3_reset(this->pp);
1094     return *this;
1095   }
1096 
bindsqlite_ps1097   sqlite_ps& bind(int parameter, const string& str)
1098   {
1099     if (verbose > 4)
1100       obatched(clog) << nickname << " bind " << parameter << "=" << str << endl;
1101     int rc = sqlite3_bind_text (this->pp, parameter, str.c_str(), -1, SQLITE_TRANSIENT);
1102     if (rc != SQLITE_OK)
1103       throw sqlite_exception(rc, "sqlite3 bind");
1104     return *this;
1105   }
1106 
bindsqlite_ps1107   sqlite_ps& bind(int parameter, int64_t value)
1108   {
1109     if (verbose > 4)
1110       obatched(clog) << nickname << " bind " << parameter << "=" << value << endl;
1111     int rc = sqlite3_bind_int64 (this->pp, parameter, value);
1112     if (rc != SQLITE_OK)
1113       throw sqlite_exception(rc, "sqlite3 bind");
1114     return *this;
1115   }
1116 
bindsqlite_ps1117   sqlite_ps& bind(int parameter)
1118   {
1119     if (verbose > 4)
1120       obatched(clog) << nickname << " bind " << parameter << "=" << "NULL" << endl;
1121     int rc = sqlite3_bind_null (this->pp, parameter);
1122     if (rc != SQLITE_OK)
1123       throw sqlite_exception(rc, "sqlite3 bind");
1124     return *this;
1125   }
1126 
1127 
step_ok_donesqlite_ps1128   void step_ok_done() {
1129     tmp_ms_metric tick("sqlite3","step_done",nickname);
1130     int rc = sqlite3_step (this->pp);
1131     if (verbose > 4)
1132       obatched(clog) << nickname << " step-ok-done(" << sqlite3_errstr(rc) << ") " << sql << endl;
1133     if (rc != SQLITE_OK && rc != SQLITE_DONE && rc != SQLITE_ROW)
1134       throw sqlite_exception(rc, "sqlite3 step");
1135     (void) sqlite3_reset (this->pp);
1136   }
1137 
1138 
stepsqlite_ps1139   int step() {
1140     tmp_ms_metric tick("sqlite3","step",nickname);
1141     int rc = sqlite3_step (this->pp);
1142     if (verbose > 4)
1143       obatched(clog) << nickname << " step(" << sqlite3_errstr(rc) << ") " << sql << endl;
1144     return rc;
1145   }
1146 
~sqlite_pssqlite_ps1147   ~sqlite_ps () { sqlite3_finalize (this->pp); }
operator sqlite3_stmt*sqlite_ps1148   operator sqlite3_stmt* () { return this->pp; }
1149 };
1150 
1151 
1152 ////////////////////////////////////////////////////////////////////////
1153 
1154 
1155 struct sqlite_checkpoint_pb: public periodic_barrier
1156 {
1157   // NB: don't use sqlite_ps since it can throw exceptions during ctor etc.
sqlite_checkpoint_pbsqlite_checkpoint_pb1158   sqlite_checkpoint_pb(unsigned t, unsigned p):
1159     periodic_barrier(t, p) { }
1160 
periodic_barrier_worksqlite_checkpoint_pb1161   void periodic_barrier_work() noexcept
1162   {
1163     (void) sqlite3_exec (db, "pragma wal_checkpoint(truncate);", NULL, NULL, NULL);
1164   }
1165 };
1166 
1167 static periodic_barrier* scan_barrier = 0; // initialized in main()
1168 
1169 
1170 ////////////////////////////////////////////////////////////////////////
1171 
1172 // RAII style templated autocloser
1173 
1174 template <class Payload, class Ignore>
1175 struct defer_dtor
1176 {
1177 public:
1178   typedef Ignore (*dtor_fn) (Payload);
1179 
1180 private:
1181   Payload p;
1182   dtor_fn fn;
1183 
1184 public:
defer_dtordefer_dtor1185   defer_dtor(Payload _p, dtor_fn _fn): p(_p), fn(_fn) {}
~defer_dtordefer_dtor1186   ~defer_dtor() { (void) (*fn)(p); }
1187 
1188 private:
1189   defer_dtor(const defer_dtor<Payload,Ignore>&); // make uncopyable
1190   defer_dtor& operator=(const defer_dtor<Payload,Ignore> &); // make unassignable
1191 };
1192 
1193 
1194 
1195 ////////////////////////////////////////////////////////////////////////
1196 
1197 
1198 static string
header_censor(const string & str)1199 header_censor(const string& str)
1200 {
1201   string y;
1202   for (auto&& x : str)
1203     {
1204       if (isalnum(x) || x == '/' || x == '.' || x == ',' || x == '_' || x == ':')
1205         y += x;
1206     }
1207   return y;
1208 }
1209 
1210 
1211 static string
conninfo(struct MHD_Connection * conn)1212 conninfo (struct MHD_Connection * conn)
1213 {
1214   char hostname[256]; // RFC1035
1215   char servname[256];
1216   int sts = -1;
1217 
1218   if (conn == 0)
1219     return "internal";
1220 
1221   /* Look up client address data. */
1222   const union MHD_ConnectionInfo *u = MHD_get_connection_info (conn,
1223                                                                MHD_CONNECTION_INFO_CLIENT_ADDRESS);
1224   struct sockaddr *so = u ? u->client_addr : 0;
1225 
1226   if (so && so->sa_family == AF_INET) {
1227     sts = getnameinfo (so, sizeof (struct sockaddr_in),
1228                        hostname, sizeof (hostname),
1229                        servname, sizeof (servname),
1230                        NI_NUMERICHOST | NI_NUMERICSERV);
1231   } else if (so && so->sa_family == AF_INET6) {
1232     struct sockaddr_in6* addr6 = (struct sockaddr_in6*) so;
1233     if (IN6_IS_ADDR_V4MAPPED(&addr6->sin6_addr)) {
1234       struct sockaddr_in addr4;
1235       memset (&addr4, 0, sizeof(addr4));
1236       addr4.sin_family = AF_INET;
1237       addr4.sin_port = addr6->sin6_port;
1238       memcpy (&addr4.sin_addr.s_addr, addr6->sin6_addr.s6_addr+12, sizeof(addr4.sin_addr.s_addr));
1239       sts = getnameinfo ((struct sockaddr*) &addr4, sizeof (addr4),
1240                          hostname, sizeof (hostname),
1241                          servname, sizeof (servname),
1242                          NI_NUMERICHOST | NI_NUMERICSERV);
1243     } else {
1244       sts = getnameinfo (so, sizeof (struct sockaddr_in6),
1245                          hostname, sizeof (hostname),
1246                          servname, sizeof (servname),
1247                          NI_NUMERICHOST | NI_NUMERICSERV);
1248     }
1249   }
1250 
1251   if (sts != 0) {
1252     hostname[0] = servname[0] = '\0';
1253   }
1254 
1255   // extract headers relevant to administration
1256   const char* user_agent = MHD_lookup_connection_value (conn, MHD_HEADER_KIND, "User-Agent") ?: "";
1257   const char* x_forwarded_for = MHD_lookup_connection_value (conn, MHD_HEADER_KIND, "X-Forwarded-For") ?: "";
1258   // NB: these are untrustworthy, beware if machine-processing log files
1259 
1260   return string(hostname) + string(":") + string(servname) +
1261     string(" UA:") + header_censor(string(user_agent)) +
1262     string(" XFF:") + header_censor(string(x_forwarded_for));
1263 }
1264 
1265 
1266 
1267 ////////////////////////////////////////////////////////////////////////
1268 
1269 /* Wrapper for MHD_add_response_header that logs an error if we
1270    couldn't add the specified header.  */
1271 static void
add_mhd_response_header(struct MHD_Response * r,const char * h,const char * v)1272 add_mhd_response_header (struct MHD_Response *r,
1273 			 const char *h, const char *v)
1274 {
1275   if (MHD_add_response_header (r, h, v) == MHD_NO)
1276     obatched(clog) << "Error: couldn't add '" << h << "' header" << endl;
1277 }
1278 
1279 static void
add_mhd_last_modified(struct MHD_Response * resp,time_t mtime)1280 add_mhd_last_modified (struct MHD_Response *resp, time_t mtime)
1281 {
1282   struct tm now;
1283   struct tm *nowp = gmtime_r (&mtime, &now);
1284   if (nowp != NULL)
1285     {
1286       char datebuf[80];
1287       size_t rc = strftime (datebuf, sizeof (datebuf), "%a, %d %b %Y %T GMT",
1288                             nowp);
1289       if (rc > 0 && rc < sizeof (datebuf))
1290         add_mhd_response_header (resp, "Last-Modified", datebuf);
1291     }
1292 
1293   add_mhd_response_header (resp, "Cache-Control", "public");
1294 }
1295 
1296 // quote all questionable characters of str for safe passage through a sh -c expansion.
1297 static string
shell_escape(const string & str)1298 shell_escape(const string& str)
1299 {
1300   string y;
1301   for (auto&& x : str)
1302     {
1303       if (! isalnum(x) && x != '/')
1304         y += "\\";
1305       y += x;
1306     }
1307   return y;
1308 }
1309 
1310 
1311 // PR25548: Perform POSIX / RFC3986 style path canonicalization on the input string.
1312 //
1313 // Namely:
1314 //    //         ->   /
1315 //    /foo/../   ->   /
1316 //    /./        ->   /
1317 //
1318 // This mapping is done on dwarf-side source path names, which may
1319 // include these constructs, so we can deal with debuginfod clients
1320 // that accidentally canonicalize the paths.
1321 //
1322 // realpath(3) is close but not quite right, because it also resolves
1323 // symbolic links.  Symlinks at the debuginfod server have nothing to
1324 // do with the build-time symlinks, thus they must not be considered.
1325 //
1326 // see also curl Curl_dedotdotify() aka RFC3986, which we mostly follow here
1327 // see also libc __realpath()
1328 // see also llvm llvm::sys::path::remove_dots()
1329 static string
canon_pathname(const string & input)1330 canon_pathname (const string& input)
1331 {
1332   string i = input; // 5.2.4 (1)
1333   string o;
1334 
1335   while (i.size() != 0)
1336     {
1337       // 5.2.4 (2) A
1338       if (i.substr(0,3) == "../")
1339         i = i.substr(3);
1340       else if(i.substr(0,2) == "./")
1341         i = i.substr(2);
1342 
1343       // 5.2.4 (2) B
1344       else if (i.substr(0,3) == "/./")
1345         i = i.substr(2);
1346       else if (i == "/.")
1347         i = ""; // no need to handle "/." complete-path-segment case; we're dealing with file names
1348 
1349       // 5.2.4 (2) C
1350       else if (i.substr(0,4) == "/../") {
1351         i = i.substr(3);
1352         string::size_type sl = o.rfind("/");
1353         if (sl != string::npos)
1354           o = o.substr(0, sl);
1355         else
1356           o = "";
1357       } else if (i == "/..")
1358         i = ""; // no need to handle "/.." complete-path-segment case; we're dealing with file names
1359 
1360       // 5.2.4 (2) D
1361       // no need to handle these cases; we're dealing with file names
1362       else if (i == ".")
1363         i = "";
1364       else if (i == "..")
1365         i = "";
1366 
1367       // POSIX special: map // to /
1368       else if (i.substr(0,2) == "//")
1369         i = i.substr(1);
1370 
1371       // 5.2.4 (2) E
1372       else {
1373         string::size_type next_slash = i.find("/", (i[0]=='/' ? 1 : 0)); // skip first slash
1374         o += i.substr(0, next_slash);
1375         if (next_slash == string::npos)
1376           i = "";
1377         else
1378           i = i.substr(next_slash);
1379       }
1380     }
1381 
1382   return o;
1383 }
1384 
1385 
1386 // Estimate available free space for a given filesystem via statfs(2).
1387 // Return true if the free fraction is known to be smaller than the
1388 // given minimum percentage.  Also update a related metric.
statfs_free_enough_p(const string & path,const string & label,long minfree=0)1389 bool statfs_free_enough_p(const string& path, const string& label, long minfree = 0)
1390 {
1391   struct statfs sfs;
1392   int rc = statfs(path.c_str(), &sfs);
1393   if (rc == 0)
1394     {
1395       double s = (double) sfs.f_bavail / (double) sfs.f_blocks;
1396       set_metric("filesys_free_ratio","purpose",label, s);
1397       return ((s * 100.0) < minfree);
1398     }
1399   return false;
1400 }
1401 
1402 
1403 
1404 // A map-like class that owns a cache of file descriptors (indexed by
1405 // file / content names).
1406 //
1407 // If only it could use fd's instead of file names ... but we can't
1408 // dup(2) to create independent descriptors for the same unlinked
1409 // files, so would have to use some goofy linux /proc/self/fd/%d
1410 // hack such as the following
1411 
1412 #if 0
1413 int superdup(int fd)
1414 {
1415 #ifdef __linux__
1416   char *fdpath = NULL;
1417   int rc = asprintf(& fdpath, "/proc/self/fd/%d", fd);
1418   int newfd;
1419   if (rc >= 0)
1420     newfd = open(fdpath, O_RDONLY);
1421   else
1422     newfd = -1;
1423   free (fdpath);
1424   return newfd;
1425 #else
1426   return -1;
1427 #endif
1428 }
1429 #endif
1430 
1431 class libarchive_fdcache
1432 {
1433 private:
1434   mutex fdcache_lock;
1435 
1436   typedef pair<string,string> key; // archive, entry
1437   struct fdcache_entry
1438   {
1439     string fd; // file name (probably in $TMPDIR), not an actual open fd (EMFILE)
1440     double fd_size_mb; // slightly rounded up megabytes
1441     time_t freshness; // when was this entry created or requested last
1442     unsigned request_count; // how many requests were made; or 0=prefetch only
1443     double latency; // how many seconds it took to extract the file
1444   };
1445 
1446   map<key,fdcache_entry> entries; // optimized for lookup
1447   time_t last_cleaning;
1448   long max_fds;
1449   long max_mbs;
1450 
1451 public:
set_metrics()1452   void set_metrics()
1453   {
1454     double fdcache_mb = 0.0;
1455     double prefetch_mb = 0.0;
1456     unsigned fdcache_count = 0;
1457     unsigned prefetch_count = 0;
1458     for (auto &i : entries) {
1459       if (i.second.request_count) {
1460         fdcache_mb += i.second.fd_size_mb;
1461         fdcache_count ++;
1462       } else {
1463         prefetch_mb += i.second.fd_size_mb;
1464         prefetch_count ++;
1465       }
1466     }
1467     set_metric("fdcache_bytes", fdcache_mb*1024.0*1024.0);
1468     set_metric("fdcache_count", fdcache_count);
1469     set_metric("fdcache_prefetch_bytes", prefetch_mb*1024.0*1024.0);
1470     set_metric("fdcache_prefetch_count", prefetch_count);
1471   }
1472 
intern(const string & a,const string & b,string fd,off_t sz,bool requested_p,double lat)1473   void intern(const string& a, const string& b, string fd, off_t sz,
1474               bool requested_p, double lat)
1475   {
1476     {
1477       unique_lock<mutex> lock(fdcache_lock);
1478       time_t now = time(NULL);
1479       // there is a chance it's already in here, just wasn't found last time
1480       // if so, there's nothing to do but count our luck
1481       auto i = entries.find(make_pair(a,b));
1482       if (i != entries.end())
1483         {
1484           inc_metric("fdcache_op_count","op","redundant_intern");
1485           if (requested_p) i->second.request_count ++; // repeat prefetch doesn't count
1486           i->second.freshness = now;
1487           // We need to nuke the temp file, since interning passes
1488           // responsibility over the path to this structure.  It is
1489           // possible that the caller still has an fd open, but that's
1490           // OK.
1491           unlink (fd.c_str());
1492           return;
1493         }
1494       double mb = (sz+65535)/1048576.0; // round up to 64K block
1495       fdcache_entry n = { .fd=fd, .fd_size_mb=mb,
1496                           .freshness=now, .request_count = requested_p?1U:0U,
1497                           .latency=lat};
1498       entries.insert(make_pair(make_pair(a,b),n));
1499 
1500       if (requested_p)
1501         inc_metric("fdcache_op_count","op","enqueue");
1502       else
1503         inc_metric("fdcache_op_count","op","prefetch_enqueue");
1504 
1505       if (verbose > 3)
1506         obatched(clog) << "fdcache interned a=" << a << " b=" << b
1507                        << " fd=" << fd << " mb=" << mb << " front=" << requested_p
1508                        << " latency=" << lat << endl;
1509 
1510       set_metrics();
1511     }
1512 
1513     // NB: we age the cache at lookup time too
1514     if (statfs_free_enough_p(tmpdir, "tmpdir", fdcache_mintmp))
1515       {
1516         inc_metric("fdcache_op_count","op","emerg-flush");
1517         obatched(clog) << "fdcache emergency flush for filling tmpdir" << endl;
1518         this->limit(0); // emergency flush
1519       }
1520     else // age cache normally
1521       this->limit(max_mbs);
1522   }
1523 
lookup(const string & a,const string & b)1524   int lookup(const string& a, const string& b)
1525   {
1526     int fd = -1;
1527     {
1528       unique_lock<mutex> lock(fdcache_lock);
1529       auto i = entries.find(make_pair(a,b));
1530       if (i != entries.end())
1531         {
1532           if (i->second.request_count == 0) // was a prefetch!
1533             {
1534               inc_metric("fdcache_prefetch_saved_milliseconds_count");
1535               add_metric("fdcache_prefetch_saved_milliseconds_sum", i->second.latency*1000.);
1536             }
1537           i->second.request_count ++;
1538           i->second.freshness = time(NULL);
1539           // brag about our success
1540           inc_metric("fdcache_op_count","op","prefetch_access"); // backward compat
1541           inc_metric("fdcache_saved_milliseconds_count");
1542           add_metric("fdcache_saved_milliseconds_sum", i->second.latency*1000.);
1543           fd = open(i->second.fd.c_str(), O_RDONLY);
1544         }
1545     }
1546 
1547     if (fd >= 0)
1548       inc_metric("fdcache_op_count","op","lookup_hit");
1549     else
1550       inc_metric("fdcache_op_count","op","lookup_miss");
1551 
1552     // NB: no need to age the cache after just a lookup
1553 
1554     return fd;
1555   }
1556 
probe(const string & a,const string & b)1557   int probe(const string& a, const string& b) // just a cache residency check - don't modify state, don't open
1558   {
1559     unique_lock<mutex> lock(fdcache_lock);
1560     auto i = entries.find(make_pair(a,b));
1561     if (i != entries.end()) {
1562       inc_metric("fdcache_op_count","op","probe_hit");
1563       return true;
1564     } else {
1565       inc_metric("fdcache_op_count","op","probe_miss");
1566       return false;
1567    }
1568   }
1569 
clear(const string & a,const string & b)1570   void clear(const string& a, const string& b)
1571   {
1572     unique_lock<mutex> lock(fdcache_lock);
1573     auto i = entries.find(make_pair(a,b));
1574     if (i != entries.end()) {
1575       inc_metric("fdcache_op_count","op",
1576                  i->second.request_count > 0 ? "clear" : "prefetch_clear");
1577       unlink (i->second.fd.c_str());
1578       entries.erase(i);
1579       set_metrics();
1580       return;
1581     }
1582   }
1583 
limit(long maxmbs,bool metrics_p=true)1584   void limit(long maxmbs, bool metrics_p = true)
1585   {
1586     time_t now = time(NULL);
1587 
1588     // avoid overly frequent limit operations
1589     if (maxmbs > 0 && (now - this->last_cleaning) < 10) // probably not worth parametrizing
1590       return;
1591     this->last_cleaning = now;
1592 
1593     if (verbose > 3 && (this->max_mbs != maxmbs))
1594       obatched(clog) << "fdcache limited to maxmbs=" << maxmbs << endl;
1595 
1596     unique_lock<mutex> lock(fdcache_lock);
1597 
1598     this->max_mbs = maxmbs;
1599     double total_mb = 0.0;
1600 
1601     map<double, pair<string,string>> sorted_entries;
1602     for (auto &i: entries)
1603       {
1604         total_mb += i.second.fd_size_mb;
1605 
1606         // need a scalar quantity that combines these inputs in a sensible way:
1607         //
1608         // 1) freshness of this entry (last time it was accessed)
1609         // 2) size of this entry
1610         // 3) number of times it has been accessed (or if just prefetched with 0 accesses)
1611         // 4) latency it required to extract
1612         //
1613         // The lower the "score", the earlier garbage collection will
1614         // nuke it, so to prioritize entries for preservation, the
1615         // score should be higher, and vice versa.
1616         time_t factor_1_freshness = (now - i.second.freshness); // seconds
1617         double factor_2_size = i.second.fd_size_mb; // megabytes
1618         unsigned factor_3_accesscount = i.second.request_count; // units
1619         double factor_4_latency = i.second.latency; // seconds
1620 
1621         #if 0
1622         double score = - factor_1_freshness; // simple LRU
1623         #endif
1624 
1625         double score = 0.
1626           - log1p(factor_1_freshness)                // penalize old file
1627           - log1p(factor_2_size)                     // penalize large file
1628           + factor_4_latency * factor_3_accesscount; // reward slow + repeatedly read files
1629 
1630         if (verbose > 4)
1631           obatched(clog) << "fdcache scored score=" << score
1632                          << " a=" << i.first.first << " b=" << i.first.second
1633                          << " f1=" << factor_1_freshness << " f2=" << factor_2_size
1634                          << " f3=" << factor_3_accesscount << " f4=" << factor_4_latency
1635                          << endl;
1636 
1637         sorted_entries.insert(make_pair(score, i.first));
1638       }
1639 
1640     unsigned cleaned = 0;
1641     unsigned entries_original = entries.size();
1642     double cleaned_score_min = DBL_MAX;
1643     double cleaned_score_max = DBL_MIN;
1644 
1645     // drop as many entries[] as needed to bring total mb down to the threshold
1646     for (auto &i: sorted_entries) // in increasing score order!
1647       {
1648         if (this->max_mbs > 0 // if this is not a "clear entire table"
1649             && total_mb < this->max_mbs) // we've cleared enough to meet threshold
1650           break; // stop clearing
1651 
1652         auto j = entries.find(i.second);
1653         if (j == entries.end())
1654           continue; // should not happen
1655 
1656         if (cleaned == 0)
1657           cleaned_score_min = i.first;
1658         cleaned++;
1659         cleaned_score_max = i.first;
1660 
1661         if (verbose > 3)
1662           obatched(clog) << "fdcache evicted score=" << i.first
1663                          << " a=" << i.second.first << " b=" << i.second.second
1664                          << " fd=" << j->second.fd << " mb=" << j->second.fd_size_mb
1665                          << " rq=" << j->second.request_count << " lat=" << j->second.latency
1666                          << " fr=" << (now - j->second.freshness)
1667                          << endl;
1668         if (metrics_p)
1669           inc_metric("fdcache_op_count","op","evict");
1670 
1671         total_mb -= j->second.fd_size_mb;
1672         unlink (j->second.fd.c_str());
1673         entries.erase(j);
1674       }
1675 
1676     if (metrics_p)
1677       inc_metric("fdcache_op_count","op","evict_cycle");
1678 
1679     if (verbose > 1 && cleaned > 0)
1680       {
1681         obatched(clog) << "fdcache evicted num=" << cleaned << " of=" << entries_original
1682                        << " min=" << cleaned_score_min << " max=" << cleaned_score_max
1683                        << endl;
1684       }
1685 
1686     if (metrics_p) set_metrics();
1687   }
1688 
1689 
~libarchive_fdcache()1690   ~libarchive_fdcache()
1691   {
1692     // unlink any fdcache entries in $TMPDIR
1693     // don't update metrics; those globals may be already destroyed
1694     limit(0, false);
1695   }
1696 };
1697 static libarchive_fdcache fdcache;
1698 
1699 /* Search ELF_FD for an ELF/DWARF section with name SECTION.
1700    If found copy the section to a temporary file and return
1701    its file descriptor, otherwise return -1.
1702 
1703    The temporary file's mtime will be set to PARENT_MTIME.
1704    B_SOURCE should be a description of the parent file suitable
1705    for printing to the log.  */
1706 
1707 static int
extract_section(int elf_fd,int64_t parent_mtime,const string & b_source,const string & section,const timespec & extract_begin)1708 extract_section (int elf_fd, int64_t parent_mtime,
1709 		 const string& b_source, const string& section,
1710                  const timespec& extract_begin)
1711 {
1712   /* Search the fdcache.  */
1713   struct stat fs;
1714   int fd = fdcache.lookup (b_source, section);
1715   if (fd >= 0)
1716     {
1717       if (fstat (fd, &fs) != 0)
1718 	{
1719 	  if (verbose)
1720 	    obatched (clog) << "cannot fstate fdcache "
1721 			    << b_source << " " << section << endl;
1722 	  close (fd);
1723 	  return -1;
1724 	}
1725       if ((int64_t) fs.st_mtime != parent_mtime)
1726 	{
1727 	  if (verbose)
1728 	    obatched(clog) << "mtime mismatch for "
1729 			   << b_source << " " << section << endl;
1730 	  close (fd);
1731 	  return -1;
1732 	}
1733       /* Success.  */
1734       return fd;
1735     }
1736 
1737   Elf *elf = elf_begin (elf_fd, ELF_C_READ_MMAP_PRIVATE, NULL);
1738   if (elf == NULL)
1739     return -1;
1740 
1741   /* Try to find the section and copy the contents into a separate file.  */
1742   try
1743     {
1744       size_t shstrndx;
1745       int rc = elf_getshdrstrndx (elf, &shstrndx);
1746       if (rc < 0)
1747 	throw elfutils_exception (rc, "getshdrstrndx");
1748 
1749       Elf_Scn *scn = NULL;
1750       while (true)
1751 	{
1752 	  scn = elf_nextscn (elf, scn);
1753 	  if (scn == NULL)
1754 	    break;
1755 	  GElf_Shdr shdr_storage;
1756 	  GElf_Shdr *shdr = gelf_getshdr (scn, &shdr_storage);
1757 	  if (shdr == NULL)
1758 	    break;
1759 
1760 	  const char *scn_name = elf_strptr (elf, shstrndx, shdr->sh_name);
1761 	  if (scn_name == NULL)
1762 	    break;
1763 	  if (scn_name == section)
1764 	    {
1765 	      Elf_Data *data = NULL;
1766 
1767 	      /* We found the desired section.  */
1768 	      data = elf_rawdata (scn, NULL);
1769 	      if (data == NULL)
1770 		throw elfutils_exception (elf_errno (), "elfraw_data");
1771 	      if (data->d_buf == NULL)
1772 		{
1773 		  obatched(clog) << "section " << section
1774 				 << " is empty" << endl;
1775 		  break;
1776 		}
1777 
1778 	      /* Create temporary file containing the section.  */
1779 	      char *tmppath = NULL;
1780 	      rc = asprintf (&tmppath, "%s/debuginfod-section.XXXXXX", tmpdir.c_str());
1781 	      if (rc < 0)
1782 		throw libc_exception (ENOMEM, "cannot allocate tmppath");
1783 	      defer_dtor<void*,void> tmmpath_freer (tmppath, free);
1784 	      fd = mkstemp (tmppath);
1785 	      if (fd < 0)
1786 		throw libc_exception (errno, "cannot create temporary file");
1787 
1788 	      ssize_t res = write_retry (fd, data->d_buf, data->d_size);
1789 	      if (res < 0 || (size_t) res != data->d_size) {
1790                 close (fd);
1791                 unlink (tmppath);
1792 		throw libc_exception (errno, "cannot write to temporary file");
1793               }
1794 
1795 	      /* Set mtime to be the same as the parent file's mtime.  */
1796 	      struct timespec tvs[2];
1797 	      if (fstat (elf_fd, &fs) != 0) {
1798                 close (fd);
1799                 unlink (tmppath);
1800 		throw libc_exception (errno, "cannot fstat file");
1801               }
1802 
1803 	      tvs[0].tv_sec = 0;
1804 	      tvs[0].tv_nsec = UTIME_OMIT;
1805 	      tvs[1] = fs.st_mtim;
1806 	      (void) futimens (fd, tvs);
1807 
1808               struct timespec extract_end;
1809               clock_gettime (CLOCK_MONOTONIC, &extract_end);
1810               double extract_time = (extract_end.tv_sec - extract_begin.tv_sec)
1811                 + (extract_end.tv_nsec - extract_begin.tv_nsec)/1.e9;
1812 
1813 	      /* Add to fdcache.  */
1814 	      fdcache.intern (b_source, section, tmppath, data->d_size, true, extract_time);
1815 	      break;
1816 	    }
1817 	}
1818     }
1819   catch (const reportable_exception &e)
1820     {
1821       e.report (clog);
1822       close (fd);
1823       fd = -1;
1824     }
1825 
1826   elf_end (elf);
1827   return fd;
1828 }
1829 
1830 static struct MHD_Response*
handle_buildid_f_match(bool internal_req_t,int64_t b_mtime,const string & b_source0,const string & section,int * result_fd)1831 handle_buildid_f_match (bool internal_req_t,
1832                         int64_t b_mtime,
1833                         const string& b_source0,
1834                         const string& section,
1835                         int *result_fd)
1836 {
1837   (void) internal_req_t; // ignored
1838 
1839   struct timespec extract_begin;
1840   clock_gettime (CLOCK_MONOTONIC, &extract_begin);
1841 
1842   int fd = open(b_source0.c_str(), O_RDONLY);
1843   if (fd < 0)
1844     throw libc_exception (errno, string("open ") + b_source0);
1845 
1846   // NB: use manual close(2) in error case instead of defer_dtor, because
1847   // in the normal case, we want to hand the fd over to libmicrohttpd for
1848   // file transfer.
1849 
1850   struct stat s;
1851   int rc = fstat(fd, &s);
1852   if (rc < 0)
1853     {
1854       close(fd);
1855       throw libc_exception (errno, string("fstat ") + b_source0);
1856     }
1857 
1858   if ((int64_t) s.st_mtime != b_mtime)
1859     {
1860       if (verbose)
1861         obatched(clog) << "mtime mismatch for " << b_source0 << endl;
1862       close(fd);
1863       return 0;
1864     }
1865 
1866   if (!section.empty ())
1867     {
1868       int scn_fd = extract_section (fd, s.st_mtime, b_source0, section, extract_begin);
1869       close (fd);
1870 
1871       if (scn_fd >= 0)
1872 	fd = scn_fd;
1873       else
1874 	{
1875 	  if (verbose)
1876 	    obatched (clog) << "cannot find section " << section
1877 			    << " for " << b_source0 << endl;
1878 	  return 0;
1879 	}
1880 
1881       rc = fstat(fd, &s);
1882       if (rc < 0)
1883 	{
1884 	  close (fd);
1885 	  throw libc_exception (errno, string ("fstat ") + b_source0
1886 				       + string (" ") + section);
1887 	}
1888     }
1889 
1890   struct MHD_Response* r = MHD_create_response_from_fd ((uint64_t) s.st_size, fd);
1891   inc_metric ("http_responses_total","result","file");
1892   if (r == 0)
1893     {
1894       if (verbose)
1895 	obatched(clog) << "cannot create fd-response for " << b_source0
1896 		       << " section=" << section << endl;
1897       close(fd);
1898     }
1899   else
1900     {
1901       add_mhd_response_header (r, "Content-Type", "application/octet-stream");
1902       add_mhd_response_header (r, "X-DEBUGINFOD-SIZE",
1903 			       to_string(s.st_size).c_str());
1904       add_mhd_response_header (r, "X-DEBUGINFOD-FILE", b_source0.c_str());
1905       add_mhd_last_modified (r, s.st_mtime);
1906       if (verbose > 1)
1907 	obatched(clog) << "serving file " << b_source0 << " section=" << section << endl;
1908       /* libmicrohttpd will close it. */
1909       if (result_fd)
1910         *result_fd = fd;
1911     }
1912 
1913   return r;
1914 }
1915 
1916 // For security/portability reasons, many distro-package archives have
1917 // a "./" in front of path names; others have nothing, others have
1918 // "/".  Canonicalize them all to a single leading "/", with the
1919 // assumption that this matches the dwarf-derived file names too.
canonicalized_archive_entry_pathname(struct archive_entry * e)1920 string canonicalized_archive_entry_pathname(struct archive_entry *e)
1921 {
1922   string fn = archive_entry_pathname(e);
1923   if (fn.size() == 0)
1924     return fn;
1925   if (fn[0] == '/')
1926     return fn;
1927   if (fn[0] == '.')
1928     return fn.substr(1);
1929   else
1930     return string("/")+fn;
1931 }
1932 
1933 
1934 
1935 static struct MHD_Response*
handle_buildid_r_match(bool internal_req_p,int64_t b_mtime,const string & b_source0,const string & b_source1,const string & section,int * result_fd)1936 handle_buildid_r_match (bool internal_req_p,
1937                         int64_t b_mtime,
1938                         const string& b_source0,
1939                         const string& b_source1,
1940                         const string& section,
1941                         int *result_fd)
1942 {
1943   struct timespec extract_begin;
1944   clock_gettime (CLOCK_MONOTONIC, &extract_begin);
1945 
1946   struct stat fs;
1947   int rc = stat (b_source0.c_str(), &fs);
1948   if (rc != 0)
1949     throw libc_exception (errno, string("stat ") + b_source0);
1950 
1951   if ((int64_t) fs.st_mtime != b_mtime)
1952     {
1953       if (verbose)
1954         obatched(clog) << "mtime mismatch for " << b_source0 << endl;
1955       return 0;
1956     }
1957 
1958   // check for a match in the fdcache first
1959   int fd = fdcache.lookup(b_source0, b_source1);
1960   while (fd >= 0) // got one!; NB: this is really an if() with a possible branch out to the end
1961     {
1962       rc = fstat(fd, &fs);
1963       if (rc < 0) // disappeared?
1964         {
1965           if (verbose)
1966             obatched(clog) << "cannot fstat fdcache " << b_source0 << endl;
1967           close(fd);
1968           fdcache.clear(b_source0, b_source1);
1969           break; // branch out of if "loop", to try new libarchive fetch attempt
1970         }
1971 
1972       if (!section.empty ())
1973 	{
1974 	  int scn_fd = extract_section (fd, fs.st_mtime,
1975 					b_source0 + ":" + b_source1,
1976 					section, extract_begin);
1977 	  close (fd);
1978 	  if (scn_fd >= 0)
1979 	    fd = scn_fd;
1980 	  else
1981 	    {
1982 	      if (verbose)
1983 	        obatched (clog) << "cannot find section " << section
1984 				<< " for archive " << b_source0
1985 				<< " file " << b_source1 << endl;
1986 	      return 0;
1987 	    }
1988 
1989 	  rc = fstat(fd, &fs);
1990 	  if (rc < 0)
1991 	    {
1992 	      close (fd);
1993 	      throw libc_exception (errno,
1994 		string ("fstat archive ") + b_source0 + string (" file ") + b_source1
1995 		+ string (" section ") + section);
1996 	    }
1997 	}
1998 
1999       struct MHD_Response* r = MHD_create_response_from_fd (fs.st_size, fd);
2000       if (r == 0)
2001         {
2002           if (verbose)
2003             obatched(clog) << "cannot create fd-response for " << b_source0 << endl;
2004           close(fd);
2005           break; // branch out of if "loop", to try new libarchive fetch attempt
2006         }
2007 
2008       inc_metric ("http_responses_total","result","archive fdcache");
2009 
2010       add_mhd_response_header (r, "Content-Type", "application/octet-stream");
2011       add_mhd_response_header (r, "X-DEBUGINFOD-SIZE",
2012 			       to_string(fs.st_size).c_str());
2013       add_mhd_response_header (r, "X-DEBUGINFOD-ARCHIVE", b_source0.c_str());
2014       add_mhd_response_header (r, "X-DEBUGINFOD-FILE", b_source1.c_str());
2015       add_mhd_last_modified (r, fs.st_mtime);
2016       if (verbose > 1)
2017 	obatched(clog) << "serving fdcache archive " << b_source0
2018 		       << " file " << b_source1
2019 		       << " section=" << section << endl;
2020       /* libmicrohttpd will close it. */
2021       if (result_fd)
2022         *result_fd = fd;
2023       return r;
2024       // NB: see, we never go around the 'loop' more than once
2025     }
2026 
2027   // no match ... grumble, must process the archive
2028   string archive_decoder = "/dev/null";
2029   string archive_extension = "";
2030   for (auto&& arch : scan_archives)
2031     if (string_endswith(b_source0, arch.first))
2032       {
2033         archive_extension = arch.first;
2034         archive_decoder = arch.second;
2035       }
2036   FILE* fp;
2037 
2038   defer_dtor<FILE*,int>::dtor_fn dfn;
2039   if (archive_decoder != "cat")
2040     {
2041       string popen_cmd = archive_decoder + " " + shell_escape(b_source0);
2042       fp = popen (popen_cmd.c_str(), "r"); // "e" O_CLOEXEC?
2043       dfn = pclose;
2044       if (fp == NULL)
2045         throw libc_exception (errno, string("popen ") + popen_cmd);
2046     }
2047   else
2048     {
2049       fp = fopen (b_source0.c_str(), "r");
2050       dfn = fclose;
2051       if (fp == NULL)
2052         throw libc_exception (errno, string("fopen ") + b_source0);
2053     }
2054   defer_dtor<FILE*,int> fp_closer (fp, dfn);
2055 
2056   struct archive *a;
2057   a = archive_read_new();
2058   if (a == NULL)
2059     throw archive_exception("cannot create archive reader");
2060   defer_dtor<struct archive*,int> archive_closer (a, archive_read_free);
2061 
2062   rc = archive_read_support_format_all(a);
2063   if (rc != ARCHIVE_OK)
2064     throw archive_exception(a, "cannot select all format");
2065   rc = archive_read_support_filter_all(a);
2066   if (rc != ARCHIVE_OK)
2067     throw archive_exception(a, "cannot select all filters");
2068 
2069   rc = archive_read_open_FILE (a, fp);
2070   if (rc != ARCHIVE_OK)
2071     {
2072       obatched(clog) << "cannot open archive from pipe " << b_source0 << endl;
2073       throw archive_exception(a, "cannot open archive from pipe");
2074     }
2075 
2076   // archive traversal is in three stages, no, four stages:
2077   // 1) skip entries whose names do not match the requested one
2078   // 2) extract the matching entry name (set r = result)
2079   // 3) extract some number of prefetched entries (just into fdcache)
2080   // 4) abort any further processing
2081   struct MHD_Response* r = 0;                 // will set in stage 2
2082   unsigned prefetch_count =
2083     internal_req_p ? 0 : fdcache_prefetch;    // will decrement in stage 3
2084 
2085   while(r == 0 || prefetch_count > 0) // stage 1, 2, or 3
2086     {
2087       if (interrupted)
2088         break;
2089 
2090       struct archive_entry *e;
2091       rc = archive_read_next_header (a, &e);
2092       if (rc != ARCHIVE_OK)
2093         break;
2094 
2095       if (! S_ISREG(archive_entry_mode (e))) // skip non-files completely
2096         continue;
2097 
2098       string fn = canonicalized_archive_entry_pathname (e);
2099       if ((r == 0) && (fn != b_source1)) // stage 1
2100         continue;
2101 
2102       if (fdcache.probe (b_source0, fn) && // skip if already interned
2103           fn != b_source1) // but only if we'd just be prefetching, PR29474
2104         continue;
2105 
2106       // extract this file to a temporary file
2107       char* tmppath = NULL;
2108       rc = asprintf (&tmppath, "%s/debuginfod-fdcache.XXXXXX", tmpdir.c_str());
2109       if (rc < 0)
2110         throw libc_exception (ENOMEM, "cannot allocate tmppath");
2111       defer_dtor<void*,void> tmmpath_freer (tmppath, free);
2112       fd = mkstemp (tmppath);
2113       if (fd < 0)
2114         throw libc_exception (errno, "cannot create temporary file");
2115       // NB: don't unlink (tmppath), as fdcache will take charge of it.
2116 
2117       // NB: this can take many uninterruptible seconds for a huge file
2118       rc = archive_read_data_into_fd (a, fd);
2119       if (rc != ARCHIVE_OK) // e.g. ENOSPC!
2120         {
2121           close (fd);
2122           unlink (tmppath);
2123           throw archive_exception(a, "cannot extract file");
2124         }
2125 
2126       // Set the mtime so the fdcache file mtimes, even prefetched ones,
2127       // propagate to future webapi clients.
2128       struct timespec tvs[2];
2129       tvs[0].tv_sec = 0;
2130       tvs[0].tv_nsec = UTIME_OMIT;
2131       tvs[1].tv_sec = archive_entry_mtime(e);
2132       tvs[1].tv_nsec = archive_entry_mtime_nsec(e);
2133       (void) futimens (fd, tvs);  /* best effort */
2134 
2135       struct timespec extract_end;
2136       clock_gettime (CLOCK_MONOTONIC, &extract_end);
2137       double extract_time = (extract_end.tv_sec - extract_begin.tv_sec)
2138         + (extract_end.tv_nsec - extract_begin.tv_nsec)/1.e9;
2139 
2140       if (r != 0) // stage 3
2141         {
2142           // NB: now we know we have a complete reusable file; make fdcache
2143           // responsible for unlinking it later.
2144           fdcache.intern(b_source0, fn,
2145                          tmppath, archive_entry_size(e),
2146                          false, extract_time); // prefetched ones go to the prefetch cache
2147           prefetch_count --;
2148           close (fd); // we're not saving this fd to make a mhd-response from!
2149           continue;
2150         }
2151 
2152       // NB: now we know we have a complete reusable file; make fdcache
2153       // responsible for unlinking it later.
2154       fdcache.intern(b_source0, b_source1,
2155                      tmppath, archive_entry_size(e),
2156                      true, extract_time); // requested ones go to the front of the line
2157 
2158       if (!section.empty ())
2159 	{
2160 	  int scn_fd = extract_section (fd, b_mtime,
2161 					b_source0 + ":" + b_source1,
2162 					section, extract_begin);
2163 	  close (fd);
2164 	  if (scn_fd >= 0)
2165 	    fd = scn_fd;
2166 	  else
2167 	    {
2168 	      if (verbose)
2169 	        obatched (clog) << "cannot find section " << section
2170 				<< " for archive " << b_source0
2171 				<< " file " << b_source1 << endl;
2172 	      return 0;
2173 	    }
2174 
2175 	  rc = fstat(fd, &fs);
2176 	  if (rc < 0)
2177 	    {
2178 	      close (fd);
2179 	      throw libc_exception (errno,
2180 		string ("fstat ") + b_source0 + string (" ") + section);
2181 	    }
2182 	  r = MHD_create_response_from_fd (fs.st_size, fd);
2183 	}
2184       else
2185 	r = MHD_create_response_from_fd (archive_entry_size(e), fd);
2186 
2187       inc_metric ("http_responses_total","result",archive_extension + " archive");
2188       if (r == 0)
2189         {
2190           if (verbose)
2191             obatched(clog) << "cannot create fd-response for " << b_source0 << endl;
2192           close(fd);
2193           break; // assume no chance of better luck around another iteration; no other copies of same file
2194         }
2195       else
2196         {
2197           add_mhd_response_header (r, "Content-Type",
2198                                    "application/octet-stream");
2199           add_mhd_response_header (r, "X-DEBUGINFOD-SIZE",
2200                                    to_string(archive_entry_size(e)).c_str());
2201           add_mhd_response_header (r, "X-DEBUGINFOD-ARCHIVE", b_source0.c_str());
2202           add_mhd_response_header (r, "X-DEBUGINFOD-FILE", b_source1.c_str());
2203           add_mhd_last_modified (r, archive_entry_mtime(e));
2204           if (verbose > 1)
2205 	    obatched(clog) << "serving archive " << b_source0
2206 			   << " file " << b_source1
2207 			   << " section=" << section << endl;
2208           /* libmicrohttpd will close it. */
2209           if (result_fd)
2210             *result_fd = fd;
2211           continue;
2212         }
2213     }
2214 
2215   // XXX: rpm/file not found: delete this R entry?
2216   return r;
2217 }
2218 
2219 
2220 static struct MHD_Response*
handle_buildid_match(bool internal_req_p,int64_t b_mtime,const string & b_stype,const string & b_source0,const string & b_source1,const string & section,int * result_fd)2221 handle_buildid_match (bool internal_req_p,
2222                       int64_t b_mtime,
2223                       const string& b_stype,
2224                       const string& b_source0,
2225                       const string& b_source1,
2226                       const string& section,
2227                       int *result_fd)
2228 {
2229   try
2230     {
2231       if (b_stype == "F")
2232         return handle_buildid_f_match(internal_req_p, b_mtime, b_source0,
2233 				      section, result_fd);
2234       else if (b_stype == "R")
2235         return handle_buildid_r_match(internal_req_p, b_mtime, b_source0,
2236 				      b_source1, section, result_fd);
2237     }
2238   catch (const reportable_exception &e)
2239     {
2240       e.report(clog);
2241       // Report but swallow libc etc. errors here; let the caller
2242       // iterate to other matches of the content.
2243     }
2244 
2245   return 0;
2246 }
2247 
2248 
2249 static int
debuginfod_find_progress(debuginfod_client *,long a,long b)2250 debuginfod_find_progress (debuginfod_client *, long a, long b)
2251 {
2252   if (verbose > 4)
2253     obatched(clog) << "federated debuginfod progress=" << a << "/" << b << endl;
2254 
2255   return interrupted;
2256 }
2257 
2258 
2259 // a little lru pool of debuginfod_client*s for reuse between query threads
2260 
2261 mutex dc_pool_lock;
2262 deque<debuginfod_client*> dc_pool;
2263 
debuginfod_pool_begin()2264 debuginfod_client* debuginfod_pool_begin()
2265 {
2266   unique_lock<mutex> lock(dc_pool_lock);
2267   if (dc_pool.size() > 0)
2268     {
2269       inc_metric("dc_pool_op_count","op","begin-reuse");
2270       debuginfod_client *c = dc_pool.front();
2271       dc_pool.pop_front();
2272       return c;
2273     }
2274   inc_metric("dc_pool_op_count","op","begin-new");
2275   return debuginfod_begin();
2276 }
2277 
2278 
debuginfod_pool_groom()2279 void debuginfod_pool_groom()
2280 {
2281   unique_lock<mutex> lock(dc_pool_lock);
2282   while (dc_pool.size() > 0)
2283     {
2284       inc_metric("dc_pool_op_count","op","end");
2285       debuginfod_end(dc_pool.front());
2286       dc_pool.pop_front();
2287     }
2288 }
2289 
2290 
debuginfod_pool_end(debuginfod_client * c)2291 void debuginfod_pool_end(debuginfod_client* c)
2292 {
2293   unique_lock<mutex> lock(dc_pool_lock);
2294   inc_metric("dc_pool_op_count","op","end-save");
2295   dc_pool.push_front(c); // accelerate reuse, vs. push_back
2296 }
2297 
2298 
2299 static struct MHD_Response*
handle_buildid(MHD_Connection * conn,const string & buildid,string & artifacttype,const string & suffix,int * result_fd)2300 handle_buildid (MHD_Connection* conn,
2301                 const string& buildid /* unsafe */,
2302                 string& artifacttype /* unsafe, cleanse on exception/return */,
2303                 const string& suffix /* unsafe */,
2304                 int *result_fd)
2305 {
2306   // validate artifacttype
2307   string atype_code;
2308   if (artifacttype == "debuginfo") atype_code = "D";
2309   else if (artifacttype == "executable") atype_code = "E";
2310   else if (artifacttype == "source") atype_code = "S";
2311   else if (artifacttype == "section") atype_code = "I";
2312   else {
2313     artifacttype = "invalid"; // PR28242 ensure http_resposes metrics don't propagate unclean user data
2314     throw reportable_exception("invalid artifacttype");
2315   }
2316 
2317   if (conn != 0)
2318     inc_metric("http_requests_total", "type", artifacttype);
2319 
2320   string section;
2321   if (atype_code == "I")
2322     {
2323       if (suffix.size () < 2)
2324 	throw reportable_exception ("invalid section suffix");
2325 
2326       // Remove leading '/'
2327       section = suffix.substr(1);
2328     }
2329 
2330   if (atype_code == "S" && suffix == "")
2331      throw reportable_exception("invalid source suffix");
2332 
2333   // validate buildid
2334   if ((buildid.size() < 2) || // not empty
2335       (buildid.size() % 2) || // even number
2336       (buildid.find_first_not_of("0123456789abcdef") != string::npos)) // pure tasty lowercase hex
2337     throw reportable_exception("invalid buildid");
2338 
2339   if (verbose > 1)
2340     obatched(clog) << "searching for buildid=" << buildid << " artifacttype=" << artifacttype
2341          << " suffix=" << suffix << endl;
2342 
2343   // If invoked from the scanner threads, use the scanners' read-write
2344   // connection.  Otherwise use the web query threads' read-only connection.
2345   sqlite3 *thisdb = (conn == 0) ? db : dbq;
2346 
2347   sqlite_ps *pp = 0;
2348 
2349   if (atype_code == "D")
2350     {
2351       pp = new sqlite_ps (thisdb, "mhd-query-d",
2352                           "select mtime, sourcetype, source0, source1 from " BUILDIDS "_query_d where buildid = ? "
2353                           "order by mtime desc");
2354       pp->reset();
2355       pp->bind(1, buildid);
2356     }
2357   else if (atype_code == "E")
2358     {
2359       pp = new sqlite_ps (thisdb, "mhd-query-e",
2360                           "select mtime, sourcetype, source0, source1 from " BUILDIDS "_query_e where buildid = ? "
2361                           "order by mtime desc");
2362       pp->reset();
2363       pp->bind(1, buildid);
2364     }
2365   else if (atype_code == "S")
2366     {
2367       // PR25548
2368       // Incoming source queries may come in with either dwarf-level OR canonicalized paths.
2369       // We let the query pass with either one.
2370 
2371       pp = new sqlite_ps (thisdb, "mhd-query-s",
2372                           "select mtime, sourcetype, source0, source1 from " BUILDIDS "_query_s where buildid = ? and artifactsrc in (?,?) "
2373                           "order by sharedprefix(source0,source0ref) desc, mtime desc");
2374       pp->reset();
2375       pp->bind(1, buildid);
2376       // NB: we don't store the non-canonicalized path names any more, but old databases
2377       // might have them (and no canon ones), so we keep searching for both.
2378       pp->bind(2, suffix);
2379       pp->bind(3, canon_pathname(suffix));
2380     }
2381   else if (atype_code == "I")
2382     {
2383       pp = new sqlite_ps (thisdb, "mhd-query-i",
2384 	"select mtime, sourcetype, source0, source1, 1 as debug_p from " BUILDIDS "_query_d where buildid = ? "
2385 	"union all "
2386 	"select mtime, sourcetype, source0, source1, 0 as debug_p from " BUILDIDS "_query_e where buildid = ? "
2387 	"order by debug_p desc, mtime desc");
2388       pp->reset();
2389       pp->bind(1, buildid);
2390       pp->bind(2, buildid);
2391     }
2392   unique_ptr<sqlite_ps> ps_closer(pp); // release pp if exception or return
2393 
2394   bool do_upstream_section_query = true;
2395 
2396   // consume all the rows
2397   while (1)
2398     {
2399       int rc = pp->step();
2400       if (rc == SQLITE_DONE) break;
2401       if (rc != SQLITE_ROW)
2402         throw sqlite_exception(rc, "step");
2403 
2404       int64_t b_mtime = sqlite3_column_int64 (*pp, 0);
2405       string b_stype = string((const char*) sqlite3_column_text (*pp, 1) ?: ""); /* by DDL may not be NULL */
2406       string b_source0 = string((const char*) sqlite3_column_text (*pp, 2) ?: ""); /* may be NULL */
2407       string b_source1 = string((const char*) sqlite3_column_text (*pp, 3) ?: ""); /* may be NULL */
2408 
2409       if (verbose > 1)
2410         obatched(clog) << "found mtime=" << b_mtime << " stype=" << b_stype
2411              << " source0=" << b_source0 << " source1=" << b_source1 << endl;
2412 
2413       // Try accessing the located match.
2414       // XXX: in case of multiple matches, attempt them in parallel?
2415       auto r = handle_buildid_match (conn ? false : true,
2416                                      b_mtime, b_stype, b_source0, b_source1,
2417 				     section, result_fd);
2418       if (r)
2419         return r;
2420 
2421       // If a debuginfo file matching BUILDID was found but didn't contain
2422       // the desired section, then the section should not exist.  Don't
2423       // bother querying upstream servers.
2424       if (!section.empty () && (sqlite3_column_int (*pp, 4) == 1))
2425 	{
2426 	  struct stat st;
2427 
2428 	  // For "F" sourcetype, check if the debuginfo exists. For "R"
2429 	  // sourcetype, check if the debuginfo was interned into the fdcache.
2430 	  if ((b_stype == "F" && (stat (b_source0.c_str (), &st) == 0))
2431 	      || (b_stype == "R" && fdcache.probe (b_source0, b_source1)))
2432 	    do_upstream_section_query = false;
2433 	}
2434     }
2435   pp->reset();
2436 
2437   if (!do_upstream_section_query)
2438     throw reportable_exception(MHD_HTTP_NOT_FOUND, "not found");
2439 
2440   // We couldn't find it in the database.  Last ditch effort
2441   // is to defer to other debuginfo servers.
2442 
2443   int fd = -1;
2444   debuginfod_client *client = debuginfod_pool_begin ();
2445   if (client == NULL)
2446     throw libc_exception(errno, "debuginfod client pool alloc");
2447   defer_dtor<debuginfod_client*,void> client_closer (client, debuginfod_pool_end);
2448 
2449   debuginfod_set_progressfn (client, & debuginfod_find_progress);
2450 
2451   if (conn)
2452     {
2453       // Transcribe incoming User-Agent:
2454       string ua = MHD_lookup_connection_value (conn, MHD_HEADER_KIND, "User-Agent") ?: "";
2455       string ua_complete = string("User-Agent: ") + ua;
2456       debuginfod_add_http_header (client, ua_complete.c_str());
2457 
2458       // Compute larger XFF:, for avoiding info loss during
2459       // federation, and for future cyclicity detection.
2460       string xff = MHD_lookup_connection_value (conn, MHD_HEADER_KIND, "X-Forwarded-For") ?: "";
2461       if (xff != "")
2462         xff += string(", "); // comma separated list
2463 
2464       unsigned int xff_count = 0;
2465       for (auto&& i : xff){
2466         if (i == ',') xff_count++;
2467       }
2468 
2469       // if X-Forwarded-For: exceeds N hops,
2470       // do not delegate a local lookup miss to upstream debuginfods.
2471       if (xff_count >= forwarded_ttl_limit)
2472         throw reportable_exception(MHD_HTTP_NOT_FOUND, "not found, --forwared-ttl-limit reached \
2473 and will not query the upstream servers");
2474 
2475       // Compute the client's numeric IP address only - so can't merge with conninfo()
2476       const union MHD_ConnectionInfo *u = MHD_get_connection_info (conn,
2477                                                                    MHD_CONNECTION_INFO_CLIENT_ADDRESS);
2478       struct sockaddr *so = u ? u->client_addr : 0;
2479       char hostname[256] = ""; // RFC1035
2480       if (so && so->sa_family == AF_INET) {
2481         (void) getnameinfo (so, sizeof (struct sockaddr_in), hostname, sizeof (hostname), NULL, 0,
2482                             NI_NUMERICHOST);
2483       } else if (so && so->sa_family == AF_INET6) {
2484         struct sockaddr_in6* addr6 = (struct sockaddr_in6*) so;
2485         if (IN6_IS_ADDR_V4MAPPED(&addr6->sin6_addr)) {
2486           struct sockaddr_in addr4;
2487           memset (&addr4, 0, sizeof(addr4));
2488           addr4.sin_family = AF_INET;
2489           addr4.sin_port = addr6->sin6_port;
2490           memcpy (&addr4.sin_addr.s_addr, addr6->sin6_addr.s6_addr+12, sizeof(addr4.sin_addr.s_addr));
2491           (void) getnameinfo ((struct sockaddr*) &addr4, sizeof (addr4),
2492                               hostname, sizeof (hostname), NULL, 0,
2493                               NI_NUMERICHOST);
2494         } else {
2495           (void) getnameinfo (so, sizeof (struct sockaddr_in6), hostname, sizeof (hostname), NULL, 0,
2496                               NI_NUMERICHOST);
2497         }
2498       }
2499 
2500       string xff_complete = string("X-Forwarded-For: ")+xff+string(hostname);
2501       debuginfod_add_http_header (client, xff_complete.c_str());
2502     }
2503 
2504   if (artifacttype == "debuginfo")
2505     fd = debuginfod_find_debuginfo (client,
2506                                     (const unsigned char*) buildid.c_str(),
2507                                     0, NULL);
2508   else if (artifacttype == "executable")
2509     fd = debuginfod_find_executable (client,
2510                                      (const unsigned char*) buildid.c_str(),
2511                                      0, NULL);
2512   else if (artifacttype == "source")
2513     fd = debuginfod_find_source (client,
2514                                  (const unsigned char*) buildid.c_str(),
2515                                  0, suffix.c_str(), NULL);
2516   else if (artifacttype == "section")
2517     fd = debuginfod_find_section (client,
2518                                   (const unsigned char*) buildid.c_str(),
2519                                   0, section.c_str(), NULL);
2520 
2521   if (fd >= 0)
2522     {
2523       if (conn != 0)
2524 	inc_metric ("http_responses_total","result","upstream");
2525       struct stat s;
2526       int rc = fstat (fd, &s);
2527       if (rc == 0)
2528         {
2529           auto r = MHD_create_response_from_fd ((uint64_t) s.st_size, fd);
2530           if (r)
2531             {
2532               add_mhd_response_header (r, "Content-Type",
2533 				       "application/octet-stream");
2534               // Copy the incoming headers
2535               const char * hdrs = debuginfod_get_headers(client);
2536               string header_dup;
2537               if (hdrs)
2538                 header_dup = string(hdrs);
2539               // Parse the "header: value\n" lines into (h,v) tuples and pass on
2540               while(1)
2541                 {
2542                   size_t newline = header_dup.find('\n');
2543                   if (newline == string::npos) break;
2544                   size_t colon = header_dup.find(':');
2545                   if (colon == string::npos) break;
2546                   string header = header_dup.substr(0,colon);
2547                   string value = header_dup.substr(colon+1,newline-colon-1);
2548                   // strip leading spaces from value
2549                   size_t nonspace = value.find_first_not_of(" ");
2550                   if (nonspace != string::npos)
2551                     value = value.substr(nonspace);
2552                   add_mhd_response_header(r, header.c_str(), value.c_str());
2553                   header_dup = header_dup.substr(newline+1);
2554                 }
2555 
2556               add_mhd_last_modified (r, s.st_mtime);
2557               if (verbose > 1)
2558                 obatched(clog) << "serving file from upstream debuginfod/cache" << endl;
2559               if (result_fd)
2560                 *result_fd = fd;
2561               return r; // NB: don't close fd; libmicrohttpd will
2562             }
2563         }
2564       close (fd);
2565     }
2566   else
2567     switch(fd)
2568       {
2569       case -ENOSYS:
2570         break;
2571       case -ENOENT:
2572         break;
2573       default: // some more tricky error
2574         throw libc_exception(-fd, "upstream debuginfod query failed");
2575       }
2576 
2577   throw reportable_exception(MHD_HTTP_NOT_FOUND, "not found");
2578 }
2579 
2580 
2581 ////////////////////////////////////////////////////////////////////////
2582 
2583 static map<string,double> metrics; // arbitrary data for /metrics query
2584 // NB: store int64_t since all our metrics are integers; prometheus accepts double
2585 static mutex metrics_lock;
2586 // NB: these objects get released during the process exit via global dtors
2587 // do not call them from within other global dtors
2588 
2589 // utility function for assembling prometheus-compatible
2590 // name="escaped-value" strings
2591 // https://prometheus.io/docs/instrumenting/exposition_formats/
2592 static string
metric_label(const string & name,const string & value)2593 metric_label(const string& name, const string& value)
2594 {
2595   string x = name + "=\"";
2596   for (auto&& c : value)
2597     switch(c)
2598       {
2599       case '\\': x += "\\\\"; break;
2600       case '\"': x += "\\\""; break;
2601       case '\n': x += "\\n"; break;
2602       default: x += c; break;
2603       }
2604   x += "\"";
2605   return x;
2606 }
2607 
2608 
2609 // add prometheus-format metric name + label tuple (if any) + value
2610 
2611 static void
set_metric(const string & metric,double value)2612 set_metric(const string& metric, double value)
2613 {
2614   unique_lock<mutex> lock(metrics_lock);
2615   metrics[metric] = value;
2616 }
2617 static void
inc_metric(const string & metric)2618 inc_metric(const string& metric)
2619 {
2620   unique_lock<mutex> lock(metrics_lock);
2621   metrics[metric] ++;
2622 }
2623 static void
set_metric(const string & metric,const string & lname,const string & lvalue,double value)2624 set_metric(const string& metric,
2625            const string& lname, const string& lvalue,
2626            double value)
2627 {
2628   string key = (metric + "{" + metric_label(lname, lvalue) + "}");
2629   unique_lock<mutex> lock(metrics_lock);
2630   metrics[key] = value;
2631 }
2632 
2633 static void
inc_metric(const string & metric,const string & lname,const string & lvalue)2634 inc_metric(const string& metric,
2635            const string& lname, const string& lvalue)
2636 {
2637   string key = (metric + "{" + metric_label(lname, lvalue) + "}");
2638   unique_lock<mutex> lock(metrics_lock);
2639   metrics[key] ++;
2640 }
2641 static void
add_metric(const string & metric,const string & lname,const string & lvalue,double value)2642 add_metric(const string& metric,
2643            const string& lname, const string& lvalue,
2644            double value)
2645 {
2646   string key = (metric + "{" + metric_label(lname, lvalue) + "}");
2647   unique_lock<mutex> lock(metrics_lock);
2648   metrics[key] += value;
2649 }
2650 static void
add_metric(const string & metric,double value)2651 add_metric(const string& metric,
2652            double value)
2653 {
2654   unique_lock<mutex> lock(metrics_lock);
2655   metrics[metric] += value;
2656 }
2657 
2658 
2659 // and more for higher arity labels if needed
2660 
2661 static void
inc_metric(const string & metric,const string & lname,const string & lvalue,const string & rname,const string & rvalue)2662 inc_metric(const string& metric,
2663            const string& lname, const string& lvalue,
2664            const string& rname, const string& rvalue)
2665 {
2666   string key = (metric + "{"
2667                 + metric_label(lname, lvalue) + ","
2668                 + metric_label(rname, rvalue) + "}");
2669   unique_lock<mutex> lock(metrics_lock);
2670   metrics[key] ++;
2671 }
2672 static void
add_metric(const string & metric,const string & lname,const string & lvalue,const string & rname,const string & rvalue,double value)2673 add_metric(const string& metric,
2674            const string& lname, const string& lvalue,
2675            const string& rname, const string& rvalue,
2676            double value)
2677 {
2678   string key = (metric + "{"
2679                 + metric_label(lname, lvalue) + ","
2680                 + metric_label(rname, rvalue) + "}");
2681   unique_lock<mutex> lock(metrics_lock);
2682   metrics[key] += value;
2683 }
2684 
2685 static struct MHD_Response*
handle_metrics(off_t * size)2686 handle_metrics (off_t* size)
2687 {
2688   stringstream o;
2689   {
2690     unique_lock<mutex> lock(metrics_lock);
2691     for (auto&& i : metrics)
2692       o << i.first
2693         << " "
2694         << std::setprecision(std::numeric_limits<double>::digits10 + 1)
2695         << i.second
2696         << endl;
2697   }
2698   const string& os = o.str();
2699   MHD_Response* r = MHD_create_response_from_buffer (os.size(),
2700                                                      (void*) os.c_str(),
2701                                                      MHD_RESPMEM_MUST_COPY);
2702   if (r != NULL)
2703     {
2704       *size = os.size();
2705       add_mhd_response_header (r, "Content-Type", "text/plain");
2706     }
2707   return r;
2708 }
2709 
2710 static struct MHD_Response*
handle_root(off_t * size)2711 handle_root (off_t* size)
2712 {
2713   static string version = "debuginfod (" + string (PACKAGE_NAME) + ") "
2714 			  + string (PACKAGE_VERSION);
2715   MHD_Response* r = MHD_create_response_from_buffer (version.size (),
2716 						     (void *) version.c_str (),
2717 						     MHD_RESPMEM_PERSISTENT);
2718   if (r != NULL)
2719     {
2720       *size = version.size ();
2721       add_mhd_response_header (r, "Content-Type", "text/plain");
2722     }
2723   return r;
2724 }
2725 
2726 
2727 ////////////////////////////////////////////////////////////////////////
2728 
2729 
2730 /* libmicrohttpd callback */
2731 static MHD_RESULT
handler_cb(void *,struct MHD_Connection * connection,const char * url,const char * method,const char *,const char *,size_t *,void ** ptr)2732 handler_cb (void * /*cls*/,
2733             struct MHD_Connection *connection,
2734             const char *url,
2735             const char *method,
2736             const char * /*version*/,
2737             const char * /*upload_data*/,
2738             size_t * /*upload_data_size*/,
2739             void ** ptr)
2740 {
2741   struct MHD_Response *r = NULL;
2742   string url_copy = url;
2743 
2744   /* libmicrohttpd always makes (at least) two callbacks: once just
2745      past the headers, and one after the request body is finished
2746      being received.  If we process things early (first callback) and
2747      queue a response, libmicrohttpd would suppress http keep-alive
2748      (via connection->read_closed = true). */
2749   static int aptr; /* just some random object to use as a flag */
2750   if (&aptr != *ptr)
2751     {
2752       /* do never respond on first call */
2753       *ptr = &aptr;
2754       return MHD_YES;
2755     }
2756   *ptr = NULL;                     /* reset when done */
2757 
2758   const char *maxsize_string = MHD_lookup_connection_value(connection, MHD_HEADER_KIND, "X-DEBUGINFOD-MAXSIZE");
2759   long maxsize = 0;
2760   if (maxsize_string != NULL && maxsize_string[0] != '\0')
2761     maxsize = atol(maxsize_string);
2762   else
2763     maxsize = 0;
2764 
2765 #if MHD_VERSION >= 0x00097002
2766   enum MHD_Result rc;
2767 #else
2768   int rc = MHD_NO; // mhd
2769 #endif
2770   int http_code = 500;
2771   off_t http_size = -1;
2772   struct timespec ts_start, ts_end;
2773   clock_gettime (CLOCK_MONOTONIC, &ts_start);
2774   double afteryou = 0.0;
2775   string artifacttype, suffix;
2776 
2777   try
2778     {
2779       if (string(method) != "GET")
2780         throw reportable_exception(400, "we support GET only");
2781 
2782       /* Start decoding the URL. */
2783       size_t slash1 = url_copy.find('/', 1);
2784       string url1 = url_copy.substr(0, slash1); // ok even if slash1 not found
2785 
2786       if (slash1 != string::npos && url1 == "/buildid")
2787         {
2788           // PR27863: block this thread awhile if another thread is already busy
2789           // fetching the exact same thing.  This is better for Everyone.
2790           // The latecomer says "... after you!" and waits.
2791           add_metric ("thread_busy", "role", "http-buildid-after-you", 1);
2792 #ifdef HAVE_PTHREAD_SETNAME_NP
2793           (void) pthread_setname_np (pthread_self(), "mhd-buildid-after-you");
2794 #endif
2795           struct timespec tsay_start, tsay_end;
2796           clock_gettime (CLOCK_MONOTONIC, &tsay_start);
2797           static unique_set<string> busy_urls;
2798           unique_set_reserver<string> after_you(busy_urls, url_copy);
2799           clock_gettime (CLOCK_MONOTONIC, &tsay_end);
2800           afteryou = (tsay_end.tv_sec - tsay_start.tv_sec) + (tsay_end.tv_nsec - tsay_start.tv_nsec)/1.e9;
2801           add_metric ("thread_busy", "role", "http-buildid-after-you", -1);
2802 
2803           tmp_inc_metric m ("thread_busy", "role", "http-buildid");
2804 #ifdef HAVE_PTHREAD_SETNAME_NP
2805           (void) pthread_setname_np (pthread_self(), "mhd-buildid");
2806 #endif
2807           size_t slash2 = url_copy.find('/', slash1+1);
2808           if (slash2 == string::npos)
2809             throw reportable_exception("/buildid/ webapi error, need buildid");
2810 
2811           string buildid = url_copy.substr(slash1+1, slash2-slash1-1);
2812 
2813           size_t slash3 = url_copy.find('/', slash2+1);
2814 
2815           if (slash3 == string::npos)
2816             {
2817               artifacttype = url_copy.substr(slash2+1);
2818               suffix = "";
2819             }
2820           else
2821             {
2822               artifacttype = url_copy.substr(slash2+1, slash3-slash2-1);
2823               suffix = url_copy.substr(slash3); // include the slash in the suffix
2824             }
2825 
2826           // get the resulting fd so we can report its size
2827           int fd;
2828           r = handle_buildid(connection, buildid, artifacttype, suffix, &fd);
2829           if (r)
2830             {
2831               struct stat fs;
2832               if (fstat(fd, &fs) == 0)
2833                 http_size = fs.st_size;
2834               // libmicrohttpd will close (fd);
2835             }
2836         }
2837       else if (url1 == "/metrics")
2838         {
2839           tmp_inc_metric m ("thread_busy", "role", "http-metrics");
2840           artifacttype = "metrics";
2841           inc_metric("http_requests_total", "type", artifacttype);
2842           r = handle_metrics(& http_size);
2843         }
2844       else if (url1 == "/")
2845         {
2846           artifacttype = "/";
2847           inc_metric("http_requests_total", "type", artifacttype);
2848           r = handle_root(& http_size);
2849         }
2850       else
2851         throw reportable_exception("webapi error, unrecognized '" + url1 + "'");
2852 
2853       if (r == 0)
2854         throw reportable_exception("internal error, missing response");
2855 
2856       if (maxsize > 0 && http_size > maxsize)
2857         {
2858           MHD_destroy_response(r);
2859           throw reportable_exception(406, "File too large, max size=" + std::to_string(maxsize));
2860         }
2861 
2862       rc = MHD_queue_response (connection, MHD_HTTP_OK, r);
2863       http_code = MHD_HTTP_OK;
2864       MHD_destroy_response (r);
2865     }
2866   catch (const reportable_exception& e)
2867     {
2868       inc_metric("http_responses_total","result","error");
2869       e.report(clog);
2870       http_code = e.code;
2871       http_size = e.message.size();
2872       rc = e.mhd_send_response (connection);
2873     }
2874 
2875   clock_gettime (CLOCK_MONOTONIC, &ts_end);
2876   double deltas = (ts_end.tv_sec - ts_start.tv_sec) + (ts_end.tv_nsec - ts_start.tv_nsec)/1.e9;
2877   // afteryou: delay waiting for other client's identical query to complete
2878   // deltas: total latency, including afteryou waiting
2879   obatched(clog) << conninfo(connection)
2880                  << ' ' << method << ' ' << url
2881                  << ' ' << http_code << ' ' << http_size
2882                  << ' ' << (int)(afteryou*1000) << '+' << (int)((deltas-afteryou)*1000) << "ms"
2883                  << endl;
2884 
2885   // related prometheus metrics
2886   string http_code_str = to_string(http_code);
2887   add_metric("http_responses_transfer_bytes_sum",
2888              "code", http_code_str, "type", artifacttype, http_size);
2889   inc_metric("http_responses_transfer_bytes_count",
2890              "code", http_code_str, "type", artifacttype);
2891 
2892   add_metric("http_responses_duration_milliseconds_sum",
2893              "code", http_code_str, "type", artifacttype, deltas*1000); // prometheus prefers _seconds and floating point
2894   inc_metric("http_responses_duration_milliseconds_count",
2895              "code", http_code_str, "type", artifacttype);
2896 
2897   add_metric("http_responses_after_you_milliseconds_sum",
2898              "code", http_code_str, "type", artifacttype, afteryou*1000);
2899   inc_metric("http_responses_after_you_milliseconds_count",
2900              "code", http_code_str, "type", artifacttype);
2901 
2902   return rc;
2903 }
2904 
2905 
2906 ////////////////////////////////////////////////////////////////////////
2907 // borrowed originally from src/nm.c get_local_names()
2908 
2909 static void
dwarf_extract_source_paths(Elf * elf,set<string> & debug_sourcefiles)2910 dwarf_extract_source_paths (Elf *elf, set<string>& debug_sourcefiles)
2911   noexcept // no exceptions - so we can simplify the altdbg resource release at end
2912 {
2913   Dwarf* dbg = dwarf_begin_elf (elf, DWARF_C_READ, NULL);
2914   if (dbg == NULL)
2915     return;
2916 
2917   Dwarf* altdbg = NULL;
2918   int    altdbg_fd = -1;
2919 
2920   // DWZ handling: if we have an unsatisfied debug-alt-link, add an
2921   // empty string into the outgoing sourcefiles set, so the caller
2922   // should know that our data is incomplete.
2923   const char *alt_name_p;
2924   const void *alt_build_id; // elfutils-owned memory
2925   ssize_t sz = dwelf_dwarf_gnu_debugaltlink (dbg, &alt_name_p, &alt_build_id);
2926   if (sz > 0) // got one!
2927     {
2928       string buildid;
2929       unsigned char* build_id_bytes = (unsigned char*) alt_build_id;
2930       for (ssize_t idx=0; idx<sz; idx++)
2931         {
2932           buildid += "0123456789abcdef"[build_id_bytes[idx] >> 4];
2933           buildid += "0123456789abcdef"[build_id_bytes[idx] & 0xf];
2934         }
2935 
2936       if (verbose > 3)
2937         obatched(clog) << "Need altdebug buildid=" << buildid << endl;
2938 
2939       // but is it unsatisfied the normal elfutils ways?
2940       Dwarf* alt = dwarf_getalt (dbg);
2941       if (alt == NULL)
2942         {
2943           // Yup, unsatisfied the normal way.  Maybe we can satisfy it
2944           // from our own debuginfod database.
2945           int alt_fd;
2946           struct MHD_Response *r = 0;
2947           try
2948             {
2949               string artifacttype = "debuginfo";
2950               r = handle_buildid (0, buildid, artifacttype, "", &alt_fd);
2951             }
2952           catch (const reportable_exception& e)
2953             {
2954               // swallow exceptions
2955             }
2956 
2957           // NB: this is not actually recursive!  This invokes the web-query
2958           // path, which cannot get back into the scan code paths.
2959           if (r)
2960             {
2961               // Found it!
2962               altdbg_fd = dup(alt_fd); // ok if this fails, downstream failures ok
2963               alt = altdbg = dwarf_begin (altdbg_fd, DWARF_C_READ);
2964               // NB: must close this dwarf and this fd at the bottom of the function!
2965               MHD_destroy_response (r); // will close alt_fd
2966               if (alt)
2967                 dwarf_setalt (dbg, alt);
2968             }
2969         }
2970       else
2971         {
2972           // NB: dwarf_setalt(alt) inappropriate - already done!
2973           // NB: altdbg will stay 0 so nothing tries to redundantly dealloc.
2974         }
2975 
2976       if (alt)
2977         {
2978           if (verbose > 3)
2979             obatched(clog) << "Resolved altdebug buildid=" << buildid << endl;
2980         }
2981       else // (alt == NULL) - signal possible presence of poor debuginfo
2982         {
2983           debug_sourcefiles.insert("");
2984           if (verbose > 3)
2985             obatched(clog) << "Unresolved altdebug buildid=" << buildid << endl;
2986         }
2987     }
2988 
2989   Dwarf_Off offset = 0;
2990   Dwarf_Off old_offset;
2991   size_t hsize;
2992 
2993   while (dwarf_nextcu (dbg, old_offset = offset, &offset, &hsize, NULL, NULL, NULL) == 0)
2994     {
2995       Dwarf_Die cudie_mem;
2996       Dwarf_Die *cudie = dwarf_offdie (dbg, old_offset + hsize, &cudie_mem);
2997 
2998       if (cudie == NULL)
2999         continue;
3000       if (dwarf_tag (cudie) != DW_TAG_compile_unit)
3001         continue;
3002 
3003       const char *cuname = dwarf_diename(cudie) ?: "unknown";
3004 
3005       Dwarf_Files *files;
3006       size_t nfiles;
3007       if (dwarf_getsrcfiles (cudie, &files, &nfiles) != 0)
3008         continue;
3009 
3010       // extract DW_AT_comp_dir to resolve relative file names
3011       const char *comp_dir = "";
3012       const char *const *dirs;
3013       size_t ndirs;
3014       if (dwarf_getsrcdirs (files, &dirs, &ndirs) == 0 &&
3015           dirs[0] != NULL)
3016         comp_dir = dirs[0];
3017       if (comp_dir == NULL)
3018         comp_dir = "";
3019 
3020       if (verbose > 3)
3021         obatched(clog) << "searching for sources for cu=" << cuname << " comp_dir=" << comp_dir
3022                        << " #files=" << nfiles << " #dirs=" << ndirs << endl;
3023 
3024       if (comp_dir[0] == '\0' && cuname[0] != '/')
3025         {
3026           if (verbose > 3)
3027             obatched(clog) << "skipping cu=" << cuname << " due to empty comp_dir" << endl;
3028           continue;
3029         }
3030 
3031       for (size_t f = 1; f < nfiles; f++)
3032         {
3033           const char *hat = dwarf_filesrc (files, f, NULL, NULL);
3034           if (hat == NULL)
3035             continue;
3036 
3037           if (string(hat) == "<built-in>") // gcc intrinsics, don't bother record
3038             continue;
3039 
3040           string waldo;
3041           if (hat[0] == '/') // absolute
3042             waldo = (string (hat));
3043           else if (comp_dir[0] != '\0') // comp_dir relative
3044             waldo = (string (comp_dir) + string("/") + string (hat));
3045           else
3046            {
3047              if (verbose > 3)
3048                obatched(clog) << "skipping hat=" << hat << " due to empty comp_dir" << endl;
3049              continue;
3050            }
3051 
3052           // NB: this is the 'waldo' that a dbginfo client will have
3053           // to supply for us to give them the file The comp_dir
3054           // prefixing is a definite complication.  Otherwise we'd
3055           // have to return a setof comp_dirs (one per CU!) with
3056           // corresponding filesrc[] names, instead of one absolute
3057           // resoved set.  Maybe we'll have to do that anyway.  XXX
3058 
3059           if (verbose > 4)
3060             obatched(clog) << waldo
3061                            << (debug_sourcefiles.find(waldo)==debug_sourcefiles.end() ? " new" : " dup") <<  endl;
3062 
3063           debug_sourcefiles.insert (waldo);
3064         }
3065     }
3066 
3067   dwarf_end(dbg);
3068   if (altdbg)
3069     dwarf_end(altdbg);
3070   if (altdbg_fd >= 0)
3071     close(altdbg_fd);
3072 }
3073 
3074 
3075 
3076 static void
elf_classify(int fd,bool & executable_p,bool & debuginfo_p,string & buildid,set<string> & debug_sourcefiles)3077 elf_classify (int fd, bool &executable_p, bool &debuginfo_p, string &buildid, set<string>& debug_sourcefiles)
3078 {
3079   Elf *elf = elf_begin (fd, ELF_C_READ_MMAP_PRIVATE, NULL);
3080   if (elf == NULL)
3081     return;
3082 
3083   try // catch our types of errors and clean up the Elf* object
3084     {
3085       if (elf_kind (elf) != ELF_K_ELF)
3086         {
3087           elf_end (elf);
3088           return;
3089         }
3090 
3091       GElf_Ehdr ehdr_storage;
3092       GElf_Ehdr *ehdr = gelf_getehdr (elf, &ehdr_storage);
3093       if (ehdr == NULL)
3094         {
3095           elf_end (elf);
3096           return;
3097         }
3098       auto elf_type = ehdr->e_type;
3099 
3100       const void *build_id; // elfutils-owned memory
3101       ssize_t sz = dwelf_elf_gnu_build_id (elf, & build_id);
3102       if (sz <= 0)
3103         {
3104           // It's not a diagnostic-worthy error for an elf file to lack build-id.
3105           // It might just be very old.
3106           elf_end (elf);
3107           return;
3108         }
3109 
3110       // build_id is a raw byte array; convert to hexadecimal *lowercase*
3111       unsigned char* build_id_bytes = (unsigned char*) build_id;
3112       for (ssize_t idx=0; idx<sz; idx++)
3113         {
3114           buildid += "0123456789abcdef"[build_id_bytes[idx] >> 4];
3115           buildid += "0123456789abcdef"[build_id_bytes[idx] & 0xf];
3116         }
3117 
3118       // now decide whether it's an executable - namely, any allocatable section has
3119       // PROGBITS;
3120       if (elf_type == ET_EXEC || elf_type == ET_DYN)
3121         {
3122           size_t shnum;
3123           int rc = elf_getshdrnum (elf, &shnum);
3124           if (rc < 0)
3125             throw elfutils_exception(rc, "getshdrnum");
3126 
3127           executable_p = false;
3128           for (size_t sc = 0; sc < shnum; sc++)
3129             {
3130               Elf_Scn *scn = elf_getscn (elf, sc);
3131               if (scn == NULL)
3132                 continue;
3133 
3134               GElf_Shdr shdr_mem;
3135               GElf_Shdr *shdr = gelf_getshdr (scn, &shdr_mem);
3136               if (shdr == NULL)
3137                 continue;
3138 
3139               // allocated (loadable / vm-addr-assigned) section with available content?
3140               if ((shdr->sh_type == SHT_PROGBITS) && (shdr->sh_flags & SHF_ALLOC))
3141                 {
3142                   if (verbose > 4)
3143                     obatched(clog) << "executable due to SHF_ALLOC SHT_PROGBITS sc=" << sc << endl;
3144                   executable_p = true;
3145                   break; // no need to keep looking for others
3146                 }
3147             } // iterate over sections
3148         } // executable_p classification
3149 
3150       // now decide whether it's a debuginfo - namely, if it has any .debug* or .zdebug* sections
3151       // logic mostly stolen from [email protected]'s elfclassify drafts
3152       size_t shstrndx;
3153       int rc = elf_getshdrstrndx (elf, &shstrndx);
3154       if (rc < 0)
3155         throw elfutils_exception(rc, "getshdrstrndx");
3156 
3157       Elf_Scn *scn = NULL;
3158       bool symtab_p = false;
3159       bool bits_alloc_p = false;
3160       while (true)
3161         {
3162           scn = elf_nextscn (elf, scn);
3163           if (scn == NULL)
3164             break;
3165           GElf_Shdr shdr_storage;
3166           GElf_Shdr *shdr = gelf_getshdr (scn, &shdr_storage);
3167           if (shdr == NULL)
3168             break;
3169           const char *section_name = elf_strptr (elf, shstrndx, shdr->sh_name);
3170           if (section_name == NULL)
3171             break;
3172           if (startswith (section_name, ".debug_line") ||
3173               startswith (section_name, ".zdebug_line"))
3174             {
3175               debuginfo_p = true;
3176               if (scan_source_info)
3177                 dwarf_extract_source_paths (elf, debug_sourcefiles);
3178               break; // expecting only one .*debug_line, so no need to look for others
3179             }
3180           else if (startswith (section_name, ".debug_") ||
3181                    startswith (section_name, ".zdebug_"))
3182             {
3183               debuginfo_p = true;
3184               // NB: don't break; need to parse .debug_line for sources
3185             }
3186           else if (shdr->sh_type == SHT_SYMTAB)
3187             {
3188               symtab_p = true;
3189             }
3190           else if (shdr->sh_type != SHT_NOBITS
3191                    && shdr->sh_type != SHT_NOTE
3192                    && (shdr->sh_flags & SHF_ALLOC) != 0)
3193             {
3194               bits_alloc_p = true;
3195             }
3196         }
3197 
3198       // For more expansive elf/split-debuginfo classification, we
3199       // want to identify as debuginfo "strip -s"-produced files
3200       // without .debug_info* (like libicudata), but we don't want to
3201       // identify "strip -g" executables (with .symtab left there).
3202       if (symtab_p && !bits_alloc_p)
3203         debuginfo_p = true;
3204     }
3205   catch (const reportable_exception& e)
3206     {
3207       e.report(clog);
3208     }
3209   elf_end (elf);
3210 }
3211 
3212 
3213 // Intern the given file name in two parts (dirname & basename) and
3214 // return the resulting file's id.
3215 static int64_t
register_file_name(sqlite_ps & ps_upsert_fileparts,sqlite_ps & ps_upsert_file,sqlite_ps & ps_lookup_file,const string & name)3216 register_file_name(sqlite_ps& ps_upsert_fileparts,
3217                    sqlite_ps& ps_upsert_file,
3218                    sqlite_ps& ps_lookup_file,
3219                    const string& name)
3220 {
3221   std::size_t slash = name.rfind('/');
3222   string dirname, filename;
3223   if (slash == std::string::npos)
3224     {
3225       dirname = "";
3226       filename = name;
3227     }
3228   else
3229     {
3230       dirname = name.substr(0, slash);
3231       filename = name.substr(slash+1);
3232     }
3233 
3234   // intern the two substrings
3235   ps_upsert_fileparts
3236     .reset()
3237     .bind(1, dirname)
3238     .step_ok_done();
3239   ps_upsert_fileparts
3240     .reset()
3241     .bind(1, filename)
3242     .step_ok_done();
3243 
3244   // intern the tuple
3245   ps_upsert_file
3246     .reset()
3247     .bind(1, dirname)
3248     .bind(2, filename)
3249     .step_ok_done();
3250 
3251   // look up the tuple's id
3252   ps_lookup_file
3253     .reset()
3254     .bind(1, dirname)
3255     .bind(2, filename);
3256   int rc = ps_lookup_file.step();
3257   if (rc != SQLITE_ROW) throw sqlite_exception(rc, "step");
3258 
3259   int64_t id = sqlite3_column_int64 (ps_lookup_file, 0);
3260   ps_lookup_file.reset();
3261   return id;
3262 }
3263 
3264 
3265 
3266 static void
scan_source_file(const string & rps,const stat_t & st,sqlite_ps & ps_upsert_buildids,sqlite_ps & ps_upsert_fileparts,sqlite_ps & ps_upsert_file,sqlite_ps & ps_lookup_file,sqlite_ps & ps_upsert_de,sqlite_ps & ps_upsert_s,sqlite_ps & ps_query,sqlite_ps & ps_scan_done,unsigned & fts_cached,unsigned & fts_executable,unsigned & fts_debuginfo,unsigned & fts_sourcefiles)3267 scan_source_file (const string& rps, const stat_t& st,
3268                   sqlite_ps& ps_upsert_buildids,
3269                   sqlite_ps& ps_upsert_fileparts,
3270                   sqlite_ps& ps_upsert_file,
3271                   sqlite_ps& ps_lookup_file,
3272                   sqlite_ps& ps_upsert_de,
3273                   sqlite_ps& ps_upsert_s,
3274                   sqlite_ps& ps_query,
3275                   sqlite_ps& ps_scan_done,
3276                   unsigned& fts_cached,
3277                   unsigned& fts_executable,
3278                   unsigned& fts_debuginfo,
3279                   unsigned& fts_sourcefiles)
3280 {
3281   int64_t fileid = register_file_name(ps_upsert_fileparts, ps_upsert_file, ps_lookup_file, rps);
3282 
3283   /* See if we know of it already. */
3284   int rc = ps_query
3285     .reset()
3286     .bind(1, fileid)
3287     .bind(2, st.st_mtime)
3288     .step();
3289   ps_query.reset();
3290   if (rc == SQLITE_ROW) // i.e., a result, as opposed to DONE (no results)
3291     // no need to recheck a file/version we already know
3292     // specifically, no need to elf-begin a file we already determined is non-elf
3293     // (so is stored with buildid=NULL)
3294     {
3295       fts_cached++;
3296       return;
3297     }
3298 
3299   bool executable_p = false, debuginfo_p = false; // E and/or D
3300   string buildid;
3301   set<string> sourcefiles;
3302 
3303   int fd = open (rps.c_str(), O_RDONLY);
3304   try
3305     {
3306       if (fd >= 0)
3307         elf_classify (fd, executable_p, debuginfo_p, buildid, sourcefiles);
3308       else
3309         throw libc_exception(errno, string("open ") + rps);
3310       add_metric ("scanned_bytes_total","source","file",
3311                   st.st_size);
3312       inc_metric ("scanned_files_total","source","file");
3313     }
3314   // NB: we catch exceptions here too, so that we can
3315   // cache the corrupt-elf case (!executable_p &&
3316   // !debuginfo_p) just below, just as if we had an
3317   // EPERM error from open(2).
3318   catch (const reportable_exception& e)
3319     {
3320       e.report(clog);
3321     }
3322 
3323   if (fd >= 0)
3324     close (fd);
3325 
3326   if (buildid == "")
3327     {
3328       // no point storing an elf file without buildid
3329       executable_p = false;
3330       debuginfo_p = false;
3331     }
3332   else
3333     {
3334       // register this build-id in the interning table
3335       ps_upsert_buildids
3336         .reset()
3337         .bind(1, buildid)
3338         .step_ok_done();
3339     }
3340 
3341   if (executable_p)
3342     fts_executable ++;
3343   if (debuginfo_p)
3344     fts_debuginfo ++;
3345   if (executable_p || debuginfo_p)
3346     {
3347       ps_upsert_de
3348         .reset()
3349         .bind(1, buildid)
3350         .bind(2, debuginfo_p ? 1 : 0)
3351         .bind(3, executable_p ? 1 : 0)
3352         .bind(4, fileid)
3353         .bind(5, st.st_mtime)
3354         .step_ok_done();
3355     }
3356   if (executable_p)
3357     inc_metric("found_executable_total","source","files");
3358   if (debuginfo_p)
3359     inc_metric("found_debuginfo_total","source","files");
3360 
3361   if (sourcefiles.size() && buildid != "")
3362     {
3363       fts_sourcefiles += sourcefiles.size();
3364 
3365       for (auto&& dwarfsrc : sourcefiles)
3366         {
3367           char *srp = realpath(dwarfsrc.c_str(), NULL);
3368           if (srp == NULL) // also if DWZ unresolved dwarfsrc=""
3369             continue; // unresolvable files are not a serious problem
3370           // throw libc_exception(errno, "fts/file realpath " + srcpath);
3371           string srps = string(srp);
3372           free (srp);
3373 
3374           struct stat sfs;
3375           rc = stat(srps.c_str(), &sfs);
3376           if (rc != 0)
3377             continue;
3378 
3379           if (verbose > 2)
3380             obatched(clog) << "recorded buildid=" << buildid << " file=" << srps
3381                            << " mtime=" << sfs.st_mtime
3382                            << " as source " << dwarfsrc << endl;
3383 
3384           // PR25548: store canonicalized dwarfsrc path
3385           string dwarfsrc_canon = canon_pathname (dwarfsrc);
3386           if (dwarfsrc_canon != dwarfsrc)
3387             {
3388               if (verbose > 3)
3389                 obatched(clog) << "canonicalized src=" << dwarfsrc << " alias=" << dwarfsrc_canon << endl;
3390             }
3391 
3392           int64_t fileid1 = register_file_name (ps_upsert_fileparts, ps_upsert_file, ps_lookup_file, dwarfsrc_canon);
3393           int64_t fileid2 = register_file_name (ps_upsert_fileparts, ps_upsert_file, ps_lookup_file, srps);
3394 
3395           ps_upsert_s
3396             .reset()
3397             .bind(1, buildid)
3398             .bind(2, fileid1)
3399             .bind(3, fileid2)
3400             .bind(4, sfs.st_mtime)
3401             .step_ok_done();
3402 
3403           inc_metric("found_sourcerefs_total","source","files");
3404         }
3405     }
3406 
3407   ps_scan_done
3408     .reset()
3409     .bind(1, fileid)
3410     .bind(2, st.st_mtime)
3411     .bind(3, st.st_size)
3412     .step_ok_done();
3413 
3414   if (verbose > 2)
3415     obatched(clog) << "recorded buildid=" << buildid << " file=" << rps
3416                    << " mtime=" << st.st_mtime << " atype="
3417                    << (executable_p ? "E" : "")
3418                    << (debuginfo_p ? "D" : "") << endl;
3419 }
3420 
3421 
3422 
3423 
3424 
3425 // Analyze given archive file of given age; record buildids / exec/debuginfo-ness of its
3426 // constituent files with given upsert statements.
3427 static void
archive_classify(const string & rps,string & archive_extension,int64_t archiveid,sqlite_ps & ps_upsert_buildids,sqlite_ps & ps_upsert_fileparts,sqlite_ps & ps_upsert_file,sqlite_ps & ps_lookup_file,sqlite_ps & ps_upsert_de,sqlite_ps & ps_upsert_sref,sqlite_ps & ps_upsert_sdef,time_t mtime,unsigned & fts_executable,unsigned & fts_debuginfo,unsigned & fts_sref,unsigned & fts_sdef,bool & fts_sref_complete_p)3428 archive_classify (const string& rps, string& archive_extension, int64_t archiveid,
3429                   sqlite_ps& ps_upsert_buildids, sqlite_ps& ps_upsert_fileparts, sqlite_ps& ps_upsert_file,
3430                   sqlite_ps& ps_lookup_file,
3431                   sqlite_ps& ps_upsert_de, sqlite_ps& ps_upsert_sref, sqlite_ps& ps_upsert_sdef,
3432                   time_t mtime,
3433                   unsigned& fts_executable, unsigned& fts_debuginfo, unsigned& fts_sref, unsigned& fts_sdef,
3434                   bool& fts_sref_complete_p)
3435 {
3436   string archive_decoder = "/dev/null";
3437   for (auto&& arch : scan_archives)
3438     if (string_endswith(rps, arch.first))
3439       {
3440         archive_extension = arch.first;
3441         archive_decoder = arch.second;
3442       }
3443 
3444   FILE* fp;
3445   defer_dtor<FILE*,int>::dtor_fn dfn;
3446   if (archive_decoder != "cat")
3447     {
3448       string popen_cmd = archive_decoder + " " + shell_escape(rps);
3449       fp = popen (popen_cmd.c_str(), "r"); // "e" O_CLOEXEC?
3450       dfn = pclose;
3451       if (fp == NULL)
3452         throw libc_exception (errno, string("popen ") + popen_cmd);
3453     }
3454   else
3455     {
3456       fp = fopen (rps.c_str(), "r");
3457       dfn = fclose;
3458       if (fp == NULL)
3459         throw libc_exception (errno, string("fopen ") + rps);
3460     }
3461   defer_dtor<FILE*,int> fp_closer (fp, dfn);
3462 
3463   struct archive *a;
3464   a = archive_read_new();
3465   if (a == NULL)
3466     throw archive_exception("cannot create archive reader");
3467   defer_dtor<struct archive*,int> archive_closer (a, archive_read_free);
3468 
3469   int rc = archive_read_support_format_all(a);
3470   if (rc != ARCHIVE_OK)
3471     throw archive_exception(a, "cannot select all formats");
3472   rc = archive_read_support_filter_all(a);
3473   if (rc != ARCHIVE_OK)
3474     throw archive_exception(a, "cannot select all filters");
3475 
3476   rc = archive_read_open_FILE (a, fp);
3477   if (rc != ARCHIVE_OK)
3478     {
3479       obatched(clog) << "cannot open archive from pipe " << rps << endl;
3480       throw archive_exception(a, "cannot open archive from pipe");
3481     }
3482 
3483   if (verbose > 3)
3484     obatched(clog) << "libarchive scanning " << rps << " id " << archiveid << endl;
3485 
3486   bool any_exceptions = false;
3487   while(1) // parse archive entries
3488     {
3489     if (interrupted)
3490       break;
3491 
3492     try
3493         {
3494           struct archive_entry *e;
3495           rc = archive_read_next_header (a, &e);
3496           if (rc != ARCHIVE_OK)
3497             break;
3498 
3499           if (! S_ISREG(archive_entry_mode (e))) // skip non-files completely
3500             continue;
3501 
3502           string fn = canonicalized_archive_entry_pathname (e);
3503 
3504           if (verbose > 3)
3505             obatched(clog) << "libarchive checking " << fn << endl;
3506 
3507           // extract this file to a temporary file
3508           char* tmppath = NULL;
3509           rc = asprintf (&tmppath, "%s/debuginfod-classify.XXXXXX", tmpdir.c_str());
3510           if (rc < 0)
3511             throw libc_exception (ENOMEM, "cannot allocate tmppath");
3512           defer_dtor<void*,void> tmmpath_freer (tmppath, free);
3513           int fd = mkstemp (tmppath);
3514           if (fd < 0)
3515             throw libc_exception (errno, "cannot create temporary file");
3516           unlink (tmppath); // unlink now so OS will release the file as soon as we close the fd
3517           defer_dtor<int,int> minifd_closer (fd, close);
3518 
3519           rc = archive_read_data_into_fd (a, fd);
3520           if (rc != ARCHIVE_OK) {
3521             close (fd);
3522             throw archive_exception(a, "cannot extract file");
3523           }
3524 
3525           // finally ... time to run elf_classify on this bad boy and update the database
3526           bool executable_p = false, debuginfo_p = false;
3527           string buildid;
3528           set<string> sourcefiles;
3529           elf_classify (fd, executable_p, debuginfo_p, buildid, sourcefiles);
3530           // NB: might throw
3531 
3532           if (buildid != "") // intern buildid
3533             {
3534               ps_upsert_buildids
3535                 .reset()
3536                 .bind(1, buildid)
3537                 .step_ok_done();
3538             }
3539 
3540           int64_t fileid = register_file_name (ps_upsert_fileparts, ps_upsert_file, ps_lookup_file, fn);
3541 
3542           if (sourcefiles.size() > 0) // sref records needed
3543             {
3544               // NB: we intern each source file once.  Once raw, as it
3545               // appears in the DWARF file list coming back from
3546               // elf_classify() - because it'll end up in the
3547               // _norm.artifactsrc column.  We don't also put another
3548               // version with a '.' at the front, even though that's
3549               // how rpm/cpio packs names, because we hide that from
3550               // the database for storage efficiency.
3551 
3552               for (auto&& s : sourcefiles)
3553                 {
3554                   if (s == "")
3555                     {
3556                       fts_sref_complete_p = false;
3557                       continue;
3558                     }
3559 
3560                   // PR25548: store canonicalized source path
3561                   const string& dwarfsrc = s;
3562                   string dwarfsrc_canon = canon_pathname (dwarfsrc);
3563                   if (dwarfsrc_canon != dwarfsrc)
3564                     {
3565                       if (verbose > 3)
3566                         obatched(clog) << "canonicalized src=" << dwarfsrc << " alias=" << dwarfsrc_canon << endl;
3567                     }
3568 
3569                   int64_t srcfileid = register_file_name(ps_upsert_fileparts, ps_upsert_file, ps_lookup_file,
3570                                                          dwarfsrc_canon);
3571 
3572                   ps_upsert_sref
3573                     .reset()
3574                     .bind(1, buildid)
3575                     .bind(2, srcfileid)
3576                     .step_ok_done();
3577 
3578                   fts_sref ++;
3579                 }
3580             }
3581 
3582           if (executable_p)
3583             fts_executable ++;
3584           if (debuginfo_p)
3585             fts_debuginfo ++;
3586 
3587           if (executable_p || debuginfo_p)
3588             {
3589               ps_upsert_de
3590                 .reset()
3591                 .bind(1, buildid)
3592                 .bind(2, debuginfo_p ? 1 : 0)
3593                 .bind(3, executable_p ? 1 : 0)
3594                 .bind(4, archiveid)
3595                 .bind(5, mtime)
3596                 .bind(6, fileid)
3597                 .step_ok_done();
3598             }
3599           else // potential source - sdef record
3600             {
3601               fts_sdef ++;
3602               ps_upsert_sdef
3603                 .reset()
3604                 .bind(1, archiveid)
3605                 .bind(2, mtime)
3606                 .bind(3, fileid)
3607                 .step_ok_done();
3608             }
3609 
3610           if ((verbose > 2) && (executable_p || debuginfo_p))
3611             obatched(clog) << "recorded buildid=" << buildid << " rpm=" << rps << " file=" << fn
3612                            << " mtime=" << mtime << " atype="
3613                            << (executable_p ? "E" : "")
3614                            << (debuginfo_p ? "D" : "")
3615                            << " sourcefiles=" << sourcefiles.size() << endl;
3616 
3617         }
3618       catch (const reportable_exception& e)
3619         {
3620           e.report(clog);
3621           any_exceptions = true;
3622           // NB: but we allow the libarchive iteration to continue, in
3623           // case we can still gather some useful information.  That
3624           // would allow some webapi queries to work, until later when
3625           // this archive is rescanned.  (Its vitals won't go into the
3626           // _file_mtime_scanned table until after a successful scan.)
3627         }
3628     }
3629 
3630   if (any_exceptions)
3631     throw reportable_exception("exceptions encountered during archive scan");
3632 }
3633 
3634 
3635 
3636 // scan for archive files such as .rpm
3637 static void
scan_archive_file(const string & rps,const stat_t & st,sqlite_ps & ps_upsert_buildids,sqlite_ps & ps_upsert_fileparts,sqlite_ps & ps_upsert_file,sqlite_ps & ps_lookup_file,sqlite_ps & ps_upsert_de,sqlite_ps & ps_upsert_sref,sqlite_ps & ps_upsert_sdef,sqlite_ps & ps_query,sqlite_ps & ps_scan_done,unsigned & fts_cached,unsigned & fts_executable,unsigned & fts_debuginfo,unsigned & fts_sref,unsigned & fts_sdef)3638 scan_archive_file (const string& rps, const stat_t& st,
3639                    sqlite_ps& ps_upsert_buildids,
3640                    sqlite_ps& ps_upsert_fileparts,
3641                    sqlite_ps& ps_upsert_file,
3642                    sqlite_ps& ps_lookup_file,
3643                    sqlite_ps& ps_upsert_de,
3644                    sqlite_ps& ps_upsert_sref,
3645                    sqlite_ps& ps_upsert_sdef,
3646                    sqlite_ps& ps_query,
3647                    sqlite_ps& ps_scan_done,
3648                    unsigned& fts_cached,
3649                    unsigned& fts_executable,
3650                    unsigned& fts_debuginfo,
3651                    unsigned& fts_sref,
3652                    unsigned& fts_sdef)
3653 {
3654   // intern the archive file name
3655   int64_t archiveid = register_file_name (ps_upsert_fileparts, ps_upsert_file, ps_lookup_file, rps);
3656 
3657   /* See if we know of it already. */
3658   int rc = ps_query
3659     .reset()
3660     .bind(1, archiveid)
3661     .bind(2, st.st_mtime)
3662     .step();
3663   ps_query.reset();
3664   if (rc == SQLITE_ROW) // i.e., a result, as opposed to DONE (no results)
3665     // no need to recheck a file/version we already know
3666     // specifically, no need to parse this archive again, since we already have
3667     // it as a D or E or S record,
3668     // (so is stored with buildid=NULL)
3669     {
3670       fts_cached ++;
3671       return;
3672     }
3673 
3674   // extract the archive contents
3675   unsigned my_fts_executable = 0, my_fts_debuginfo = 0, my_fts_sref = 0, my_fts_sdef = 0;
3676   bool my_fts_sref_complete_p = true;
3677   bool any_exceptions = false;
3678   try
3679     {
3680       string archive_extension;
3681       archive_classify (rps, archive_extension, archiveid,
3682                         ps_upsert_buildids, ps_upsert_fileparts, ps_upsert_file, ps_lookup_file,
3683                         ps_upsert_de, ps_upsert_sref, ps_upsert_sdef, // dalt
3684                         st.st_mtime,
3685                         my_fts_executable, my_fts_debuginfo, my_fts_sref, my_fts_sdef,
3686                         my_fts_sref_complete_p);
3687       add_metric ("scanned_bytes_total","source",archive_extension + " archive",
3688                   st.st_size);
3689       inc_metric ("scanned_files_total","source",archive_extension + " archive");
3690       add_metric("found_debuginfo_total","source",archive_extension + " archive",
3691                  my_fts_debuginfo);
3692       add_metric("found_executable_total","source",archive_extension + " archive",
3693                  my_fts_executable);
3694       add_metric("found_sourcerefs_total","source",archive_extension + " archive",
3695                  my_fts_sref);
3696     }
3697   catch (const reportable_exception& e)
3698     {
3699       e.report(clog);
3700       any_exceptions = true;
3701     }
3702 
3703   if (verbose > 2)
3704     obatched(clog) << "scanned archive=" << rps
3705                    << " mtime=" << st.st_mtime
3706                    << " executables=" << my_fts_executable
3707                    << " debuginfos=" << my_fts_debuginfo
3708                    << " srefs=" << my_fts_sref
3709                    << " sdefs=" << my_fts_sdef
3710                    << " exceptions=" << any_exceptions
3711                    << endl;
3712 
3713   fts_executable += my_fts_executable;
3714   fts_debuginfo += my_fts_debuginfo;
3715   fts_sref += my_fts_sref;
3716   fts_sdef += my_fts_sdef;
3717 
3718   if (any_exceptions)
3719     throw reportable_exception("exceptions encountered during archive scan");
3720 
3721   if (my_fts_sref_complete_p) // leave incomplete?
3722     ps_scan_done
3723       .reset()
3724       .bind(1, archiveid)
3725       .bind(2, st.st_mtime)
3726       .bind(3, st.st_size)
3727       .step_ok_done();
3728 }
3729 
3730 
3731 
3732 ////////////////////////////////////////////////////////////////////////
3733 
3734 
3735 
3736 // The thread that consumes file names off of the scanq.  We hold
3737 // the persistent sqlite_ps's at this level and delegate file/archive
3738 // scanning to other functions.
3739 static void
scan()3740 scan ()
3741 {
3742   // all the prepared statements fit to use, the _f_ set:
3743   sqlite_ps ps_f_upsert_buildids (db, "file-buildids-intern", "insert or ignore into " BUILDIDS "_buildids VALUES (NULL, ?);");
3744   sqlite_ps ps_f_upsert_fileparts (db, "file-fileparts-intern", "insert or ignore into " BUILDIDS "_fileparts VALUES (NULL, ?);");
3745   sqlite_ps ps_f_upsert_file (db, "file-file-intern", "insert or ignore into " BUILDIDS "_files VALUES (NULL, \n"
3746                               "(select id from " BUILDIDS "_fileparts where name = ?),\n"
3747                               "(select id from " BUILDIDS "_fileparts where name = ?));");
3748   sqlite_ps ps_f_lookup_file (db, "file-file-lookup",
3749                               "select f.id\n"
3750                               " from " BUILDIDS "_files f, " BUILDIDS "_fileparts p1, " BUILDIDS "_fileparts p2 \n"
3751                               " where f.dirname = p1.id and f.basename = p2.id and p1.name = ? and p2.name = ?;\n");
3752   sqlite_ps ps_f_upsert_de (db, "file-de-upsert",
3753                           "insert or ignore into " BUILDIDS "_f_de "
3754                           "(buildid, debuginfo_p, executable_p, file, mtime) "
3755                           "values ((select id from " BUILDIDS "_buildids where hex = ?),"
3756                             "        ?,?,?,?);");
3757   sqlite_ps ps_f_upsert_s (db, "file-s-upsert",
3758                          "insert or ignore into " BUILDIDS "_f_s "
3759                          "(buildid, artifactsrc, file, mtime) "
3760                          "values ((select id from " BUILDIDS "_buildids where hex = ?),"
3761                          "      ?,?,?);");
3762   sqlite_ps ps_f_query (db, "file-negativehit-find",
3763                         "select 1 from " BUILDIDS "_file_mtime_scanned where sourcetype = 'F' "
3764                         "and file = ? and mtime = ?;");
3765   sqlite_ps ps_f_scan_done (db, "file-scanned",
3766                           "insert or ignore into " BUILDIDS "_file_mtime_scanned (sourcetype, file, mtime, size)"
3767                           "values ('F', ?,?,?);");
3768 
3769   // and now for the _r_ set
3770   sqlite_ps ps_r_upsert_buildids (db, "rpm-buildid-intern", "insert or ignore into " BUILDIDS "_buildids VALUES (NULL, ?);");
3771   sqlite_ps ps_r_upsert_fileparts (db, "rpm-fileparts-intern", "insert or ignore into " BUILDIDS "_fileparts VALUES (NULL, ?);");
3772   sqlite_ps ps_r_upsert_file (db, "rpm-file-intern", "insert or ignore into " BUILDIDS "_files VALUES (NULL, \n"
3773                               "(select id from " BUILDIDS "_fileparts where name = ?),\n"
3774                               "(select id from " BUILDIDS "_fileparts where name = ?));");
3775   sqlite_ps ps_r_lookup_file (db, "rpm-file-lookup",
3776                               "select f.id\n"
3777                               " from " BUILDIDS "_files f, " BUILDIDS "_fileparts p1, " BUILDIDS "_fileparts p2 \n"
3778                               " where f.dirname = p1.id and f.basename = p2.id and p1.name = ? and p2.name = ?;\n");
3779   sqlite_ps ps_r_upsert_de (db, "rpm-de-insert",
3780                           "insert or ignore into " BUILDIDS "_r_de (buildid, debuginfo_p, executable_p, file, mtime, content) values ("
3781                           "(select id from " BUILDIDS "_buildids where hex = ?), ?, ?, ?, ?, ?);");
3782   sqlite_ps ps_r_upsert_sref (db, "rpm-sref-insert",
3783                             "insert or ignore into " BUILDIDS "_r_sref (buildid, artifactsrc) values ("
3784                             "(select id from " BUILDIDS "_buildids where hex = ?), "
3785                             "?);");
3786   sqlite_ps ps_r_upsert_sdef (db, "rpm-sdef-insert",
3787                             "insert or ignore into " BUILDIDS "_r_sdef (file, mtime, content) values ("
3788                             "?, ?, ?);");
3789   sqlite_ps ps_r_query (db, "rpm-negativehit-query",
3790                       "select 1 from " BUILDIDS "_file_mtime_scanned where "
3791                       "sourcetype = 'R' and file = ? and mtime = ?;");
3792   sqlite_ps ps_r_scan_done (db, "rpm-scanned",
3793                           "insert or ignore into " BUILDIDS "_file_mtime_scanned (sourcetype, file, mtime, size)"
3794                           "values ('R', ?, ?, ?);");
3795 
3796 
3797   unsigned fts_cached = 0, fts_executable = 0, fts_debuginfo = 0, fts_sourcefiles = 0;
3798   unsigned fts_sref = 0, fts_sdef = 0;
3799 
3800   add_metric("thread_count", "role", "scan", 1);
3801   add_metric("thread_busy", "role", "scan", 1);
3802   while (! interrupted)
3803     {
3804       scan_payload p;
3805 
3806       add_metric("thread_busy", "role", "scan", -1);
3807       // NB: threads may be blocked within either of these two waiting
3808       // states, if the work queue happens to run dry.  That's OK.
3809       if (scan_barrier) scan_barrier->count();
3810       bool gotone = scanq.wait_front(p);
3811       add_metric("thread_busy", "role", "scan", 1);
3812 
3813       if (! gotone) continue; // go back to waiting
3814 
3815       try
3816         {
3817           bool scan_archive = false;
3818           for (auto&& arch : scan_archives)
3819             if (string_endswith(p.first, arch.first))
3820               scan_archive = true;
3821 
3822           if (scan_archive)
3823             scan_archive_file (p.first, p.second,
3824                                ps_r_upsert_buildids,
3825                                ps_r_upsert_fileparts,
3826                                ps_r_upsert_file,
3827                                ps_r_lookup_file,
3828                                ps_r_upsert_de,
3829                                ps_r_upsert_sref,
3830                                ps_r_upsert_sdef,
3831                                ps_r_query,
3832                                ps_r_scan_done,
3833                                fts_cached,
3834                                fts_executable,
3835                                fts_debuginfo,
3836                                fts_sref,
3837                                fts_sdef);
3838 
3839           if (scan_files) // NB: maybe "else if" ?
3840             scan_source_file (p.first, p.second,
3841                               ps_f_upsert_buildids,
3842                               ps_f_upsert_fileparts,
3843                               ps_f_upsert_file,
3844                               ps_f_lookup_file,
3845                               ps_f_upsert_de,
3846                               ps_f_upsert_s,
3847                               ps_f_query,
3848                               ps_f_scan_done,
3849                               fts_cached, fts_executable, fts_debuginfo, fts_sourcefiles);
3850         }
3851       catch (const reportable_exception& e)
3852         {
3853           e.report(cerr);
3854         }
3855 
3856       scanq.done_front(); // let idlers run
3857 
3858       if (fts_cached || fts_executable || fts_debuginfo || fts_sourcefiles || fts_sref || fts_sdef)
3859         {} // NB: not just if a successful scan - we might have encountered -ENOSPC & failed
3860       (void) statfs_free_enough_p(db_path, "database"); // report sqlite filesystem size
3861       (void) statfs_free_enough_p(tmpdir, "tmpdir"); // this too, in case of fdcache/tmpfile usage
3862 
3863       // finished a scanning step -- not a "loop", because we just
3864       // consume the traversal loop's work, whenever
3865       inc_metric("thread_work_total","role","scan");
3866     }
3867 
3868   add_metric("thread_busy", "role", "scan", -1);
3869 }
3870 
3871 
3872 // Use this function as the thread entry point, so it can catch our
3873 // fleet of exceptions (incl. the sqlite_ps ctors) and report.
3874 static void*
thread_main_scanner(void * arg)3875 thread_main_scanner (void* arg)
3876 {
3877   (void) arg;
3878   while (! interrupted)
3879     try
3880       {
3881         scan();
3882       }
3883     catch (const reportable_exception& e)
3884       {
3885         e.report(cerr);
3886       }
3887   return 0;
3888 }
3889 
3890 
3891 
3892 // The thread that traverses all the source_paths and enqueues all the
3893 // matching files into the file/archive scan queue.
3894 static void
scan_source_paths()3895 scan_source_paths()
3896 {
3897   // NB: fedora 31 glibc/fts(3) crashes inside fts_read() on empty
3898   // path list.
3899   if (source_paths.empty())
3900     return;
3901 
3902   // Turn the source_paths into an fts(3)-compatible char**.  Since
3903   // source_paths[] does not change after argv processing, the
3904   // c_str()'s are safe to keep around awile.
3905   vector<const char *> sps;
3906   for (auto&& sp: source_paths)
3907     sps.push_back(sp.c_str());
3908   sps.push_back(NULL);
3909 
3910   FTS *fts = fts_open ((char * const *)sps.data(),
3911                       (traverse_logical ? FTS_LOGICAL : FTS_PHYSICAL|FTS_XDEV)
3912                       | FTS_NOCHDIR /* multithreaded */,
3913                       NULL);
3914   if (fts == NULL)
3915     throw libc_exception(errno, "cannot fts_open");
3916   defer_dtor<FTS*,int> fts_cleanup (fts, fts_close);
3917 
3918   struct timespec ts_start, ts_end;
3919   clock_gettime (CLOCK_MONOTONIC, &ts_start);
3920   unsigned fts_scanned = 0, fts_regex = 0;
3921 
3922   FTSENT *f;
3923   while ((f = fts_read (fts)) != NULL)
3924   {
3925     if (interrupted) break;
3926 
3927     if (sigusr2 != forced_groom_count) // stop early if groom triggered
3928       {
3929         scanq.clear(); // clear previously issued work for scanner threads
3930         break;
3931       }
3932 
3933     fts_scanned ++;
3934 
3935     if (verbose > 2)
3936       obatched(clog) << "fts traversing " << f->fts_path << endl;
3937 
3938     switch (f->fts_info)
3939       {
3940       case FTS_F:
3941         {
3942           /* Found a file.  Convert it to an absolute path, so
3943              the buildid database does not have relative path
3944              names that are unresolvable from a subsequent run
3945              in a different cwd. */
3946           char *rp = realpath(f->fts_path, NULL);
3947           if (rp == NULL)
3948             continue; // ignore dangling symlink or such
3949           string rps = string(rp);
3950           free (rp);
3951 
3952           bool ri = !regexec (&file_include_regex, rps.c_str(), 0, 0, 0);
3953           bool rx = !regexec (&file_exclude_regex, rps.c_str(), 0, 0, 0);
3954           if (!ri || rx)
3955             {
3956               if (verbose > 3)
3957                 obatched(clog) << "fts skipped by regex "
3958                                << (!ri ? "I" : "") << (rx ? "X" : "") << endl;
3959               fts_regex ++;
3960               if (!ri)
3961                 inc_metric("traversed_total","type","file-skipped-I");
3962               if (rx)
3963                 inc_metric("traversed_total","type","file-skipped-X");
3964             }
3965           else
3966             {
3967               scanq.push_back (make_pair(rps, *f->fts_statp));
3968               inc_metric("traversed_total","type","file");
3969             }
3970         }
3971         break;
3972 
3973       case FTS_ERR:
3974       case FTS_NS:
3975         // report on some types of errors because they may reflect fixable misconfiguration
3976         {
3977           auto x = libc_exception(f->fts_errno, string("fts traversal ") + string(f->fts_path));
3978           x.report(cerr);
3979         }
3980         inc_metric("traversed_total","type","error");
3981         break;
3982 
3983       case FTS_SL: // ignore, but count because debuginfod -L would traverse these
3984         inc_metric("traversed_total","type","symlink");
3985         break;
3986 
3987       case FTS_D: // ignore
3988         inc_metric("traversed_total","type","directory");
3989         break;
3990 
3991       default: // ignore
3992         inc_metric("traversed_total","type","other");
3993         break;
3994       }
3995   }
3996   clock_gettime (CLOCK_MONOTONIC, &ts_end);
3997   double deltas = (ts_end.tv_sec - ts_start.tv_sec) + (ts_end.tv_nsec - ts_start.tv_nsec)/1.e9;
3998 
3999   obatched(clog) << "fts traversed source paths in " << deltas << "s, scanned=" << fts_scanned
4000                  << ", regex-skipped=" << fts_regex << endl;
4001 }
4002 
4003 
4004 static void*
thread_main_fts_source_paths(void * arg)4005 thread_main_fts_source_paths (void* arg)
4006 {
4007   (void) arg; // ignore; we operate on global data
4008 
4009   set_metric("thread_tid", "role","traverse", tid());
4010   add_metric("thread_count", "role", "traverse", 1);
4011 
4012   time_t last_rescan = 0;
4013 
4014   while (! interrupted)
4015     {
4016       sleep (1);
4017       scanq.wait_idle(); // don't start a new traversal while scanners haven't finished the job
4018       scanq.done_idle(); // release the hounds
4019       if (interrupted) break;
4020 
4021       time_t now = time(NULL);
4022       bool rescan_now = false;
4023       if (last_rescan == 0) // at least one initial rescan is documented even for -t0
4024         rescan_now = true;
4025       if (rescan_s > 0 && (long)now > (long)(last_rescan + rescan_s))
4026         rescan_now = true;
4027       if (sigusr1 != forced_rescan_count)
4028         {
4029           forced_rescan_count = sigusr1;
4030           rescan_now = true;
4031         }
4032       if (rescan_now)
4033         {
4034           set_metric("thread_busy", "role","traverse", 1);
4035           try
4036             {
4037               scan_source_paths();
4038             }
4039           catch (const reportable_exception& e)
4040             {
4041               e.report(cerr);
4042             }
4043           last_rescan = time(NULL); // NB: now was before scanning
4044           // finished a traversal loop
4045           inc_metric("thread_work_total", "role","traverse");
4046           set_metric("thread_busy", "role","traverse", 0);
4047         }
4048     }
4049 
4050   return 0;
4051 }
4052 
4053 
4054 
4055 ////////////////////////////////////////////////////////////////////////
4056 
4057 static void
database_stats_report()4058 database_stats_report()
4059 {
4060   sqlite_ps ps_query (db, "database-overview",
4061                       "select label,quantity from " BUILDIDS "_stats");
4062 
4063   obatched(clog) << "database record counts:" << endl;
4064   while (1)
4065     {
4066       if (interrupted) break;
4067       if (sigusr1 != forced_rescan_count) // stop early if scan triggered
4068         break;
4069 
4070       int rc = ps_query.step();
4071       if (rc == SQLITE_DONE) break;
4072       if (rc != SQLITE_ROW)
4073         throw sqlite_exception(rc, "step");
4074 
4075       obatched(clog)
4076         << ((const char*) sqlite3_column_text(ps_query, 0) ?: (const char*) "NULL")
4077         << " "
4078         << (sqlite3_column_text(ps_query, 1) ?: (const unsigned char*) "NULL")
4079         << endl;
4080 
4081       set_metric("groom", "statistic",
4082                  ((const char*) sqlite3_column_text(ps_query, 0) ?: (const char*) "NULL"),
4083                  (sqlite3_column_double(ps_query, 1)));
4084     }
4085 }
4086 
4087 
4088 // Do a round of database grooming that might take many minutes to run.
groom()4089 void groom()
4090 {
4091   obatched(clog) << "grooming database" << endl;
4092 
4093   struct timespec ts_start, ts_end;
4094   clock_gettime (CLOCK_MONOTONIC, &ts_start);
4095 
4096   // scan for files that have disappeared
4097   sqlite_ps files (db, "check old files",
4098                    "select distinct s.mtime, s.file, f.name from "
4099                    BUILDIDS "_file_mtime_scanned s, " BUILDIDS "_files_v f "
4100                    "where f.id = s.file");
4101   // NB: Because _ftime_mtime_scanned can contain both F and
4102   // R records for the same file, this query would return duplicates if the
4103   // DISTINCT qualifier were not there.
4104   files.reset();
4105 
4106   // DECISION TIME - we enumerate stale fileids/mtimes
4107   deque<pair<int64_t,int64_t> > stale_fileid_mtime;
4108 
4109   time_t time_start = time(NULL);
4110   while(1)
4111     {
4112       // PR28514: limit grooming iteration to O(rescan time), to avoid
4113       // slow filesystem tests over many files locking out rescans for
4114       // too long.
4115       if (rescan_s > 0 && (long)time(NULL) > (long)(time_start + rescan_s))
4116         {
4117           inc_metric("groomed_total", "decision", "aborted");
4118           break;
4119         }
4120 
4121       if (interrupted) break;
4122 
4123       int rc = files.step();
4124       if (rc != SQLITE_ROW)
4125         break;
4126 
4127       int64_t mtime = sqlite3_column_int64 (files, 0);
4128       int64_t fileid = sqlite3_column_int64 (files, 1);
4129       const char* filename = ((const char*) sqlite3_column_text (files, 2) ?: "");
4130       struct stat s;
4131       bool regex_file_drop = 0;
4132 
4133       if (regex_groom)
4134         {
4135           bool reg_include = !regexec (&file_include_regex, filename, 0, 0, 0);
4136           bool reg_exclude = !regexec (&file_exclude_regex, filename, 0, 0, 0);
4137           regex_file_drop = !reg_include || reg_exclude; // match logic of scan_source_paths
4138         }
4139 
4140       rc = stat(filename, &s);
4141       if ( regex_file_drop ||  rc < 0 || (mtime != (int64_t) s.st_mtime) )
4142         {
4143           if (verbose > 2)
4144             obatched(clog) << "groom: stale file=" << filename << " mtime=" << mtime << endl;
4145           stale_fileid_mtime.push_back(make_pair(fileid,mtime));
4146           inc_metric("groomed_total", "decision", "stale");
4147           set_metric("thread_work_pending","role","groom", stale_fileid_mtime.size());
4148         }
4149       else
4150         inc_metric("groomed_total", "decision", "fresh");
4151 
4152       if (sigusr1 != forced_rescan_count) // stop early if scan triggered
4153         break;
4154     }
4155   files.reset();
4156 
4157   // ACTION TIME
4158 
4159   // Now that we know which file/mtime tuples are stale, actually do
4160   // the deletion from the database.  Doing this during the SELECT
4161   // iteration above results in undefined behaviour in sqlite, as per
4162   // https://www.sqlite.org/isolation.html
4163 
4164   // We could shuffle stale_fileid_mtime[] here.  It'd let aborted
4165   // sequences of nuke operations resume at random locations, instead
4166   // of just starting over.  But it doesn't matter much either way,
4167   // as long as we make progress.
4168 
4169   sqlite_ps files_del_f_de (db, "nuke f_de", "delete from " BUILDIDS "_f_de where file = ? and mtime = ?");
4170   sqlite_ps files_del_r_de (db, "nuke r_de", "delete from " BUILDIDS "_r_de where file = ? and mtime = ?");
4171   sqlite_ps files_del_scan (db, "nuke f_m_s", "delete from " BUILDIDS "_file_mtime_scanned "
4172                             "where file = ? and mtime = ?");
4173 
4174   while (! stale_fileid_mtime.empty())
4175     {
4176       auto stale = stale_fileid_mtime.front();
4177       stale_fileid_mtime.pop_front();
4178       set_metric("thread_work_pending","role","groom", stale_fileid_mtime.size());
4179 
4180       // PR28514: limit grooming iteration to O(rescan time), to avoid
4181       // slow nuke_* queries over many files locking out rescans for too
4182       // long.  We iterate over the files in random() sequence to avoid
4183       // partial checks going over the same set.
4184       if (rescan_s > 0 && (long)time(NULL) > (long)(time_start + rescan_s))
4185         {
4186           inc_metric("groomed_total", "action", "aborted");
4187           break;
4188         }
4189 
4190       if (interrupted) break;
4191 
4192       int64_t fileid = stale.first;
4193       int64_t mtime = stale.second;
4194       files_del_f_de.reset().bind(1,fileid).bind(2,mtime).step_ok_done();
4195       files_del_r_de.reset().bind(1,fileid).bind(2,mtime).step_ok_done();
4196       files_del_scan.reset().bind(1,fileid).bind(2,mtime).step_ok_done();
4197       inc_metric("groomed_total", "action", "cleaned");
4198 
4199        if (sigusr1 != forced_rescan_count) // stop early if scan triggered
4200         break;
4201     }
4202   stale_fileid_mtime.clear(); // no need for this any longer
4203   set_metric("thread_work_pending","role","groom", stale_fileid_mtime.size());
4204 
4205   // delete buildids with no references in _r_de or _f_de tables;
4206   // cascades to _r_sref & _f_s records
4207   sqlite_ps buildids_del (db, "nuke orphan buildids",
4208                           "delete from " BUILDIDS "_buildids "
4209                           "where not exists (select 1 from " BUILDIDS "_f_de d where " BUILDIDS "_buildids.id = d.buildid) "
4210                           "and not exists (select 1 from " BUILDIDS "_r_de d where " BUILDIDS "_buildids.id = d.buildid)");
4211   buildids_del.reset().step_ok_done();
4212 
4213   if (interrupted) return;
4214 
4215   // NB: "vacuum" is too heavy for even daily runs: it rewrites the entire db, so is done as maxigroom -G
4216   sqlite_ps g1 (db, "incremental vacuum", "pragma incremental_vacuum");
4217   g1.reset().step_ok_done();
4218   sqlite_ps g2 (db, "optimize", "pragma optimize");
4219   g2.reset().step_ok_done();
4220   sqlite_ps g3 (db, "wal checkpoint", "pragma wal_checkpoint=truncate");
4221   g3.reset().step_ok_done();
4222 
4223   database_stats_report();
4224 
4225   (void) statfs_free_enough_p(db_path, "database"); // report sqlite filesystem size
4226 
4227   sqlite3_db_release_memory(db); // shrink the process if possible
4228   sqlite3_db_release_memory(dbq); // ... for both connections
4229   debuginfod_pool_groom(); // and release any debuginfod_client objects we've been holding onto
4230 
4231 #if 0 /* PR31265: don't jettison cache unnecessarily */
4232 
4233   fdcache.limit(0); // release the fdcache contents
4234   fdcache.limit(fdcache_mbs); // restore status quo parameters
4235 #endif
4236 
4237   clock_gettime (CLOCK_MONOTONIC, &ts_end);
4238   double deltas = (ts_end.tv_sec - ts_start.tv_sec) + (ts_end.tv_nsec - ts_start.tv_nsec)/1.e9;
4239 
4240   obatched(clog) << "groomed database in " << deltas << "s" << endl;
4241 }
4242 
4243 
4244 static void*
thread_main_groom(void *)4245 thread_main_groom (void* /*arg*/)
4246 {
4247   set_metric("thread_tid", "role", "groom", tid());
4248   add_metric("thread_count", "role", "groom", 1);
4249 
4250   time_t last_groom = 0;
4251 
4252   while (1)
4253     {
4254       sleep (1);
4255       scanq.wait_idle(); // PR25394: block scanners during grooming!
4256       if (interrupted) break;
4257 
4258       time_t now = time(NULL);
4259       bool groom_now = false;
4260       if (last_groom == 0) // at least one initial groom is documented even for -g0
4261         groom_now = true;
4262       if (groom_s > 0 && (long)now > (long)(last_groom + groom_s))
4263         groom_now = true;
4264       if (sigusr2 != forced_groom_count)
4265         {
4266           forced_groom_count = sigusr2;
4267           groom_now = true;
4268         }
4269       if (groom_now)
4270         {
4271           set_metric("thread_busy", "role", "groom", 1);
4272           try
4273             {
4274               groom ();
4275             }
4276           catch (const sqlite_exception& e)
4277             {
4278               obatched(cerr) << e.message << endl;
4279             }
4280           last_groom = time(NULL); // NB: now was before grooming
4281           // finished a grooming loop
4282           inc_metric("thread_work_total", "role", "groom");
4283           set_metric("thread_busy", "role", "groom", 0);
4284         }
4285 
4286       scanq.done_idle();
4287     }
4288 
4289   return 0;
4290 }
4291 
4292 
4293 ////////////////////////////////////////////////////////////////////////
4294 
4295 
4296 static void
signal_handler(int)4297 signal_handler (int /* sig */)
4298 {
4299   interrupted ++;
4300 
4301   if (db)
4302     sqlite3_interrupt (db);
4303   if (dbq)
4304     sqlite3_interrupt (dbq);
4305 
4306   // NB: don't do anything else in here
4307 }
4308 
4309 static void
sigusr1_handler(int)4310 sigusr1_handler (int /* sig */)
4311 {
4312    sigusr1 ++;
4313   // NB: don't do anything else in here
4314 }
4315 
4316 static void
sigusr2_handler(int)4317 sigusr2_handler (int /* sig */)
4318 {
4319    sigusr2 ++;
4320   // NB: don't do anything else in here
4321 }
4322 
4323 
4324 static void // error logging callback from libmicrohttpd internals
error_cb(void * arg,const char * fmt,va_list ap)4325 error_cb (void *arg, const char *fmt, va_list ap)
4326 {
4327   (void) arg;
4328   inc_metric("error_count","libmicrohttpd",fmt);
4329   char errmsg[512];
4330   (void) vsnprintf (errmsg, sizeof(errmsg), fmt, ap); // ok if slightly truncated
4331   obatched(cerr) << "libmicrohttpd error: " << errmsg; // MHD_DLOG calls already include \n
4332 }
4333 
4334 
4335 // A user-defined sqlite function, to score the sharedness of the
4336 // prefix of two strings.  This is used to compare candidate debuginfo
4337 // / source-rpm names, so that the closest match
4338 // (directory-topology-wise closest) is found.  This is important in
4339 // case the same sref (source file name) is in many -debuginfo or
4340 // -debugsource RPMs, such as when multiple versions/releases of the
4341 // same package are in the database.
4342 
sqlite3_sharedprefix_fn(sqlite3_context * c,int argc,sqlite3_value ** argv)4343 static void sqlite3_sharedprefix_fn (sqlite3_context* c, int argc, sqlite3_value** argv)
4344 {
4345   if (argc != 2)
4346     sqlite3_result_error(c, "expect 2 string arguments", -1);
4347   else if ((sqlite3_value_type(argv[0]) != SQLITE_TEXT) ||
4348            (sqlite3_value_type(argv[1]) != SQLITE_TEXT))
4349     sqlite3_result_null(c);
4350   else
4351     {
4352       const unsigned char* a = sqlite3_value_text (argv[0]);
4353       const unsigned char* b = sqlite3_value_text (argv[1]);
4354       int i = 0;
4355       while (*a != '\0' && *b != '\0' && *a++ == *b++)
4356         i++;
4357       sqlite3_result_int (c, i);
4358     }
4359 }
4360 
4361 
4362 static unsigned
default_concurrency()4363 default_concurrency() // guaranteed >= 1
4364 {
4365   // Prior to PR29975 & PR29976, we'd just use this:
4366   unsigned sth = std::thread::hardware_concurrency();
4367   // ... but on many-CPU boxes, admins or distros may throttle
4368   // resources in such a way that debuginfod would mysteriously fail.
4369   // So we reduce the defaults:
4370 
4371   unsigned aff = 0;
4372 #ifdef HAVE_SCHED_GETAFFINITY
4373   {
4374     int ret;
4375     cpu_set_t mask;
4376     CPU_ZERO(&mask);
4377     ret = sched_getaffinity(0, sizeof(mask), &mask);
4378     if (ret == 0)
4379       aff = CPU_COUNT(&mask);
4380   }
4381 #endif
4382 
4383   unsigned fn = 0;
4384 #ifdef HAVE_GETRLIMIT
4385   {
4386     struct rlimit rlim;
4387     int rc = getrlimit(RLIMIT_NOFILE, &rlim);
4388     if (rc == 0)
4389       fn = max((rlim_t)1, (rlim.rlim_cur - 100) / 4);
4390     // at least 2 fds are used by each listener thread etc.
4391     // plus a bunch to account for shared libraries and such
4392   }
4393 #endif
4394 
4395   unsigned d = min(max(sth, 1U),
4396                    min(max(aff, 1U),
4397                        max(fn, 1U)));
4398   return d;
4399 }
4400 
4401 
4402 // 30879: Something to help out in case of an uncaught exception.
my_terminate_handler()4403 void my_terminate_handler()
4404 {
4405 #if defined(__GLIBC__)
4406   void *array[40];
4407   int size = backtrace (array, 40);
4408   backtrace_symbols_fd (array, size, STDERR_FILENO);
4409 #endif
4410 #if defined(__GLIBCXX__) || defined(__GLIBCPP__)
4411   __gnu_cxx::__verbose_terminate_handler();
4412 #endif
4413   abort();
4414 }
4415 
4416 
4417 int
main(int argc,char * argv[])4418 main (int argc, char *argv[])
4419 {
4420   (void) setlocale (LC_ALL, "");
4421   (void) bindtextdomain (PACKAGE_TARNAME, LOCALEDIR);
4422   (void) textdomain (PACKAGE_TARNAME);
4423 
4424   std::set_terminate(& my_terminate_handler);
4425 
4426   /* Tell the library which version we are expecting.  */
4427   elf_version (EV_CURRENT);
4428 
4429   tmpdir = string(getenv("TMPDIR") ?: "/tmp");
4430 
4431   /* Set computed default values. */
4432   db_path = string(getenv("HOME") ?: "/") + string("/.debuginfod.sqlite"); /* XDG? */
4433   int rc = regcomp (& file_include_regex, ".*", REG_EXTENDED|REG_NOSUB); // match everything
4434   if (rc != 0)
4435     error (EXIT_FAILURE, 0, "regcomp failure: %d", rc);
4436   rc = regcomp (& file_exclude_regex, "^$", REG_EXTENDED|REG_NOSUB); // match nothing
4437   if (rc != 0)
4438     error (EXIT_FAILURE, 0, "regcomp failure: %d", rc);
4439 
4440   // default parameters for fdcache are computed from system stats
4441   struct statfs sfs;
4442   rc = statfs(tmpdir.c_str(), &sfs);
4443   if (rc < 0)
4444     fdcache_mbs = 1024; // 1 gigabyte
4445   else
4446     fdcache_mbs = sfs.f_bavail * sfs.f_bsize / 1024 / 1024 / 4; // 25% of free space
4447   fdcache_mintmp = 25; // emergency flush at 25% remaining (75% full)
4448   fdcache_prefetch = 64; // guesstimate storage is this much less costly than re-decompression
4449 
4450   /* Parse and process arguments.  */
4451   int remaining;
4452   (void) argp_parse (&argp, argc, argv, ARGP_IN_ORDER, &remaining, NULL);
4453   if (remaining != argc)
4454       error (EXIT_FAILURE, 0,
4455              "unexpected argument: %s", argv[remaining]);
4456 
4457   if (scan_archives.size()==0 && !scan_files && source_paths.size()>0)
4458     obatched(clog) << "warning: without -F -R -U -Z, ignoring PATHs" << endl;
4459 
4460   fdcache.limit(fdcache_mbs);
4461 
4462   (void) signal (SIGPIPE, SIG_IGN); // microhttpd can generate it incidentally, ignore
4463   (void) signal (SIGINT, signal_handler); // ^C
4464   (void) signal (SIGHUP, signal_handler); // EOF
4465   (void) signal (SIGTERM, signal_handler); // systemd
4466   (void) signal (SIGUSR1, sigusr1_handler); // end-user
4467   (void) signal (SIGUSR2, sigusr2_handler); // end-user
4468 
4469   /* Get database ready. */
4470   if (! passive_p)
4471     {
4472       rc = sqlite3_open_v2 (db_path.c_str(), &db, (SQLITE_OPEN_READWRITE
4473                                                    |SQLITE_OPEN_URI
4474                                                    |SQLITE_OPEN_PRIVATECACHE
4475                                                    |SQLITE_OPEN_CREATE
4476                                                    |SQLITE_OPEN_FULLMUTEX), /* thread-safe */
4477                             NULL);
4478       if (rc == SQLITE_CORRUPT)
4479         {
4480           (void) unlink (db_path.c_str());
4481           error (EXIT_FAILURE, 0,
4482                  "cannot open %s, deleted database: %s", db_path.c_str(), sqlite3_errmsg(db));
4483         }
4484       else if (rc)
4485         {
4486           error (EXIT_FAILURE, 0,
4487                  "cannot open %s, consider deleting database: %s", db_path.c_str(), sqlite3_errmsg(db));
4488         }
4489     }
4490 
4491   // open the readonly query variant
4492   // NB: PRIVATECACHE allows web queries to operate in parallel with
4493   // much other grooming/scanning operation.
4494   rc = sqlite3_open_v2 (db_path.c_str(), &dbq, (SQLITE_OPEN_READONLY
4495                                                 |SQLITE_OPEN_URI
4496                                                 |SQLITE_OPEN_PRIVATECACHE
4497                                                 |SQLITE_OPEN_FULLMUTEX), /* thread-safe */
4498                         NULL);
4499   if (rc)
4500     {
4501       error (EXIT_FAILURE, 0,
4502              "cannot open %s, consider deleting database: %s", db_path.c_str(), sqlite3_errmsg(dbq));
4503     }
4504 
4505 
4506   obatched(clog) << "opened database " << db_path
4507                  << (db?" rw":"") << (dbq?" ro":"") << endl;
4508   obatched(clog) << "sqlite version " << sqlite3_version << endl;
4509   obatched(clog) << "service mode " << (passive_p ? "passive":"active") << endl;
4510 
4511   // add special string-prefix-similarity function used in rpm sref/sdef resolution
4512   rc = sqlite3_create_function(dbq, "sharedprefix", 2, SQLITE_UTF8, NULL,
4513                                & sqlite3_sharedprefix_fn, NULL, NULL);
4514   if (rc != SQLITE_OK)
4515     error (EXIT_FAILURE, 0,
4516            "cannot create sharedprefix function: %s", sqlite3_errmsg(dbq));
4517 
4518   if (! passive_p)
4519     {
4520       if (verbose > 3)
4521         obatched(clog) << "ddl: " << DEBUGINFOD_SQLITE_DDL << endl;
4522       rc = sqlite3_exec (db, DEBUGINFOD_SQLITE_DDL, NULL, NULL, NULL);
4523       if (rc != SQLITE_OK)
4524         {
4525           error (EXIT_FAILURE, 0,
4526                  "cannot run database schema ddl: %s", sqlite3_errmsg(db));
4527         }
4528     }
4529 
4530   obatched(clog) << "libmicrohttpd version " << MHD_get_version() << endl;
4531 
4532   /* If '-C' wasn't given or was given with no arg, pick a reasonable default
4533      for the number of worker threads.  */
4534   if (connection_pool == 0)
4535     connection_pool = default_concurrency();
4536 
4537   /* Note that MHD_USE_EPOLL and MHD_USE_THREAD_PER_CONNECTION don't
4538      work together.  */
4539   unsigned int use_epoll = 0;
4540 #if MHD_VERSION >= 0x00095100
4541   use_epoll = MHD_USE_EPOLL;
4542 #endif
4543 
4544   unsigned int mhd_flags = (
4545 #if MHD_VERSION >= 0x00095300
4546 			    MHD_USE_INTERNAL_POLLING_THREAD
4547 #else
4548 			    MHD_USE_SELECT_INTERNALLY
4549 #endif
4550 			    | MHD_USE_DUAL_STACK
4551 			    | use_epoll
4552 #if MHD_VERSION >= 0x00095200
4553 			    | MHD_USE_ITC
4554 #endif
4555 			    | MHD_USE_DEBUG); /* report errors to stderr */
4556 
4557   // Start httpd server threads.  Use a single dual-homed pool.
4558   MHD_Daemon *d46 = MHD_start_daemon (mhd_flags, http_port,
4559 				      NULL, NULL, /* default accept policy */
4560 				      handler_cb, NULL, /* handler callback */
4561 				      MHD_OPTION_EXTERNAL_LOGGER,
4562 				      error_cb, NULL,
4563 				      MHD_OPTION_THREAD_POOL_SIZE,
4564 				      (int)connection_pool,
4565 				      MHD_OPTION_END);
4566 
4567   MHD_Daemon *d4 = NULL;
4568   if (d46 == NULL)
4569     {
4570       // Cannot use dual_stack, use ipv4 only
4571       mhd_flags &= ~(MHD_USE_DUAL_STACK);
4572       d4 = MHD_start_daemon (mhd_flags, http_port,
4573 			     NULL, NULL, /* default accept policy */
4574 			     handler_cb, NULL, /* handler callback */
4575 			     MHD_OPTION_EXTERNAL_LOGGER,
4576 			     error_cb, NULL,
4577 			     (connection_pool
4578 			      ? MHD_OPTION_THREAD_POOL_SIZE
4579 			      : MHD_OPTION_END),
4580 			     (connection_pool
4581 			      ? (int)connection_pool
4582 			      : MHD_OPTION_END),
4583 			     MHD_OPTION_END);
4584       if (d4 == NULL)
4585 	{
4586 	  sqlite3 *database = db;
4587 	  sqlite3 *databaseq = dbq;
4588 	  db = dbq = 0; // for signal_handler not to freak
4589 	  sqlite3_close (databaseq);
4590 	  sqlite3_close (database);
4591 	  error (EXIT_FAILURE, 0, "cannot start http server at port %d",
4592 		 http_port);
4593 	}
4594 
4595     }
4596   obatched(clog) << "started http server on"
4597                  << (d4 != NULL ? " IPv4 " : " IPv4 IPv6 ")
4598                  << "port=" << http_port << endl;
4599 
4600   // add maxigroom sql if -G given
4601   if (maxigroom)
4602     {
4603       obatched(clog) << "maxigrooming database, please wait." << endl;
4604       extra_ddl.push_back("create index if not exists " BUILDIDS "_r_sref_arc on " BUILDIDS "_r_sref(artifactsrc);");
4605       extra_ddl.push_back("delete from " BUILDIDS "_r_sdef where not exists (select 1 from " BUILDIDS "_r_sref b where " BUILDIDS "_r_sdef.content = b.artifactsrc);");
4606       extra_ddl.push_back("drop index if exists " BUILDIDS "_r_sref_arc;");
4607 
4608       // NB: we don't maxigroom the _files interning table.  It'd require a temp index on all the
4609       // tables that have file foreign-keys, which is a lot.
4610 
4611       // NB: with =delete, may take up 3x disk space total during vacuum process
4612       //     vs.  =off (only 2x but may corrupt database if program dies mid-vacuum)
4613       //     vs.  =wal (>3x observed, but safe)
4614       extra_ddl.push_back("pragma journal_mode=delete;");
4615       extra_ddl.push_back("vacuum;");
4616       extra_ddl.push_back("pragma journal_mode=wal;");
4617     }
4618 
4619   // run extra -D sql if given
4620   if (! passive_p)
4621     for (auto&& i: extra_ddl)
4622       {
4623         if (verbose > 1)
4624           obatched(clog) << "extra ddl:\n" << i << endl;
4625         rc = sqlite3_exec (db, i.c_str(), NULL, NULL, NULL);
4626         if (rc != SQLITE_OK && rc != SQLITE_DONE && rc != SQLITE_ROW)
4627           error (0, 0,
4628                  "warning: cannot run database extra ddl %s: %s", i.c_str(), sqlite3_errmsg(db));
4629 
4630         if (maxigroom)
4631           obatched(clog) << "maxigroomed database" << endl;
4632       }
4633 
4634   if (! passive_p)
4635     obatched(clog) << "search concurrency " << concurrency << endl;
4636   obatched(clog) << "webapi connection pool " << connection_pool
4637                  << (connection_pool ? "" : " (unlimited)") << endl;
4638   if (! passive_p) {
4639     obatched(clog) << "rescan time " << rescan_s << endl;
4640     obatched(clog) << "scan checkpoint " << scan_checkpoint << endl;
4641   }
4642   obatched(clog) << "fdcache mbs " << fdcache_mbs << endl;
4643   obatched(clog) << "fdcache prefetch " << fdcache_prefetch << endl;
4644   obatched(clog) << "fdcache tmpdir " << tmpdir << endl;
4645   obatched(clog) << "fdcache tmpdir min% " << fdcache_mintmp << endl;
4646   if (! passive_p)
4647     obatched(clog) << "groom time " << groom_s << endl;
4648   obatched(clog) << "forwarded ttl limit " << forwarded_ttl_limit << endl;
4649 
4650   if (scan_archives.size()>0)
4651     {
4652       obatched ob(clog);
4653       auto& o = ob << "accepting archive types ";
4654       for (auto&& arch : scan_archives)
4655 	o << arch.first << "(" << arch.second << ") ";
4656       o << endl;
4657     }
4658   const char* du = getenv(DEBUGINFOD_URLS_ENV_VAR);
4659   if (du && du[0] != '\0') // set to non-empty string?
4660     obatched(clog) << "upstream debuginfod servers: " << du << endl;
4661 
4662   vector<pthread_t> all_threads;
4663 
4664   if (! passive_p)
4665     {
4666       pthread_t pt;
4667       rc = pthread_create (& pt, NULL, thread_main_groom, NULL);
4668       if (rc)
4669         error (EXIT_FAILURE, rc, "cannot spawn thread to groom database\n");
4670       else
4671         {
4672 #ifdef HAVE_PTHREAD_SETNAME_NP
4673           (void) pthread_setname_np (pt, "groom");
4674 #endif
4675           all_threads.push_back(pt);
4676         }
4677 
4678       if (scan_files || scan_archives.size() > 0)
4679         {
4680           if (scan_checkpoint > 0)
4681             scan_barrier = new sqlite_checkpoint_pb(concurrency, (unsigned) scan_checkpoint);
4682 
4683           rc = pthread_create (& pt, NULL, thread_main_fts_source_paths, NULL);
4684           if (rc)
4685             error (EXIT_FAILURE, rc, "cannot spawn thread to traverse source paths\n");
4686 #ifdef HAVE_PTHREAD_SETNAME_NP
4687           (void) pthread_setname_np (pt, "traverse");
4688 #endif
4689           all_threads.push_back(pt);
4690 
4691           for (unsigned i=0; i<concurrency; i++)
4692             {
4693               rc = pthread_create (& pt, NULL, thread_main_scanner, NULL);
4694               if (rc)
4695                 error (EXIT_FAILURE, rc, "cannot spawn thread to scan source files / archives\n");
4696 #ifdef HAVE_PTHREAD_SETNAME_NP
4697               (void) pthread_setname_np (pt, "scan");
4698 #endif
4699               all_threads.push_back(pt);
4700             }
4701         }
4702     }
4703 
4704   /* Trivial main loop! */
4705   set_metric("ready", 1);
4706   while (! interrupted)
4707     pause ();
4708   scanq.nuke(); // wake up any remaining scanq-related threads, let them die
4709   if (scan_barrier) scan_barrier->nuke(); // ... in case they're stuck in a barrier
4710   set_metric("ready", 0);
4711 
4712   if (verbose)
4713     obatched(clog) << "stopping" << endl;
4714 
4715   /* Join all our threads. */
4716   for (auto&& it : all_threads)
4717     pthread_join (it, NULL);
4718 
4719   /* Stop all the web service threads. */
4720   if (d46) MHD_stop_daemon (d46);
4721   if (d4) MHD_stop_daemon (d4);
4722 
4723   if (! passive_p)
4724     {
4725       /* With all threads known dead, we can clean up the global resources. */
4726       rc = sqlite3_exec (db, DEBUGINFOD_SQLITE_CLEANUP_DDL, NULL, NULL, NULL);
4727       if (rc != SQLITE_OK)
4728         {
4729           error (0, 0,
4730                  "warning: cannot run database cleanup ddl: %s", sqlite3_errmsg(db));
4731         }
4732     }
4733 
4734   debuginfod_pool_groom ();
4735   delete scan_barrier;
4736 
4737   // NB: no problem with unconditional free here - an earlier failed regcomp would exit program
4738   (void) regfree (& file_include_regex);
4739   (void) regfree (& file_exclude_regex);
4740 
4741   sqlite3 *database = db;
4742   sqlite3 *databaseq = dbq;
4743   db = dbq = 0; // for signal_handler not to freak
4744   (void) sqlite3_close (databaseq);
4745   if (! passive_p)
4746     (void) sqlite3_close (database);
4747 
4748   return 0;
4749 }
4750