xref: /aosp_15_r20/external/AFLplusplus/afl-cmin (revision 08b48e0b10e97b33e7b60c5b6e2243bd915777f2)
1#!/usr/bin/env sh
2SYS=$(uname -s)
3test "$SYS" = "Darwin" && {
4  echo Error: afl-cmin does not work on Apple currently. please use afl-cmin.bash instead.
5  exit 1
6}
7export AFL_QUIET=1
8export ASAN_OPTIONS=detect_leaks=0
9THISPATH=`dirname ${0}`
10export PATH="${THISPATH}:$PATH"
11awk -f - -- ${@+"$@"} <<'EOF'
12#!/usr/bin/awk -f
13# awk script to minimize a test corpus of input files
14#
15# based on afl-cmin bash script written by Michal Zalewski
16# rewritten by Heiko Eißfeldt (hexcoder-)
17# tested with:
18#   gnu awk (x86 Linux)
19#   bsd awk (x86 *BSD)
20#   mawk (arm32 raspbian)
21#
22# uses getopt.awk package from Arnold Robbins
23#
24# external tools used by this script:
25# test
26# grep
27# rm
28# mkdir
29# ln
30# cp
31# pwd
32# type
33# cd
34# find
35# stat
36# sort
37# cut
38# and afl-showmap from this project :-)
39
40# getopt.awk --- Do C library getopt(3) function in awk
41
42# External variables:
43#    Optind -- index in ARGV of first nonoption argument
44#    Optarg -- string value of argument to current option
45#    Opterr -- if nonzero, print our own diagnostic
46#    Optopt -- current option letter
47
48# Returns:
49#    -1     at end of options
50#    "?"    for unrecognized option
51#    <c>    a character representing the current option
52
53# Private Data:
54#    _opti  -- index in multiflag option, e.g., -abc
55
56function getopt(argc, argv, options,    thisopt, i)
57{
58    if (length(options) == 0)    # no options given
59        return -1
60
61    if (argv[Optind] == "--") {  # all done
62        Optind++
63        _opti = 0
64        return -1
65    } else if (argv[Optind] !~ /^-[^:\t ]/) {
66        _opti = 0
67        return -1
68    }
69    if (_opti == 0)
70        _opti = 2
71    thisopt = substr(argv[Optind], _opti, 1)
72    Optopt = thisopt
73    i = index(options, thisopt)
74    if (i == 0) {
75        if (Opterr)
76            printf("%c -- invalid option\n", thisopt) > "/dev/stderr"
77        if (_opti >= length(argv[Optind])) {
78            Optind++
79            _opti = 0
80        } else
81            _opti++
82        return "?"
83    }
84    if (substr(options, i + 1, 1) == ":") {
85        # get option argument
86        if (length(substr(argv[Optind], _opti + 1)) > 0)
87            Optarg = substr(argv[Optind], _opti + 1)
88        else
89            Optarg = argv[++Optind]
90        _opti = 0
91    } else
92        Optarg = ""
93    if (_opti == 0 || _opti >= length(argv[Optind])) {
94        Optind++
95        _opti = 0
96    } else
97        _opti++
98    return thisopt
99}
100
101function usage() {
102   print \
103"afl-cmin [ options ] -- /path/to/target_app [ ... ]\n" \
104"\n" \
105"Required parameters:\n" \
106"  -i dir        - input directory with starting corpus\n" \
107"  -o dir        - output directory for minimized files\n" \
108"\n" \
109"Execution control settings:\n" \
110"  -T tasks      - how many parallel tasks to run (default: 1, all=nproc)\n" \
111"  -f file       - location read by the fuzzed program (stdin)\n" \
112"  -m megs       - memory limit for child process ("mem_limit" MB)\n" \
113"  -t msec       - run time limit for child process (default: 5000)\n" \
114"  -O            - use binary-only instrumentation (FRIDA mode)\n" \
115"  -Q            - use binary-only instrumentation (QEMU mode)\n" \
116"  -U            - use unicorn-based instrumentation (unicorn mode)\n" \
117"  -X            - use Nyx mode\n" \
118"\n" \
119"Minimization settings:\n" \
120"  -A            - allow crashes and timeouts (not recommended)\n" \
121"  -C            - keep crashing inputs, reject everything else\n" \
122"  -e            - solve for edge coverage only, ignore hit counts\n" \
123"\n" \
124"For additional tips, please consult README.md\n" \
125"\n" \
126"Environment variables used:\n" \
127"AFL_CRASH_EXITCODE: optional child exit code to be interpreted as crash\n" \
128"AFL_FORKSRV_INIT_TMOUT: time the fuzzer waits for the forkserver to come up\n" \
129"AFL_KEEP_TRACES: leave the temporary <out_dir>/.traces directory\n" \
130"AFL_KILL_SIGNAL: Signal delivered to child processes on timeout (default: SIGKILL)\n" \
131"AFL_FORK_SERVER_KILL_SIGNAL: Signal delivered to fork server processes on\n" \
132"   termination (default: SIGTERM). If this is not set and AFL_KILL_SIGNAL is\n" \
133"   set, this will be set to the same value as AFL_KILL_SIGNAL.\n" \
134"AFL_NO_FORKSRV: run target via execve instead of using the forkserver\n" \
135"AFL_CMIN_ALLOW_ANY: write tuples for crashing inputs also\n" \
136"AFL_PATH: path for the afl-showmap binary if not found anywhere in PATH\n" \
137"AFL_PRINT_FILENAMES: If set, the filename currently processed will be " \
138      "printed to stdout\n" \
139"AFL_SKIP_BIN_CHECK: skip afl instrumentation checks for target binary\n"
140"AFL_CUSTOM_MUTATOR_LIBRARY: custom mutator library (post_process and send)\n"
141"AFL_PYTHON_MODULE: custom mutator library (post_process and send)\n"
142   exit 1
143}
144
145function exists_and_is_executable(binarypath) {
146  return 0 == system("test -f "binarypath" -a -x "binarypath)
147}
148
149BEGIN {
150  if (0 != system( "test -t 1")) {
151    redirected = 1
152  } else {
153    redirected = 0
154  }
155
156  print "corpus minimization tool for AFL++ (awk version)\n"
157
158  # defaults
159  extra_par = ""
160  AFL_CMIN_CRASHES_ONLY = ""
161  AFL_CMIN_ALLOW_ANY = ""
162
163  # process options
164  Opterr = 1    # default is to diagnose
165  Optind = 1    # skip ARGV[0]
166  while ((_go_c = getopt(ARGC, ARGV, "hi:o:f:m:t:eACOQUXYT:?")) != -1) {
167    if (_go_c == "i") {
168      if (!Optarg) usage()
169      if (in_dir) { print "Option "_go_c" is only allowed once" > "/dev/stderr"}
170      in_dir = Optarg
171      continue
172    } else
173    if (_go_c == "T") {
174      if (!Optarg) usage()
175      if (threads) { print "Option "_go_c" is only allowed once" > "/dev/stderr"}
176      threads = Optarg
177      continue
178    } else
179    if (_go_c == "o") {
180      if (!Optarg) usage()
181      if (out_dir) { print "Option "_go_c" is only allowed once" > "/dev/stderr"}
182      out_dir = Optarg
183      continue
184    } else
185    if (_go_c == "f") {
186      if (!Optarg) usage()
187      if (stdin_file) { print "Option "_go_c" is only allowed once" > "/dev/stderr"}
188      stdin_file = Optarg
189      continue
190    } else
191    if (_go_c == "m") {
192      if (!Optarg) usage()
193      if (mem_limit) { print "Option "_go_c" is only allowed once" > "/dev/stderr"}
194      mem_limit = Optarg
195      mem_limit_given = 1
196      continue
197    } else
198    if (_go_c == "t") {
199      if (!Optarg) usage()
200      if (timeout) { print "Option "_go_c" is only allowed once" > "/dev/stderr"}
201      timeout = Optarg
202      continue
203    } else
204    if (_go_c == "C") {
205      AFL_CMIN_CRASHES_ONLY = "AFL_CMIN_CRASHES_ONLY=1 "
206      continue
207    } else
208    if (_go_c == "A") {
209      AFL_CMIN_ALLOW_ANY = "AFL_CMIN_ALLOW_ANY=1 "
210      continue
211    } else
212    if (_go_c == "e") {
213      extra_par = extra_par " -e"
214      continue
215    } else
216    if (_go_c == "O") {
217      if (frida_mode) { print "Option "_go_c" is only allowed once" > "/dev/stderr"}
218      extra_par = extra_par " -O"
219      frida_mode = 1
220      continue
221    } else
222    if (_go_c == "Q") {
223      if (qemu_mode) { print "Option "_go_c" is only allowed once" > "/dev/stderr"}
224      extra_par = extra_par " -Q"
225      qemu_mode = 1
226      continue
227    } else
228    if (_go_c == "U") {
229      if (unicorn_mode) { print "Option "_go_c" is only allowed once" > "/dev/stderr"}
230      extra_par = extra_par " -U"
231      unicorn_mode = 1
232      continue
233    } else
234    if (_go_c == "X" || _go_c == "Y") {
235      if (nyx_mode) { print "Option "_go_c" is only allowed once" > "/dev/stderr"}
236      extra_par = extra_par " -X"
237      nyx_mode = 1
238      continue
239    } else
240    if (_go_c == "?") {
241      exit 1
242    } else
243      usage()
244  } # while options
245
246  if (!mem_limit) mem_limit = "none"
247  if (!timeout) timeout = "5000"
248
249  # get program args
250  i = 0
251  prog_args_string = ""
252  for (; Optind < ARGC; Optind++) {
253    prog_args[i++] = ARGV[Optind]
254    if (i > 1)
255      prog_args_string = prog_args_string" '"ARGV[Optind]"'"
256  }
257
258  # sanity checks
259  if (!prog_args[0] || !in_dir || !out_dir) usage()
260
261  target_bin = prog_args[0]
262
263  # Do a sanity check to discourage the use of /tmp, since we can't really
264  # handle this safely from an awk script.
265
266  if (!ENVIRON["AFL_ALLOW_TMP"]) {
267    dirlist[0] = in_dir
268    dirlist[1] = target_bin
269    dirlist[2] = out_dir
270    dirlist[3] = stdin_file
271    "pwd" | getline dirlist[4] # current directory
272    for (dirind in dirlist) {
273      dir = dirlist[dirind]
274      if (dir ~ /^(\/var)?\/tmp/) {
275        print "[-] Warning: do not use this script in /tmp or /var/tmp for security reasons." > "/dev/stderr"
276      }
277    }
278    delete dirlist
279  }
280
281  if (threads && stdin_file) {
282    print "[-] Error: -T and -f cannot be used together." > "/dev/stderr"
283    exit 1
284  }
285
286  if (!threads && !stdin_file && !nyx_mode) {
287    print "[*] Are you aware of the '-T all' parallelize option that improves the speed for large/slow corpuses?"
288  }
289
290  # If @@ is specified, but there's no -f, let's come up with a temporary input
291  # file name.
292
293  trace_dir = out_dir "/.traces"
294
295  if (!stdin_file) {
296    found_atat = 0
297    for (prog_args_ind in prog_args) {
298      if (match(prog_args[prog_args_ind], "@@") != 0) {
299        found_atat = 1
300        break
301      }
302    }
303    if (found_atat) {
304      stdin_file = trace_dir "/.cur_input"
305    }
306  }
307
308  # Check for obvious errors.
309
310  if (mem_limit && mem_limit != "none" && mem_limit < 5) {
311    print "[-] Error: dangerously low memory limit." > "/dev/stderr"
312    exit 1
313  }
314
315  if (timeout && timeout != "none" && timeout < 10) {
316    print "[-] Error: dangerously low timeout." > "/dev/stderr"
317    exit 1
318  }
319
320
321  if (!nyx_mode && target_bin && !exists_and_is_executable(target_bin)) {
322
323    cmd = "command -v "target_bin" 2>/dev/null"
324    cmd | getline tnew
325    close(cmd)
326    if (!tnew || !exists_and_is_executable(tnew)) {
327      print "[-] Error: binary '"target_bin"' not found or not executable." > "/dev/stderr"
328      exit 1
329    }
330    target_bin = tnew
331  }
332
333  if (0 == system ( "grep -aq AFL_DUMP_MAP_SIZE " target_bin )) {
334    echo "[!] Trying to obtain the map size of the target ..."
335    get_map_size = "AFL_DUMP_MAP_SIZE=1 " target_bin
336    get_map_size | getline mapsize
337    close(get_map_size)
338    if (mapsize && mapsize > 65535 && mapsize < 100000000) {
339      AFL_MAP_SIZE = "AFL_MAP_SIZE="mapsize" "
340      print "[+] Setting "AFL_MAP_SIZE
341    }
342  }
343
344  if (!ENVIRON["AFL_SKIP_BIN_CHECK"] && !qemu_mode && !frida_mode && !unicorn_mode && !nyx_mode) {
345    if (0 != system( "grep -q __AFL_SHM_ID "target_bin )) {
346      print "[-] Error: binary '"target_bin"' doesn't appear to be instrumented." > "/dev/stderr"
347      exit 1
348    }
349  }
350
351  if (0 != system( "test -d "in_dir )) {
352    print "[-] Error: directory '"in_dir"' not found." > "/dev/stderr"
353    exit 1
354  }
355
356  #if (0 == system( "test -d "in_dir"/default" )) {
357  #  in_dir = in_dir "/default"
358  #}
359  #
360  #if (0 == system( "test -d "in_dir"/queue" )) {
361  #  in_dir = in_dir "/queue"
362  #}
363
364  system("rm -rf "trace_dir" 2>/dev/null");
365  system("rm "out_dir"/id[:_]* 2>/dev/null")
366
367  cmd = "ls "out_dir"/* 2>/dev/null | wc -l"
368  cmd | getline noofentries
369  close(cmd)
370  if (0 == system( "test -d "out_dir" -a "noofentries" -gt 0" )) {
371    print "[-] Error: directory '"out_dir"' exists and is not empty - delete it first." > "/dev/stderr"
372    exit 1
373  }
374
375  if (threads) {
376    cmd = "nproc"
377    cmd | getline nproc
378    close(cmd)
379    if (threads == "all") {
380      threads = nproc
381    } else {
382      if (!(threads > 1 && threads <= nproc)) {
383        print "[-] Error: -T option must be between 1 and "nproc" or \"all\"." > "/dev/stderr"
384        exit 1
385      }
386    }
387  }
388
389  # Check for the more efficient way to copy files...
390  if (0 != system("mkdir -p -m 0700 "trace_dir)) {
391    print "[-] Error: Cannot create directory "trace_dir > "/dev/stderr"
392    exit 1
393  }
394
395  if (stdin_file) {
396    # truncate input file
397    printf "" > stdin_file
398    close(stdin_file)
399  }
400
401  # First we look in PATH
402  if (0 == system("command -v afl-showmap >/dev/null 2>&1")) {
403    cmd = "command -v afl-showmap 2>/dev/null"
404    cmd | getline showmap
405    close(cmd)
406  } else {
407    # then we look in the current directory
408    if (0 == system("test -x ./afl-showmap")) {
409      showmap = "./afl-showmap"
410    } else {
411      if (ENVIRON["AFL_PATH"]) {
412        showmap = ENVIRON["AFL_PATH"] "/afl-showmap"
413      }
414    }
415  }
416
417  if (!showmap || 0 != system("test -x "showmap )) {
418    print "[-] Error: can't find 'afl-showmap' - please set AFL_PATH." > "/dev/stderr"
419    exit 1
420  }
421
422  # get list of input filenames sorted by size
423  i = 0
424  # yuck, gnu stat is option incompatible to bsd stat
425  # we use a heuristic to differentiate between
426  # GNU stat and other stats
427  cmd = "stat --version 2>/dev/null"
428  cmd | getline statversion
429  close(cmd)
430  if (statversion ~ /GNU coreutils/ || statversion ~ /BusyBox/) {
431    stat_format = "-c '%s %n'" # GNU
432  } else {
433    stat_format = "-f '%z %N'" # *BSD, MacOS
434  }
435  cmdline = "(cd "in_dir" && find . \\( ! -name \".*\" -a -type d \\) -o -type f -exec stat "stat_format" \\{\\} + | sort -k1n -k2r) | grep -Ev '^0'"
436  #cmdline = "ls "in_dir" | (cd "in_dir" && xargs stat "stat_format" 2>/dev/null) | sort -k1n -k2r"
437  #cmdline = "(cd "in_dir" && stat "stat_format" *) | sort -k1n -k2r"
438  #cmdline = "(cd "in_dir" && ls | xargs stat "stat_format" ) | sort -k1n -k2r"
439  while (cmdline | getline) {
440    sub(/^[0-9]+ (\.\/)?/,"",$0)
441    infilesSmallToBigFull[i] = $0
442    sub(/.*\//, "", $0)
443    infilesSmallToBig[i] = $0
444    infilesSmallToBigMap[infilesSmallToBig[i]] = infilesSmallToBigFull[i]
445    infilesSmallToBigFullMap[infilesSmallToBigFull[i]] = infilesSmallToBig[i]
446    i++
447  }
448  close(cmdline)
449  in_count = i
450
451  first_file = infilesSmallToBigFull[0]
452
453  #if (0 == system("test -d ""\""in_dir"/"first_file"\"")) {
454  #  print "[-] Error: The input directory is empty or contains subdirectories - please fix." > "/dev/stderr"
455  #  exit 1
456  #}
457
458  system(">\""in_dir"/.afl-cmin.test\"")
459  if (0 == system("ln \""in_dir"/.afl-cmin.test\" "trace_dir"/.link_test")) {
460    cp_tool = "ln"
461  } else {
462    cp_tool = "cp"
463  }
464  system("rm -f \""in_dir"/.afl-cmin.test\"")
465
466  if (!ENVIRON["AFL_SKIP_BIN_CHECK"]) {
467    # Make sure that we can actually get anything out of afl-showmap before we
468    # waste too much time.
469
470    print "[*] Testing the target binary..."
471
472    if (!stdin_file) {
473      system(AFL_MAP_SIZE "AFL_CMIN_ALLOW_ANY=1 "AFL_CMIN_CRASHES_ONLY"\""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/.run_test\" -Z "extra_par" -- \""target_bin"\" "prog_args_string" <\""in_dir"/"first_file"\"")
474    } else {
475      system("cp \""in_dir"/"first_file"\" "stdin_file)
476      system(AFL_MAP_SIZE "AFL_CMIN_ALLOW_ANY=1 "AFL_CMIN_CRASHES_ONLY"\""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/.run_test\" -Z "extra_par" -H \""stdin_file"\" -- \""target_bin"\" "prog_args_string" </dev/null")
477    }
478
479    first_count = 0
480
481    runtest = trace_dir"/.run_test"
482    while ((getline < runtest) > 0) {
483      ++first_count
484    }
485    close(runtest)
486
487    if (first_count) {
488      print "[+] OK, "first_count" tuples recorded."
489    } else {
490      print "[-] Error: no instrumentation output detected (perhaps crash or timeout)." > "/dev/stderr"
491      if (!ENVIRON["AFL_KEEP_TRACES"]) {
492        system("rm -rf "trace_dir" 2>/dev/null")
493      }
494      exit 1
495    }
496  }
497
498  if (in_count < threads) {
499    threads = in_count
500    print "[!] WARNING: less inputs than threads, reducing threads to "threads" and likely the overhead of threading makes things slower..."
501  }
502
503  # Let's roll!
504
505  #############################
506  # STEP 1: Collecting traces #
507  #############################
508
509  if (threads) {
510
511    inputsperfile = int(in_count / threads)
512    if (in_count % threads) {
513      inputsperfile++;
514    }
515
516    cnt = 0;
517    tmpfile=out_dir "/.filelist"
518    for (instance = 1; instance < threads; instance++) {
519      for (i = 0; i < inputsperfile; i++) {
520        print in_dir"/"infilesSmallToBigFull[cnt] >> tmpfile"."instance
521        cnt++
522      }
523    }
524    for (; cnt < in_count; cnt++) {
525      print in_dir"/"infilesSmallToBigFull[cnt] >> tmpfile"."threads
526    }
527
528  }
529
530  print "[*] Obtaining traces for "in_count" input files in '"in_dir"'."
531
532  cur = 0;
533
534  if (threads > 1) {
535
536    print "[*] Creating " threads " parallel tasks with about " inputsperfile " items each."
537    for (i = 1; i <= threads; i++) {
538
539      if (!stdin_file) {
540#        print " { "AFL_MAP_SIZE AFL_CMIN_ALLOW_ANY AFL_CMIN_CRASHES_ONLY"\""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"\" -Z "extra_par" -I \""tmpfile"."i"\" -- \""target_bin"\" "prog_args_string"; > "tmpfile"."i".done ; } &"
541        retval = system(" { "AFL_MAP_SIZE AFL_CMIN_ALLOW_ANY AFL_CMIN_CRASHES_ONLY"\""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"\" -Z "extra_par" -I \""tmpfile"."i"\" -- \""target_bin"\" "prog_args_string"; > "tmpfile"."i".done ; } &")
542      } else {
543        stdin_file=tmpfile"."i".stdin"
544#        print " { "AFL_MAP_SIZE AFL_CMIN_ALLOW_ANY AFL_CMIN_CRASHES_ONLY"\""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"\" -Z "extra_par" -I \""tmpfile"."i"\" -H \""stdin_file"\" -- \""target_bin"\" "prog_args_string" </dev/null; > "tmpfile"."i".done ; } &"
545        retval = system(" { "AFL_MAP_SIZE AFL_CMIN_ALLOW_ANY AFL_CMIN_CRASHES_ONLY"\""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"\" -Z "extra_par" -I \""tmpfile"."i"\" -H \""stdin_file"\" -- \""target_bin"\" "prog_args_string" </dev/null; > "tmpfile"."i".done ; } &")
546      }
547    }
548    print "[*] Waiting for parallel tasks to complete ..."
549    # wait for all processes to finish
550    ok=0
551    while (ok < threads) {
552      ok=0
553      for (i = 1; i <= threads; i++) {
554        if (system("test -f "tmpfile"."i".done") == 0) {
555          ok++
556        }
557      }
558    }
559    print "[*] Done!"
560    system("rm -f "tmpfile"*")
561  } else {
562    if (!stdin_file) {
563      print "    Processing "in_count" files (forkserver mode)..."
564#      print AFL_CMIN_CRASHES_ONLY"\""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"\" -Z "extra_par" -i \""in_dir"\" -- \""target_bin"\" "prog_args_string
565      retval = system(AFL_MAP_SIZE AFL_CMIN_ALLOW_ANY AFL_CMIN_CRASHES_ONLY"\""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"\" -Z "extra_par" -i \""in_dir"\" -- \""target_bin"\" "prog_args_string)
566    } else {
567      print "    Processing "in_count" files (forkserver mode)..."
568#    print AFL_CMIN_CRASHES_ONLY"\""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"\" -Z "extra_par" -i \""in_dir"\" -H \""stdin_file"\" -- \""target_bin"\" "prog_args_string" </dev/null"
569      retval = system(AFL_MAP_SIZE AFL_CMIN_ALLOW_ANY AFL_CMIN_CRASHES_ONLY"\""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"\" -Z "extra_par" -i \""in_dir"\" -H \""stdin_file"\" -- \""target_bin"\" "prog_args_string" </dev/null")
570    }
571
572    if (retval && (!AFL_CMIN_CRASHES_ONLY && !AFL_CMIN_ALLOW_ANY)) {
573      print "[!] Exit code "retval" != 0 received from afl-showmap (this means a crashing or timeout input is likely present), terminating..."
574
575      if (!ENVIRON["AFL_KEEP_TRACES"]) {
576        system("rm -rf "trace_dir" 2>/dev/null")
577        system("rmdir "out_dir)
578      }
579      exit retval
580    }
581
582  }
583
584  #######################################################
585  # STEP 2: register smallest input file for each tuple #
586  # STEP 3: copy that file (at most once)               #
587  #######################################################
588
589  print "[*] Processing traces for input files in '"in_dir"'."
590
591  cur = 0
592  out_count = 0
593  tuple_count = 0
594
595  # from rare to frequent new tuples
596  # get the best (smallest) file for it
597  # and copy it
598  while (cur < in_count) {
599    fn = infilesSmallToBig[cur]
600    ++cur
601    if (redirected == 0) { printf "\r    Processing file "cur"/"in_count }
602    else { print "    Processing file "cur"/"in_count }
603    # create path for the trace file from afl-showmap
604    tracefile_path = trace_dir"/"fn
605    # ensure the file size is not zero
606    cmd = "du -b "tracefile_path
607    "ls -l "tracefile_path
608    cmd | getline output
609    close(cmd)
610    split(output, result, "\t")
611    if (result[1] == 0) {
612      print "[!] WARNING: file "fn" is crashing the target, ignoring..."
613    }
614    # gather all keys, and count them
615    while ((getline line < tracefile_path) > 0) {
616        key = line
617        if (!(key in key_count)) {
618          ++tuple_count
619        }
620        ++key_count[key]
621        if (! (key in best_file)) {
622            # this is the best file for this key
623            best_file[key] = fn
624#printf "BEST_FILE[%d]=\"%s\"\n",key,fn | "sort -t'[' -k2 > "trace_dir"/.candidate_script"
625        }
626#printf "%d %s\n",key,fn > trace_dir"/.candidate_list"
627    }
628    close(tracefile_path)
629  }
630  print ""
631
632  # sort keys
633  sortedKeys = trace_dir"/.all_uniq"
634  sortKeysCmd = "sort -k1n > "sortedKeys
635  for (key in key_count) {
636     printf "%7d %s\n",key_count[key],key | sortKeysCmd
637  }
638  close(sortKeysCmd)
639
640  # iterate over keys from rare to frequent and
641  # copy best file
642  while ((getline < sortedKeys) > 0) {
643
644    # split
645    nrFields = split($0, field, / +/)
646#print nrFields" Felder: '"field[0]"',  '"field[1]"',  '"field[2]"',  '"field[3]"'"
647    key = field[nrFields]
648
649    ++tcnt;
650    if (redirected == 0) { printf "\r    Processing tuple "tcnt"/"tuple_count" with count "key_count[key]"..." }
651    else { print "    Processing tuple "tcnt"/"tuple_count" with count "key_count[key]"..." }
652
653    if (key in keyAlreadyKnown) {
654      continue
655    }
656
657    fn = best_file[key]
658    # gather all tuples from the best file for this key
659    tracedfn = trace_dir"/"fn
660    while ((getline < tracedfn) > 0) {
661      keyAlreadyKnown[$0] = ""
662    }
663    close(tracedfn)
664
665    # copy file unless already done
666    if (! (fn in file_already_copied)) {
667      realfile = infilesSmallToBigMap[fn]
668      system(cp_tool" \""in_dir"/"realfile"\" \""out_dir"/"fn"\"")
669      file_already_copied[fn] = ""
670      ++out_count
671      #printf "tuple nr %d (%d cnt=%d) -> %s\n",tcnt,key,key_count[key],fn > trace_dir"/.log"
672    }
673  }
674  close(sortedKeys)
675  print ""
676  print "[+] Found "tuple_count" unique tuples across "in_count" files."
677
678  if (out_count == 1) {
679    print "[!] WARNING: All test cases had the same traces, check syntax!"
680  }
681  print "[+] Narrowed down to "out_count" files, saved in '"out_dir"'."
682
683  if (!ENVIRON["AFL_KEEP_TRACES"]) {
684    system("rm -rf "trace_dir" 2>/dev/null")
685  }
686
687  exit 0
688}
689EOF
690