xref: /aosp_15_r20/external/zstd/tests/DEPRECATED-test-zstd-speed.py (revision 01826a4963a0d8a59bc3812d29bdf0fb76416722)
1#! /usr/bin/env python3
2# THIS BENCHMARK IS BEING REPLACED BY automated-bencmarking.py
3
4# ################################################################
5# Copyright (c) Meta Platforms, Inc. and affiliates.
6# All rights reserved.
7#
8# This source code is licensed under both the BSD-style license (found in the
9# LICENSE file in the root directory of this source tree) and the GPLv2 (found
10# in the COPYING file in the root directory of this source tree).
11# You may select, at your option, one of the above-listed licenses.
12# ##########################################################################
13
14# Limitations:
15# - doesn't support filenames with spaces
16# - dir1/zstd and dir2/zstd will be merged in a single results file
17
18import argparse
19import os           # getloadavg
20import string
21import subprocess
22import time         # strftime
23import traceback
24import hashlib
25import platform     # system
26
27script_version = 'v1.1.2 (2017-03-26)'
28default_repo_url = 'https://github.com/facebook/zstd.git'
29working_dir_name = 'speedTest'
30working_path = os.getcwd() + '/' + working_dir_name     # /path/to/zstd/tests/speedTest
31clone_path = working_path + '/' + 'zstd'                # /path/to/zstd/tests/speedTest/zstd
32email_header = 'ZSTD_speedTest'
33pid = str(os.getpid())
34verbose = False
35clang_version = "unknown"
36gcc_version = "unknown"
37args = None
38
39
40def hashfile(hasher, fname, blocksize=65536):
41    with open(fname, "rb") as f:
42        for chunk in iter(lambda: f.read(blocksize), b""):
43            hasher.update(chunk)
44    return hasher.hexdigest()
45
46
47def log(text):
48    print(time.strftime("%Y/%m/%d %H:%M:%S") + ' - ' + text)
49
50
51def execute(command, print_command=True, print_output=False, print_error=True, param_shell=True):
52    if print_command:
53        log("> " + command)
54    popen = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=param_shell, cwd=execute.cwd)
55    stdout_lines, stderr_lines = popen.communicate(timeout=args.timeout)
56    stderr_lines = stderr_lines.decode("utf-8")
57    stdout_lines = stdout_lines.decode("utf-8")
58    if print_output:
59        if stdout_lines:
60            print(stdout_lines)
61        if stderr_lines:
62            print(stderr_lines)
63    if popen.returncode is not None and popen.returncode != 0:
64        if stderr_lines and not print_output and print_error:
65            print(stderr_lines)
66        raise RuntimeError(stdout_lines + stderr_lines)
67    return (stdout_lines + stderr_lines).splitlines()
68execute.cwd = None
69
70
71def does_command_exist(command):
72    try:
73        execute(command, verbose, False, False)
74    except Exception:
75        return False
76    return True
77
78
79def send_email(emails, topic, text, have_mutt, have_mail):
80    logFileName = working_path + '/' + 'tmpEmailContent'
81    with open(logFileName, "w") as myfile:
82        myfile.writelines(text)
83        myfile.close()
84        if have_mutt:
85            execute('mutt -s "' + topic + '" ' + emails + ' < ' + logFileName, verbose)
86        elif have_mail:
87            execute('mail -s "' + topic + '" ' + emails + ' < ' + logFileName, verbose)
88        else:
89            log("e-mail cannot be sent (mail or mutt not found)")
90
91
92def send_email_with_attachments(branch, commit, last_commit, args, text, results_files,
93                                logFileName, have_mutt, have_mail):
94    with open(logFileName, "w") as myfile:
95        myfile.writelines(text)
96        myfile.close()
97        email_topic = '[%s:%s] Warning for %s:%s last_commit=%s speed<%s ratio<%s' \
98                      % (email_header, pid, branch, commit, last_commit,
99                         args.lowerLimit, args.ratioLimit)
100        if have_mutt:
101            execute('mutt -s "' + email_topic + '" ' + args.emails + ' -a ' + results_files
102                    + ' < ' + logFileName)
103        elif have_mail:
104            execute('mail -s "' + email_topic + '" ' + args.emails + ' < ' + logFileName)
105        else:
106            log("e-mail cannot be sent (mail or mutt not found)")
107
108
109def git_get_branches():
110    execute('git fetch -p', verbose)
111    branches = execute('git branch -rl', verbose)
112    output = []
113    for line in branches:
114        if ("HEAD" not in line) and ("coverity_scan" not in line) and ("gh-pages" not in line):
115            output.append(line.strip())
116    return output
117
118
119def git_get_changes(branch, commit, last_commit):
120    fmt = '--format="%h: (%an) %s, %ar"'
121    if last_commit is None:
122        commits = execute('git log -n 10 %s %s' % (fmt, commit))
123    else:
124        commits = execute('git --no-pager log %s %s..%s' % (fmt, last_commit, commit))
125    return str('Changes in %s since %s:\n' % (branch, last_commit)) + '\n'.join(commits)
126
127
128def get_last_results(resultsFileName):
129    if not os.path.isfile(resultsFileName):
130        return None, None, None, None
131    commit = None
132    csize = []
133    cspeed = []
134    dspeed = []
135    with open(resultsFileName, 'r') as f:
136        for line in f:
137            words = line.split()
138            if len(words) <= 4:   # branch + commit + compilerVer + md5
139                commit = words[1]
140                csize = []
141                cspeed = []
142                dspeed = []
143            if (len(words) == 8) or (len(words) == 9):  # results: "filename" or "XX files"
144                csize.append(int(words[1]))
145                cspeed.append(float(words[3]))
146                dspeed.append(float(words[5]))
147    return commit, csize, cspeed, dspeed
148
149
150def benchmark_and_compare(branch, commit, last_commit, args, executableName, md5sum, compilerVersion, resultsFileName,
151                          testFilePath, fileName, last_csize, last_cspeed, last_dspeed):
152    sleepTime = 30
153    while os.getloadavg()[0] > args.maxLoadAvg:
154        log("WARNING: bench loadavg=%.2f is higher than %s, sleeping for %s seconds"
155            % (os.getloadavg()[0], args.maxLoadAvg, sleepTime))
156        time.sleep(sleepTime)
157    start_load = str(os.getloadavg())
158    osType = platform.system()
159    if osType == 'Linux':
160        cpuSelector = "taskset --cpu-list 0"
161    else:
162        cpuSelector = ""
163    if args.dictionary:
164        result = execute('%s programs/%s -rqi5b1e%s -D %s %s' % (cpuSelector, executableName, args.lastCLevel, args.dictionary, testFilePath), print_output=True)
165    else:
166        result = execute('%s programs/%s -rqi5b1e%s %s' % (cpuSelector, executableName, args.lastCLevel, testFilePath), print_output=True)
167    end_load = str(os.getloadavg())
168    linesExpected = args.lastCLevel + 1
169    if len(result) != linesExpected:
170        raise RuntimeError("ERROR: number of result lines=%d is different that expected %d\n%s" % (len(result), linesExpected, '\n'.join(result)))
171    with open(resultsFileName, "a") as myfile:
172        myfile.write('%s %s %s md5=%s\n' % (branch, commit, compilerVersion, md5sum))
173        myfile.write('\n'.join(result) + '\n')
174        myfile.close()
175        if (last_cspeed == None):
176            log("WARNING: No data for comparison for branch=%s file=%s " % (branch, fileName))
177            return ""
178        commit, csize, cspeed, dspeed = get_last_results(resultsFileName)
179        text = ""
180        for i in range(0, min(len(cspeed), len(last_cspeed))):
181            print("%s:%s -%d cSpeed=%6.2f cLast=%6.2f cDiff=%1.4f dSpeed=%6.2f dLast=%6.2f dDiff=%1.4f ratioDiff=%1.4f %s" % (branch, commit, i+1, cspeed[i], last_cspeed[i], cspeed[i]/last_cspeed[i], dspeed[i], last_dspeed[i], dspeed[i]/last_dspeed[i], float(last_csize[i])/csize[i], fileName))
182            if (cspeed[i]/last_cspeed[i] < args.lowerLimit):
183                text += "WARNING: %s -%d cSpeed=%.2f cLast=%.2f cDiff=%.4f %s\n" % (executableName, i+1, cspeed[i], last_cspeed[i], cspeed[i]/last_cspeed[i], fileName)
184            if (dspeed[i]/last_dspeed[i] < args.lowerLimit):
185                text += "WARNING: %s -%d dSpeed=%.2f dLast=%.2f dDiff=%.4f %s\n" % (executableName, i+1, dspeed[i], last_dspeed[i], dspeed[i]/last_dspeed[i], fileName)
186            if (float(last_csize[i])/csize[i] < args.ratioLimit):
187                text += "WARNING: %s -%d cSize=%d last_cSize=%d diff=%.4f %s\n" % (executableName, i+1, csize[i], last_csize[i], float(last_csize[i])/csize[i], fileName)
188        if text:
189            text = args.message + ("\nmaxLoadAvg=%s  load average at start=%s end=%s\n%s  last_commit=%s  md5=%s\n" % (args.maxLoadAvg, start_load, end_load, compilerVersion, last_commit, md5sum)) + text
190        return text
191
192
193def update_config_file(branch, commit):
194    last_commit = None
195    commitFileName = working_path + "/commit_" + branch.replace("/", "_") + ".txt"
196    if os.path.isfile(commitFileName):
197        with open(commitFileName, 'r') as infile:
198            last_commit = infile.read()
199    with open(commitFileName, 'w') as outfile:
200        outfile.write(commit)
201    return last_commit
202
203
204def double_check(branch, commit, args, executableName, md5sum, compilerVersion, resultsFileName, filePath, fileName):
205    last_commit, csize, cspeed, dspeed = get_last_results(resultsFileName)
206    if not args.dry_run:
207        text = benchmark_and_compare(branch, commit, last_commit, args, executableName, md5sum, compilerVersion, resultsFileName, filePath, fileName, csize, cspeed, dspeed)
208        if text:
209            log("WARNING: redoing tests for branch %s: commit %s" % (branch, commit))
210            text = benchmark_and_compare(branch, commit, last_commit, args, executableName, md5sum, compilerVersion, resultsFileName, filePath, fileName, csize, cspeed, dspeed)
211    return text
212
213
214def test_commit(branch, commit, last_commit, args, testFilePaths, have_mutt, have_mail):
215    local_branch = branch.split('/')[1]
216    version = local_branch.rpartition('-')[2] + '_' + commit
217    if not args.dry_run:
218        execute('make -C programs clean zstd CC=clang MOREFLAGS="-Werror -Wconversion -Wno-sign-conversion -DZSTD_GIT_COMMIT=%s" && ' % version +
219                'mv programs/zstd programs/zstd_clang && ' +
220                'make -C programs clean zstd zstd32 MOREFLAGS="-DZSTD_GIT_COMMIT=%s"' % version)
221    md5_zstd = hashfile(hashlib.md5(), clone_path + '/programs/zstd')
222    md5_zstd32 = hashfile(hashlib.md5(), clone_path + '/programs/zstd32')
223    md5_zstd_clang = hashfile(hashlib.md5(), clone_path + '/programs/zstd_clang')
224    print("md5(zstd)=%s\nmd5(zstd32)=%s\nmd5(zstd_clang)=%s" % (md5_zstd, md5_zstd32, md5_zstd_clang))
225    print("gcc_version=%s clang_version=%s" % (gcc_version, clang_version))
226
227    logFileName = working_path + "/log_" + branch.replace("/", "_") + ".txt"
228    text_to_send = []
229    results_files = ""
230    if args.dictionary:
231        dictName = args.dictionary.rpartition('/')[2]
232    else:
233        dictName = None
234
235    for filePath in testFilePaths:
236        fileName = filePath.rpartition('/')[2]
237        if dictName:
238            resultsFileName = working_path + "/" + dictName.replace(".", "_") + "_" + branch.replace("/", "_") + "_" + fileName.replace(".", "_") + ".txt"
239        else:
240            resultsFileName = working_path + "/results_" + branch.replace("/", "_") + "_" + fileName.replace(".", "_") + ".txt"
241        text = double_check(branch, commit, args, 'zstd', md5_zstd, 'gcc_version='+gcc_version, resultsFileName, filePath, fileName)
242        if text:
243            text_to_send.append(text)
244            results_files += resultsFileName + " "
245        resultsFileName = working_path + "/results32_" + branch.replace("/", "_") + "_" + fileName.replace(".", "_") + ".txt"
246        text = double_check(branch, commit, args, 'zstd32', md5_zstd32, 'gcc_version='+gcc_version, resultsFileName, filePath, fileName)
247        if text:
248            text_to_send.append(text)
249            results_files += resultsFileName + " "
250        resultsFileName = working_path + "/resultsClang_" + branch.replace("/", "_") + "_" + fileName.replace(".", "_") + ".txt"
251        text = double_check(branch, commit, args, 'zstd_clang', md5_zstd_clang, 'clang_version='+clang_version, resultsFileName, filePath, fileName)
252        if text:
253            text_to_send.append(text)
254            results_files += resultsFileName + " "
255    if text_to_send:
256        send_email_with_attachments(branch, commit, last_commit, args, text_to_send, results_files, logFileName, have_mutt, have_mail)
257
258
259if __name__ == '__main__':
260    parser = argparse.ArgumentParser()
261    parser.add_argument('testFileNames', help='file or directory names list for speed benchmark')
262    parser.add_argument('emails', help='list of e-mail addresses to send warnings')
263    parser.add_argument('--dictionary', '-D', help='path to the dictionary')
264    parser.add_argument('--message', '-m', help='attach an additional message to e-mail', default="")
265    parser.add_argument('--repoURL', help='changes default repository URL', default=default_repo_url)
266    parser.add_argument('--lowerLimit', '-l', type=float, help='send email if speed is lower than given limit', default=0.98)
267    parser.add_argument('--ratioLimit', '-r', type=float, help='send email if ratio is lower than given limit', default=0.999)
268    parser.add_argument('--maxLoadAvg', type=float, help='maximum load average to start testing', default=0.75)
269    parser.add_argument('--lastCLevel', type=int, help='last compression level for testing', default=5)
270    parser.add_argument('--sleepTime', '-s', type=int, help='frequency of repository checking in seconds', default=300)
271    parser.add_argument('--timeout', '-t', type=int, help='timeout for executing shell commands', default=1800)
272    parser.add_argument('--dry-run', dest='dry_run', action='store_true', help='not build', default=False)
273    parser.add_argument('--verbose', '-v', action='store_true', help='more verbose logs', default=False)
274    args = parser.parse_args()
275    verbose = args.verbose
276
277    # check if test files are accessible
278    testFileNames = args.testFileNames.split()
279    testFilePaths = []
280    for fileName in testFileNames:
281        fileName = os.path.expanduser(fileName)
282        if os.path.isfile(fileName) or os.path.isdir(fileName):
283            testFilePaths.append(os.path.abspath(fileName))
284        else:
285            log("ERROR: File/directory not found: " + fileName)
286            exit(1)
287
288    # check if dictionary is accessible
289    if args.dictionary:
290        args.dictionary = os.path.abspath(os.path.expanduser(args.dictionary))
291        if not os.path.isfile(args.dictionary):
292            log("ERROR: Dictionary not found: " + args.dictionary)
293            exit(1)
294
295    # check availability of e-mail senders
296    have_mutt = does_command_exist("mutt -h")
297    have_mail = does_command_exist("mail -V")
298    if not have_mutt and not have_mail:
299        log("ERROR: e-mail senders 'mail' or 'mutt' not found")
300        exit(1)
301
302    clang_version = execute("clang -v 2>&1 | grep ' version ' | sed -e 's:.*version \\([0-9.]*\\).*:\\1:' -e 's:\\.\\([0-9][0-9]\\):\\1:g'", verbose)[0];
303    gcc_version = execute("gcc -dumpversion", verbose)[0];
304
305    if verbose:
306        print("PARAMETERS:\nrepoURL=%s" % args.repoURL)
307        print("working_path=%s" % working_path)
308        print("clone_path=%s" % clone_path)
309        print("testFilePath(%s)=%s" % (len(testFilePaths), testFilePaths))
310        print("message=%s" % args.message)
311        print("emails=%s" % args.emails)
312        print("dictionary=%s" % args.dictionary)
313        print("maxLoadAvg=%s" % args.maxLoadAvg)
314        print("lowerLimit=%s" % args.lowerLimit)
315        print("ratioLimit=%s" % args.ratioLimit)
316        print("lastCLevel=%s" % args.lastCLevel)
317        print("sleepTime=%s" % args.sleepTime)
318        print("timeout=%s" % args.timeout)
319        print("dry_run=%s" % args.dry_run)
320        print("verbose=%s" % args.verbose)
321        print("have_mutt=%s have_mail=%s" % (have_mutt, have_mail))
322
323    # clone ZSTD repo if needed
324    if not os.path.isdir(working_path):
325        os.mkdir(working_path)
326    if not os.path.isdir(clone_path):
327        execute.cwd = working_path
328        execute('git clone ' + args.repoURL)
329    if not os.path.isdir(clone_path):
330        log("ERROR: ZSTD clone not found: " + clone_path)
331        exit(1)
332    execute.cwd = clone_path
333
334    # check if speedTest.pid already exists
335    pidfile = "./speedTest.pid"
336    if os.path.isfile(pidfile):
337        log("ERROR: %s already exists, exiting" % pidfile)
338        exit(1)
339
340    send_email(args.emails, '[%s:%s] test-zstd-speed.py %s has been started' % (email_header, pid, script_version), args.message, have_mutt, have_mail)
341    with open(pidfile, 'w') as the_file:
342        the_file.write(pid)
343
344    branch = ""
345    commit = ""
346    first_time = True
347    while True:
348        try:
349            if first_time:
350                first_time = False
351            else:
352                time.sleep(args.sleepTime)
353            loadavg = os.getloadavg()[0]
354            if (loadavg <= args.maxLoadAvg):
355                branches = git_get_branches()
356                for branch in branches:
357                    commit = execute('git show -s --format=%h ' + branch, verbose)[0]
358                    last_commit = update_config_file(branch, commit)
359                    if commit == last_commit:
360                        log("skipping branch %s: head %s already processed" % (branch, commit))
361                    else:
362                        log("build branch %s: head %s is different from prev %s" % (branch, commit, last_commit))
363                        execute('git checkout -- . && git checkout ' + branch)
364                        print(git_get_changes(branch, commit, last_commit))
365                        test_commit(branch, commit, last_commit, args, testFilePaths, have_mutt, have_mail)
366            else:
367                log("WARNING: main loadavg=%.2f is higher than %s" % (loadavg, args.maxLoadAvg))
368            if verbose:
369                log("sleep for %s seconds" % args.sleepTime)
370        except Exception as e:
371            stack = traceback.format_exc()
372            email_topic = '[%s:%s] ERROR in %s:%s' % (email_header, pid, branch, commit)
373            send_email(args.emails, email_topic, stack, have_mutt, have_mail)
374            print(stack)
375        except KeyboardInterrupt:
376            os.unlink(pidfile)
377            send_email(args.emails, '[%s:%s] test-zstd-speed.py %s has been stopped' % (email_header, pid, script_version), args.message, have_mutt, have_mail)
378            exit(0)
379