xref: /aosp_15_r20/external/zstd/tests/test-zstd-versions.py (revision 01826a4963a0d8a59bc3812d29bdf0fb76416722)
1#!/usr/bin/env python3
2"""Test zstd interoperability between versions"""
3
4# ################################################################
5# Copyright (c) Meta Platforms, Inc. and affiliates.
6# All rights reserved.
7#
8# This source code is licensed under both the BSD-style license (found in the
9# LICENSE file in the root directory of this source tree) and the GPLv2 (found
10# in the COPYING file in the root directory of this source tree).
11# You may select, at your option, one of the above-listed licenses.
12# ################################################################
13
14import filecmp
15import glob
16import hashlib
17import os
18import shutil
19import sys
20import subprocess
21from subprocess import Popen, PIPE
22
23repo_url = 'https://github.com/facebook/zstd.git'
24tmp_dir_name = 'tests/versionsTest'
25make_cmd = 'make'
26make_args = ['-j','CFLAGS=-O0']
27git_cmd = 'git'
28test_dat_src = 'README.md'
29test_dat = 'test_dat'
30head = 'vdevel'
31dict_source = 'dict_source'
32dict_globs = [
33    'programs/*.c',
34    'lib/common/*.c',
35    'lib/compress/*.c',
36    'lib/decompress/*.c',
37    'lib/dictBuilder/*.c',
38    'lib/legacy/*.c',
39    'programs/*.h',
40    'lib/common/*.h',
41    'lib/compress/*.h',
42    'lib/dictBuilder/*.h',
43    'lib/legacy/*.h'
44]
45
46
47def execute(command, print_output=False, print_error=True, param_shell=False):
48    popen = Popen(command, stdout=PIPE, stderr=PIPE, shell=param_shell)
49    stdout_lines, stderr_lines = popen.communicate()
50    stderr_lines = stderr_lines.decode("utf-8")
51    stdout_lines = stdout_lines.decode("utf-8")
52    if print_output:
53        print(stdout_lines)
54        print(stderr_lines)
55    if popen.returncode is not None and popen.returncode != 0:
56        if not print_output and print_error:
57            print(stderr_lines)
58    return popen.returncode
59
60
61def proc(cmd_args, pipe=True, dummy=False):
62    if dummy:
63        return
64    if pipe:
65        subproc = Popen(cmd_args, stdout=PIPE, stderr=PIPE)
66    else:
67        subproc = Popen(cmd_args)
68    return subproc.communicate()
69
70
71def make(targets, pipe=True):
72    cmd = [make_cmd] + make_args + targets
73    cmd_str = str(cmd)
74    print('compilation command : ' + cmd_str)
75    return proc(cmd, pipe)
76
77
78def git(args, pipe=True):
79    return proc([git_cmd] + args, pipe)
80
81
82def get_git_tags():
83    stdout, stderr = git(['tag', '-l', 'v[0-9].[0-9].[0-9]'])
84    tags = stdout.decode('utf-8').split()
85    return tags
86
87
88def dict_ok(tag, dict_name, sample):
89    if not os.path.isfile(dict_name):
90        return False
91    try:
92        cmd = ['./zstd.' + tag, '-D', dict_name]
93        with open(sample, "rb") as i:
94            subprocess.check_call(cmd, stdin=i, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
95        return True
96    except:
97        return False
98
99
100def create_dict(tag, dict_source_path, fallback_tag=None):
101    dict_name = 'dict.' + tag
102    if not os.path.isfile(dict_name):
103        cFiles = glob.glob(dict_source_path + "/*.c")
104        hFiles = glob.glob(dict_source_path + "/*.h")
105        # Ensure the dictionary builder is deterministic
106        files = sorted(cFiles + hFiles)
107        if tag == 'v0.5.0':
108            result = execute('./dictBuilder.' + tag + ' ' + ' '.join(files) + ' -o ' + dict_name, print_output=False, param_shell=True)
109        else:
110            result = execute('./zstd.' + tag + ' -f --train ' + ' '.join(files) + ' -o ' + dict_name, print_output=False, param_shell=True)
111        if result == 0 and dict_ok(tag, dict_name, files[0]):
112            print(dict_name + ' created')
113        elif fallback_tag is not None:
114            fallback_dict_name = 'dict.' + fallback_tag
115            print('creating dictionary ' + dict_name + ' failed, falling back to ' + fallback_dict_name)
116            shutil.copy(fallback_dict_name, dict_name)
117        else:
118            raise RuntimeError('ERROR: creating of ' + dict_name + ' failed')
119    else:
120        print(dict_name + ' already exists')
121
122
123def zstd(tag, args, input_file, output_file):
124    """
125    Zstd compress input_file to output_file.
126    Need this helper because 0.5.0 is broken when stdout is not a TTY.
127    Throws an exception if the command returns non-zero.
128    """
129    with open(input_file, "rb") as i:
130        with open(output_file, "wb") as o:
131            cmd = ['./zstd.' + tag] + args
132            print("Running: '{}', input={}, output={}" .format(
133                ' '.join(cmd), input_file, output_file
134            ))
135            result = subprocess.run(cmd, stdin=i, stdout=o, stderr=subprocess.PIPE)
136            print("Stderr: {}".format(result.stderr.decode("ascii")))
137            result.check_returncode()
138
139
140def dict_compress_sample(tag, sample):
141    dict_name = 'dict.' + tag
142    verbose = ['-v', '-v', '-v']
143    zstd(tag, ['-D', dict_name, '-1'] + verbose, sample, sample + '_01_64_' + tag + '_dictio.zst')
144    zstd(tag, ['-D', dict_name, '-3'], sample, sample + '_03_64_' + tag + '_dictio.zst')
145    zstd(tag, ['-D', dict_name, '-5'], sample, sample + '_05_64_' + tag + '_dictio.zst')
146    zstd(tag, ['-D', dict_name, '-9'], sample, sample + '_09_64_' + tag + '_dictio.zst')
147    zstd(tag, ['-D', dict_name, '-15'], sample, sample + '_15_64_' + tag + '_dictio.zst')
148    zstd(tag, ['-D', dict_name, '-18'], sample, sample + '_18_64_' + tag + '_dictio.zst')
149    # zstdFiles = glob.glob("*.zst*")
150    # print(zstdFiles)
151    print(tag + " : dict compression completed")
152
153
154def compress_sample(tag, sample):
155    zstd(tag, ['-1'], sample, sample + '_01_64_' + tag + '_nodict.zst')
156    zstd(tag, ['-3'], sample, sample + '_03_64_' + tag + '_nodict.zst')
157    zstd(tag, ['-5'], sample, sample + '_05_64_' + tag + '_nodict.zst')
158    zstd(tag, ['-9'], sample, sample + '_09_64_' + tag + '_nodict.zst')
159    zstd(tag, ['-15'], sample, sample + '_15_64_' + tag + '_nodict.zst')
160    zstd(tag, ['-18'], sample, sample + '_18_64_' + tag + '_nodict.zst')
161    # zstdFiles = glob.glob("*.zst*")
162    # print(zstdFiles)
163    print(tag + " : compression completed")
164
165
166# https://stackoverflow.com/a/19711609/2132223
167def sha1_of_file(filepath):
168    with open(filepath, 'rb') as f:
169        return hashlib.sha1(f.read()).hexdigest()
170
171
172def remove_duplicates():
173    list_of_zst = sorted(glob.glob('*.zst'))
174    for i, ref_zst in enumerate(list_of_zst):
175        if not os.path.isfile(ref_zst):
176            continue
177        for j in range(i + 1, len(list_of_zst)):
178            compared_zst = list_of_zst[j]
179            if not os.path.isfile(compared_zst):
180                continue
181            if filecmp.cmp(ref_zst, compared_zst):
182                os.remove(compared_zst)
183                print('duplicated : {} == {}'.format(ref_zst, compared_zst))
184
185
186def decompress_zst(tag):
187    dec_error = 0
188    list_zst = sorted(glob.glob('*_nodict.zst'))
189    for file_zst in list_zst:
190        print(file_zst + ' ' + tag)
191        file_dec = file_zst + '_d64_' + tag + '.dec'
192        zstd(tag, ['-d'], file_zst, file_dec)
193        if not filecmp.cmp(file_dec, test_dat):
194            raise RuntimeError('Decompression failed: tag={} file={}'.format(tag, file_zst))
195        else:
196            print('OK     ')
197
198
199def decompress_dict(tag):
200    dec_error = 0
201    list_zst = sorted(glob.glob('*_dictio.zst'))
202    for file_zst in list_zst:
203        dict_tag = file_zst[0:len(file_zst)-11]  # remove "_dictio.zst"
204        if head in dict_tag: # find vdevel
205            dict_tag = head
206        else:
207            dict_tag = dict_tag[dict_tag.rfind('v'):]
208        if tag == 'v0.6.0' and dict_tag < 'v0.6.0':
209            continue
210        dict_name = 'dict.' + dict_tag
211        print(file_zst + ' ' + tag + ' dict=' + dict_tag)
212        file_dec = file_zst + '_d64_' + tag + '.dec'
213        zstd(tag, ['-D', dict_name, '-d'], file_zst, file_dec)
214        if not filecmp.cmp(file_dec, test_dat):
215            raise RuntimeError('Decompression failed: tag={} file={}'.format(tag, file_zst))
216        else:
217            print('OK     ')
218
219
220if __name__ == '__main__':
221    error_code = 0
222    base_dir = os.getcwd() + '/..'                  # /path/to/zstd
223    tmp_dir = base_dir + '/' + tmp_dir_name         # /path/to/zstd/tests/versionsTest
224    clone_dir = tmp_dir + '/' + 'zstd'              # /path/to/zstd/tests/versionsTest/zstd
225    dict_source_path = tmp_dir + '/' + dict_source  # /path/to/zstd/tests/versionsTest/dict_source
226    programs_dir = base_dir + '/programs'           # /path/to/zstd/programs
227    os.makedirs(tmp_dir, exist_ok=True)
228
229    # since Travis clones limited depth, we should clone full repository
230    if not os.path.isdir(clone_dir):
231        git(['clone', repo_url, clone_dir])
232
233    shutil.copy2(base_dir + '/' + test_dat_src, tmp_dir + '/' + test_dat)
234
235    # Retrieve all release tags
236    print('Retrieve all release tags :')
237    os.chdir(clone_dir)
238    alltags = get_git_tags() + [head]
239    tags = [t for t in alltags if t >= 'v0.5.0']
240    print(tags)
241
242    # Build all release zstd
243    for tag in tags:
244        os.chdir(base_dir)
245        dst_zstd = '{}/zstd.{}'.format(tmp_dir, tag)  # /path/to/zstd/tests/versionsTest/zstd.<TAG>
246        if not os.path.isfile(dst_zstd) or tag == head:
247            if tag != head:
248                print('-----------------------------------------------')
249                print('compiling ' + tag)
250                print('-----------------------------------------------')
251                r_dir = '{}/{}'.format(tmp_dir, tag)  # /path/to/zstd/tests/versionsTest/<TAG>
252                os.makedirs(r_dir, exist_ok=True)
253                os.chdir(clone_dir)
254                git(['--work-tree=' + r_dir, 'checkout', tag, '--', '.'], False)
255                if tag == 'v0.5.0':
256                    os.chdir(r_dir + '/dictBuilder')  # /path/to/zstd/tests/versionsTest/v0.5.0/dictBuilder
257                    make(['clean'], False)   # separate 'clean' target to allow parallel build
258                    make(['dictBuilder'], False)
259                    shutil.copy2('dictBuilder', '{}/dictBuilder.{}'.format(tmp_dir, tag))
260                os.chdir(r_dir + '/programs')  # /path/to/zstd/tests/versionsTest/<TAG>/programs
261                make(['clean'], False)  # separate 'clean' target to allow parallel build
262                make(['zstd'], False)
263            else:
264                os.chdir(programs_dir)
265                print('-----------------------------------------------')
266                print('compiling head')
267                print('-----------------------------------------------')
268                make(['zstd'], False)
269            shutil.copy2('zstd', dst_zstd)
270
271    # remove any remaining *.zst and *.dec from previous test
272    os.chdir(tmp_dir)
273    for compressed in glob.glob("*.zst"):
274        os.remove(compressed)
275    for dec in glob.glob("*.dec"):
276        os.remove(dec)
277
278    # copy *.c and *.h to a temporary directory ("dict_source")
279    if not os.path.isdir(dict_source_path):
280        os.mkdir(dict_source_path)
281        for dict_glob in dict_globs:
282            files = glob.glob(dict_glob, root_dir=base_dir)
283            for file in files:
284                file = os.path.join(base_dir, file)
285                print("copying " + file + " to " + dict_source_path)
286                shutil.copy(file, dict_source_path)
287
288    print('-----------------------------------------------')
289    print('Compress test.dat by all released zstd')
290    print('-----------------------------------------------')
291
292    create_dict(head, dict_source_path)
293    for tag in tags:
294        print(tag)
295        if tag >= 'v0.5.0':
296            create_dict(tag, dict_source_path, head)
297            dict_compress_sample(tag, test_dat)
298            remove_duplicates()
299            decompress_dict(tag)
300        compress_sample(tag, test_dat)
301        remove_duplicates()
302        decompress_zst(tag)
303
304    print('')
305    print('Enumerate different compressed files')
306    zstds = sorted(glob.glob('*.zst'))
307    for zstd in zstds:
308        print(zstd + ' : ' + repr(os.path.getsize(zstd)) + ', ' + sha1_of_file(zstd))
309