1#!/usr/bin/env python3 2"""Test zstd interoperability between versions""" 3 4# ################################################################ 5# Copyright (c) Meta Platforms, Inc. and affiliates. 6# All rights reserved. 7# 8# This source code is licensed under both the BSD-style license (found in the 9# LICENSE file in the root directory of this source tree) and the GPLv2 (found 10# in the COPYING file in the root directory of this source tree). 11# You may select, at your option, one of the above-listed licenses. 12# ################################################################ 13 14import filecmp 15import glob 16import hashlib 17import os 18import shutil 19import sys 20import subprocess 21from subprocess import Popen, PIPE 22 23repo_url = 'https://github.com/facebook/zstd.git' 24tmp_dir_name = 'tests/versionsTest' 25make_cmd = 'make' 26make_args = ['-j','CFLAGS=-O0'] 27git_cmd = 'git' 28test_dat_src = 'README.md' 29test_dat = 'test_dat' 30head = 'vdevel' 31dict_source = 'dict_source' 32dict_globs = [ 33 'programs/*.c', 34 'lib/common/*.c', 35 'lib/compress/*.c', 36 'lib/decompress/*.c', 37 'lib/dictBuilder/*.c', 38 'lib/legacy/*.c', 39 'programs/*.h', 40 'lib/common/*.h', 41 'lib/compress/*.h', 42 'lib/dictBuilder/*.h', 43 'lib/legacy/*.h' 44] 45 46 47def execute(command, print_output=False, print_error=True, param_shell=False): 48 popen = Popen(command, stdout=PIPE, stderr=PIPE, shell=param_shell) 49 stdout_lines, stderr_lines = popen.communicate() 50 stderr_lines = stderr_lines.decode("utf-8") 51 stdout_lines = stdout_lines.decode("utf-8") 52 if print_output: 53 print(stdout_lines) 54 print(stderr_lines) 55 if popen.returncode is not None and popen.returncode != 0: 56 if not print_output and print_error: 57 print(stderr_lines) 58 return popen.returncode 59 60 61def proc(cmd_args, pipe=True, dummy=False): 62 if dummy: 63 return 64 if pipe: 65 subproc = Popen(cmd_args, stdout=PIPE, stderr=PIPE) 66 else: 67 subproc = Popen(cmd_args) 68 return subproc.communicate() 69 70 71def make(targets, pipe=True): 72 cmd = [make_cmd] + make_args + targets 73 cmd_str = str(cmd) 74 print('compilation command : ' + cmd_str) 75 return proc(cmd, pipe) 76 77 78def git(args, pipe=True): 79 return proc([git_cmd] + args, pipe) 80 81 82def get_git_tags(): 83 stdout, stderr = git(['tag', '-l', 'v[0-9].[0-9].[0-9]']) 84 tags = stdout.decode('utf-8').split() 85 return tags 86 87 88def dict_ok(tag, dict_name, sample): 89 if not os.path.isfile(dict_name): 90 return False 91 try: 92 cmd = ['./zstd.' + tag, '-D', dict_name] 93 with open(sample, "rb") as i: 94 subprocess.check_call(cmd, stdin=i, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) 95 return True 96 except: 97 return False 98 99 100def create_dict(tag, dict_source_path, fallback_tag=None): 101 dict_name = 'dict.' + tag 102 if not os.path.isfile(dict_name): 103 cFiles = glob.glob(dict_source_path + "/*.c") 104 hFiles = glob.glob(dict_source_path + "/*.h") 105 # Ensure the dictionary builder is deterministic 106 files = sorted(cFiles + hFiles) 107 if tag == 'v0.5.0': 108 result = execute('./dictBuilder.' + tag + ' ' + ' '.join(files) + ' -o ' + dict_name, print_output=False, param_shell=True) 109 else: 110 result = execute('./zstd.' + tag + ' -f --train ' + ' '.join(files) + ' -o ' + dict_name, print_output=False, param_shell=True) 111 if result == 0 and dict_ok(tag, dict_name, files[0]): 112 print(dict_name + ' created') 113 elif fallback_tag is not None: 114 fallback_dict_name = 'dict.' + fallback_tag 115 print('creating dictionary ' + dict_name + ' failed, falling back to ' + fallback_dict_name) 116 shutil.copy(fallback_dict_name, dict_name) 117 else: 118 raise RuntimeError('ERROR: creating of ' + dict_name + ' failed') 119 else: 120 print(dict_name + ' already exists') 121 122 123def zstd(tag, args, input_file, output_file): 124 """ 125 Zstd compress input_file to output_file. 126 Need this helper because 0.5.0 is broken when stdout is not a TTY. 127 Throws an exception if the command returns non-zero. 128 """ 129 with open(input_file, "rb") as i: 130 with open(output_file, "wb") as o: 131 cmd = ['./zstd.' + tag] + args 132 print("Running: '{}', input={}, output={}" .format( 133 ' '.join(cmd), input_file, output_file 134 )) 135 result = subprocess.run(cmd, stdin=i, stdout=o, stderr=subprocess.PIPE) 136 print("Stderr: {}".format(result.stderr.decode("ascii"))) 137 result.check_returncode() 138 139 140def dict_compress_sample(tag, sample): 141 dict_name = 'dict.' + tag 142 verbose = ['-v', '-v', '-v'] 143 zstd(tag, ['-D', dict_name, '-1'] + verbose, sample, sample + '_01_64_' + tag + '_dictio.zst') 144 zstd(tag, ['-D', dict_name, '-3'], sample, sample + '_03_64_' + tag + '_dictio.zst') 145 zstd(tag, ['-D', dict_name, '-5'], sample, sample + '_05_64_' + tag + '_dictio.zst') 146 zstd(tag, ['-D', dict_name, '-9'], sample, sample + '_09_64_' + tag + '_dictio.zst') 147 zstd(tag, ['-D', dict_name, '-15'], sample, sample + '_15_64_' + tag + '_dictio.zst') 148 zstd(tag, ['-D', dict_name, '-18'], sample, sample + '_18_64_' + tag + '_dictio.zst') 149 # zstdFiles = glob.glob("*.zst*") 150 # print(zstdFiles) 151 print(tag + " : dict compression completed") 152 153 154def compress_sample(tag, sample): 155 zstd(tag, ['-1'], sample, sample + '_01_64_' + tag + '_nodict.zst') 156 zstd(tag, ['-3'], sample, sample + '_03_64_' + tag + '_nodict.zst') 157 zstd(tag, ['-5'], sample, sample + '_05_64_' + tag + '_nodict.zst') 158 zstd(tag, ['-9'], sample, sample + '_09_64_' + tag + '_nodict.zst') 159 zstd(tag, ['-15'], sample, sample + '_15_64_' + tag + '_nodict.zst') 160 zstd(tag, ['-18'], sample, sample + '_18_64_' + tag + '_nodict.zst') 161 # zstdFiles = glob.glob("*.zst*") 162 # print(zstdFiles) 163 print(tag + " : compression completed") 164 165 166# https://stackoverflow.com/a/19711609/2132223 167def sha1_of_file(filepath): 168 with open(filepath, 'rb') as f: 169 return hashlib.sha1(f.read()).hexdigest() 170 171 172def remove_duplicates(): 173 list_of_zst = sorted(glob.glob('*.zst')) 174 for i, ref_zst in enumerate(list_of_zst): 175 if not os.path.isfile(ref_zst): 176 continue 177 for j in range(i + 1, len(list_of_zst)): 178 compared_zst = list_of_zst[j] 179 if not os.path.isfile(compared_zst): 180 continue 181 if filecmp.cmp(ref_zst, compared_zst): 182 os.remove(compared_zst) 183 print('duplicated : {} == {}'.format(ref_zst, compared_zst)) 184 185 186def decompress_zst(tag): 187 dec_error = 0 188 list_zst = sorted(glob.glob('*_nodict.zst')) 189 for file_zst in list_zst: 190 print(file_zst + ' ' + tag) 191 file_dec = file_zst + '_d64_' + tag + '.dec' 192 zstd(tag, ['-d'], file_zst, file_dec) 193 if not filecmp.cmp(file_dec, test_dat): 194 raise RuntimeError('Decompression failed: tag={} file={}'.format(tag, file_zst)) 195 else: 196 print('OK ') 197 198 199def decompress_dict(tag): 200 dec_error = 0 201 list_zst = sorted(glob.glob('*_dictio.zst')) 202 for file_zst in list_zst: 203 dict_tag = file_zst[0:len(file_zst)-11] # remove "_dictio.zst" 204 if head in dict_tag: # find vdevel 205 dict_tag = head 206 else: 207 dict_tag = dict_tag[dict_tag.rfind('v'):] 208 if tag == 'v0.6.0' and dict_tag < 'v0.6.0': 209 continue 210 dict_name = 'dict.' + dict_tag 211 print(file_zst + ' ' + tag + ' dict=' + dict_tag) 212 file_dec = file_zst + '_d64_' + tag + '.dec' 213 zstd(tag, ['-D', dict_name, '-d'], file_zst, file_dec) 214 if not filecmp.cmp(file_dec, test_dat): 215 raise RuntimeError('Decompression failed: tag={} file={}'.format(tag, file_zst)) 216 else: 217 print('OK ') 218 219 220if __name__ == '__main__': 221 error_code = 0 222 base_dir = os.getcwd() + '/..' # /path/to/zstd 223 tmp_dir = base_dir + '/' + tmp_dir_name # /path/to/zstd/tests/versionsTest 224 clone_dir = tmp_dir + '/' + 'zstd' # /path/to/zstd/tests/versionsTest/zstd 225 dict_source_path = tmp_dir + '/' + dict_source # /path/to/zstd/tests/versionsTest/dict_source 226 programs_dir = base_dir + '/programs' # /path/to/zstd/programs 227 os.makedirs(tmp_dir, exist_ok=True) 228 229 # since Travis clones limited depth, we should clone full repository 230 if not os.path.isdir(clone_dir): 231 git(['clone', repo_url, clone_dir]) 232 233 shutil.copy2(base_dir + '/' + test_dat_src, tmp_dir + '/' + test_dat) 234 235 # Retrieve all release tags 236 print('Retrieve all release tags :') 237 os.chdir(clone_dir) 238 alltags = get_git_tags() + [head] 239 tags = [t for t in alltags if t >= 'v0.5.0'] 240 print(tags) 241 242 # Build all release zstd 243 for tag in tags: 244 os.chdir(base_dir) 245 dst_zstd = '{}/zstd.{}'.format(tmp_dir, tag) # /path/to/zstd/tests/versionsTest/zstd.<TAG> 246 if not os.path.isfile(dst_zstd) or tag == head: 247 if tag != head: 248 print('-----------------------------------------------') 249 print('compiling ' + tag) 250 print('-----------------------------------------------') 251 r_dir = '{}/{}'.format(tmp_dir, tag) # /path/to/zstd/tests/versionsTest/<TAG> 252 os.makedirs(r_dir, exist_ok=True) 253 os.chdir(clone_dir) 254 git(['--work-tree=' + r_dir, 'checkout', tag, '--', '.'], False) 255 if tag == 'v0.5.0': 256 os.chdir(r_dir + '/dictBuilder') # /path/to/zstd/tests/versionsTest/v0.5.0/dictBuilder 257 make(['clean'], False) # separate 'clean' target to allow parallel build 258 make(['dictBuilder'], False) 259 shutil.copy2('dictBuilder', '{}/dictBuilder.{}'.format(tmp_dir, tag)) 260 os.chdir(r_dir + '/programs') # /path/to/zstd/tests/versionsTest/<TAG>/programs 261 make(['clean'], False) # separate 'clean' target to allow parallel build 262 make(['zstd'], False) 263 else: 264 os.chdir(programs_dir) 265 print('-----------------------------------------------') 266 print('compiling head') 267 print('-----------------------------------------------') 268 make(['zstd'], False) 269 shutil.copy2('zstd', dst_zstd) 270 271 # remove any remaining *.zst and *.dec from previous test 272 os.chdir(tmp_dir) 273 for compressed in glob.glob("*.zst"): 274 os.remove(compressed) 275 for dec in glob.glob("*.dec"): 276 os.remove(dec) 277 278 # copy *.c and *.h to a temporary directory ("dict_source") 279 if not os.path.isdir(dict_source_path): 280 os.mkdir(dict_source_path) 281 for dict_glob in dict_globs: 282 files = glob.glob(dict_glob, root_dir=base_dir) 283 for file in files: 284 file = os.path.join(base_dir, file) 285 print("copying " + file + " to " + dict_source_path) 286 shutil.copy(file, dict_source_path) 287 288 print('-----------------------------------------------') 289 print('Compress test.dat by all released zstd') 290 print('-----------------------------------------------') 291 292 create_dict(head, dict_source_path) 293 for tag in tags: 294 print(tag) 295 if tag >= 'v0.5.0': 296 create_dict(tag, dict_source_path, head) 297 dict_compress_sample(tag, test_dat) 298 remove_duplicates() 299 decompress_dict(tag) 300 compress_sample(tag, test_dat) 301 remove_duplicates() 302 decompress_zst(tag) 303 304 print('') 305 print('Enumerate different compressed files') 306 zstds = sorted(glob.glob('*.zst')) 307 for zstd in zstds: 308 print(zstd + ' : ' + repr(os.path.getsize(zstd)) + ', ' + sha1_of_file(zstd)) 309