1*01826a49SYabin Cui#!/usr/bin/env python3 2*01826a49SYabin Cui"""Test zstd interoperability between versions""" 3*01826a49SYabin Cui 4*01826a49SYabin Cui# ################################################################ 5*01826a49SYabin Cui# Copyright (c) Meta Platforms, Inc. and affiliates. 6*01826a49SYabin Cui# All rights reserved. 7*01826a49SYabin Cui# 8*01826a49SYabin Cui# This source code is licensed under both the BSD-style license (found in the 9*01826a49SYabin Cui# LICENSE file in the root directory of this source tree) and the GPLv2 (found 10*01826a49SYabin Cui# in the COPYING file in the root directory of this source tree). 11*01826a49SYabin Cui# You may select, at your option, one of the above-listed licenses. 12*01826a49SYabin Cui# ################################################################ 13*01826a49SYabin Cui 14*01826a49SYabin Cuiimport filecmp 15*01826a49SYabin Cuiimport glob 16*01826a49SYabin Cuiimport hashlib 17*01826a49SYabin Cuiimport os 18*01826a49SYabin Cuiimport shutil 19*01826a49SYabin Cuiimport sys 20*01826a49SYabin Cuiimport subprocess 21*01826a49SYabin Cuifrom subprocess import Popen, PIPE 22*01826a49SYabin Cui 23*01826a49SYabin Cuirepo_url = 'https://github.com/facebook/zstd.git' 24*01826a49SYabin Cuitmp_dir_name = 'tests/versionsTest' 25*01826a49SYabin Cuimake_cmd = 'make' 26*01826a49SYabin Cuimake_args = ['-j','CFLAGS=-O0'] 27*01826a49SYabin Cuigit_cmd = 'git' 28*01826a49SYabin Cuitest_dat_src = 'README.md' 29*01826a49SYabin Cuitest_dat = 'test_dat' 30*01826a49SYabin Cuihead = 'vdevel' 31*01826a49SYabin Cuidict_source = 'dict_source' 32*01826a49SYabin Cuidict_globs = [ 33*01826a49SYabin Cui 'programs/*.c', 34*01826a49SYabin Cui 'lib/common/*.c', 35*01826a49SYabin Cui 'lib/compress/*.c', 36*01826a49SYabin Cui 'lib/decompress/*.c', 37*01826a49SYabin Cui 'lib/dictBuilder/*.c', 38*01826a49SYabin Cui 'lib/legacy/*.c', 39*01826a49SYabin Cui 'programs/*.h', 40*01826a49SYabin Cui 'lib/common/*.h', 41*01826a49SYabin Cui 'lib/compress/*.h', 42*01826a49SYabin Cui 'lib/dictBuilder/*.h', 43*01826a49SYabin Cui 'lib/legacy/*.h' 44*01826a49SYabin Cui] 45*01826a49SYabin Cui 46*01826a49SYabin Cui 47*01826a49SYabin Cuidef execute(command, print_output=False, print_error=True, param_shell=False): 48*01826a49SYabin Cui popen = Popen(command, stdout=PIPE, stderr=PIPE, shell=param_shell) 49*01826a49SYabin Cui stdout_lines, stderr_lines = popen.communicate() 50*01826a49SYabin Cui stderr_lines = stderr_lines.decode("utf-8") 51*01826a49SYabin Cui stdout_lines = stdout_lines.decode("utf-8") 52*01826a49SYabin Cui if print_output: 53*01826a49SYabin Cui print(stdout_lines) 54*01826a49SYabin Cui print(stderr_lines) 55*01826a49SYabin Cui if popen.returncode is not None and popen.returncode != 0: 56*01826a49SYabin Cui if not print_output and print_error: 57*01826a49SYabin Cui print(stderr_lines) 58*01826a49SYabin Cui return popen.returncode 59*01826a49SYabin Cui 60*01826a49SYabin Cui 61*01826a49SYabin Cuidef proc(cmd_args, pipe=True, dummy=False): 62*01826a49SYabin Cui if dummy: 63*01826a49SYabin Cui return 64*01826a49SYabin Cui if pipe: 65*01826a49SYabin Cui subproc = Popen(cmd_args, stdout=PIPE, stderr=PIPE) 66*01826a49SYabin Cui else: 67*01826a49SYabin Cui subproc = Popen(cmd_args) 68*01826a49SYabin Cui return subproc.communicate() 69*01826a49SYabin Cui 70*01826a49SYabin Cui 71*01826a49SYabin Cuidef make(targets, pipe=True): 72*01826a49SYabin Cui cmd = [make_cmd] + make_args + targets 73*01826a49SYabin Cui cmd_str = str(cmd) 74*01826a49SYabin Cui print('compilation command : ' + cmd_str) 75*01826a49SYabin Cui return proc(cmd, pipe) 76*01826a49SYabin Cui 77*01826a49SYabin Cui 78*01826a49SYabin Cuidef git(args, pipe=True): 79*01826a49SYabin Cui return proc([git_cmd] + args, pipe) 80*01826a49SYabin Cui 81*01826a49SYabin Cui 82*01826a49SYabin Cuidef get_git_tags(): 83*01826a49SYabin Cui stdout, stderr = git(['tag', '-l', 'v[0-9].[0-9].[0-9]']) 84*01826a49SYabin Cui tags = stdout.decode('utf-8').split() 85*01826a49SYabin Cui return tags 86*01826a49SYabin Cui 87*01826a49SYabin Cui 88*01826a49SYabin Cuidef dict_ok(tag, dict_name, sample): 89*01826a49SYabin Cui if not os.path.isfile(dict_name): 90*01826a49SYabin Cui return False 91*01826a49SYabin Cui try: 92*01826a49SYabin Cui cmd = ['./zstd.' + tag, '-D', dict_name] 93*01826a49SYabin Cui with open(sample, "rb") as i: 94*01826a49SYabin Cui subprocess.check_call(cmd, stdin=i, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) 95*01826a49SYabin Cui return True 96*01826a49SYabin Cui except: 97*01826a49SYabin Cui return False 98*01826a49SYabin Cui 99*01826a49SYabin Cui 100*01826a49SYabin Cuidef create_dict(tag, dict_source_path, fallback_tag=None): 101*01826a49SYabin Cui dict_name = 'dict.' + tag 102*01826a49SYabin Cui if not os.path.isfile(dict_name): 103*01826a49SYabin Cui cFiles = glob.glob(dict_source_path + "/*.c") 104*01826a49SYabin Cui hFiles = glob.glob(dict_source_path + "/*.h") 105*01826a49SYabin Cui # Ensure the dictionary builder is deterministic 106*01826a49SYabin Cui files = sorted(cFiles + hFiles) 107*01826a49SYabin Cui if tag == 'v0.5.0': 108*01826a49SYabin Cui result = execute('./dictBuilder.' + tag + ' ' + ' '.join(files) + ' -o ' + dict_name, print_output=False, param_shell=True) 109*01826a49SYabin Cui else: 110*01826a49SYabin Cui result = execute('./zstd.' + tag + ' -f --train ' + ' '.join(files) + ' -o ' + dict_name, print_output=False, param_shell=True) 111*01826a49SYabin Cui if result == 0 and dict_ok(tag, dict_name, files[0]): 112*01826a49SYabin Cui print(dict_name + ' created') 113*01826a49SYabin Cui elif fallback_tag is not None: 114*01826a49SYabin Cui fallback_dict_name = 'dict.' + fallback_tag 115*01826a49SYabin Cui print('creating dictionary ' + dict_name + ' failed, falling back to ' + fallback_dict_name) 116*01826a49SYabin Cui shutil.copy(fallback_dict_name, dict_name) 117*01826a49SYabin Cui else: 118*01826a49SYabin Cui raise RuntimeError('ERROR: creating of ' + dict_name + ' failed') 119*01826a49SYabin Cui else: 120*01826a49SYabin Cui print(dict_name + ' already exists') 121*01826a49SYabin Cui 122*01826a49SYabin Cui 123*01826a49SYabin Cuidef zstd(tag, args, input_file, output_file): 124*01826a49SYabin Cui """ 125*01826a49SYabin Cui Zstd compress input_file to output_file. 126*01826a49SYabin Cui Need this helper because 0.5.0 is broken when stdout is not a TTY. 127*01826a49SYabin Cui Throws an exception if the command returns non-zero. 128*01826a49SYabin Cui """ 129*01826a49SYabin Cui with open(input_file, "rb") as i: 130*01826a49SYabin Cui with open(output_file, "wb") as o: 131*01826a49SYabin Cui cmd = ['./zstd.' + tag] + args 132*01826a49SYabin Cui print("Running: '{}', input={}, output={}" .format( 133*01826a49SYabin Cui ' '.join(cmd), input_file, output_file 134*01826a49SYabin Cui )) 135*01826a49SYabin Cui result = subprocess.run(cmd, stdin=i, stdout=o, stderr=subprocess.PIPE) 136*01826a49SYabin Cui print("Stderr: {}".format(result.stderr.decode("ascii"))) 137*01826a49SYabin Cui result.check_returncode() 138*01826a49SYabin Cui 139*01826a49SYabin Cui 140*01826a49SYabin Cuidef dict_compress_sample(tag, sample): 141*01826a49SYabin Cui dict_name = 'dict.' + tag 142*01826a49SYabin Cui verbose = ['-v', '-v', '-v'] 143*01826a49SYabin Cui zstd(tag, ['-D', dict_name, '-1'] + verbose, sample, sample + '_01_64_' + tag + '_dictio.zst') 144*01826a49SYabin Cui zstd(tag, ['-D', dict_name, '-3'], sample, sample + '_03_64_' + tag + '_dictio.zst') 145*01826a49SYabin Cui zstd(tag, ['-D', dict_name, '-5'], sample, sample + '_05_64_' + tag + '_dictio.zst') 146*01826a49SYabin Cui zstd(tag, ['-D', dict_name, '-9'], sample, sample + '_09_64_' + tag + '_dictio.zst') 147*01826a49SYabin Cui zstd(tag, ['-D', dict_name, '-15'], sample, sample + '_15_64_' + tag + '_dictio.zst') 148*01826a49SYabin Cui zstd(tag, ['-D', dict_name, '-18'], sample, sample + '_18_64_' + tag + '_dictio.zst') 149*01826a49SYabin Cui # zstdFiles = glob.glob("*.zst*") 150*01826a49SYabin Cui # print(zstdFiles) 151*01826a49SYabin Cui print(tag + " : dict compression completed") 152*01826a49SYabin Cui 153*01826a49SYabin Cui 154*01826a49SYabin Cuidef compress_sample(tag, sample): 155*01826a49SYabin Cui zstd(tag, ['-1'], sample, sample + '_01_64_' + tag + '_nodict.zst') 156*01826a49SYabin Cui zstd(tag, ['-3'], sample, sample + '_03_64_' + tag + '_nodict.zst') 157*01826a49SYabin Cui zstd(tag, ['-5'], sample, sample + '_05_64_' + tag + '_nodict.zst') 158*01826a49SYabin Cui zstd(tag, ['-9'], sample, sample + '_09_64_' + tag + '_nodict.zst') 159*01826a49SYabin Cui zstd(tag, ['-15'], sample, sample + '_15_64_' + tag + '_nodict.zst') 160*01826a49SYabin Cui zstd(tag, ['-18'], sample, sample + '_18_64_' + tag + '_nodict.zst') 161*01826a49SYabin Cui # zstdFiles = glob.glob("*.zst*") 162*01826a49SYabin Cui # print(zstdFiles) 163*01826a49SYabin Cui print(tag + " : compression completed") 164*01826a49SYabin Cui 165*01826a49SYabin Cui 166*01826a49SYabin Cui# https://stackoverflow.com/a/19711609/2132223 167*01826a49SYabin Cuidef sha1_of_file(filepath): 168*01826a49SYabin Cui with open(filepath, 'rb') as f: 169*01826a49SYabin Cui return hashlib.sha1(f.read()).hexdigest() 170*01826a49SYabin Cui 171*01826a49SYabin Cui 172*01826a49SYabin Cuidef remove_duplicates(): 173*01826a49SYabin Cui list_of_zst = sorted(glob.glob('*.zst')) 174*01826a49SYabin Cui for i, ref_zst in enumerate(list_of_zst): 175*01826a49SYabin Cui if not os.path.isfile(ref_zst): 176*01826a49SYabin Cui continue 177*01826a49SYabin Cui for j in range(i + 1, len(list_of_zst)): 178*01826a49SYabin Cui compared_zst = list_of_zst[j] 179*01826a49SYabin Cui if not os.path.isfile(compared_zst): 180*01826a49SYabin Cui continue 181*01826a49SYabin Cui if filecmp.cmp(ref_zst, compared_zst): 182*01826a49SYabin Cui os.remove(compared_zst) 183*01826a49SYabin Cui print('duplicated : {} == {}'.format(ref_zst, compared_zst)) 184*01826a49SYabin Cui 185*01826a49SYabin Cui 186*01826a49SYabin Cuidef decompress_zst(tag): 187*01826a49SYabin Cui dec_error = 0 188*01826a49SYabin Cui list_zst = sorted(glob.glob('*_nodict.zst')) 189*01826a49SYabin Cui for file_zst in list_zst: 190*01826a49SYabin Cui print(file_zst + ' ' + tag) 191*01826a49SYabin Cui file_dec = file_zst + '_d64_' + tag + '.dec' 192*01826a49SYabin Cui zstd(tag, ['-d'], file_zst, file_dec) 193*01826a49SYabin Cui if not filecmp.cmp(file_dec, test_dat): 194*01826a49SYabin Cui raise RuntimeError('Decompression failed: tag={} file={}'.format(tag, file_zst)) 195*01826a49SYabin Cui else: 196*01826a49SYabin Cui print('OK ') 197*01826a49SYabin Cui 198*01826a49SYabin Cui 199*01826a49SYabin Cuidef decompress_dict(tag): 200*01826a49SYabin Cui dec_error = 0 201*01826a49SYabin Cui list_zst = sorted(glob.glob('*_dictio.zst')) 202*01826a49SYabin Cui for file_zst in list_zst: 203*01826a49SYabin Cui dict_tag = file_zst[0:len(file_zst)-11] # remove "_dictio.zst" 204*01826a49SYabin Cui if head in dict_tag: # find vdevel 205*01826a49SYabin Cui dict_tag = head 206*01826a49SYabin Cui else: 207*01826a49SYabin Cui dict_tag = dict_tag[dict_tag.rfind('v'):] 208*01826a49SYabin Cui if tag == 'v0.6.0' and dict_tag < 'v0.6.0': 209*01826a49SYabin Cui continue 210*01826a49SYabin Cui dict_name = 'dict.' + dict_tag 211*01826a49SYabin Cui print(file_zst + ' ' + tag + ' dict=' + dict_tag) 212*01826a49SYabin Cui file_dec = file_zst + '_d64_' + tag + '.dec' 213*01826a49SYabin Cui zstd(tag, ['-D', dict_name, '-d'], file_zst, file_dec) 214*01826a49SYabin Cui if not filecmp.cmp(file_dec, test_dat): 215*01826a49SYabin Cui raise RuntimeError('Decompression failed: tag={} file={}'.format(tag, file_zst)) 216*01826a49SYabin Cui else: 217*01826a49SYabin Cui print('OK ') 218*01826a49SYabin Cui 219*01826a49SYabin Cui 220*01826a49SYabin Cuiif __name__ == '__main__': 221*01826a49SYabin Cui error_code = 0 222*01826a49SYabin Cui base_dir = os.getcwd() + '/..' # /path/to/zstd 223*01826a49SYabin Cui tmp_dir = base_dir + '/' + tmp_dir_name # /path/to/zstd/tests/versionsTest 224*01826a49SYabin Cui clone_dir = tmp_dir + '/' + 'zstd' # /path/to/zstd/tests/versionsTest/zstd 225*01826a49SYabin Cui dict_source_path = tmp_dir + '/' + dict_source # /path/to/zstd/tests/versionsTest/dict_source 226*01826a49SYabin Cui programs_dir = base_dir + '/programs' # /path/to/zstd/programs 227*01826a49SYabin Cui os.makedirs(tmp_dir, exist_ok=True) 228*01826a49SYabin Cui 229*01826a49SYabin Cui # since Travis clones limited depth, we should clone full repository 230*01826a49SYabin Cui if not os.path.isdir(clone_dir): 231*01826a49SYabin Cui git(['clone', repo_url, clone_dir]) 232*01826a49SYabin Cui 233*01826a49SYabin Cui shutil.copy2(base_dir + '/' + test_dat_src, tmp_dir + '/' + test_dat) 234*01826a49SYabin Cui 235*01826a49SYabin Cui # Retrieve all release tags 236*01826a49SYabin Cui print('Retrieve all release tags :') 237*01826a49SYabin Cui os.chdir(clone_dir) 238*01826a49SYabin Cui alltags = get_git_tags() + [head] 239*01826a49SYabin Cui tags = [t for t in alltags if t >= 'v0.5.0'] 240*01826a49SYabin Cui print(tags) 241*01826a49SYabin Cui 242*01826a49SYabin Cui # Build all release zstd 243*01826a49SYabin Cui for tag in tags: 244*01826a49SYabin Cui os.chdir(base_dir) 245*01826a49SYabin Cui dst_zstd = '{}/zstd.{}'.format(tmp_dir, tag) # /path/to/zstd/tests/versionsTest/zstd.<TAG> 246*01826a49SYabin Cui if not os.path.isfile(dst_zstd) or tag == head: 247*01826a49SYabin Cui if tag != head: 248*01826a49SYabin Cui print('-----------------------------------------------') 249*01826a49SYabin Cui print('compiling ' + tag) 250*01826a49SYabin Cui print('-----------------------------------------------') 251*01826a49SYabin Cui r_dir = '{}/{}'.format(tmp_dir, tag) # /path/to/zstd/tests/versionsTest/<TAG> 252*01826a49SYabin Cui os.makedirs(r_dir, exist_ok=True) 253*01826a49SYabin Cui os.chdir(clone_dir) 254*01826a49SYabin Cui git(['--work-tree=' + r_dir, 'checkout', tag, '--', '.'], False) 255*01826a49SYabin Cui if tag == 'v0.5.0': 256*01826a49SYabin Cui os.chdir(r_dir + '/dictBuilder') # /path/to/zstd/tests/versionsTest/v0.5.0/dictBuilder 257*01826a49SYabin Cui make(['clean'], False) # separate 'clean' target to allow parallel build 258*01826a49SYabin Cui make(['dictBuilder'], False) 259*01826a49SYabin Cui shutil.copy2('dictBuilder', '{}/dictBuilder.{}'.format(tmp_dir, tag)) 260*01826a49SYabin Cui os.chdir(r_dir + '/programs') # /path/to/zstd/tests/versionsTest/<TAG>/programs 261*01826a49SYabin Cui make(['clean'], False) # separate 'clean' target to allow parallel build 262*01826a49SYabin Cui make(['zstd'], False) 263*01826a49SYabin Cui else: 264*01826a49SYabin Cui os.chdir(programs_dir) 265*01826a49SYabin Cui print('-----------------------------------------------') 266*01826a49SYabin Cui print('compiling head') 267*01826a49SYabin Cui print('-----------------------------------------------') 268*01826a49SYabin Cui make(['zstd'], False) 269*01826a49SYabin Cui shutil.copy2('zstd', dst_zstd) 270*01826a49SYabin Cui 271*01826a49SYabin Cui # remove any remaining *.zst and *.dec from previous test 272*01826a49SYabin Cui os.chdir(tmp_dir) 273*01826a49SYabin Cui for compressed in glob.glob("*.zst"): 274*01826a49SYabin Cui os.remove(compressed) 275*01826a49SYabin Cui for dec in glob.glob("*.dec"): 276*01826a49SYabin Cui os.remove(dec) 277*01826a49SYabin Cui 278*01826a49SYabin Cui # copy *.c and *.h to a temporary directory ("dict_source") 279*01826a49SYabin Cui if not os.path.isdir(dict_source_path): 280*01826a49SYabin Cui os.mkdir(dict_source_path) 281*01826a49SYabin Cui for dict_glob in dict_globs: 282*01826a49SYabin Cui files = glob.glob(dict_glob, root_dir=base_dir) 283*01826a49SYabin Cui for file in files: 284*01826a49SYabin Cui file = os.path.join(base_dir, file) 285*01826a49SYabin Cui print("copying " + file + " to " + dict_source_path) 286*01826a49SYabin Cui shutil.copy(file, dict_source_path) 287*01826a49SYabin Cui 288*01826a49SYabin Cui print('-----------------------------------------------') 289*01826a49SYabin Cui print('Compress test.dat by all released zstd') 290*01826a49SYabin Cui print('-----------------------------------------------') 291*01826a49SYabin Cui 292*01826a49SYabin Cui create_dict(head, dict_source_path) 293*01826a49SYabin Cui for tag in tags: 294*01826a49SYabin Cui print(tag) 295*01826a49SYabin Cui if tag >= 'v0.5.0': 296*01826a49SYabin Cui create_dict(tag, dict_source_path, head) 297*01826a49SYabin Cui dict_compress_sample(tag, test_dat) 298*01826a49SYabin Cui remove_duplicates() 299*01826a49SYabin Cui decompress_dict(tag) 300*01826a49SYabin Cui compress_sample(tag, test_dat) 301*01826a49SYabin Cui remove_duplicates() 302*01826a49SYabin Cui decompress_zst(tag) 303*01826a49SYabin Cui 304*01826a49SYabin Cui print('') 305*01826a49SYabin Cui print('Enumerate different compressed files') 306*01826a49SYabin Cui zstds = sorted(glob.glob('*.zst')) 307*01826a49SYabin Cui for zstd in zstds: 308*01826a49SYabin Cui print(zstd + ' : ' + repr(os.path.getsize(zstd)) + ', ' + sha1_of_file(zstd)) 309