1#!/usr/bin/python 2"""Print a test spec on stdout. 3 4Each line has parameters for a test case. The regtest.sh shell script reads 5these lines and runs parallel processes. 6 7We use Python data structures so the test cases are easier to read and edit. 8""" 9 10import optparse 11import sys 12 13# 14# TEST CONFIGURATION 15# 16 17DEMO = ( 18 # (case_name distr num_unique_values num_clients values_per_client) 19 # (num_bits num_hashes num_cohorts) 20 # (p q f) (num_additional regexp_to_remove) 21 ('demo1 unif 100 100000 10', '32 1 64', '0.25 0.75 0.5', '100 v[0-9]*9$'), 22 ('demo2 gauss 100 100000 10', '32 1 64', '0.25 0.75 0.5', '100 v[0-9]*9$'), 23 ('demo3 exp 100 100000 10', '32 1 64', '0.25 0.75 0.5', '100 v[0-9]*9$'), 24 ('demo4 zipf1 100 100000 10', '32 1 64', '0.25 0.75 0.5', '100 v[0-9]*9$'), 25 ('demo5 zipf1.5 100 100000 10', '32 1 64', '0.25 0.75 0.5', '100 v[0-9]*9$'), 26) 27 28DISTRIBUTIONS = ( 29 'unif', 30 'exp', 31 'gauss', 32 'zipf1', 33 'zipf1.5', 34) 35 36DISTRIBUTION_PARAMS = ( 37 # name, num unique values, num clients, values per client 38 ('tiny', 100, 1000, 1), # test for insufficient data 39 ('small', 100, 1000000, 1), 40 ('medium', 1000, 10000000, 1), 41 ('large', 10000, 100000000, 1), 42) 43 44# 'k, h, m' as in params file. 45BLOOMFILTER_PARAMS = { 46 '8x16': (8, 2, 16), # 16 cohorts, 8 bits each, 2 bits set in each 47 '8x32': (8, 2, 32), # 32 cohorts, 8 bits each, 2 bits set in each 48 '8x128': (8, 2, 128), # 128 cohorts, 8 bits each, 2 bits set in each 49 '128x128': (128, 2, 128), # 8 cohorts, 128 bits each, 2 bits set in each 50} 51 52# 'p, q, f' as in params file. 53PRIVACY_PARAMS = { 54 'eps_1_1': (0.39, 0.61, 0.45), # eps_1 = 1, eps_inf = 5: 55 'eps_1_5': (0.225, 0.775, 0.0), # eps_1 = 5, no eps_inf 56} 57 58# For deriving candidates from true inputs. 59MAP_REGEX_MISSING = { 60 'sharp': 'NONE', # Categorical data 61 '10%': 'v[0-9]*9$', # missing every 10th string 62} 63 64# test configuration -> 65# (name modifier, Bloom filter, privacy params, fraction of extra, 66# regex missing) 67TEST_CONFIGS = [ 68 ('typical', '8x128', 'eps_1_1', .2, '10%'), 69 ('sharp', '8x128', 'eps_1_1', .0, 'sharp'), # no extra candidates 70 ('loose', '8x128', 'eps_1_5', .2, '10%'), # loose privacy 71 ('over_x2', '8x128', 'eps_1_1', 2.0, '10%'), # overshoot by x2 72 ('over_x10', '8x128', 'eps_1_1', 10.0, '10%'), # overshoot by x10 73] 74 75# 76# END TEST CONFIGURATION 77# 78 79 80def main(argv): 81 rows = [] 82 83 test_case = [] 84 for (distr_params, num_values, num_clients, 85 num_reports_per_client) in DISTRIBUTION_PARAMS: 86 for distribution in DISTRIBUTIONS: 87 for (config_name, bloom_name, privacy_params, fr_extra, 88 regex_missing) in TEST_CONFIGS: 89 test_name = 'r-{}-{}-{}'.format(distribution, distr_params, 90 config_name) 91 92 params = (BLOOMFILTER_PARAMS[bloom_name] 93 + PRIVACY_PARAMS[privacy_params] 94 + tuple([int(num_values * fr_extra)]) 95 + tuple([MAP_REGEX_MISSING[regex_missing]])) 96 97 test_case = (test_name, distribution, num_values, num_clients, 98 num_reports_per_client) + params 99 row_str = [str(element) for element in test_case] 100 rows.append(row_str) 101 102 for params in DEMO: 103 rows.append(params) 104 105 for row in rows: 106 print ' '.join(row) 107 108if __name__ == '__main__': 109 try: 110 main(sys.argv) 111 except RuntimeError, e: 112 print >>sys.stderr, 'FATAL: %s' % e 113 sys.exit(1) 114