1*2abb3134SXin Li#!/usr/bin/python 2*2abb3134SXin Li"""Print a test spec on stdout. 3*2abb3134SXin Li 4*2abb3134SXin LiEach line has parameters for a test case. The regtest.sh shell script reads 5*2abb3134SXin Lithese lines and runs parallel processes. 6*2abb3134SXin Li 7*2abb3134SXin LiWe use Python data structures so the test cases are easier to read and edit. 8*2abb3134SXin Li""" 9*2abb3134SXin Li 10*2abb3134SXin Liimport optparse 11*2abb3134SXin Liimport sys 12*2abb3134SXin Li 13*2abb3134SXin Li# 14*2abb3134SXin Li# TEST CONFIGURATION 15*2abb3134SXin Li# 16*2abb3134SXin Li 17*2abb3134SXin LiDEMO = ( 18*2abb3134SXin Li # (case_name distr num_unique_values num_clients values_per_client) 19*2abb3134SXin Li # (num_bits num_hashes num_cohorts) 20*2abb3134SXin Li # (p q f) (num_additional regexp_to_remove) 21*2abb3134SXin Li ('demo1 unif 100 100000 10', '32 1 64', '0.25 0.75 0.5', '100 v[0-9]*9$'), 22*2abb3134SXin Li ('demo2 gauss 100 100000 10', '32 1 64', '0.25 0.75 0.5', '100 v[0-9]*9$'), 23*2abb3134SXin Li ('demo3 exp 100 100000 10', '32 1 64', '0.25 0.75 0.5', '100 v[0-9]*9$'), 24*2abb3134SXin Li ('demo4 zipf1 100 100000 10', '32 1 64', '0.25 0.75 0.5', '100 v[0-9]*9$'), 25*2abb3134SXin Li ('demo5 zipf1.5 100 100000 10', '32 1 64', '0.25 0.75 0.5', '100 v[0-9]*9$'), 26*2abb3134SXin Li) 27*2abb3134SXin Li 28*2abb3134SXin LiDISTRIBUTIONS = ( 29*2abb3134SXin Li 'unif', 30*2abb3134SXin Li 'exp', 31*2abb3134SXin Li 'gauss', 32*2abb3134SXin Li 'zipf1', 33*2abb3134SXin Li 'zipf1.5', 34*2abb3134SXin Li) 35*2abb3134SXin Li 36*2abb3134SXin LiDISTRIBUTION_PARAMS = ( 37*2abb3134SXin Li # name, num unique values, num clients, values per client 38*2abb3134SXin Li ('tiny', 100, 1000, 1), # test for insufficient data 39*2abb3134SXin Li ('small', 100, 1000000, 1), 40*2abb3134SXin Li ('medium', 1000, 10000000, 1), 41*2abb3134SXin Li ('large', 10000, 100000000, 1), 42*2abb3134SXin Li) 43*2abb3134SXin Li 44*2abb3134SXin Li# 'k, h, m' as in params file. 45*2abb3134SXin LiBLOOMFILTER_PARAMS = { 46*2abb3134SXin Li '8x16': (8, 2, 16), # 16 cohorts, 8 bits each, 2 bits set in each 47*2abb3134SXin Li '8x32': (8, 2, 32), # 32 cohorts, 8 bits each, 2 bits set in each 48*2abb3134SXin Li '8x128': (8, 2, 128), # 128 cohorts, 8 bits each, 2 bits set in each 49*2abb3134SXin Li '128x128': (128, 2, 128), # 8 cohorts, 128 bits each, 2 bits set in each 50*2abb3134SXin Li} 51*2abb3134SXin Li 52*2abb3134SXin Li# 'p, q, f' as in params file. 53*2abb3134SXin LiPRIVACY_PARAMS = { 54*2abb3134SXin Li 'eps_1_1': (0.39, 0.61, 0.45), # eps_1 = 1, eps_inf = 5: 55*2abb3134SXin Li 'eps_1_5': (0.225, 0.775, 0.0), # eps_1 = 5, no eps_inf 56*2abb3134SXin Li} 57*2abb3134SXin Li 58*2abb3134SXin Li# For deriving candidates from true inputs. 59*2abb3134SXin LiMAP_REGEX_MISSING = { 60*2abb3134SXin Li 'sharp': 'NONE', # Categorical data 61*2abb3134SXin Li '10%': 'v[0-9]*9$', # missing every 10th string 62*2abb3134SXin Li} 63*2abb3134SXin Li 64*2abb3134SXin Li# test configuration -> 65*2abb3134SXin Li# (name modifier, Bloom filter, privacy params, fraction of extra, 66*2abb3134SXin Li# regex missing) 67*2abb3134SXin LiTEST_CONFIGS = [ 68*2abb3134SXin Li ('typical', '8x128', 'eps_1_1', .2, '10%'), 69*2abb3134SXin Li ('sharp', '8x128', 'eps_1_1', .0, 'sharp'), # no extra candidates 70*2abb3134SXin Li ('loose', '8x128', 'eps_1_5', .2, '10%'), # loose privacy 71*2abb3134SXin Li ('over_x2', '8x128', 'eps_1_1', 2.0, '10%'), # overshoot by x2 72*2abb3134SXin Li ('over_x10', '8x128', 'eps_1_1', 10.0, '10%'), # overshoot by x10 73*2abb3134SXin Li] 74*2abb3134SXin Li 75*2abb3134SXin Li# 76*2abb3134SXin Li# END TEST CONFIGURATION 77*2abb3134SXin Li# 78*2abb3134SXin Li 79*2abb3134SXin Li 80*2abb3134SXin Lidef main(argv): 81*2abb3134SXin Li rows = [] 82*2abb3134SXin Li 83*2abb3134SXin Li test_case = [] 84*2abb3134SXin Li for (distr_params, num_values, num_clients, 85*2abb3134SXin Li num_reports_per_client) in DISTRIBUTION_PARAMS: 86*2abb3134SXin Li for distribution in DISTRIBUTIONS: 87*2abb3134SXin Li for (config_name, bloom_name, privacy_params, fr_extra, 88*2abb3134SXin Li regex_missing) in TEST_CONFIGS: 89*2abb3134SXin Li test_name = 'r-{}-{}-{}'.format(distribution, distr_params, 90*2abb3134SXin Li config_name) 91*2abb3134SXin Li 92*2abb3134SXin Li params = (BLOOMFILTER_PARAMS[bloom_name] 93*2abb3134SXin Li + PRIVACY_PARAMS[privacy_params] 94*2abb3134SXin Li + tuple([int(num_values * fr_extra)]) 95*2abb3134SXin Li + tuple([MAP_REGEX_MISSING[regex_missing]])) 96*2abb3134SXin Li 97*2abb3134SXin Li test_case = (test_name, distribution, num_values, num_clients, 98*2abb3134SXin Li num_reports_per_client) + params 99*2abb3134SXin Li row_str = [str(element) for element in test_case] 100*2abb3134SXin Li rows.append(row_str) 101*2abb3134SXin Li 102*2abb3134SXin Li for params in DEMO: 103*2abb3134SXin Li rows.append(params) 104*2abb3134SXin Li 105*2abb3134SXin Li for row in rows: 106*2abb3134SXin Li print ' '.join(row) 107*2abb3134SXin Li 108*2abb3134SXin Liif __name__ == '__main__': 109*2abb3134SXin Li try: 110*2abb3134SXin Li main(sys.argv) 111*2abb3134SXin Li except RuntimeError, e: 112*2abb3134SXin Li print >>sys.stderr, 'FATAL: %s' % e 113*2abb3134SXin Li sys.exit(1) 114