xref: /aosp_15_r20/external/rappor/tests/regtest_spec.py (revision 2abb31345f6c95944768b5222a9a5ed3fc68cc00)
1*2abb3134SXin Li#!/usr/bin/python
2*2abb3134SXin Li"""Print a test spec on stdout.
3*2abb3134SXin Li
4*2abb3134SXin LiEach line has parameters for a test case.  The regtest.sh shell script reads
5*2abb3134SXin Lithese lines and runs parallel processes.
6*2abb3134SXin Li
7*2abb3134SXin LiWe use Python data structures so the test cases are easier to read and edit.
8*2abb3134SXin Li"""
9*2abb3134SXin Li
10*2abb3134SXin Liimport optparse
11*2abb3134SXin Liimport sys
12*2abb3134SXin Li
13*2abb3134SXin Li#
14*2abb3134SXin Li# TEST CONFIGURATION
15*2abb3134SXin Li#
16*2abb3134SXin Li
17*2abb3134SXin LiDEMO = (
18*2abb3134SXin Li    # (case_name distr num_unique_values num_clients values_per_client)
19*2abb3134SXin Li    # (num_bits num_hashes num_cohorts)
20*2abb3134SXin Li    # (p q f) (num_additional regexp_to_remove)
21*2abb3134SXin Li    ('demo1 unif    100 100000 10', '32 1 64', '0.25 0.75 0.5', '100 v[0-9]*9$'),
22*2abb3134SXin Li    ('demo2 gauss   100 100000 10', '32 1 64', '0.25 0.75 0.5', '100 v[0-9]*9$'),
23*2abb3134SXin Li    ('demo3 exp     100 100000 10', '32 1 64', '0.25 0.75 0.5', '100 v[0-9]*9$'),
24*2abb3134SXin Li    ('demo4 zipf1   100 100000 10', '32 1 64', '0.25 0.75 0.5', '100 v[0-9]*9$'),
25*2abb3134SXin Li    ('demo5 zipf1.5 100 100000 10', '32 1 64', '0.25 0.75 0.5', '100 v[0-9]*9$'),
26*2abb3134SXin Li)
27*2abb3134SXin Li
28*2abb3134SXin LiDISTRIBUTIONS = (
29*2abb3134SXin Li    'unif',
30*2abb3134SXin Li    'exp',
31*2abb3134SXin Li    'gauss',
32*2abb3134SXin Li    'zipf1',
33*2abb3134SXin Li    'zipf1.5',
34*2abb3134SXin Li)
35*2abb3134SXin Li
36*2abb3134SXin LiDISTRIBUTION_PARAMS = (
37*2abb3134SXin Li    # name, num unique values, num clients, values per client
38*2abb3134SXin Li    ('tiny', 100, 1000, 1),  # test for insufficient data
39*2abb3134SXin Li    ('small', 100, 1000000, 1),
40*2abb3134SXin Li    ('medium', 1000, 10000000, 1),
41*2abb3134SXin Li    ('large', 10000, 100000000, 1),
42*2abb3134SXin Li)
43*2abb3134SXin Li
44*2abb3134SXin Li# 'k, h, m' as in params file.
45*2abb3134SXin LiBLOOMFILTER_PARAMS = {
46*2abb3134SXin Li    '8x16': (8, 2, 16),  # 16 cohorts, 8 bits each, 2 bits set in each
47*2abb3134SXin Li    '8x32': (8, 2, 32),  # 32 cohorts, 8 bits each, 2 bits set in each
48*2abb3134SXin Li    '8x128': (8, 2, 128),  # 128 cohorts, 8 bits each, 2 bits set in each
49*2abb3134SXin Li    '128x128': (128, 2, 128),  # 8 cohorts, 128 bits each, 2 bits set in each
50*2abb3134SXin Li}
51*2abb3134SXin Li
52*2abb3134SXin Li# 'p, q, f' as in params file.
53*2abb3134SXin LiPRIVACY_PARAMS = {
54*2abb3134SXin Li    'eps_1_1': (0.39, 0.61, 0.45),  # eps_1 = 1, eps_inf = 5:
55*2abb3134SXin Li    'eps_1_5': (0.225, 0.775, 0.0),  # eps_1 = 5, no eps_inf
56*2abb3134SXin Li}
57*2abb3134SXin Li
58*2abb3134SXin Li# For deriving candidates from true inputs.
59*2abb3134SXin LiMAP_REGEX_MISSING = {
60*2abb3134SXin Li    'sharp': 'NONE',  # Categorical data
61*2abb3134SXin Li    '10%': 'v[0-9]*9$',  # missing every 10th string
62*2abb3134SXin Li}
63*2abb3134SXin Li
64*2abb3134SXin Li# test configuration ->
65*2abb3134SXin Li#   (name modifier, Bloom filter, privacy params, fraction of extra,
66*2abb3134SXin Li#    regex missing)
67*2abb3134SXin LiTEST_CONFIGS = [
68*2abb3134SXin Li    ('typical', '8x128', 'eps_1_1', .2, '10%'),
69*2abb3134SXin Li    ('sharp', '8x128', 'eps_1_1', .0, 'sharp'),  # no extra candidates
70*2abb3134SXin Li    ('loose', '8x128', 'eps_1_5', .2, '10%'),  # loose privacy
71*2abb3134SXin Li    ('over_x2', '8x128', 'eps_1_1', 2.0, '10%'),  # overshoot by x2
72*2abb3134SXin Li    ('over_x10', '8x128', 'eps_1_1', 10.0, '10%'),  # overshoot by x10
73*2abb3134SXin Li]
74*2abb3134SXin Li
75*2abb3134SXin Li#
76*2abb3134SXin Li# END TEST CONFIGURATION
77*2abb3134SXin Li#
78*2abb3134SXin Li
79*2abb3134SXin Li
80*2abb3134SXin Lidef main(argv):
81*2abb3134SXin Li  rows = []
82*2abb3134SXin Li
83*2abb3134SXin Li  test_case = []
84*2abb3134SXin Li  for (distr_params, num_values, num_clients,
85*2abb3134SXin Li       num_reports_per_client) in DISTRIBUTION_PARAMS:
86*2abb3134SXin Li    for distribution in DISTRIBUTIONS:
87*2abb3134SXin Li      for (config_name, bloom_name, privacy_params, fr_extra,
88*2abb3134SXin Li           regex_missing) in TEST_CONFIGS:
89*2abb3134SXin Li        test_name = 'r-{}-{}-{}'.format(distribution, distr_params,
90*2abb3134SXin Li                                        config_name)
91*2abb3134SXin Li
92*2abb3134SXin Li        params = (BLOOMFILTER_PARAMS[bloom_name]
93*2abb3134SXin Li                  + PRIVACY_PARAMS[privacy_params]
94*2abb3134SXin Li                  + tuple([int(num_values * fr_extra)])
95*2abb3134SXin Li                  + tuple([MAP_REGEX_MISSING[regex_missing]]))
96*2abb3134SXin Li
97*2abb3134SXin Li        test_case = (test_name, distribution, num_values, num_clients,
98*2abb3134SXin Li                     num_reports_per_client) + params
99*2abb3134SXin Li        row_str = [str(element) for element in test_case]
100*2abb3134SXin Li        rows.append(row_str)
101*2abb3134SXin Li
102*2abb3134SXin Li  for params in DEMO:
103*2abb3134SXin Li    rows.append(params)
104*2abb3134SXin Li
105*2abb3134SXin Li  for row in rows:
106*2abb3134SXin Li    print ' '.join(row)
107*2abb3134SXin Li
108*2abb3134SXin Liif __name__ == '__main__':
109*2abb3134SXin Li  try:
110*2abb3134SXin Li    main(sys.argv)
111*2abb3134SXin Li  except RuntimeError, e:
112*2abb3134SXin Li    print >>sys.stderr, 'FATAL: %s' % e
113*2abb3134SXin Li    sys.exit(1)
114