xref: /aosp_15_r20/external/rappor/tests/regtest_spec.py (revision 2abb31345f6c95944768b5222a9a5ed3fc68cc00)
1#!/usr/bin/python
2"""Print a test spec on stdout.
3
4Each line has parameters for a test case.  The regtest.sh shell script reads
5these lines and runs parallel processes.
6
7We use Python data structures so the test cases are easier to read and edit.
8"""
9
10import optparse
11import sys
12
13#
14# TEST CONFIGURATION
15#
16
17DEMO = (
18    # (case_name distr num_unique_values num_clients values_per_client)
19    # (num_bits num_hashes num_cohorts)
20    # (p q f) (num_additional regexp_to_remove)
21    ('demo1 unif    100 100000 10', '32 1 64', '0.25 0.75 0.5', '100 v[0-9]*9$'),
22    ('demo2 gauss   100 100000 10', '32 1 64', '0.25 0.75 0.5', '100 v[0-9]*9$'),
23    ('demo3 exp     100 100000 10', '32 1 64', '0.25 0.75 0.5', '100 v[0-9]*9$'),
24    ('demo4 zipf1   100 100000 10', '32 1 64', '0.25 0.75 0.5', '100 v[0-9]*9$'),
25    ('demo5 zipf1.5 100 100000 10', '32 1 64', '0.25 0.75 0.5', '100 v[0-9]*9$'),
26)
27
28DISTRIBUTIONS = (
29    'unif',
30    'exp',
31    'gauss',
32    'zipf1',
33    'zipf1.5',
34)
35
36DISTRIBUTION_PARAMS = (
37    # name, num unique values, num clients, values per client
38    ('tiny', 100, 1000, 1),  # test for insufficient data
39    ('small', 100, 1000000, 1),
40    ('medium', 1000, 10000000, 1),
41    ('large', 10000, 100000000, 1),
42)
43
44# 'k, h, m' as in params file.
45BLOOMFILTER_PARAMS = {
46    '8x16': (8, 2, 16),  # 16 cohorts, 8 bits each, 2 bits set in each
47    '8x32': (8, 2, 32),  # 32 cohorts, 8 bits each, 2 bits set in each
48    '8x128': (8, 2, 128),  # 128 cohorts, 8 bits each, 2 bits set in each
49    '128x128': (128, 2, 128),  # 8 cohorts, 128 bits each, 2 bits set in each
50}
51
52# 'p, q, f' as in params file.
53PRIVACY_PARAMS = {
54    'eps_1_1': (0.39, 0.61, 0.45),  # eps_1 = 1, eps_inf = 5:
55    'eps_1_5': (0.225, 0.775, 0.0),  # eps_1 = 5, no eps_inf
56}
57
58# For deriving candidates from true inputs.
59MAP_REGEX_MISSING = {
60    'sharp': 'NONE',  # Categorical data
61    '10%': 'v[0-9]*9$',  # missing every 10th string
62}
63
64# test configuration ->
65#   (name modifier, Bloom filter, privacy params, fraction of extra,
66#    regex missing)
67TEST_CONFIGS = [
68    ('typical', '8x128', 'eps_1_1', .2, '10%'),
69    ('sharp', '8x128', 'eps_1_1', .0, 'sharp'),  # no extra candidates
70    ('loose', '8x128', 'eps_1_5', .2, '10%'),  # loose privacy
71    ('over_x2', '8x128', 'eps_1_1', 2.0, '10%'),  # overshoot by x2
72    ('over_x10', '8x128', 'eps_1_1', 10.0, '10%'),  # overshoot by x10
73]
74
75#
76# END TEST CONFIGURATION
77#
78
79
80def main(argv):
81  rows = []
82
83  test_case = []
84  for (distr_params, num_values, num_clients,
85       num_reports_per_client) in DISTRIBUTION_PARAMS:
86    for distribution in DISTRIBUTIONS:
87      for (config_name, bloom_name, privacy_params, fr_extra,
88           regex_missing) in TEST_CONFIGS:
89        test_name = 'r-{}-{}-{}'.format(distribution, distr_params,
90                                        config_name)
91
92        params = (BLOOMFILTER_PARAMS[bloom_name]
93                  + PRIVACY_PARAMS[privacy_params]
94                  + tuple([int(num_values * fr_extra)])
95                  + tuple([MAP_REGEX_MISSING[regex_missing]]))
96
97        test_case = (test_name, distribution, num_values, num_clients,
98                     num_reports_per_client) + params
99        row_str = [str(element) for element in test_case]
100        rows.append(row_str)
101
102  for params in DEMO:
103    rows.append(params)
104
105  for row in rows:
106    print ' '.join(row)
107
108if __name__ == '__main__':
109  try:
110    main(sys.argv)
111  except RuntimeError, e:
112    print >>sys.stderr, 'FATAL: %s' % e
113    sys.exit(1)
114