1#!/usr/bin/python 2# 3# Copyright 2014 Google Inc. All rights reserved. 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16 17""" 18Read the RAPPOR'd values on stdin, and sum the bits to produce a Counting Bloom 19filter by cohort. This can then be analyzed by R. 20""" 21 22import csv 23import sys 24 25import rappor 26 27 28def SumBits(params, stdin, stdout): 29 csv_in = csv.reader(stdin) 30 csv_out = csv.writer(stdout) 31 32 num_cohorts = params.num_cohorts 33 num_bloombits = params.num_bloombits 34 35 sums = [[0] * num_bloombits for _ in xrange(num_cohorts)] 36 num_reports = [0] * num_cohorts 37 38 for i, row in enumerate(csv_in): 39 try: 40 (user_id, cohort, unused_bloom, unused_prr, irr) = row 41 except ValueError: 42 raise RuntimeError('Error parsing row %r' % row) 43 44 if i == 0: 45 continue # skip header 46 47 cohort = int(cohort) 48 num_reports[cohort] += 1 49 50 if not len(irr) == params.num_bloombits: 51 raise RuntimeError( 52 "Expected %d bits, got %r" % (params.num_bloombits, len(irr))) 53 for i, c in enumerate(irr): 54 bit_num = num_bloombits - i - 1 # e.g. char 0 = bit 15, char 15 = bit 0 55 if c == '1': 56 sums[cohort][bit_num] += 1 57 else: 58 if c != '0': 59 raise RuntimeError('Invalid IRR -- digits should be 0 or 1') 60 61 for cohort in xrange(num_cohorts): 62 # First column is the total number of reports in the cohort. 63 row = [num_reports[cohort]] + sums[cohort] 64 csv_out.writerow(row) 65 66 67def main(argv): 68 try: 69 filename = argv[1] 70 except IndexError: 71 raise RuntimeError('Usage: sum_bits.py <params file>') 72 with open(filename) as f: 73 try: 74 params = rappor.Params.from_csv(f) 75 except rappor.Error as e: 76 raise RuntimeError(e) 77 78 SumBits(params, sys.stdin, sys.stdout) 79 80 81if __name__ == '__main__': 82 try: 83 main(sys.argv) 84 except RuntimeError, e: 85 print >>sys.stderr, e.args[0] 86 sys.exit(1) 87