1 // Copyright 2014 Google Inc. All rights reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include <stdio.h>
16 #include <time.h> // time
17
18 #include <cassert> // assert
19 #include <cstdlib> // strtol, strtof
20 #include <iostream>
21 #include <vector>
22
23 #include "encoder.h"
24 #include "libc_rand_impl.h"
25 #include "unix_kernel_rand_impl.h"
26 #include "openssl_hash_impl.h"
27
28 // Like atoi, but with basic (not exhaustive) error checking.
StringToInt(const char * s,int * result)29 bool StringToInt(const char* s, int* result) {
30 bool ok = true;
31 char* end; // mutated by strtol
32
33 *result = strtol(s, &end, 10); // base 10
34 // If strol didn't consume any characters, it failed.
35 if (end == s) {
36 ok = false;
37 }
38 return ok;
39 }
40
41 // Like atof, but with basic (not exhaustive) error checking.
StringToFloat(const char * s,float * result)42 bool StringToFloat(const char* s, float* result) {
43 bool ok = true;
44 char* end; // mutated by strtof
45
46 *result = strtof(s, &end);
47 // If strof didn't consume any characters, it failed.
48 if (end == s) {
49 ok = false;
50 }
51 return ok;
52 }
53
54 // Copy a report into a string, which can go in a protobuf.
BitsToString(rappor::Bits b,std::string * output,int num_bytes)55 void BitsToString(rappor::Bits b, std::string* output, int num_bytes) {
56 output->assign(num_bytes, '\0');
57 for (int i = 0; i < num_bytes; ++i) {
58 // "little endian" string
59 (*output)[i] = b & 0xFF; // last byte
60 b >>= 8;
61 }
62 }
63
64 // Print a report, with the most significant bit first.
PrintBitString(const std::string & s)65 void PrintBitString(const std::string& s) {
66 for (int i = s.size() - 1; i >= 0; --i) {
67 unsigned char byte = s[i];
68 for (int j = 7; j >= 0; --j) {
69 bool bit = byte & (1 << j);
70 std::cout << (bit ? "1" : "0");
71 }
72 }
73 }
74
main(int argc,char ** argv)75 int main(int argc, char** argv) {
76 if (argc != 7) {
77 rappor::log(
78 "Usage: rappor_encode <num bits> <num hashes> <num cohorts> p q f");
79 exit(1);
80 }
81
82 int num_bits, num_hashes, num_cohorts;
83 float prob_p, prob_q, prob_f;
84
85 bool ok1 = StringToInt(argv[1], &num_bits);
86 bool ok2 = StringToInt(argv[2], &num_hashes);
87 bool ok3 = StringToInt(argv[3], &num_cohorts);
88
89 bool ok4 = StringToFloat(argv[4], &prob_p);
90 bool ok5 = StringToFloat(argv[5], &prob_q);
91 bool ok6 = StringToFloat(argv[6], &prob_f);
92
93 if (!ok1) {
94 rappor::log("Invalid number of bits: '%s'", argv[1]);
95 exit(1);
96 }
97 if (!ok2) {
98 rappor::log("Invalid number of hashes: '%s'", argv[2]);
99 exit(1);
100 }
101 if (!ok3) {
102 rappor::log("Invalid number of cohorts: '%s'", argv[3]);
103 exit(1);
104 }
105 if (!ok4) {
106 rappor::log("Invalid float p: '%s'", argv[4]);
107 exit(1);
108 }
109 if (!ok5) {
110 rappor::log("Invalid float q: '%s'", argv[5]);
111 exit(1);
112 }
113 if (!ok6) {
114 rappor::log("Invalid float f: '%s'", argv[6]);
115 exit(1);
116 }
117
118 rappor::Params params(num_bits, num_hashes, num_cohorts, prob_f, prob_p,
119 prob_q);
120
121 //rappor::log("k: %d, h: %d, m: %d", params.num_bits(), params.num_hashes(), params.num_cohorts());
122 //rappor::log("f: %f, p: %f, q: %f", prob_f, prob_p, prob_q);
123
124 int num_bytes = params.num_bits() / 8;
125
126 // TODO: Add a flag for
127 // - -r libc / kernel
128 // - -c openssl / nacl crpto
129
130 rappor::IrrRandInterface* irr_rand;
131 if (false) {
132 FILE* fp = fopen("/dev/urandom", "r");
133 irr_rand = new rappor::UnixKernelRand(fp);
134 } else {
135 int seed = time(NULL);
136 srand(seed); // seed with nanoseconds
137 irr_rand = new rappor::LibcRand();
138 }
139
140 std::string line;
141
142 // CSV header
143 std::cout << "client,cohort,bloom,prr,irr\n";
144
145 // Consume header line
146 std::getline(std::cin, line);
147 if (line != "client,cohort,value") {
148 rappor::log("Expected CSV header 'client,cohort,value'");
149 return 1;
150 }
151
152 while (true) {
153 std::getline(std::cin, line); // no trailing newline
154 // rappor::log("Got line %s", line.c_str());
155
156 if (line.empty()) {
157 break; // EOF
158 }
159
160 size_t comma1_pos = line.find(',');
161 if (comma1_pos == std::string::npos) {
162 rappor::log("Expected , in line '%s'", line.c_str());
163 return 1;
164 }
165 size_t comma2_pos = line.find(',', comma1_pos + 1);
166 if (comma2_pos == std::string::npos) {
167 rappor::log("Expected second , in line '%s'", line.c_str());
168 return 1;
169 }
170
171 // The C++ API substr(pos, length) not (pos, end)
172
173 // everything before comma
174 std::string client_str = line.substr(0, comma1_pos);
175 // everything between first and second comma.
176 // TODO(andychu): Remove unused second column.
177 std::string unused = line.substr(comma1_pos + 1, comma2_pos-comma1_pos);
178 // everything after
179 std::string value = line.substr(comma2_pos + 1);
180
181 rappor::Deps deps(rappor::Md5, client_str /*client_secret*/,
182 rappor::HmacSha256, *irr_rand);
183
184 // For now, construct a new encoder every time. We could construct one for
185 // each client. We are simulating many clients reporting the same metric,
186 // so the encoder ID is constant.
187 rappor::Encoder e("metric-name", params, deps);
188
189 // rappor::log("CLIENT %s VALUE %s COHORT %d", client_str.c_str(),
190 // value.c_str(), cohort);
191
192 rappor::Bits bloom;
193 rappor::Bits prr;
194 rappor::Bits irr;
195 bool ok = e._EncodeStringInternal(value, &bloom, &prr, &irr);
196
197 // NOTE: Are there really encoding errors?
198 if (!ok) {
199 rappor::log("Error encoding string %s", line.c_str());
200 break;
201 }
202
203 std::string bloom_str;
204 BitsToString(bloom, &bloom_str, num_bytes);
205
206 std::string prr_str;
207 BitsToString(prr, &prr_str, num_bytes);
208
209 std::string irr_str;
210 BitsToString(irr, &irr_str, num_bytes);
211
212 // Output CSV row.
213
214 std::cout << client_str;
215 std::cout << ',';
216 std::cout << e.cohort(); // cohort the encoder assigned
217 std::cout << ',';
218 PrintBitString(bloom_str);
219 std::cout << ',';
220 PrintBitString(prr_str);
221 std::cout << ',';
222 PrintBitString(irr_str);
223
224 std::cout << "\n";
225 }
226
227 // Cleanup
228 delete irr_rand;
229 }
230