1 /* 2 * Copyright 2019 Google LLC. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * https://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16 #ifndef PRIVATE_JOIN_AND_COMPUTE_DATA_UTIL_H_ 17 #define PRIVATE_JOIN_AND_COMPUTE_DATA_UTIL_H_ 18 19 // Contains utility functions to generate dummy input data for the server and 20 // client, and also to write the data to file and parse it back. 21 22 #include <string> 23 #include <tuple> 24 #include <utility> 25 #include <vector> 26 27 #include "absl/strings/string_view.h" 28 #include "private_join_and_compute/crypto/context.h" 29 #include "private_join_and_compute/match.pb.h" 30 #include "private_join_and_compute/util/status.inc" 31 32 namespace private_join_and_compute { 33 34 // Random Identifiers generated by this library will be this many bytes long. 35 static const int64_t kRandomIdentifierLengthBytes = 32; 36 37 // Generates random datasets for the server and client. The server data contains 38 // the server_data_size identifiers, while the client data contains 39 // client_data_size identifiers, each paired with randomly selected associated 40 // values between 0 and the max_associated_value. The two generated datasets 41 // will have intersection_size identifiers in common. The function also returns 42 // the value of the real intersection sum. Each identifier consists of random 43 // alphanumeric strings. 44 // 45 // The output is a tuple with the following interpretation: 46 // First element: server's data. 47 // Second element: client's data (identifiers and associated values). 48 // Third element: the sum of values associated with common identifiers ( the 49 // "true" intersection-sum) 50 // 51 // Client and server identifiers are kRandomIdentifierLengthBytes-long random 52 // strings. 53 // 54 // The identifiers are generated and permuted with a 55 // non-cryptographically-secure PRNG. This is fine for dummy data. 56 // 57 // Fails with INVALID_ARGUMENT if the intersection size given is larger than 58 // either server or client data size, if max_associated_value is negative, or if 59 // max_associated_value * intersection_size is larger than the max value of 60 // int64_t. 61 auto GenerateRandomDatabases(int64_t server_data_size, int64_t client_data_size, 62 int64_t intersection_size, 63 int64_t max_associated_value) 64 -> StatusOr<std::tuple< 65 std::vector<std::string>, 66 std::pair<std::vector<std::string>, std::vector<int64_t>>, int64_t>>; 67 68 // Write Server Dataset to the specified file in CSV format. 69 Status WriteServerDatasetToFile(const std::vector<std::string>& server_data, 70 absl::string_view server_data_filename); 71 72 // Write Client Dataset to the specified file in CSV format. 73 Status WriteClientDatasetToFile( 74 const std::vector<std::string>& client_identifiers, 75 const std::vector<int64_t>& client_associated_values, 76 absl::string_view client_data_filename); 77 78 // Read Server Dataset from the specified file, which should be in CSV format. 79 StatusOr<std::vector<std::string>> ReadServerDatasetFromFile( 80 absl::string_view server_data_filename); 81 82 // Read Client Dataset (identifiers and associated values) from the specified 83 // file, which should be in CSV format. Automatically packages the parsed 84 // associated values as BigNums for convenience. 85 StatusOr<std::pair<std::vector<std::string>, std::vector<BigNum>>> 86 ReadClientDatasetFromFile(absl::string_view client_data_filename, 87 Context* context); 88 89 // Splits a CSV line using ',' as a delimiter, and returns a vector of 90 // associated strings. 91 std::vector<std::string> SplitCsvLine(const std::string& line); 92 93 } // namespace private_join_and_compute 94 #endif // PRIVATE_JOIN_AND_COMPUTE_DATA_UTIL_H_ 95