/* * Copyright 2019 Google LLC. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * https://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef PRIVATE_JOIN_AND_COMPUTE_DATA_UTIL_H_ #define PRIVATE_JOIN_AND_COMPUTE_DATA_UTIL_H_ // Contains utility functions to generate dummy input data for the server and // client, and also to write the data to file and parse it back. #include #include #include #include #include "absl/strings/string_view.h" #include "private_join_and_compute/crypto/context.h" #include "private_join_and_compute/match.pb.h" #include "private_join_and_compute/util/status.inc" namespace private_join_and_compute { // Random Identifiers generated by this library will be this many bytes long. static const int64_t kRandomIdentifierLengthBytes = 32; // Generates random datasets for the server and client. The server data contains // the server_data_size identifiers, while the client data contains // client_data_size identifiers, each paired with randomly selected associated // values between 0 and the max_associated_value. The two generated datasets // will have intersection_size identifiers in common. The function also returns // the value of the real intersection sum. Each identifier consists of random // alphanumeric strings. // // The output is a tuple with the following interpretation: // First element: server's data. // Second element: client's data (identifiers and associated values). // Third element: the sum of values associated with common identifiers ( the // "true" intersection-sum) // // Client and server identifiers are kRandomIdentifierLengthBytes-long random // strings. // // The identifiers are generated and permuted with a // non-cryptographically-secure PRNG. This is fine for dummy data. // // Fails with INVALID_ARGUMENT if the intersection size given is larger than // either server or client data size, if max_associated_value is negative, or if // max_associated_value * intersection_size is larger than the max value of // int64_t. auto GenerateRandomDatabases(int64_t server_data_size, int64_t client_data_size, int64_t intersection_size, int64_t max_associated_value) -> StatusOr, std::pair, std::vector>, int64_t>>; // Write Server Dataset to the specified file in CSV format. Status WriteServerDatasetToFile(const std::vector& server_data, absl::string_view server_data_filename); // Write Client Dataset to the specified file in CSV format. Status WriteClientDatasetToFile( const std::vector& client_identifiers, const std::vector& client_associated_values, absl::string_view client_data_filename); // Read Server Dataset from the specified file, which should be in CSV format. StatusOr> ReadServerDatasetFromFile( absl::string_view server_data_filename); // Read Client Dataset (identifiers and associated values) from the specified // file, which should be in CSV format. Automatically packages the parsed // associated values as BigNums for convenience. StatusOr, std::vector>> ReadClientDatasetFromFile(absl::string_view client_data_filename, Context* context); // Splits a CSV line using ',' as a delimiter, and returns a vector of // associated strings. std::vector SplitCsvLine(const std::string& line); } // namespace private_join_and_compute #endif // PRIVATE_JOIN_AND_COMPUTE_DATA_UTIL_H_