xref: /aosp_15_r20/external/rappor/tests/gen_true_values.R (revision 2abb31345f6c95944768b5222a9a5ed3fc68cc00)
1*2abb3134SXin Li#!/usr/bin/env Rscript
2*2abb3134SXin Li#
3*2abb3134SXin Li# Copyright 2015 Google Inc. All rights reserved.
4*2abb3134SXin Li#
5*2abb3134SXin Li# Licensed under the Apache License, Version 2.0 (the "License");
6*2abb3134SXin Li# you may not use this file except in compliance with the License.
7*2abb3134SXin Li# You may obtain a copy of the License at
8*2abb3134SXin Li#
9*2abb3134SXin Li#     http://www.apache.org/licenses/LICENSE-2.0
10*2abb3134SXin Li#
11*2abb3134SXin Li# Unless required by applicable law or agreed to in writing, software
12*2abb3134SXin Li# distributed under the License is distributed on an "AS IS" BASIS,
13*2abb3134SXin Li# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14*2abb3134SXin Li# See the License for the specific language governing permissions and
15*2abb3134SXin Li# limitations under the License.
16*2abb3134SXin Li
17*2abb3134SXin Lisource('tests/gen_counts.R')
18*2abb3134SXin Li
19*2abb3134SXin Li# Usage:
20*2abb3134SXin Li#
21*2abb3134SXin Li# $ ./gen_true_values.R exp 100 10000 1 foo.csv
22*2abb3134SXin Li#
23*2abb3134SXin Li# Inputs:
24*2abb3134SXin Li#   distribution name
25*2abb3134SXin Li#   size of the distribution's support
26*2abb3134SXin Li#   number of clients
27*2abb3134SXin Li#   reports per client
28*2abb3134SXin Li#   name of the output file
29*2abb3134SXin Li# Output:
30*2abb3134SXin Li#   csv file with reports sampled according to the specified distribution.
31*2abb3134SXin Li
32*2abb3134SXin LiGenerateTrueValues <- function(distr, distr_range, num_clients,
33*2abb3134SXin Li                            reports_per_client, num_cohorts) {
34*2abb3134SXin Li
35*2abb3134SXin Li  # Sums to 1.0, e.g. [0.2 0.2 0.2 0.2 0.2] for uniform distribution of 5.
36*2abb3134SXin Li  pdf <- ComputePdf(distr, distr_range)
37*2abb3134SXin Li
38*2abb3134SXin Li  num_reports <- num_clients * reports_per_client
39*2abb3134SXin Li
40*2abb3134SXin Li  # Computes the number of clients reporting each value, where the numbers are
41*2abb3134SXin Li  # sampled according to pdf.  (sums to num_reports)
42*2abb3134SXin Li  partition <- RandomPartition(num_reports, pdf)
43*2abb3134SXin Li
44*2abb3134SXin Li  value_ints <- rep(1:distr_range, partition)  # expand partition
45*2abb3134SXin Li
46*2abb3134SXin Li  stopifnot(length(value_ints) == num_reports)
47*2abb3134SXin Li
48*2abb3134SXin Li  # Shuffle values randomly (may take a few sec for > 10^8 inputs)
49*2abb3134SXin Li  value_ints <- sample(value_ints)
50*2abb3134SXin Li
51*2abb3134SXin Li  # Reported values are strings, so prefix integers "v". Even slower than
52*2abb3134SXin Li  # shuffling.
53*2abb3134SXin Li  values <- sprintf("v%d", value_ints)
54*2abb3134SXin Li
55*2abb3134SXin Li  # e.g. [1 1 2 2 3 3] if num_clients is 3 and reports_per_client is 2
56*2abb3134SXin Li  client_ints <- rep(1:num_clients, each = reports_per_client)
57*2abb3134SXin Li
58*2abb3134SXin Li  # Cohorts are assigned to clients. Cohorts are 0-based.
59*2abb3134SXin Li  cohorts <- client_ints %% num_cohorts  # %% is integer modulus
60*2abb3134SXin Li
61*2abb3134SXin Li  clients <- sprintf("c%d", client_ints)
62*2abb3134SXin Li
63*2abb3134SXin Li  data.frame(client = clients, cohort = cohorts, value = values)
64*2abb3134SXin Li}
65*2abb3134SXin Li
66*2abb3134SXin Limain <- function(argv) {
67*2abb3134SXin Li  distr <- argv[[1]]
68*2abb3134SXin Li  distr_range <- as.integer(argv[[2]])
69*2abb3134SXin Li  num_clients <- as.integer(argv[[3]])
70*2abb3134SXin Li  reports_per_client <- as.integer(argv[[4]])
71*2abb3134SXin Li  num_cohorts <- as.integer(argv[[5]])
72*2abb3134SXin Li  out_file <- argv[[6]]
73*2abb3134SXin Li
74*2abb3134SXin Li  reports <- GenerateTrueValues(distr, distr_range, num_clients,
75*2abb3134SXin Li                                reports_per_client, num_cohorts)
76*2abb3134SXin Li
77*2abb3134SXin Li  write.csv(reports, file = out_file, row.names = FALSE, quote = FALSE)
78*2abb3134SXin Li}
79*2abb3134SXin Li
80*2abb3134SXin Liif (length(sys.frames()) == 0) {
81*2abb3134SXin Li  main(commandArgs(TRUE))
82*2abb3134SXin Li}
83