xref: /aosp_15_r20/external/rappor/tests/gen_counts_test.R (revision 2abb31345f6c95944768b5222a9a5ed3fc68cc00)
1*2abb3134SXin Li#!/usr/bin/env Rscript
2*2abb3134SXin Li#
3*2abb3134SXin Li# Copyright 2014 Google Inc. All rights reserved.
4*2abb3134SXin Li#
5*2abb3134SXin Li# Licensed under the Apache License, Version 2.0 (the "License");
6*2abb3134SXin Li# you may not use this file except in compliance with the License.
7*2abb3134SXin Li# You may obtain a copy of the License at
8*2abb3134SXin Li#
9*2abb3134SXin Li#     http://www.apache.org/licenses/LICENSE-2.0
10*2abb3134SXin Li#
11*2abb3134SXin Li# Unless required by applicable law or agreed to in writing, software
12*2abb3134SXin Li# distributed under the License is distributed on an "AS IS" BASIS,
13*2abb3134SXin Li# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14*2abb3134SXin Li# See the License for the specific language governing permissions and
15*2abb3134SXin Li# limitations under the License.
16*2abb3134SXin Li
17*2abb3134SXin Lilibrary(RUnit)
18*2abb3134SXin Lilibrary(Matrix)  # for sparse matrices
19*2abb3134SXin Li
20*2abb3134SXin Lisource('tests/gen_counts.R')
21*2abb3134SXin Li
22*2abb3134SXin LiTestGenerateCounts <- function() {
23*2abb3134SXin Li  report_params <- list(k = 4, m = 2)  # 2 cohorts, 4 bits each
24*2abb3134SXin Li  map <- Matrix(0, nrow = 8, ncol = 3, sparse = TRUE)  # 3 possible values
25*2abb3134SXin Li  map[1,] <- c(1, 0, 0)
26*2abb3134SXin Li  map[2,] <- c(0, 1, 0)
27*2abb3134SXin Li  map[3,] <- c(0, 0, 1)
28*2abb3134SXin Li  map[4,] <- c(1, 1, 1)  # 4th bit of the first cohort gets signal from all
29*2abb3134SXin Li  map[5,] <- c(0, 0, 1)  # 1st bit of the second cohort gets signal from v3
30*2abb3134SXin Li
31*2abb3134SXin Li  colnames(map) <- c('v1', 'v2', 'v3')
32*2abb3134SXin Li
33*2abb3134SXin Li  partition <- c(3, 2, 1) * 10000
34*2abb3134SXin Li  v <- 100  # reports per client
35*2abb3134SXin Li
36*2abb3134SXin Li  noise0 <- list(p = 0, q = 1, f = 0)  # no noise at all
37*2abb3134SXin Li  counts0 <- GenerateCounts(c(report_params, noise0), map, partition, v)
38*2abb3134SXin Li
39*2abb3134SXin Li  checkEqualsNumeric(sum(counts0[1,2:4]), counts0[1,1])
40*2abb3134SXin Li  checkEqualsNumeric(counts0[1,5], counts0[1,1])
41*2abb3134SXin Li  checkEqualsNumeric(partition[3] * v, counts0[1,4] + counts0[2,2])
42*2abb3134SXin Li  checkEqualsNumeric(sum(partition) * v, counts0[1,1] + counts0[2,1])
43*2abb3134SXin Li
44*2abb3134SXin Li  pvalues <- chisq.test(counts0[,1] / v, p = c(.5, .5))$p.value
45*2abb3134SXin Li  for(i in 2:4)
46*2abb3134SXin Li    pvalues <- c(pvalues,
47*2abb3134SXin Li                 chisq.test(
48*2abb3134SXin Li                   c(counts0[1,i] / v, partition[i - 1] - counts0[1,i] / v),
49*2abb3134SXin Li                   p = c(.5, .5))$p.value)
50*2abb3134SXin Li
51*2abb3134SXin Li  noise1 <- list(p = .5, q = .5, f = 0)  # truly random IRRs
52*2abb3134SXin Li  counts1 <- GenerateCounts(c(report_params, noise1), map, partition, v)
53*2abb3134SXin Li
54*2abb3134SXin Li  for(i in 2:5)
55*2abb3134SXin Li    for(j in 1:2)
56*2abb3134SXin Li      pvalues <- c(pvalues,
57*2abb3134SXin Li                   chisq.test(c(counts1[j,1] - counts1[j,i], counts1[j,i]),
58*2abb3134SXin Li                                p = c(.5, .5))$p.value)
59*2abb3134SXin Li
60*2abb3134SXin Li  noise2 <- list(p = 0, q = 1, f = 1.0)  # truly random PRRs
61*2abb3134SXin Li  counts2 <- GenerateCounts(c(report_params, noise2), map, partition, v)
62*2abb3134SXin Li
63*2abb3134SXin Li  checkEqualsNumeric(0, max(counts2 %% v))  # all entries must be divisible by v
64*2abb3134SXin Li
65*2abb3134SXin Li  counts2 <- counts2 / v
66*2abb3134SXin Li
67*2abb3134SXin Li  for(i in 2:5)
68*2abb3134SXin Li    for(j in 1:2)
69*2abb3134SXin Li      pvalues <- c(pvalues,
70*2abb3134SXin Li                   chisq.test(c(counts2[j,1] - counts2[j,i], counts2[j,i]),
71*2abb3134SXin Li                              p = c(.5, .5))$p.value)
72*2abb3134SXin Li
73*2abb3134SXin Li  checkTrue(min(pvalues) > 1E-9,  "Chi-squared test failed")
74*2abb3134SXin Li}
75*2abb3134SXin Li
76*2abb3134SXin LiTestRandomPartition <- function() {
77*2abb3134SXin Li
78*2abb3134SXin Li  p1 <- RandomPartition(total = 100, dgeom(0:999, prob = .1))
79*2abb3134SXin Li  p2 <- RandomPartition(total = 1000, dnorm(1:1000, mean = 500, sd = 1000 / 6))
80*2abb3134SXin Li  p3 <- RandomPartition(total = 10000, dunif(1:1000))
81*2abb3134SXin Li
82*2abb3134SXin Li  # Totals must check out.
83*2abb3134SXin Li  checkEqualsNumeric(100, sum(p1))
84*2abb3134SXin Li  checkEqualsNumeric(1000, sum(p2))
85*2abb3134SXin Li  checkEqualsNumeric(10000, sum(p3))
86*2abb3134SXin Li
87*2abb3134SXin Li  # Initialize the weights vector to 1 0 1 0 1 0 ...
88*2abb3134SXin Li  weights <- rep(c(1, 0), 100)
89*2abb3134SXin Li
90*2abb3134SXin Li  p4 <- RandomPartition(total = 10000, weights)
91*2abb3134SXin Li
92*2abb3134SXin Li  # Check that all mass is allocated to non-zero weights.
93*2abb3134SXin Li  checkEqualsNumeric(10000, sum(p4[weights == 1]))
94*2abb3134SXin Li  checkTrue(all(p4[weights == 0] == 0))
95*2abb3134SXin Li
96*2abb3134SXin Li  p5 <- RandomPartition(total = 1000000, c(1, 2, 3, 4))
97*2abb3134SXin Li  p.value <- chisq.test(p5, p = c(.1, .2, .3, .4))$p.value
98*2abb3134SXin Li
99*2abb3134SXin Li  # Apply the chi squared test and fail if p.value is too high or too low.
100*2abb3134SXin Li  # Probability of failure is 2 * 1E-9, which should never happen.
101*2abb3134SXin Li  checkTrue(p.value > 1E-9)
102*2abb3134SXin Li}
103*2abb3134SXin Li
104*2abb3134SXin LiTestAll <- function(){
105*2abb3134SXin Li  TestRandomPartition()
106*2abb3134SXin Li  TestGenerateCounts()
107*2abb3134SXin Li}
108*2abb3134SXin Li
109*2abb3134SXin LiTestAll()