1*2abb3134SXin Li#!/usr/bin/env Rscript 2*2abb3134SXin Li# 3*2abb3134SXin Li# Copyright 2014 Google Inc. All rights reserved. 4*2abb3134SXin Li# 5*2abb3134SXin Li# Licensed under the Apache License, Version 2.0 (the "License"); 6*2abb3134SXin Li# you may not use this file except in compliance with the License. 7*2abb3134SXin Li# You may obtain a copy of the License at 8*2abb3134SXin Li# 9*2abb3134SXin Li# http://www.apache.org/licenses/LICENSE-2.0 10*2abb3134SXin Li# 11*2abb3134SXin Li# Unless required by applicable law or agreed to in writing, software 12*2abb3134SXin Li# distributed under the License is distributed on an "AS IS" BASIS, 13*2abb3134SXin Li# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14*2abb3134SXin Li# See the License for the specific language governing permissions and 15*2abb3134SXin Li# limitations under the License. 16*2abb3134SXin Li 17*2abb3134SXin Lilibrary(RUnit) 18*2abb3134SXin Lilibrary(Matrix) # for sparse matrices 19*2abb3134SXin Li 20*2abb3134SXin Lisource('tests/gen_counts.R') 21*2abb3134SXin Li 22*2abb3134SXin LiTestGenerateCounts <- function() { 23*2abb3134SXin Li report_params <- list(k = 4, m = 2) # 2 cohorts, 4 bits each 24*2abb3134SXin Li map <- Matrix(0, nrow = 8, ncol = 3, sparse = TRUE) # 3 possible values 25*2abb3134SXin Li map[1,] <- c(1, 0, 0) 26*2abb3134SXin Li map[2,] <- c(0, 1, 0) 27*2abb3134SXin Li map[3,] <- c(0, 0, 1) 28*2abb3134SXin Li map[4,] <- c(1, 1, 1) # 4th bit of the first cohort gets signal from all 29*2abb3134SXin Li map[5,] <- c(0, 0, 1) # 1st bit of the second cohort gets signal from v3 30*2abb3134SXin Li 31*2abb3134SXin Li colnames(map) <- c('v1', 'v2', 'v3') 32*2abb3134SXin Li 33*2abb3134SXin Li partition <- c(3, 2, 1) * 10000 34*2abb3134SXin Li v <- 100 # reports per client 35*2abb3134SXin Li 36*2abb3134SXin Li noise0 <- list(p = 0, q = 1, f = 0) # no noise at all 37*2abb3134SXin Li counts0 <- GenerateCounts(c(report_params, noise0), map, partition, v) 38*2abb3134SXin Li 39*2abb3134SXin Li checkEqualsNumeric(sum(counts0[1,2:4]), counts0[1,1]) 40*2abb3134SXin Li checkEqualsNumeric(counts0[1,5], counts0[1,1]) 41*2abb3134SXin Li checkEqualsNumeric(partition[3] * v, counts0[1,4] + counts0[2,2]) 42*2abb3134SXin Li checkEqualsNumeric(sum(partition) * v, counts0[1,1] + counts0[2,1]) 43*2abb3134SXin Li 44*2abb3134SXin Li pvalues <- chisq.test(counts0[,1] / v, p = c(.5, .5))$p.value 45*2abb3134SXin Li for(i in 2:4) 46*2abb3134SXin Li pvalues <- c(pvalues, 47*2abb3134SXin Li chisq.test( 48*2abb3134SXin Li c(counts0[1,i] / v, partition[i - 1] - counts0[1,i] / v), 49*2abb3134SXin Li p = c(.5, .5))$p.value) 50*2abb3134SXin Li 51*2abb3134SXin Li noise1 <- list(p = .5, q = .5, f = 0) # truly random IRRs 52*2abb3134SXin Li counts1 <- GenerateCounts(c(report_params, noise1), map, partition, v) 53*2abb3134SXin Li 54*2abb3134SXin Li for(i in 2:5) 55*2abb3134SXin Li for(j in 1:2) 56*2abb3134SXin Li pvalues <- c(pvalues, 57*2abb3134SXin Li chisq.test(c(counts1[j,1] - counts1[j,i], counts1[j,i]), 58*2abb3134SXin Li p = c(.5, .5))$p.value) 59*2abb3134SXin Li 60*2abb3134SXin Li noise2 <- list(p = 0, q = 1, f = 1.0) # truly random PRRs 61*2abb3134SXin Li counts2 <- GenerateCounts(c(report_params, noise2), map, partition, v) 62*2abb3134SXin Li 63*2abb3134SXin Li checkEqualsNumeric(0, max(counts2 %% v)) # all entries must be divisible by v 64*2abb3134SXin Li 65*2abb3134SXin Li counts2 <- counts2 / v 66*2abb3134SXin Li 67*2abb3134SXin Li for(i in 2:5) 68*2abb3134SXin Li for(j in 1:2) 69*2abb3134SXin Li pvalues <- c(pvalues, 70*2abb3134SXin Li chisq.test(c(counts2[j,1] - counts2[j,i], counts2[j,i]), 71*2abb3134SXin Li p = c(.5, .5))$p.value) 72*2abb3134SXin Li 73*2abb3134SXin Li checkTrue(min(pvalues) > 1E-9, "Chi-squared test failed") 74*2abb3134SXin Li} 75*2abb3134SXin Li 76*2abb3134SXin LiTestRandomPartition <- function() { 77*2abb3134SXin Li 78*2abb3134SXin Li p1 <- RandomPartition(total = 100, dgeom(0:999, prob = .1)) 79*2abb3134SXin Li p2 <- RandomPartition(total = 1000, dnorm(1:1000, mean = 500, sd = 1000 / 6)) 80*2abb3134SXin Li p3 <- RandomPartition(total = 10000, dunif(1:1000)) 81*2abb3134SXin Li 82*2abb3134SXin Li # Totals must check out. 83*2abb3134SXin Li checkEqualsNumeric(100, sum(p1)) 84*2abb3134SXin Li checkEqualsNumeric(1000, sum(p2)) 85*2abb3134SXin Li checkEqualsNumeric(10000, sum(p3)) 86*2abb3134SXin Li 87*2abb3134SXin Li # Initialize the weights vector to 1 0 1 0 1 0 ... 88*2abb3134SXin Li weights <- rep(c(1, 0), 100) 89*2abb3134SXin Li 90*2abb3134SXin Li p4 <- RandomPartition(total = 10000, weights) 91*2abb3134SXin Li 92*2abb3134SXin Li # Check that all mass is allocated to non-zero weights. 93*2abb3134SXin Li checkEqualsNumeric(10000, sum(p4[weights == 1])) 94*2abb3134SXin Li checkTrue(all(p4[weights == 0] == 0)) 95*2abb3134SXin Li 96*2abb3134SXin Li p5 <- RandomPartition(total = 1000000, c(1, 2, 3, 4)) 97*2abb3134SXin Li p.value <- chisq.test(p5, p = c(.1, .2, .3, .4))$p.value 98*2abb3134SXin Li 99*2abb3134SXin Li # Apply the chi squared test and fail if p.value is too high or too low. 100*2abb3134SXin Li # Probability of failure is 2 * 1E-9, which should never happen. 101*2abb3134SXin Li checkTrue(p.value > 1E-9) 102*2abb3134SXin Li} 103*2abb3134SXin Li 104*2abb3134SXin LiTestAll <- function(){ 105*2abb3134SXin Li TestRandomPartition() 106*2abb3134SXin Li TestGenerateCounts() 107*2abb3134SXin Li} 108*2abb3134SXin Li 109*2abb3134SXin LiTestAll()