1*2abb3134SXin Li#!/usr/bin/env Rscript 2*2abb3134SXin Li# 3*2abb3134SXin Li# Command line tool to decode multidimensional reports. It's a simple wrapper 4*2abb3134SXin Li# around functions in association.R. 5*2abb3134SXin Li 6*2abb3134SXin Lilibrary(optparse) 7*2abb3134SXin Li 8*2abb3134SXin Li# 9*2abb3134SXin Li# Command line parsing. Do this first before loading libraries to catch errors 10*2abb3134SXin Li# quickly. Loading libraries in R is slow. 11*2abb3134SXin Li# 12*2abb3134SXin Li 13*2abb3134SXin Li# Display an error string and quit. 14*2abb3134SXin LiUsageError <- function(...) { 15*2abb3134SXin Li cat(sprintf(...)) 16*2abb3134SXin Li cat('\n') 17*2abb3134SXin Li quit(status = 1) 18*2abb3134SXin Li} 19*2abb3134SXin Li 20*2abb3134SXin Lioption_list <- list( 21*2abb3134SXin Li make_option( 22*2abb3134SXin Li "--metric-name", dest="metric_name", default="", 23*2abb3134SXin Li help="Name of the metric; metrics contain variables (required)"), 24*2abb3134SXin Li make_option( 25*2abb3134SXin Li "--reports", default="", 26*2abb3134SXin Li help="CSV file with reports; each variable is a column (required)"), 27*2abb3134SXin Li make_option( 28*2abb3134SXin Li "--schema", default="", 29*2abb3134SXin Li help="CSV file with variable types and metadata (required)"), 30*2abb3134SXin Li make_option( 31*2abb3134SXin Li "--params-dir", dest="params_dir", default="", 32*2abb3134SXin Li help="Directory where parameter CSV files are stored (required)"), 33*2abb3134SXin Li 34*2abb3134SXin Li make_option( 35*2abb3134SXin Li "--var1", default="", 36*2abb3134SXin Li help="Name of first variable (required)"), 37*2abb3134SXin Li make_option( 38*2abb3134SXin Li "--var2", default="", 39*2abb3134SXin Li help="Name of second variable (required)"), 40*2abb3134SXin Li 41*2abb3134SXin Li make_option( 42*2abb3134SXin Li "--map1", default="", 43*2abb3134SXin Li help="Path to map file, if var1 is a string"), 44*2abb3134SXin Li make_option( 45*2abb3134SXin Li "--map2", default="", 46*2abb3134SXin Li help="Path to map file, if var2 is a string"), 47*2abb3134SXin Li 48*2abb3134SXin Li make_option( 49*2abb3134SXin Li "--output-dir", dest="output_dir", default=".", 50*2abb3134SXin Li help="Output directory (default .)"), 51*2abb3134SXin Li 52*2abb3134SXin Li make_option( 53*2abb3134SXin Li "--create-bool-map", dest="create_bool_map", default=FALSE, 54*2abb3134SXin Li action="store_true", 55*2abb3134SXin Li help="Hack to use string RAPPOR to analyze boolean variables."), 56*2abb3134SXin Li make_option( 57*2abb3134SXin Li "--remove-bad-rows", dest="remove_bad_rows", default=FALSE, 58*2abb3134SXin Li action="store_true", 59*2abb3134SXin Li help="Whether we should remove rows where any value is missing (by 60*2abb3134SXin Li default, the program aborts with an error)"), 61*2abb3134SXin Li 62*2abb3134SXin Li # Options that speed it up 63*2abb3134SXin Li make_option( 64*2abb3134SXin Li "--reports-sample-size", dest="reports_sample_size", default=-1, 65*2abb3134SXin Li help="Only analyze a random sample of this size. This is for 66*2abb3134SXin Li limiting the execution time at the expense of accuracy."), 67*2abb3134SXin Li make_option( 68*2abb3134SXin Li "--num-cores", dest="num_cores", default=1, 69*2abb3134SXin Li help="Number of cores for mclapply to use. Speeds up the parts 70*2abb3134SXin Li of the computation proportional to the number of reports, 71*2abb3134SXin Li EXCEPT the EM step, which can be sped up by native code."), 72*2abb3134SXin Li make_option( 73*2abb3134SXin Li "--max-em-iters", dest="max_em_iters", default=1000, 74*2abb3134SXin Li help="Maximum number of EM iterations"), 75*2abb3134SXin Li make_option( 76*2abb3134SXin Li "--em-executable", dest="em_executable", default="", 77*2abb3134SXin Li help="Shell out to this executable for an accelerated implementation 78*2abb3134SXin Li of EM."), 79*2abb3134SXin Li make_option( 80*2abb3134SXin Li "--tmp-dir", dest="tmp_dir", default="/tmp", 81*2abb3134SXin Li help="Use this tmp dir to communicate with the EM executable") 82*2abb3134SXin Li) 83*2abb3134SXin Li 84*2abb3134SXin LiParseOptions <- function() { 85*2abb3134SXin Li # NOTE: This API is bad; if you add positional_arguments, the return value 86*2abb3134SXin Li # changes! 87*2abb3134SXin Li parser <- OptionParser(option_list = option_list) 88*2abb3134SXin Li opts <- parse_args(parser) 89*2abb3134SXin Li 90*2abb3134SXin Li if (opts$metric_name == "") { 91*2abb3134SXin Li UsageError("--metric-name is required.") 92*2abb3134SXin Li } 93*2abb3134SXin Li if (opts$reports== "") { 94*2abb3134SXin Li UsageError("--reports is required.") 95*2abb3134SXin Li } 96*2abb3134SXin Li if (opts$schema == "") { 97*2abb3134SXin Li UsageError("--schema is required.") 98*2abb3134SXin Li } 99*2abb3134SXin Li if (opts$params_dir == "") { 100*2abb3134SXin Li UsageError("--params-dir is required.") 101*2abb3134SXin Li } 102*2abb3134SXin Li if (opts$var1 == "") { 103*2abb3134SXin Li UsageError("--var1 is required.") 104*2abb3134SXin Li } 105*2abb3134SXin Li if (opts$var2 == "") { 106*2abb3134SXin Li UsageError("--var2 is required.") 107*2abb3134SXin Li } 108*2abb3134SXin Li 109*2abb3134SXin Li return(opts) 110*2abb3134SXin Li} 111*2abb3134SXin Li 112*2abb3134SXin Liif (!interactive()) { 113*2abb3134SXin Li opts <- ParseOptions() 114*2abb3134SXin Li} 115*2abb3134SXin Li 116*2abb3134SXin Li# 117*2abb3134SXin Li# Load libraries and source our own code. 118*2abb3134SXin Li# 119*2abb3134SXin Li 120*2abb3134SXin Lilibrary(RJSONIO) # toJSON() 121*2abb3134SXin Li 122*2abb3134SXin Li# So we don't have to change pwd 123*2abb3134SXin Lisource.rappor <- function(rel_path) { 124*2abb3134SXin Li abs_path <- paste0(Sys.getenv("RAPPOR_REPO", ""), rel_path) 125*2abb3134SXin Li source(abs_path) 126*2abb3134SXin Li} 127*2abb3134SXin Li 128*2abb3134SXin Lisource.rappor("analysis/R/association.R") 129*2abb3134SXin Lisource.rappor("analysis/R/fast_em.R") 130*2abb3134SXin Lisource.rappor("analysis/R/read_input.R") 131*2abb3134SXin Lisource.rappor("analysis/R/util.R") 132*2abb3134SXin Li 133*2abb3134SXin Lioptions(stringsAsFactors = FALSE) 134*2abb3134SXin Lioptions(max.print = 100) # So our structure() debug calls look better 135*2abb3134SXin Li 136*2abb3134SXin LiCreateAssocStringMap <- function(all_cohorts_map, params) { 137*2abb3134SXin Li # Processes the maps loaded using ReadMapFile and turns it into something 138*2abb3134SXin Li # that association.R can use. Namely, we want a map per cohort. 139*2abb3134SXin Li # 140*2abb3134SXin Li # Arguments: 141*2abb3134SXin Li # all_cohorts_map: map matrix, as for single variable analysis 142*2abb3134SXin Li # params: encoding parameters 143*2abb3134SXin Li 144*2abb3134SXin Li if (nrow(all_cohorts_map) != (params$m * params$k)) { 145*2abb3134SXin Li stop(sprintf( 146*2abb3134SXin Li "Map matrix has invalid dimensions: m * k = %d, nrow(map) = %d", 147*2abb3134SXin Li params$m * params$k, nrow(all_cohorts_map))) 148*2abb3134SXin Li } 149*2abb3134SXin Li 150*2abb3134SXin Li k <- params$k 151*2abb3134SXin Li map_by_cohort <- lapply(0 : (params$m-1), function(cohort) { 152*2abb3134SXin Li begin <- cohort * k 153*2abb3134SXin Li end <- (cohort + 1) * k 154*2abb3134SXin Li all_cohorts_map[(begin+1) : end, ] 155*2abb3134SXin Li }) 156*2abb3134SXin Li 157*2abb3134SXin Li list(all_cohorts_map = all_cohorts_map, map_by_cohort = map_by_cohort) 158*2abb3134SXin Li} 159*2abb3134SXin Li 160*2abb3134SXin Li# Hack to create a map for booleans. We should use closed-form formulas instead. 161*2abb3134SXin LiCreateAssocBoolMap <- function(params) { 162*2abb3134SXin Li names <- c("FALSE", "TRUE") 163*2abb3134SXin Li 164*2abb3134SXin Li map_by_cohort <- lapply(1:params$m, function(unused_cohort) { 165*2abb3134SXin Li # The (1,1) cell is false and the (1,2) cell is true. 166*2abb3134SXin Li m <- sparseMatrix(c(1), c(2), dims = c(1, 2)) 167*2abb3134SXin Li colnames(m) <- names 168*2abb3134SXin Li m 169*2abb3134SXin Li }) 170*2abb3134SXin Li 171*2abb3134SXin Li all_cohorts_map <- sparseMatrix(1:params$m, rep(2, params$m)) 172*2abb3134SXin Li colnames(all_cohorts_map) <- names 173*2abb3134SXin Li 174*2abb3134SXin Li list(map_by_cohort = map_by_cohort, all_cohorts_map = all_cohorts_map) 175*2abb3134SXin Li} 176*2abb3134SXin Li 177*2abb3134SXin LiResultMatrixToDataFrame <- function(m, string_var_name, bool_var_name) { 178*2abb3134SXin Li # Args: 179*2abb3134SXin Li # m: A 2D matrix as output by ComputeDistributionEM, e.g. 180*2abb3134SXin Li # bing.com yahoo.com google.com Other 181*2abb3134SXin Li # TRUE 0.2718526 0.1873424 0.19637704 0.003208933 182*2abb3134SXin Li # Other 0.1404581 0.1091826 0.08958427 0.001994163 183*2abb3134SXin Li # Returns: 184*2abb3134SXin Li # A flattened data frame, e.g. 185*2abb3134SXin Li 186*2abb3134SXin Li # Name the dimensions of the matrix. 187*2abb3134SXin Li dim_names <- list() 188*2abb3134SXin Li # TODO: generalize this. Right now we're assuming the first dimension is 189*2abb3134SXin Li # boolean. 190*2abb3134SXin Li dim_names[[bool_var_name]] <- c('TRUE', 'FALSE') 191*2abb3134SXin Li dim_names[[string_var_name]] <- dimnames(m)[[2]] 192*2abb3134SXin Li 193*2abb3134SXin Li dimnames(m) <- dim_names 194*2abb3134SXin Li 195*2abb3134SXin Li # http://stackoverflow.com/questions/15885111/create-data-frame-from-a-matrix-in-r 196*2abb3134SXin Li fit_df <- as.data.frame(as.table(m)) 197*2abb3134SXin Li 198*2abb3134SXin Li # The as.table conversion gives you a Freq column. Call it "proportion" to 199*2abb3134SXin Li # be consistent with single variable analysis. 200*2abb3134SXin Li colnames(fit_df)[colnames(fit_df) == "Freq"] <- "proportion" 201*2abb3134SXin Li 202*2abb3134SXin Li fit_df 203*2abb3134SXin Li} 204*2abb3134SXin Li 205*2abb3134SXin Limain <- function(opts) { 206*2abb3134SXin Li Log("decode-assoc") 207*2abb3134SXin Li Log("argv:") 208*2abb3134SXin Li print(commandArgs(TRUE)) 209*2abb3134SXin Li 210*2abb3134SXin Li schema <- read.csv(opts$schema) 211*2abb3134SXin Li Log("Read %d vars from schema", nrow(schema)) 212*2abb3134SXin Li 213*2abb3134SXin Li schema1 <- schema[schema$metric == opts$metric_name & 214*2abb3134SXin Li schema$var == opts$var1, ] 215*2abb3134SXin Li if (nrow(schema1) == 0) { 216*2abb3134SXin Li UsageError("Couldn't find metric '%s', field '%s' in schema", 217*2abb3134SXin Li opts$metric_name, opts$var1) 218*2abb3134SXin Li } 219*2abb3134SXin Li schema2 <- schema[schema$metric == opts$metric_name & 220*2abb3134SXin Li schema$var== opts$var2, ] 221*2abb3134SXin Li if (nrow(schema2) == 0) { 222*2abb3134SXin Li UsageError("Couldn't find metric '%s', field '%s' in schema", 223*2abb3134SXin Li opts$metric_name, opts$var2) 224*2abb3134SXin Li } 225*2abb3134SXin Li 226*2abb3134SXin Li if (schema1$params != schema2$params) { 227*2abb3134SXin Li UsageError('var1 and var2 should have the same params (%s != %s)', 228*2abb3134SXin Li schema1$params, schema2$params) 229*2abb3134SXin Li } 230*2abb3134SXin Li params_name <- schema1$params 231*2abb3134SXin Li params_path <- file.path(opts$params_dir, paste0(params_name, '.csv')) 232*2abb3134SXin Li params <- ReadParameterFile(params_path) 233*2abb3134SXin Li 234*2abb3134SXin Li var1_type <- schema1$var_type 235*2abb3134SXin Li var2_type <- schema2$var_type 236*2abb3134SXin Li 237*2abb3134SXin Li # Right now we're assuming that --var1 is a string and --var2 is a boolean. 238*2abb3134SXin Li # TODO: Remove these limitations. 239*2abb3134SXin Li if (var1_type != "string") { 240*2abb3134SXin Li UsageError("Variable 1 should be a string (%s is of type %s)", opts$var1, 241*2abb3134SXin Li var1_type) 242*2abb3134SXin Li } 243*2abb3134SXin Li if (var2_type != "boolean") { 244*2abb3134SXin Li UsageError("Variable 2 should be a boolean (%s is of type %s)", opts$var2, 245*2abb3134SXin Li var2_type) 246*2abb3134SXin Li } 247*2abb3134SXin Li 248*2abb3134SXin Li if (opts$map1 == "") { 249*2abb3134SXin Li UsageError("--map1 must be provided when --var1 is a string (var = %s)", 250*2abb3134SXin Li opts$var1) 251*2abb3134SXin Li } 252*2abb3134SXin Li 253*2abb3134SXin Li # Example cache speedup for 100k map file: 31 seconds to load map and write 254*2abb3134SXin Li # cache; vs 2.2 seconds to read cache. 255*2abb3134SXin Li string_params <- params 256*2abb3134SXin Li map <- LoadMapFile(opts$map1, string_params) 257*2abb3134SXin Li 258*2abb3134SXin Li # Important: first column is cohort (integer); the rest are variables, which 259*2abb3134SXin Li # are ASCII bit strings. 260*2abb3134SXin Li reports <- read.csv(opts$reports, colClasses=c("character"), as.is = TRUE) 261*2abb3134SXin Li 262*2abb3134SXin Li Log("Read %d reports. Preview:", nrow(reports)) 263*2abb3134SXin Li print(head(reports)) 264*2abb3134SXin Li cat('\n') 265*2abb3134SXin Li 266*2abb3134SXin Li # Filter bad reports first 267*2abb3134SXin Li is_empty1 <- reports[[opts$var1]] == "" 268*2abb3134SXin Li is_empty2 <- reports[[opts$var2]] == "" 269*2abb3134SXin Li Log('Found %d blank values in %s', sum(is_empty1), opts$var1) 270*2abb3134SXin Li Log('Found %d blank values in %s', sum(is_empty2), opts$var2) 271*2abb3134SXin Li 272*2abb3134SXin Li is_empty <- is_empty1 | is_empty2 # boolean vectors 273*2abb3134SXin Li Log('%d bad rows', sum(is_empty)) 274*2abb3134SXin Li if (sum(is_empty) > 0) { 275*2abb3134SXin Li if (opts$remove_bad_rows) { 276*2abb3134SXin Li reports <- reports[!is_empty, ] 277*2abb3134SXin Li Log('Removed %d rows, giving %d rows', sum(is_empty), nrow(reports)) 278*2abb3134SXin Li } else { 279*2abb3134SXin Li stop("Found bad rows and --remove-bad-rows wasn't passed") 280*2abb3134SXin Li } 281*2abb3134SXin Li } 282*2abb3134SXin Li 283*2abb3134SXin Li N <- nrow(reports) 284*2abb3134SXin Li 285*2abb3134SXin Li if (N == 0) { 286*2abb3134SXin Li # Use an arbitrary error code when there is nothing to analyze, so we can 287*2abb3134SXin Li # distinguish this from more serious failures. 288*2abb3134SXin Li Log("No reports to analyze. Exiting with code 9.") 289*2abb3134SXin Li quit(status = 9) 290*2abb3134SXin Li } 291*2abb3134SXin Li 292*2abb3134SXin Li # Sample reports if specified. 293*2abb3134SXin Li if (opts$reports_sample_size != -1) { 294*2abb3134SXin Li if (N > opts$reports_sample_size) { 295*2abb3134SXin Li indices <- sample(1:N, opts$reports_sample_size) 296*2abb3134SXin Li reports <- reports[indices, ] 297*2abb3134SXin Li Log("Created a sample of %d reports", nrow(reports)) 298*2abb3134SXin Li } else { 299*2abb3134SXin Li Log("Got less than %d reports, not sampling", opts$reports_sample_size) 300*2abb3134SXin Li } 301*2abb3134SXin Li } 302*2abb3134SXin Li 303*2abb3134SXin Li num_vars <- 2 # hard-coded for now, since there is --var1 and --var2. 304*2abb3134SXin Li 305*2abb3134SXin Li # Convert strings to integers 306*2abb3134SXin Li cohorts <- as.integer(reports$cohort) 307*2abb3134SXin Li 308*2abb3134SXin Li # Hack for Chrome: like AdjustCounts in decode_dist.R. 309*2abb3134SXin Li cohorts <- cohorts %% params$m 310*2abb3134SXin Li 311*2abb3134SXin Li # Assume the input has 0-based cohorts, and change to 1-based cohorts. 312*2abb3134SXin Li cohorts <- cohorts + 1 313*2abb3134SXin Li 314*2abb3134SXin Li # i.e. create a list of length 2, with identical cohorts. 315*2abb3134SXin Li # NOTE: Basic RAPPOR doesn't need cohorts. 316*2abb3134SXin Li cohorts_list <- rep(list(cohorts), num_vars) 317*2abb3134SXin Li 318*2abb3134SXin Li # TODO: We should use the closed-form formulas rather than calling the 319*2abb3134SXin Li # solver, and not require this flag. 320*2abb3134SXin Li if (!opts$create_bool_map) { 321*2abb3134SXin Li stop("ERROR: pass --create-bool-map to analyze booleans.") 322*2abb3134SXin Li } 323*2abb3134SXin Li 324*2abb3134SXin Li bool_params <- params 325*2abb3134SXin Li # HACK: Make this the boolean. The Decode() step uses k. (Note that R makes 326*2abb3134SXin Li # a copy here) 327*2abb3134SXin Li bool_params$k <- 1 328*2abb3134SXin Li 329*2abb3134SXin Li params_list <- list(bool_params, string_params) 330*2abb3134SXin Li 331*2abb3134SXin Li Log('CreateAssocStringMap') 332*2abb3134SXin Li string_map <- CreateAssocStringMap(map$map, params) 333*2abb3134SXin Li 334*2abb3134SXin Li Log('CreateAssocBoolMap') 335*2abb3134SXin Li bool_map <- CreateAssocBoolMap(params) 336*2abb3134SXin Li 337*2abb3134SXin Li map_list <- list(bool_map, string_map) 338*2abb3134SXin Li 339*2abb3134SXin Li string_var <- reports[[opts$var1]] 340*2abb3134SXin Li bool_var <- reports[[opts$var2]] 341*2abb3134SXin Li 342*2abb3134SXin Li Log('Preview of string var:') 343*2abb3134SXin Li print(head(table(string_var))) 344*2abb3134SXin Li cat('\n') 345*2abb3134SXin Li 346*2abb3134SXin Li Log('Preview of bool var:') 347*2abb3134SXin Li print(head(table(bool_var))) 348*2abb3134SXin Li cat('\n') 349*2abb3134SXin Li 350*2abb3134SXin Li # Split ASCII strings into array of numerics (as required by association.R) 351*2abb3134SXin Li 352*2abb3134SXin Li Log('Splitting string reports (%d cores)', opts$num_cores) 353*2abb3134SXin Li string_reports <- mclapply(string_var, function(x) { 354*2abb3134SXin Li # function splits strings and converts them to numeric values 355*2abb3134SXin Li # rev needed for endianness 356*2abb3134SXin Li rev(as.integer(strsplit(x, split = "")[[1]])) 357*2abb3134SXin Li }, mc.cores = opts$num_cores) 358*2abb3134SXin Li 359*2abb3134SXin Li Log('Splitting bool reports (%d cores)', opts$num_cores) 360*2abb3134SXin Li # Has to be an list of length 1 integer vectors 361*2abb3134SXin Li bool_reports <- mclapply(bool_var, function(x) { 362*2abb3134SXin Li as.integer(x) 363*2abb3134SXin Li }, mc.cores = opts$num_cores) 364*2abb3134SXin Li 365*2abb3134SXin Li reports_list <- list(bool_reports, string_reports) 366*2abb3134SXin Li 367*2abb3134SXin Li Log('Association for %d vars', length(reports_list)) 368*2abb3134SXin Li 369*2abb3134SXin Li if (opts$em_executable != "") { 370*2abb3134SXin Li Log('Will shell out to %s for native EM implementation', opts$em_executable) 371*2abb3134SXin Li em_iter_func <- ConstructFastEM(opts$em_executable, opts$tmp_dir) 372*2abb3134SXin Li } else { 373*2abb3134SXin Li Log('Will use R implementation of EM (slow)') 374*2abb3134SXin Li em_iter_func <- EM 375*2abb3134SXin Li } 376*2abb3134SXin Li 377*2abb3134SXin Li assoc_result <- ComputeDistributionEM(reports_list, cohorts_list, map_list, 378*2abb3134SXin Li ignore_other = FALSE, 379*2abb3134SXin Li params_list = params_list, 380*2abb3134SXin Li marginals = NULL, 381*2abb3134SXin Li estimate_var = FALSE, 382*2abb3134SXin Li num_cores = opts$num_cores, 383*2abb3134SXin Li em_iter_func = em_iter_func, 384*2abb3134SXin Li max_em_iters = opts$max_em_iters) 385*2abb3134SXin Li 386*2abb3134SXin Li # This happens if the marginal can't be decoded. 387*2abb3134SXin Li if (is.null(assoc_result)) { 388*2abb3134SXin Li stop("ComputeDistributionEM failed.") 389*2abb3134SXin Li } 390*2abb3134SXin Li 391*2abb3134SXin Li # NOTE: It would be nicer if reports_list, cohorts_list, etc. were indexed by 392*2abb3134SXin Li # names like 'domain' rather than numbers, and the result assoc_result$fit 393*2abb3134SXin Li # matrix had corresponding named dimensions. Instead we call 394*2abb3134SXin Li # ResultMatrixToDataFrame to do this. 395*2abb3134SXin Li 396*2abb3134SXin Li fit <- assoc_result$fit 397*2abb3134SXin Li fit_df <- ResultMatrixToDataFrame(fit, opts$var1, opts$var2) 398*2abb3134SXin Li 399*2abb3134SXin Li Log("Association results:") 400*2abb3134SXin Li print(fit_df) 401*2abb3134SXin Li cat('\n') 402*2abb3134SXin Li 403*2abb3134SXin Li results_csv_path <- file.path(opts$output_dir, 'assoc-results.csv') 404*2abb3134SXin Li write.csv(fit_df, file = results_csv_path, row.names = FALSE) 405*2abb3134SXin Li Log("Wrote %s", results_csv_path) 406*2abb3134SXin Li 407*2abb3134SXin Li # Measure elapsed time as close to the end as possible 408*2abb3134SXin Li total_elapsed_time <- proc.time()[['elapsed']] 409*2abb3134SXin Li 410*2abb3134SXin Li metrics <- list(num_reports = N, 411*2abb3134SXin Li reports_sample_size = opts$reports_sample_size, 412*2abb3134SXin Li # fit is a matrix 413*2abb3134SXin Li estimate_dimensions = dim(fit), 414*2abb3134SXin Li # should sum to near 1.0 415*2abb3134SXin Li sum_estimates = sum(fit), 416*2abb3134SXin Li total_elapsed_time = total_elapsed_time, 417*2abb3134SXin Li em_elapsed_time = assoc_result$em_elapsed_time, 418*2abb3134SXin Li num_em_iters = assoc_result$num_em_iters) 419*2abb3134SXin Li 420*2abb3134SXin Li metrics_json_path <- file.path(opts$output_dir, 'assoc-metrics.json') 421*2abb3134SXin Li writeLines(toJSON(metrics), con = metrics_json_path) 422*2abb3134SXin Li Log("Wrote %s", metrics_json_path) 423*2abb3134SXin Li 424*2abb3134SXin Li Log('DONE decode-assoc') 425*2abb3134SXin Li} 426*2abb3134SXin Li 427*2abb3134SXin Liif (!interactive()) { 428*2abb3134SXin Li main(opts) 429*2abb3134SXin Li} 430