1#!/usr/bin/env Rscript 2# 3# Command line tool to decode a RAPPOR data set. It is a simple wrapper for 4# Decode() in decode.R. 5 6library(optparse) 7 8# 9# Command line parsing. Do this first before loading libraries to catch errors 10# quickly. Loading libraries in R is slow. 11# 12 13# For command line error checking. 14UsageError <- function(...) { 15 cat(sprintf(...)) 16 cat('\n') 17 quit(status = 1) 18} 19 20option_list <- list( 21 # Inputs 22 make_option("--map", default="", help="Map file (required)"), 23 make_option("--counts", default="", help="Counts file (required)"), 24 make_option("--params", default="", help="Params file (required)"), 25 make_option("--output-dir", dest="output_dir", default=".", 26 help="Output directory (default .)"), 27 28 make_option("--correction", default="FDR", help="Correction method"), 29 make_option("--alpha", default=.05, help="Alpha level"), 30 31 make_option("--adjust-counts-hack", dest="adjust_counts_hack", 32 default=FALSE, action="store_true", 33 help="Allow the counts file to have more rows than cohorts. 34 Most users should not use this.") 35) 36 37ParseOptions <- function() { 38 # NOTE: This API is bad; if you add positional_arguments, the return value 39 # changes! 40 parser <- OptionParser(option_list = option_list) 41 opts <- parse_args(parser) 42 43 if (opts$map == "") { 44 UsageError("--map is required.") 45 } 46 if (opts$counts == "") { 47 UsageError("--counts is required.") 48 } 49 if (opts$params == "") { 50 UsageError("--params is required.") 51 } 52 return(opts) 53} 54 55if (!interactive()) { 56 opts <- ParseOptions() 57} 58 59# 60# Load libraries and source our own code. 61# 62 63library(RJSONIO) 64 65# So we don't have to change pwd 66source.rappor <- function(rel_path) { 67 abs_path <- paste0(Sys.getenv("RAPPOR_REPO", ""), rel_path) 68 source(abs_path) 69} 70 71source.rappor("analysis/R/read_input.R") 72source.rappor("analysis/R/decode.R") 73source.rappor("analysis/R/util.R") 74 75source.rappor("analysis/R/alternative.R") 76 77options(stringsAsFactors = FALSE) 78 79 80main <- function(opts) { 81 Log("decode-dist") 82 Log("argv:") 83 print(commandArgs(TRUE)) 84 85 Log("Loading inputs") 86 87 # Run a single model of all inputs are specified. 88 params <- ReadParameterFile(opts$params) 89 counts <- ReadCountsFile(opts$counts, params, adjust_counts = opts$adjust_counts_hack) 90 counts <- AdjustCounts(counts, params) 91 92 93 # The left-most column has totals. 94 num_reports <- sum(counts[, 1]) 95 96 map <- LoadMapFile(opts$map, params) 97 98 Log("Decoding %d reports", num_reports) 99 res <- Decode(counts, map$map, params, correction = opts$correction, 100 alpha = opts$alpha) 101 Log("Done decoding") 102 103 if (nrow(res$fit) == 0) { 104 Log("FATAL: Analysis returned no strings.") 105 quit(status = 1) 106 } 107 108 # Write analysis results as CSV. 109 results_csv_path <- file.path(opts$output_dir, 'results.csv') 110 write.csv(res$fit, file = results_csv_path, row.names = FALSE) 111 112 # Write residual histograph as a png. 113 results_png_path <- file.path(opts$output_dir, 'residual.png') 114 png(results_png_path) 115 breaks <- pretty(res$residual, n = 200) 116 histogram <- hist(res$residual, breaks, plot = FALSE) 117 histogram$counts <- histogram$counts / sum(histogram$counts) # convert the histogram to frequencies 118 plot(histogram, main = "Histogram of the residual", 119 xlab = sprintf("Residual (observed - explained, %d x %d values)", params$m, params$k)) 120 dev.off() 121 122 res$metrics$total_elapsed_time <- proc.time()[['elapsed']] 123 124 # Write summary as JSON (scalar values). 125 metrics_json_path <- file.path(opts$output_dir, 'metrics.json') 126 m <- toJSON(res$metrics) 127 writeLines(m, con = metrics_json_path) 128 Log("Wrote %s, %s, and %s", results_csv_path, results_png_path, metrics_json_path) 129 130 # TODO: 131 # - These are in an 2 column 'parameters' and 'values' format. Should these 132 # just be a plain list? 133 # - Should any of these privacy params be in metrics.json? 134 135 Log("Privacy summary:") 136 print(res$privacy) 137 cat("\n") 138 139 Log('DONE') 140} 141 142if (!interactive()) { 143 main(opts) 144} 145