xref: /aosp_15_r20/external/rappor/analysis/R/encode.R (revision 2abb31345f6c95944768b5222a9a5ed3fc68cc00)
1*2abb3134SXin Li# Copyright 2014 Google Inc. All rights reserved.
2*2abb3134SXin Li#
3*2abb3134SXin Li# Licensed under the Apache License, Version 2.0 (the "License");
4*2abb3134SXin Li# you may not use this file except in compliance with the License.
5*2abb3134SXin Li# You may obtain a copy of the License at
6*2abb3134SXin Li#
7*2abb3134SXin Li#     http://www.apache.org/licenses/LICENSE-2.0
8*2abb3134SXin Li#
9*2abb3134SXin Li# Unless required by applicable law or agreed to in writing, software
10*2abb3134SXin Li# distributed under the License is distributed on an "AS IS" BASIS,
11*2abb3134SXin Li# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12*2abb3134SXin Li# See the License for the specific language governing permissions and
13*2abb3134SXin Li# limitations under the License.
14*2abb3134SXin Li
15*2abb3134SXin LiEncode <- function(value, map, strs, params, N, id = NULL,
16*2abb3134SXin Li                   cohort = NULL, B = NULL, BP = NULL) {
17*2abb3134SXin Li  # Encode value to RAPPOR and return a report.
18*2abb3134SXin Li  #
19*2abb3134SXin Li  # Input:
20*2abb3134SXin Li  #    value: value to be encoded
21*2abb3134SXin Li  #    map: a mapping matrix describing where each element of strs map in
22*2abb3134SXin Li  #         each cohort
23*2abb3134SXin Li  #    strs: a vector of possible values with value being one of them
24*2abb3134SXin Li  #    params: a list of RAPPOR parameters described in decode.R
25*2abb3134SXin Li  #    N: sample size
26*2abb3134SXin Li  # Optional parameters:
27*2abb3134SXin Li  #    id: user ID (smaller than N)
28*2abb3134SXin Li  #    cohort: specifies cohort number (smaller than m)
29*2abb3134SXin Li  #    B: input Bloom filter itself, in which case value is ignored
30*2abb3134SXin Li  #    BP: input Permanent Randomized Response (memoized for multiple colections
31*2abb3134SXin Li  #        from the same user
32*2abb3134SXin Li
33*2abb3134SXin Li  k <- params$k
34*2abb3134SXin Li  p <- params$p
35*2abb3134SXin Li  q <- params$q
36*2abb3134SXin Li  f <- params$f
37*2abb3134SXin Li  h <- params$h
38*2abb3134SXin Li  m <- params$m
39*2abb3134SXin Li  if (is.null(cohort)) {
40*2abb3134SXin Li    cohort <- sample(1:m, 1)
41*2abb3134SXin Li  }
42*2abb3134SXin Li
43*2abb3134SXin Li  if (is.null(id)) {
44*2abb3134SXin Li    id <- sample(N, 1)
45*2abb3134SXin Li  }
46*2abb3134SXin Li
47*2abb3134SXin Li  ind <- which(value == strs)
48*2abb3134SXin Li
49*2abb3134SXin Li  if (is.null(B)) {
50*2abb3134SXin Li    B <- as.numeric(map[[cohort]][, ind])
51*2abb3134SXin Li  }
52*2abb3134SXin Li
53*2abb3134SXin Li  if (is.null(BP)) {
54*2abb3134SXin Li    BP <- sapply(B, function(x) sample(c(0, 1, x), 1,
55*2abb3134SXin Li                                       prob = c(0.5 * f, 0.5 * f, 1 - f)))
56*2abb3134SXin Li  }
57*2abb3134SXin Li  rappor <- sapply(BP, function(x) rbinom(1, 1, ifelse(x == 1, q, p)))
58*2abb3134SXin Li
59*2abb3134SXin Li  list(value = value, rappor = rappor, B = B, BP = BP, cohort = cohort, id = id)
60*2abb3134SXin Li}
61*2abb3134SXin Li
62*2abb3134SXin LiExamplePlot <- function(res, k, ebs = 1, title = "", title_cex = 4,
63*2abb3134SXin Li                        voff = .17, acex = 1.5, posa = 2, ymin = 1,
64*2abb3134SXin Li                        horiz = FALSE) {
65*2abb3134SXin Li  PC <- function(k, report) {
66*2abb3134SXin Li    char <- as.character(report)
67*2abb3134SXin Li    if (k > 128) {
68*2abb3134SXin Li      char[char != ""] <- "|"
69*2abb3134SXin Li    }
70*2abb3134SXin Li    char
71*2abb3134SXin Li  }
72*2abb3134SXin Li
73*2abb3134SXin Li  # Annotation settings
74*2abb3134SXin Li  anc <- "darkorange2"
75*2abb3134SXin Li  colors <- c("lavenderblush3", "maroon4")
76*2abb3134SXin Li
77*2abb3134SXin Li  par(omi = c(0, .55, 0, 0))
78*2abb3134SXin Li  # Setup plotting.
79*2abb3134SXin Li  plot(1:k, rep(1, k), ylim = c(ymin, 4), type = "n",
80*2abb3134SXin Li       xlab = "Bloom filter bits",
81*2abb3134SXin Li       yaxt = "n", ylab = "", xlim = c(0, k), bty = "n", xaxt = "n")
82*2abb3134SXin Li  mtext(paste0("Participant ", res$id, " in cohort ", res$cohort), 3, 2,
83*2abb3134SXin Li        adj = 1, col = anc, cex = acex)
84*2abb3134SXin Li  axis(1, 2^(0:15), 2^(0:15))
85*2abb3134SXin Li  abline(v = which(res$B == 1), lty = 2, col = "grey")
86*2abb3134SXin Li
87*2abb3134SXin Li  # First row with the true value.
88*2abb3134SXin Li  text(k / 2, 4, paste0('"', paste0(title, as.character(res$value)), '"'),
89*2abb3134SXin Li       cex = title_cex, col = colors[2], xpd = NA)
90*2abb3134SXin Li
91*2abb3134SXin Li  # Second row with BF: B.
92*2abb3134SXin Li  points(1:k, rep(3, k), pch = PC(k, res$B), col = colors[res$B + 1],
93*2abb3134SXin Li         cex = res$B + 1)
94*2abb3134SXin Li  text(k, 3 + voff, paste0(sum(res$B), " signal bits"), cex = acex,
95*2abb3134SXin Li       col = anc, pos = posa)
96*2abb3134SXin Li
97*2abb3134SXin Li  # Third row: B'.
98*2abb3134SXin Li  points(1:k, rep(2, k), pch = PC(k, res$BP), col = colors[res$BP + 1],
99*2abb3134SXin Li         cex = res$BP + 1)
100*2abb3134SXin Li  text(k, 2 + voff, paste0(sum(res$BP), " bits on"),
101*2abb3134SXin Li       cex = acex, col = anc, pos = posa)
102*2abb3134SXin Li
103*2abb3134SXin Li  # Row 4: actual RAPPOR report.
104*2abb3134SXin Li  report <- res$rappor
105*2abb3134SXin Li  points(1:k, rep(1, k), pch = PC(k, as.character(report)),
106*2abb3134SXin Li         col = colors[report + 1], cex = report + 1)
107*2abb3134SXin Li  text(k, 1 + voff, paste0(sum(res$rappor), " bits on"), cex = acex,
108*2abb3134SXin Li       col = anc, pos = posa)
109*2abb3134SXin Li
110*2abb3134SXin Li  mtext(c("True value:", "Bloom filter (B):",
111*2abb3134SXin Li          "Fake Bloom \n filter (B'):", "Report sent\n to server:"),
112*2abb3134SXin Li        2, 1, at = 4:1, las = 2)
113*2abb3134SXin Li  legend("topright", legend = c("0", "1"), fill = colors, bty = "n",
114*2abb3134SXin Li         cex = 1.5, horiz = horiz)
115*2abb3134SXin Li  legend("topleft", legend = ebs, plot = FALSE)
116*2abb3134SXin Li}
117*2abb3134SXin Li
118*2abb3134SXin LiPlotPopulation <- function(probs, detected, detection_frequency) {
119*2abb3134SXin Li    cc <- c("gray80", "darkred")
120*2abb3134SXin Li    color <- rep(cc[1], length(probs))
121*2abb3134SXin Li    color[detected] <- cc[2]
122*2abb3134SXin Li    bp <- barplot(probs, col = color, border = color)
123*2abb3134SXin Li    inds <- c(1, c(max(which(probs > 0)), length(probs)))
124*2abb3134SXin Li    axis(1, bp[inds], inds)
125*2abb3134SXin Li    legend("topright", legend = c("Detected", "Not-detected"),
126*2abb3134SXin Li           fill = rev(cc), bty = "n")
127*2abb3134SXin Li    abline(h = detection_frequency, lty = 2, col = "grey")
128*2abb3134SXin Li}
129