xref: /aosp_15_r20/external/rappor/analysis/R/alternative.R (revision 2abb31345f6c95944768b5222a9a5ed3fc68cc00)
1# Copyright 2014 Google Inc. All rights reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15library(limSolve)
16library(Matrix)
17
18# The next two functions create a matrix (G) and a vector (H) encoding
19# linear inequality constraints that a solution vector (x) must satisfy:
20#                       G * x >= H
21
22# Currently represent three sets of constraints on the solution vector:
23#  - all solution coefficients are nonnegative
24#  - the sum total of all solution coefficients is no more than 1
25#  - in each of the coordinates of the target vector (estimated Bloom filter)
26#    we don't overshoot by more than three standard deviations.
27MakeG <- function(n, X) {
28  d <- Diagonal(n)
29  last <- rep(-1, n)
30  rbind2(rbind2(d, last), -X)
31}
32
33MakeH <- function(n, Y, stds) {
34  # set the floor at 0.01 to avoid degenerate cases
35  YY <- apply(Y + 3 * stds,  # in each bin don't overshoot by more than 3 stds
36              1:2,
37              function(x) min(1, max(0.01, x)))  # clamp the bound to [0.01,1]
38
39  c(rep(0, n),  # non-negativity condition
40    -1,         # coefficients sum up to no more than 1
41    -as.vector(t(YY))   # t is important!
42    )
43}
44
45MakeLseiModel <- function(X, Y, stds) {
46  m <- dim(X)[1]
47  n <- dim(X)[2]
48
49# no slack variables for now
50#   slack <- Matrix(FALSE, nrow = m, ncol = m, sparse = TRUE)
51#   colnames(slack) <- 1:m
52#   diag(slack) <- TRUE
53#
54#   G <- MakeG(n + m)
55#   H <- MakeH(n + m)
56#
57#   G[n+m+1,n:(n+m)] <- -0.1
58#  A = cbind2(X, slack)
59
60  w <- as.vector(t(1 / stds))
61  w_median <- median(w[!is.infinite(w)])
62  if(is.na(w_median))  # all w are infinite
63    w_median <- 1
64  w[w > w_median * 2] <- w_median * 2
65  w <- w / mean(w)
66
67  list(# coerce sparse Boolean matrix X to sparse numeric matrix
68       A = Diagonal(x = w) %*% (X + 0),
69       B = as.vector(t(Y)) * w,  # transform to vector in the row-first order
70       G = MakeG(n, X),
71       H = MakeH(n, Y, stds),
72       type = 2)  # Since there are no equality constraints, lsei defaults to
73                  # solve.QP anyway, but outputs a warning unless type == 2.
74}
75
76# CustomLM(X, Y)
77ConstrainedLinModel <- function(X,Y) {
78  model <- MakeLseiModel(X, Y$estimates, Y$stds)
79  coefs <- do.call(lsei, model)$X
80  names(coefs) <- colnames(X)
81
82  coefs
83}