xref: /aosp_15_r20/external/cronet/net/reporting/reporting_cache_impl.h (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 // Copyright 2019 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef NET_REPORTING_REPORTING_CACHE_IMPL_H_
6 #define NET_REPORTING_REPORTING_CACHE_IMPL_H_
7 
8 #include <map>
9 #include <memory>
10 #include <optional>
11 #include <set>
12 #include <string>
13 #include <unordered_set>
14 #include <utility>
15 #include <vector>
16 
17 #include "base/containers/flat_map.h"
18 #include "base/containers/flat_set.h"
19 #include "base/containers/unique_ptr_adapters.h"
20 #include "base/memory/raw_ptr.h"
21 #include "base/sequence_checker.h"
22 #include "base/time/time.h"
23 #include "base/unguessable_token.h"
24 #include "base/values.h"
25 #include "net/base/isolation_info.h"
26 #include "net/base/network_anonymization_key.h"
27 #include "net/reporting/reporting_cache.h"
28 #include "net/reporting/reporting_context.h"
29 #include "net/reporting/reporting_endpoint.h"
30 #include "net/reporting/reporting_header_parser.h"
31 #include "net/reporting/reporting_report.h"
32 #include "url/gurl.h"
33 #include "url/origin.h"
34 
35 namespace net {
36 
37 class ReportingCacheImpl : public ReportingCache {
38  public:
39   explicit ReportingCacheImpl(ReportingContext* context);
40 
41   ReportingCacheImpl(const ReportingCacheImpl&) = delete;
42   ReportingCacheImpl& operator=(const ReportingCacheImpl&) = delete;
43 
44   ~ReportingCacheImpl() override;
45 
46   // ReportingCache implementation
47   void AddReport(const std::optional<base::UnguessableToken>& reporting_source,
48                  const NetworkAnonymizationKey& network_anonymization_key,
49                  const GURL& url,
50                  const std::string& user_agent,
51                  const std::string& group_name,
52                  const std::string& type,
53                  base::Value::Dict body,
54                  int depth,
55                  base::TimeTicks queued,
56                  int attempts) override;
57   void GetReports(
58       std::vector<raw_ptr<const ReportingReport, VectorExperimental>>*
59           reports_out) const override;
60   base::Value GetReportsAsValue() const override;
61   std::vector<raw_ptr<const ReportingReport, VectorExperimental>>
62   GetReportsToDeliver() override;
63   std::vector<raw_ptr<const ReportingReport, VectorExperimental>>
64   GetReportsToDeliverForSource(
65       const base::UnguessableToken& reporting_source) override;
66   void ClearReportsPending(
67       const std::vector<raw_ptr<const ReportingReport, VectorExperimental>>&
68           reports) override;
69   void IncrementReportsAttempts(
70       const std::vector<raw_ptr<const ReportingReport, VectorExperimental>>&
71           reports) override;
72   base::flat_map<url::Origin, std::vector<ReportingEndpoint>>
73   GetV1ReportingEndpointsByOrigin() const override;
74   void IncrementEndpointDeliveries(const ReportingEndpointGroupKey& group_key,
75                                    const GURL& url,
76                                    int reports_delivered,
77                                    bool successful) override;
78   void SetExpiredSource(
79       const base::UnguessableToken& reporting_source) override;
80   const base::flat_set<base::UnguessableToken>& GetExpiredSources()
81       const override;
82   void RemoveReports(
83       const std::vector<raw_ptr<const ReportingReport, VectorExperimental>>&
84           reports) override;
85   void RemoveReports(
86       const std::vector<raw_ptr<const ReportingReport, VectorExperimental>>&
87           reports,
88       bool delivery_success) override;
89   void RemoveAllReports() override;
90   size_t GetFullReportCountForTesting() const override;
91   size_t GetReportCountWithStatusForTesting(
92       ReportingReport::Status status) const override;
93   bool IsReportPendingForTesting(const ReportingReport* report) const override;
94   bool IsReportDoomedForTesting(const ReportingReport* report) const override;
95   void OnParsedHeader(
96       const NetworkAnonymizationKey& network_anonymization_key,
97       const url::Origin& origin,
98       std::vector<ReportingEndpointGroup> parsed_header) override;
99   void OnParsedReportingEndpointsHeader(
100       const base::UnguessableToken& reporting_source,
101       const IsolationInfo& isolation_info,
102       std::vector<ReportingEndpoint> parsed_header) override;
103   std::set<url::Origin> GetAllOrigins() const override;
104   void RemoveClient(const NetworkAnonymizationKey& network_anonymization_key,
105                     const url::Origin& origin) override;
106   void RemoveClientsForOrigin(const url::Origin& origin) override;
107   void RemoveAllClients() override;
108   void RemoveEndpointGroup(const ReportingEndpointGroupKey& group_key) override;
109   void RemoveEndpointsForUrl(const GURL& url) override;
110   void RemoveSourceAndEndpoints(
111       const base::UnguessableToken& reporting_source) override;
112   void AddClientsLoadedFromStore(
113       std::vector<ReportingEndpoint> loaded_endpoints,
114       std::vector<CachedReportingEndpointGroup> loaded_endpoint_groups)
115       override;
116   std::vector<ReportingEndpoint> GetCandidateEndpointsForDelivery(
117       const ReportingEndpointGroupKey& group_key) override;
118   base::Value GetClientsAsValue() const override;
119   size_t GetEndpointCount() const override;
120   void Flush() override;
121   ReportingEndpoint GetV1EndpointForTesting(
122       const base::UnguessableToken& reporting_source,
123       const std::string& endpoint_name) const override;
124   ReportingEndpoint GetEndpointForTesting(
125       const ReportingEndpointGroupKey& group_key,
126       const GURL& url) const override;
127   bool EndpointGroupExistsForTesting(const ReportingEndpointGroupKey& group_key,
128                                      OriginSubdomains include_subdomains,
129                                      base::Time expires) const override;
130   bool ClientExistsForTesting(
131       const NetworkAnonymizationKey& network_anonymization_key,
132       const url::Origin& origin) const override;
133   size_t GetEndpointGroupCountForTesting() const override;
134   size_t GetClientCountForTesting() const override;
135   size_t GetReportingSourceCountForTesting() const override;
136   void SetEndpointForTesting(const ReportingEndpointGroupKey& group_key,
137                              const GURL& url,
138                              OriginSubdomains include_subdomains,
139                              base::Time expires,
140                              int priority,
141                              int weight) override;
142   void SetV1EndpointForTesting(const ReportingEndpointGroupKey& group_key,
143                                const base::UnguessableToken& reporting_source,
144                                const IsolationInfo& isolation_info,
145                                const GURL& url) override;
146   IsolationInfo GetIsolationInfoForEndpoint(
147       const ReportingEndpoint& endpoint) const override;
148 
149  private:
150   // Represents the entire Report-To configuration for a (NAK, origin) pair.
151   struct Client {
152     Client(const NetworkAnonymizationKey& network_anonymization_key,
153            const url::Origin& origin);
154 
155     Client(const Client& other);
156     Client(Client&& other);
157 
158     Client& operator=(const Client& other);
159     Client& operator=(Client&& other);
160 
161     ~Client();
162 
163     // NAK of the context associated with this client. Needed to prevent leaking
164     // third party contexts across sites.
165     NetworkAnonymizationKey network_anonymization_key;
166 
167     // Origin that configured this client.
168     url::Origin origin;
169 
170     // Total number of endpoints for this origin. Should stay in sync with the
171     // sum of endpoint counts for all the groups within this client.
172     size_t endpoint_count = 0;
173 
174     // Last time that any of the groups for this origin was accessed for a
175     // delivery or updated via a new header. Should stay in sync with the latest
176     // |last_used| of all the groups within this client.
177     base::Time last_used;
178 
179     // Set of endpoint group names for this origin.
180     std::set<std::string> endpoint_group_names;
181   };
182 
183   using ReportSet = base::flat_set<std::unique_ptr<ReportingReport>,
184                                    base::UniquePtrComparator>;
185   using ClientMap = std::multimap<std::string, Client>;
186   using EndpointGroupMap =
187       std::map<ReportingEndpointGroupKey, CachedReportingEndpointGroup>;
188   using EndpointMap =
189       std::multimap<ReportingEndpointGroupKey, ReportingEndpoint>;
190 
191   ReportSet::const_iterator FindReportToEvict() const;
192 
193   // Consistency-checks the entire data structure of clients, groups, and
194   // endpoints, if DCHECK is on. The cached clients should pass this consistency
195   // check after completely parsing a header (i.e. not after the intermediate
196   // steps), and before and after any of the public methods that remove or
197   // retrieve client info. Also calls |sequence_checker_| to DCHECK that we are
198   // being called on a valid sequence.
199   void ConsistencyCheckClients() const;
200 
201   // Helper methods for ConsistencyCheckClients():
202 #if DCHECK_IS_ON()
203   // Returns number of endpoint groups found in |client|.
204   size_t ConsistencyCheckClient(const std::string& domain,
205                                 const Client& client) const;
206 
207   // Returns the number of endpoints found in |group|.
208   size_t ConsistencyCheckEndpointGroup(
209       const ReportingEndpointGroupKey& key,
210       const CachedReportingEndpointGroup& group) const;
211 
212   void ConsistencyCheckEndpoint(const ReportingEndpointGroupKey& key,
213                                 const ReportingEndpoint& endpoint,
214                                 EndpointMap::const_iterator endpoint_it) const;
215 #endif  // DCHECK_IS_ON()
216 
217   // Finds iterator to the client with the given |network_anonymization_key| and
218   // |origin|, if one exists. Returns |clients_.end()| if none is found.
219   ClientMap::iterator FindClientIt(
220       const NetworkAnonymizationKey& network_anonymization_key,
221       const url::Origin& origin);
222 
223   // Overload that takes a ReportingEndpointGroupKey and finds the client
224   // to which a group specified by the |group_key| would belong. The group name
225   // of the key is ignored.
226   ClientMap::iterator FindClientIt(const ReportingEndpointGroupKey& group_key);
227 
228   // Finds iterator to the endpoint group identified by |group_key| (origin and
229   // name), if one exists. Returns |endpoint_groups_.end()| if none is found.
230   EndpointGroupMap::iterator FindEndpointGroupIt(
231       const ReportingEndpointGroupKey& group_key);
232 
233   // Finds iterator to the endpoint for the given |group_key| (origin and group
234   // name) and |url|, if one exists. Returns |endpoints_.end()| if none is
235   // found.
236   EndpointMap::iterator FindEndpointIt(
237       const ReportingEndpointGroupKey& group_key,
238       const GURL& url);
239 
240   // Adds a new client, endpoint group, or endpoint to the cache, if none
241   // exists. If one already exists, updates the existing entry to match the new
242   // one. Returns iterator to newly added client.
243   ClientMap::iterator AddOrUpdateClient(Client new_client);
244   void AddOrUpdateEndpointGroup(CachedReportingEndpointGroup new_group);
245   void AddOrUpdateEndpoint(ReportingEndpoint new_endpoint);
246 
247   // Remove all the endpoints configured for |origin| and |group| whose urls are
248   // not in |endpoints_to_keep_urls|. Does not guarantee that all the endpoints
249   // in |endpoints_to_keep_urls| exist in the cache for that group.
250   void RemoveEndpointsInGroupOtherThan(
251       const ReportingEndpointGroupKey& group_key,
252       const std::set<GURL>& endpoints_to_keep_urls);
253 
254   // Remove all the endpoint groups for the NAK and origin whose names are not
255   // in |groups_to_keep_names|. Does not guarantee that all the groups in
256   // |groups_to_keep_names| exist in the cache for that client.
257   void RemoveEndpointGroupsForClientOtherThan(
258       const NetworkAnonymizationKey& network_anonymization_key,
259       const url::Origin& origin,
260       const std::set<std::string>& groups_to_keep_names);
261 
262   // Gets the endpoints in the given group.
263   std::vector<ReportingEndpoint> GetEndpointsInGroup(
264       const ReportingEndpointGroupKey& group_key) const;
265 
266   // Gets the number of endpoints for the given origin and group.
267   size_t GetEndpointCountInGroup(
268       const ReportingEndpointGroupKey& group_key) const;
269 
270   // Updates the last_used time for the given origin and endpoint group.
271   void MarkEndpointGroupAndClientUsed(ClientMap::iterator client_it,
272                                       EndpointGroupMap::iterator group_it,
273                                       base::Time now);
274 
275   // Removes the endpoint at the given iterator, which must exist in the cache.
276   // Also takes iterators to the client and endpoint group to avoid repeated
277   // lookups. May cause the client and/or group to be removed if they become
278   // empty, which would invalidate those iterators.
279   // Returns the iterator following the endpoint removed, or std::nullopt if
280   // either of |group_it| or |client_it| were invalidated. (If |client_it| is
281   // invalidated, then so must |group_it|).
282   std::optional<EndpointMap::iterator> RemoveEndpointInternal(
283       ClientMap::iterator client_it,
284       EndpointGroupMap::iterator group_it,
285       EndpointMap::iterator endpoint_it);
286 
287   // Removes the endpoint group at the given iterator (which must exist in the
288   // cache). Also takes iterator to the client to avoid repeated lookups. May
289   // cause the client to be removed if it becomes empty, which would
290   // invalidate |client_it|. If |num_endpoints_removed| is not null, then
291   // |*num_endpoints_removed| is incremented by the number of endpoints
292   // removed.
293   // Returns the iterator following the endpoint group removed, or std::nullopt
294   // if |client_it| was invalidated.
295   std::optional<EndpointGroupMap::iterator> RemoveEndpointGroupInternal(
296       ClientMap::iterator client_it,
297       EndpointGroupMap::iterator group_it,
298       size_t* num_endpoints_removed = nullptr);
299 
300   // Removes the client at the given iterator (which must exist in the cache),
301   // along with all of its endpoint groups and endpoints. Invalidates
302   // |client_it|.
303   // Returns the iterator following the client removed.
304   ClientMap::iterator RemoveClientInternal(ClientMap::iterator client_it);
305 
306   // Evict endpoints from the specified client and globally, if necessary to
307   // obey the per-client and global endpoint limits set in the ReportingPolicy.
308   //
309   // To evict from a client: First evicts any stale or expired groups for that
310   // origin. If that removes enough endpoints, then stop. Otherwise, find the
311   // stalest group (which has not been accessed for a delivery in the longest
312   // time) with the most endpoints, and evict the least important endpoints from
313   // that group.
314   // To evict globally: Find the stalest client with the most endpoints and do
315   // the above.
316   void EnforcePerClientAndGlobalEndpointLimits(ClientMap::iterator client_it);
317 
318   // Evicts endpoints from a client until it has evicted |endpoints_to_evict|
319   // endpoints. First tries to remove expired and stale groups. If that fails to
320   // satisfy the limit, finds the stalest group with the most endpoints and
321   // evicts the least important endpoints from it.
322   void EvictEndpointsFromClient(ClientMap::iterator client_it,
323                                 size_t endpoints_to_evict);
324 
325   // Evicts the least important endpoint from a group (the endpoint with lowest
326   // priority and lowest weight). May cause the group and/or client to be
327   // deleted and the iterators invalidated.
328   void EvictEndpointFromGroup(ClientMap::iterator client_it,
329                               EndpointGroupMap::iterator group_it);
330 
331   // Removes all expired or stale groups from the given client. May delete the
332   // client and invalidate |client_it| if it becomes empty.
333   // Increments |*num_endpoints_removed| by the number of endpoints removed.
334   // Returns true if |client_it| was invalidated.
335   bool RemoveExpiredOrStaleGroups(ClientMap::iterator client_it,
336                                   size_t* num_endpoints_removed);
337 
338   // Adds/removes (if it exists) |endpoint_it| from |endpoint_its_by_url_|.
339   void AddEndpointItToIndex(EndpointMap::iterator endpoint_it);
340   void RemoveEndpointItFromIndex(EndpointMap::iterator endpoint_it);
341 
342   // Helper method for IncrementEndpointDeliveries
343   ReportingEndpoint::Statistics* GetEndpointStats(
344       const ReportingEndpointGroupKey& group_key,
345       const GURL& url);
346 
347   // Helper methods for GetClientsAsValue().
348   base::Value GetClientAsValue(const Client& client) const;
349   base::Value GetEndpointGroupAsValue(
350       const CachedReportingEndpointGroup& group) const;
351   base::Value GetEndpointAsValue(const ReportingEndpoint& endpoint) const;
352 
353   // Convenience methods for fetching things from the context_.
clock()354   const base::Clock& clock() const { return context_->clock(); }
tick_clock()355   const base::TickClock& tick_clock() const { return context_->tick_clock(); }
store()356   PersistentReportingStore* store() { return context_->store(); }
357 
358   raw_ptr<ReportingContext> context_;
359 
360   // Reports that have not yet been successfully uploaded.
361   ReportSet reports_;
362 
363   // Reporting API V0 Cache:
364   // The |clients_|, |endpoint_groups_| and |endpoints_| members all hold
365   // endpoint group configuration for the V0 API. These endpoint groups are
366   // configured through the Report-To HTTP header, and are currently used for
367   // both document and network reports.
368 
369   // Map of clients for all configured origins and NAKs, keyed on domain name
370   // (there may be multiple NAKs and origins per domain name).
371   ClientMap clients_;
372 
373   // Map of endpoint groups, keyed on origin and group name. Keys and values
374   // must only contain V0 endpoint group keys.
375   EndpointGroupMap endpoint_groups_;
376 
377   // Map of endpoints, keyed on origin and group name (there may be multiple
378   // endpoints for a given origin and group, with different urls). Keys must
379   // only contain V0 endpoint group keys.
380   EndpointMap endpoints_;
381 
382   // Index of endpoints stored in |endpoints_| keyed on URL, for easier lookup
383   // during RemoveEndpointsForUrl(). Should stay in sync with |endpoints_|.
384   std::multimap<GURL, EndpointMap::iterator> endpoint_its_by_url_;
385 
386   // Reporting API V1 Cache:
387   // The `document_endpoints_` member holds endpoint configuration for the V1
388   // API, configured through the Reporting-Endpoints HTTP header. These
389   // endpoints are strongly associated with the resource which configured them,
390   // and are only used for document reports.
391 
392   // Map of endpoints for each reporting source, keyed on the reporting source
393   // token. This contains only V1 document endpoints.
394   std::map<base::UnguessableToken, std::vector<ReportingEndpoint>>
395       document_endpoints_;
396 
397   // Isolation info for each reporting source. Used for determining credentials
398   // to send when delivering reports. This contains only V1 document endpoints.
399   std::map<base::UnguessableToken, IsolationInfo> isolation_info_;
400 
401   // Reporting source tokens representing sources which have been destroyed.
402   // The configuration in `document_endpoints_` and `isolation_info_` for these
403   // sources can be removed once all outstanding reports are delivered (or
404   // expired).
405   base::flat_set<base::UnguessableToken> expired_sources_;
406 
407   SEQUENCE_CHECKER(sequence_checker_);
408 };
409 
410 }  // namespace net
411 
412 #endif  // NET_REPORTING_REPORTING_CACHE_IMPL_H_
413