1 /*
2  * Copyright 2017, The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #pragma once
17 
18 #include <gtest/gtest_prod.h>
19 #include <log/log_time.h>
20 #include <src/guardrail/stats_log_enums.pb.h>
21 
22 #include <list>
23 #include <mutex>
24 #include <string>
25 #include <unordered_map>
26 #include <vector>
27 
28 #include "config/ConfigKey.h"
29 #include "logd/logevent_util.h"
30 
31 namespace android {
32 namespace os {
33 namespace statsd {
34 
35 struct InvalidConfigReason {
36     InvalidConfigReasonEnum reason;
37     std::optional<int64_t> metricId;
38     std::optional<int64_t> stateId;
39     std::optional<int64_t> alertId;
40     std::optional<int64_t> alarmId;
41     std::optional<int64_t> subscriptionId;
42     std::vector<int64_t> matcherIds;
43     std::vector<int64_t> conditionIds;
InvalidConfigReasonInvalidConfigReason44     InvalidConfigReason(){};
InvalidConfigReasonInvalidConfigReason45     InvalidConfigReason(InvalidConfigReasonEnum reason) : reason(reason){};
InvalidConfigReasonInvalidConfigReason46     InvalidConfigReason(InvalidConfigReasonEnum reason, int64_t metricId)
47         : reason(reason), metricId(metricId){};
48     bool operator==(const InvalidConfigReason& other) const {
49         return (this->reason == other.reason) && (this->metricId == other.metricId) &&
50                (this->stateId == other.stateId) && (this->alertId == other.alertId) &&
51                (this->alarmId == other.alarmId) && (this->subscriptionId == other.subscriptionId) &&
52                (this->matcherIds == other.matcherIds) && (this->conditionIds == other.conditionIds);
53     }
54 
55     // For better failure messages in statsd_test
56     friend void PrintTo(const InvalidConfigReason& obj, std::ostream* os);
57 };
58 
59 typedef struct {
60     int64_t insertError = 0;
61     int64_t tableCreationError = 0;
62     int64_t tableDeletionError = 0;
63     std::list<int64_t> flushLatencyNs;
64     int64_t categoryChangedCount = 0;
65 } RestrictedMetricStats;
66 
67 struct DumpReportStats {
DumpReportStatsDumpReportStats68     DumpReportStats(int32_t dumpReportSec, int32_t dumpReportSize, int32_t reportNumber)
69         : mDumpReportTimeSec(dumpReportSec),
70           mDumpReportSizeBytes(dumpReportSize),
71           mDumpReportNumber(reportNumber) {
72     }
73     int32_t mDumpReportTimeSec = 0;
74     int32_t mDumpReportSizeBytes = 0;
75     int32_t mDumpReportNumber = 0;
76 };
77 
78 struct ConfigStats {
79     int32_t uid;
80     int64_t id;
81     int32_t creation_time_sec;
82     int32_t deletion_time_sec = 0;
83     int32_t reset_time_sec = 0;
84     int32_t metric_count;
85     int32_t condition_count;
86     int32_t matcher_count;
87     int32_t alert_count;
88     bool is_valid;
89     bool device_info_table_creation_failed = false;
90     int32_t db_corrupted_count = 0;
91     int32_t db_deletion_stat_failed = 0;
92     int32_t db_deletion_size_exceeded_limit = 0;
93     int32_t db_deletion_config_invalid = 0;
94     int32_t db_deletion_too_old = 0;
95     int32_t db_deletion_config_removed = 0;
96     int32_t db_deletion_config_updated = 0;
97     // Stores the number of ConfigMetadataProvider promotion failures
98     int32_t config_metadata_provider_promote_failure = 0;
99 
100     // Stores reasons for why config is valid or not
101     std::optional<InvalidConfigReason> reason;
102 
103     std::list<int32_t> broadcast_sent_time_sec;
104 
105     // Times at which this config is activated.
106     std::list<int32_t> activation_time_sec;
107 
108     // Times at which this config is deactivated.
109     std::list<int32_t> deactivation_time_sec;
110 
111     std::list<int32_t> data_drop_time_sec;
112     // Number of bytes dropped at corresponding time.
113     std::list<int64_t> data_drop_bytes;
114 
115     std::list<DumpReportStats> dump_report_stats;
116 
117     // Stores how many times a matcher have been matched. The map size is capped by kMaxConfigCount.
118     std::map<const int64_t, int> matcher_stats;
119 
120     // Stores the number of output tuple of condition trackers when it's bigger than
121     // kDimensionKeySizeSoftLimit. When you see the number is kDimensionKeySizeHardLimit +1,
122     // it means some data has been dropped. The map size is capped by kMaxConfigCount.
123     std::map<const int64_t, int> condition_stats;
124 
125     // Stores the number of output tuple of metric producers when it's bigger than
126     // kDimensionKeySizeSoftLimit. When you see the number is kDimensionKeySizeHardLimit +1,
127     // it means some data has been dropped. The map size is capped by kMaxConfigCount.
128     std::map<const int64_t, int> metric_stats;
129 
130     // Stores the max number of output tuple of dimensions in condition across dimensions in what
131     // when it's bigger than kDimensionKeySizeSoftLimit. When you see the number is
132     // kDimensionKeySizeHardLimit +1, it means some data has been dropped. The map size is capped by
133     // kMaxConfigCount.
134     std::map<const int64_t, int> metric_dimension_in_condition_stats;
135 
136     // Stores the number of times an anomaly detection alert has been declared.
137     // The map size is capped by kMaxConfigCount.
138     std::map<const int64_t, int> alert_stats;
139 
140     // Stores the config ID for each sub-config used.
141     std::list<std::pair<const int64_t, const int32_t>> annotations;
142 
143     // Maps metric ID of restricted metric to its stats.
144     std::map<int64_t, RestrictedMetricStats> restricted_metric_stats;
145 
146     std::list<int64_t> total_flush_latency_ns;
147 
148     // Stores the last 20 timestamps for computing sqlite db size.
149     std::list<int64_t> total_db_size_timestamps;
150 
151     // Stores the last 20 sizes of the sqlite db.
152     std::list<int64_t> total_db_sizes;
153 };
154 
155 struct UidMapStats {
156     int32_t changes = 0;
157     int32_t bytes_used = 0;
158     int32_t dropped_changes = 0;
159     int32_t deleted_apps = 0;
160 };
161 
162 struct SubscriptionStats {
163     int32_t pushed_atom_count = 0;
164     int32_t pulled_atom_count = 0;
165     int32_t start_time_sec = 0;
166     int32_t end_time_sec = 0;
167     int32_t flush_count = 0;
168 };
169 
170 // Keeps track of stats of statsd.
171 // Single instance shared across the process. All public methods are thread safe.
172 class StatsdStats {
173 public:
174     static StatsdStats& getInstance();
~StatsdStats()175     ~StatsdStats(){};
176 
177     const static int kDimensionKeySizeSoftLimit = 500;
178     static constexpr int kDimensionKeySizeHardLimit = 800;
179     static constexpr int kDimensionKeySizeHardLimitMin = 800;
180     static constexpr int kDimensionKeySizeHardLimitMax = 3000;
181 
182     // Per atom dimension key size limit
183     static const std::map<int, std::pair<size_t, size_t>> kAtomDimensionKeySizeLimitMap;
184 
185     const static int kMaxConfigCountPerUid = 20;
186     const static int kMaxAlertCountPerConfig = 200;
187     const static int kMaxConditionCountPerConfig = 500;
188     const static int kMaxMetricCountPerConfig = 3000;
189     const static int kMaxMatcherCountPerConfig = 3500;
190 
191     // The max number of old config stats we keep.
192     const static int kMaxIceBoxSize = 20;
193 
194     const static int kMaxLoggerErrors = 20;
195 
196     const static int kMaxSystemServerRestarts = 20;
197 
198     const static int kMaxTimestampCount = 20;
199 
200     const static int kMaxLogSourceCount = 150;
201 
202     const static int kMaxPullAtomPackages = 100;
203 
204     const static int kMaxRestrictedMetricQueryCount = 20;
205 
206     const static int kMaxRestrictedMetricFlushLatencyCount = 20;
207 
208     const static int kMaxRestrictedConfigFlushLatencyCount = 20;
209 
210     const static int kMaxRestrictedConfigDbSizeCount = 20;
211 
212     // Max memory allowed for storing metrics per configuration. If this limit is exceeded, statsd
213     // drops the metrics data in memory.
214     static const size_t kDefaultMaxMetricsBytesPerConfig = 2 * 1024 * 1024;
215 
216     // Hard limit for custom memory allowed for storing metrics per configuration.
217     static const size_t kHardMaxMetricsBytesPerConfig = 20 * 1024 * 1024;
218 
219     // Max memory allowed for storing metrics per configuration before triggering a intent to fetch
220     // data.
221     static const size_t kHardMaxTriggerGetDataBytes = 10 * 1024 * 1024;
222 
223     // Soft memory limit per configuration. Once this limit is exceeded, we begin notifying the
224     // data subscriber that it's time to call getData.
225     static const size_t kDefaultBytesPerConfigTriggerGetData = 192 * 1024;
226 
227     // Soft memory limit per restricted configuration. Once this limit is exceeded,
228     // we begin flush in-memory restricted metrics to database.
229     static const size_t kBytesPerRestrictedConfigTriggerFlush = 25 * 1024;
230 
231     // Cap the UID map's memory usage to this. This should be fairly high since the UID information
232     // is critical for understanding the metrics.
233     const static size_t kMaxBytesUsedUidMap = 50 * 1024;
234 
235     // The number of deleted apps that are stored in the uid map.
236     const static int kMaxDeletedAppsInUidMap = 100;
237 
238     /* Minimum period between two broadcasts in nanoseconds. */
239     static const int64_t kMinBroadcastPeriodNs = 60 * NS_PER_SEC;
240 
241     /* Min period between two checks of byte size per config key in nanoseconds. */
242     static const int64_t kMinByteSizeCheckPeriodNs = 1 * 60 * NS_PER_SEC;
243 
244     // Min period between two checks of byte size per config key in nanoseconds for V2 memory
245     // calculations.
246     static const int64_t kMinByteSizeV2CheckPeriodNs = 5 * 60 * NS_PER_SEC;
247 
248     /* Min period between two checks of restricted metrics TTLs. */
249     static const int64_t kMinTtlCheckPeriodNs = 60 * 60 * NS_PER_SEC;
250 
251     /* Min period between two flush operations of restricted metrics. */
252     static const int64_t kMinFlushRestrictedPeriodNs = 60 * 60 * NS_PER_SEC;
253 
254     /* Min period between two db guardrail check operations of restricted metrics. */
255     static const int64_t kMinDbGuardrailEnforcementPeriodNs = 60 * 60 * NS_PER_SEC;
256 
257     /* Minimum period between two activation broadcasts in nanoseconds. */
258     static const int64_t kMinActivationBroadcastPeriodNs = 10 * NS_PER_SEC;
259 
260     // Maximum age (30 days) that files on disk can exist in seconds.
261     static const int kMaxAgeSecond = 60 * 60 * 24 * 30;
262 
263     // Maximum age (2 days) that local history files on disk can exist in seconds.
264     static const int kMaxLocalHistoryAgeSecond = 60 * 60 * 24 * 2;
265 
266     // Maximum number of files (1000) that can be in stats directory on disk.
267     static const int kMaxFileNumber = 1000;
268 
269     // Maximum size of all files that can be written to stats directory on disk.
270     static const int kMaxFileSize = 50 * 1024 * 1024;
271 
272     // How long to try to clear puller cache from last time
273     static const long kPullerCacheClearIntervalSec = 1;
274 
275     // Max time to do a pull.
276     static const int64_t kPullMaxDelayNs = 30 * NS_PER_SEC;
277 
278     // Maximum number of pushed atoms statsd stats will track above kMaxPushedAtomId.
279     static const int kMaxNonPlatformPushedAtoms = 600;
280 
281     // Maximum number of pushed atoms error statsd stats will track.
282     static const int kMaxPushedAtomErrorStatsSize = 100;
283 
284     // Maximum number of socket loss stats to track.
285     static const int kMaxSocketLossStatsSize = 50;
286 
287     // Maximum atom id value that we consider a platform pushed atom.
288     // This should be updated once highest pushed atom id in atoms.proto approaches this value.
289     static const int kMaxPushedAtomId = 1500;
290 
291     // Atom id that is the start of the pulled atoms.
292     static const int kPullAtomStartTag = 10000;
293 
294     // Atom id that is the start of vendor atoms.
295     static const int kVendorAtomStartTag = 100000;
296 
297     // Vendor pulled atom start id.
298     static const int32_t kVendorPulledAtomStartTag = 150000;
299 
300     // Beginning of range for timestamp truncation.
301     static const int32_t kTimestampTruncationStartTag = 300000;
302 
303     // End of range for timestamp truncation.
304     static const int32_t kTimestampTruncationEndTag = 304999;
305 
306     // Max accepted atom id.
307     static const int32_t kMaxAtomTag = 200000;
308 
309     static const int64_t kInt64Max = 0x7fffffffffffffffLL;
310 
311     static const int32_t kMaxLoggedBucketDropEvents = 10;
312 
313     static const int32_t kNumBinsInSocketBatchReadHistogram = 30;
314     static const int32_t kLargeBatchReadThreshold = 1000;
315     static const int32_t kMaxLargeBatchReadSize = 20;
316     static const int32_t kMaxLargeBatchReadAtomThreshold = 50;
317 
318     /**
319      * Report a new config has been received and report the static stats about the config.
320      *
321      * The static stats include: the count of metrics, conditions, matchers, and alerts.
322      * If the config is not valid, this config stats will be put into icebox immediately.
323      */
324     void noteConfigReceived(const ConfigKey& key, int metricsCount, int conditionsCount,
325                             int matchersCount, int alertCount,
326                             const std::list<std::pair<const int64_t, const int32_t>>& annotations,
327                             const std::optional<InvalidConfigReason>& reason);
328     /**
329      * Report a config has been removed.
330      */
331     void noteConfigRemoved(const ConfigKey& key);
332     /**
333      * Report a config has been reset when ttl expires.
334      */
335     void noteConfigReset(const ConfigKey& key);
336 
337     /**
338      * Report a broadcast has been sent to a config owner to collect the data.
339      */
340     void noteBroadcastSent(const ConfigKey& key);
341 
342     /**
343      * Report that a config has become activated or deactivated.
344      * This can be different from whether or not a broadcast is sent if the
345      * guardrail prevented the broadcast from being sent.
346      */
347     void noteActiveStatusChanged(const ConfigKey& key, bool activate);
348 
349     /**
350      * Report a config's metrics data has been dropped.
351      */
352     void noteDataDropped(const ConfigKey& key, const size_t totalBytes);
353 
354     /**
355      * Report metrics data report has been sent.
356      *
357      * The report may be requested via StatsManager API, or through adb cmd.
358      */
359     void noteMetricsReportSent(const ConfigKey& key, const size_t numBytes,
360                                const int32_t reportNumber);
361 
362     /**
363      * Report failure in creating the device info metadata table for restricted configs.
364      */
365     void noteDeviceInfoTableCreationFailed(const ConfigKey& key);
366 
367     /**
368      * Report db corruption for restricted configs.
369      */
370     void noteDbCorrupted(const ConfigKey& key);
371 
372     /**
373      * Report db exceeded the size limit for restricted configs.
374      */
375     void noteDbSizeExceeded(const ConfigKey& key);
376 
377     /**
378      * Report db size check with stat for restricted configs failed.
379      */
380     void noteDbStatFailed(const ConfigKey& key);
381 
382     /**
383      * Report restricted config is invalid.
384      */
385     void noteDbConfigInvalid(const ConfigKey& key);
386 
387     /**
388      * Report db is too old for restricted configs.
389      */
390     void noteDbTooOld(const ConfigKey& key);
391 
392     /**
393      * Report db was deleted due to config removal.
394      */
395     void noteDbDeletionConfigRemoved(const ConfigKey& key);
396 
397     /**
398      * Report db was deleted due to config update.
399      */
400     void noteDbDeletionConfigUpdated(const ConfigKey& key);
401 
402     /**
403      * Reports that the promotion for ConfigMetadataProvider failed.
404      */
405     void noteConfigMetadataProviderPromotionFailed(const ConfigKey& key);
406 
407     /**
408      * Report the size of output tuple of a condition.
409      *
410      * Note: only report when the condition has an output dimension, and the tuple
411      * count > kDimensionKeySizeSoftLimit.
412      *
413      * [key]: The config key that this condition belongs to.
414      * [id]: The id of the condition.
415      * [size]: The output tuple size.
416      */
417     void noteConditionDimensionSize(const ConfigKey& key, int64_t id, int size);
418 
419     /**
420      * Report the size of output tuple of a metric.
421      *
422      * Note: only report when the metric has an output dimension, and the tuple
423      * count > kDimensionKeySizeSoftLimit.
424      *
425      * [key]: The config key that this metric belongs to.
426      * [id]: The id of the metric.
427      * [size]: The output tuple size.
428      */
429     void noteMetricDimensionSize(const ConfigKey& key, int64_t id, int size);
430 
431     /**
432      * Report the max size of output tuple of dimension in condition across dimensions in what.
433      *
434      * Note: only report when the metric has an output dimension in condition, and the max tuple
435      * count > kDimensionKeySizeSoftLimit.
436      *
437      * [key]: The config key that this metric belongs to.
438      * [id]: The id of the metric.
439      * [size]: The output tuple size.
440      */
441     void noteMetricDimensionInConditionSize(const ConfigKey& key, int64_t id, int size);
442 
443     /**
444      * Report a matcher has been matched.
445      *
446      * [key]: The config key that this matcher belongs to.
447      * [id]: The id of the matcher.
448      */
449     void noteMatcherMatched(const ConfigKey& key, int64_t id);
450 
451     /**
452      * Report that an anomaly detection alert has been declared.
453      *
454      * [key]: The config key that this alert belongs to.
455      * [id]: The id of the alert.
456      */
457     void noteAnomalyDeclared(const ConfigKey& key, int64_t id);
458 
459     /**
460      * Report an atom event has been logged.
461      */
462     void noteAtomLogged(int atomId, int32_t timeSec, bool isSkipped);
463 
464     /**
465      * Report that statsd modified the anomaly alarm registered with StatsCompanionService.
466      */
467     void noteRegisteredAnomalyAlarmChanged();
468 
469     /**
470      * Report that statsd modified the periodic alarm registered with StatsCompanionService.
471      */
472     void noteRegisteredPeriodicAlarmChanged();
473 
474     /**
475      * Records the number of delta entries that are being dropped from the uid map.
476      */
477     void noteUidMapDropped(int deltas);
478 
479     /**
480      * Records that an app was deleted (from statsd's map).
481      */
482     void noteUidMapAppDeletionDropped();
483 
484     /**
485      * Updates the number of changes currently stored in the uid map.
486      */
487     void setUidMapChanges(int changes);
488     void setCurrentUidMapMemory(int bytes);
489 
490     /*
491      * Updates minimum interval between pulls for an pulled atom.
492      */
493     void updateMinPullIntervalSec(int pullAtomId, long intervalSec);
494 
495     /*
496      * Notes an atom is pulled.
497      */
498     void notePull(int pullAtomId);
499 
500     /*
501      * Notes an atom is served from puller cache.
502      */
503     void notePullFromCache(int pullAtomId);
504 
505     /*
506      * Notify data error for pulled atom.
507      */
508     void notePullDataError(int pullAtomId);
509 
510     /*
511      * Records time for actual pulling, not including those served from cache and not including
512      * statsd processing delays.
513      */
514     void notePullTime(int pullAtomId, int64_t pullTimeNs);
515 
516     /*
517      * Records pull delay for a pulled atom, including those served from cache and including statsd
518      * processing delays.
519      */
520     void notePullDelay(int pullAtomId, int64_t pullDelayNs);
521 
522     /*
523      * Records pull exceeds timeout for the puller.
524      */
525     void notePullTimeout(int pullAtomId, int64_t pullUptimeMillis, int64_t pullElapsedMillis);
526 
527     /*
528      * Records pull exceeds max delay for a metric.
529      */
530     void notePullExceedMaxDelay(int pullAtomId);
531 
532     /*
533      * Records when system server restarts.
534      */
535     void noteSystemServerRestart(int32_t timeSec);
536 
537     /**
538      * Records statsd skipped an event.
539      */
540     void noteLogLost(int32_t wallClockTimeSec, int32_t count, int32_t lastError,
541                      int32_t lastAtomTag, int32_t uid, int32_t pid);
542 
543     /**
544      * Records that the pull of an atom has failed. Eg, if the client indicated the pull failed, if
545      * the pull timed out, or if the outgoing binder call failed.
546      * This count will only increment if the puller was actually invoked.
547      *
548      * It does not include a pull not occurring due to not finding the appropriate
549      * puller. These cases are covered in other counts.
550      */
551     void notePullFailed(int atomId);
552 
553     /**
554      * Records that the pull of an atom has failed due to not having a uid provider.
555      */
556     void notePullUidProviderNotFound(int atomId);
557 
558     /**
559      * Records that the pull of an atom has failed due not finding a puller registered by a
560      * trusted uid.
561      */
562     void notePullerNotFound(int atomId);
563 
564     /**
565      * Records that the pull has failed due to the outgoing binder call failing.
566      */
567     void notePullBinderCallFailed(int atomId);
568 
569     /**
570      * A pull with no data occurred
571      */
572     void noteEmptyData(int atomId);
573 
574     /**
575      * Records that a puller callback for the given atomId was registered or unregistered.
576      *
577      * @param registered True if the callback was registered, false if was unregistered.
578      */
579     void notePullerCallbackRegistrationChanged(int atomId, bool registered);
580 
581     /**
582      * Hard limit was reached in the cardinality of an atom
583      */
584     void noteHardDimensionLimitReached(int64_t metricId);
585 
586     /**
587      * A log event was too late, arrived in the wrong bucket and was skipped
588      */
589     void noteLateLogEventSkipped(int64_t metricId);
590 
591     /**
592      * Buckets were skipped as time elapsed without any data for them
593      */
594     void noteSkippedForwardBuckets(int64_t metricId);
595 
596     /**
597      * An unsupported value type was received
598      */
599     void noteBadValueType(int64_t metricId);
600 
601     /**
602      * Buckets were dropped due to reclaim memory.
603      */
604     void noteBucketDropped(int64_t metricId);
605 
606     /**
607      * A condition change was too late, arrived in the wrong bucket and was skipped
608      */
609     void noteConditionChangeInNextBucket(int64_t metricId);
610 
611     /**
612      * A bucket has been tagged as invalid.
613      */
614     void noteInvalidatedBucket(int64_t metricId);
615 
616     /**
617      * Tracks the total number of buckets (include skipped/invalid buckets).
618      */
619     void noteBucketCount(int64_t metricId);
620 
621     /**
622      * For pulls at bucket boundaries, it represents the misalignment between the real timestamp and
623      * the end of the bucket.
624      */
625     void noteBucketBoundaryDelayNs(int64_t metricId, int64_t timeDelayNs);
626 
627     /**
628      * Number of buckets with unknown condition.
629      */
630     void noteBucketUnknownCondition(int64_t metricId);
631 
632     /* Reports one event id has been dropped due to queue overflow, and the oldest event timestamp
633      * in the queue */
634     void noteEventQueueOverflow(int64_t oldestEventTimestampNs, int32_t atomId, bool isSkipped);
635 
636     /* Notes queue max size seen so far and associated timestamp */
637     void noteEventQueueSize(int32_t size, int64_t eventTimestampNs);
638 
639     /**
640      * Reports that the activation broadcast guardrail was hit for this uid. Namely, the broadcast
641      * should have been sent, but instead was skipped due to hitting the guardrail.
642      */
643     void noteActivationBroadcastGuardrailHit(const int uid);
644 
645     /**
646      * Reports that an atom is erroneous or cannot be parsed successfully by
647      * statsd. An atom tag of 0 indicates that the client did not supply the
648      * atom id within the encoding.
649      *
650      * For pushed atoms only, this call should be preceded by a call to
651      * noteAtomLogged.
652      */
653     void noteAtomError(int atomTag, bool pull = false);
654 
655     /** Report query of restricted metric succeed **/
656     void noteQueryRestrictedMetricSucceed(const int64_t configId, const string& configPackage,
657                                           const std::optional<int32_t> configUid,
658                                           const int32_t callingUid, int64_t queryLatencyNs);
659 
660     /** Report query of restricted metric failed **/
661     void noteQueryRestrictedMetricFailed(const int64_t configId, const string& configPackage,
662                                          const std::optional<int32_t> configUid,
663                                          const int32_t callingUid, const InvalidQueryReason reason);
664 
665     /** Report query of restricted metric failed along with an error string **/
666     void noteQueryRestrictedMetricFailed(const int64_t configId, const string& configPackage,
667                                          const std::optional<int32_t> configUid,
668                                          const int32_t callingUid, const InvalidQueryReason reason,
669                                          const string& error);
670 
671     // Reports that a restricted metric fails to be inserted to database.
672     void noteRestrictedMetricInsertError(const ConfigKey& configKey, int64_t metricId);
673 
674     // Reports that a restricted metric fails to create table in database.
675     void noteRestrictedMetricTableCreationError(const ConfigKey& configKey, int64_t metricId);
676 
677     // Reports that a restricted metric fails to delete table in database.
678     void noteRestrictedMetricTableDeletionError(const ConfigKey& configKey, int64_t metricId);
679 
680     // Reports the time it takes for a restricted metric to flush the data to the database.
681     void noteRestrictedMetricFlushLatency(const ConfigKey& configKey, int64_t metricId,
682                                           const int64_t flushLatencyNs);
683 
684     // Reports that a restricted metric had a category change.
685     void noteRestrictedMetricCategoryChanged(const ConfigKey& configKey, int64_t metricId);
686 
687     // Reports the time is takes to flush a restricted config to the database.
688     void noteRestrictedConfigFlushLatency(const ConfigKey& configKey,
689                                           const int64_t totalFlushLatencyNs);
690 
691     // Reports the size of the internal sqlite db.
692     void noteRestrictedConfigDbSize(const ConfigKey& configKey, int64_t elapsedTimeNs,
693                                     const int64_t dbSize);
694 
695     /**
696      * Records libstatssocket was not able to write into socket.
697      */
698     void noteAtomSocketLoss(const SocketLossInfo& lossInfo);
699 
700     /**
701      * Report a new subscription has started and report the static stats about the subscription
702      * config.
703      *
704      * The static stats include: the count of pushed atoms and pulled atoms.
705      */
706     void noteSubscriptionStarted(int subId, int32_t pushedAtomCount, int32_t pulledAtomCount);
707 
708     /**
709      * Report an existing subscription has ended.
710      */
711     void noteSubscriptionEnded(int subId);
712 
713     /**
714      * Report an existing subscription was flushed.
715      */
716     void noteSubscriptionFlushed(int subId);
717 
718     /**
719      * Report an atom was pulled for a subscription.
720      */
721     void noteSubscriptionAtomPulled(int atomId);
722 
723     /**
724      * Report subscriber pull thread wakeup.
725      */
726     void noteSubscriptionPullThreadWakeup();
727 
728     void noteBatchSocketRead(int32_t size, int64_t lastReadTimeNs, int64_t currReadTimeNs,
729                              int64_t minAtomReadTimeNs, int64_t maxAtomReadTimeNs,
730                              const std::unordered_map<int32_t, int32_t>& atomCounts);
731 
732     /**
733      * Reset the historical stats. Including all stats in icebox, and the tracked stats about
734      * metrics, matchers, and atoms. The active configs will be kept and StatsdStats will continue
735      * to collect stats after reset() has been called.
736      */
737     void reset();
738 
739     /**
740      * Output the stats in protobuf binary format to [buffer].
741      *
742      * [reset]: whether to clear the historical stats after the call.
743      */
744     void dumpStats(std::vector<uint8_t>* buffer, bool reset);
745 
746     /**
747      * Output statsd stats in human readable format to [out] file descriptor.
748      */
749     void dumpStats(int outFd) const;
750 
751     /**
752      * Returns true if dimension guardrail has been hit since boot for given metric.
753      */
754     bool hasHitDimensionGuardrail(int64_t metricId) const;
755 
756     /**
757      * Return soft and hard atom key dimension size limits as an std::pair.
758      */
759     static std::pair<size_t, size_t> getAtomDimensionKeySizeLimits(int atomId,
760                                                                    size_t defaultHardLimit);
761 
clampDimensionKeySizeLimit(int dimLimit)762     inline static int clampDimensionKeySizeLimit(int dimLimit) {
763         return std::clamp(dimLimit, kDimensionKeySizeHardLimitMin, kDimensionKeySizeHardLimitMax);
764     }
765 
766     /**
767      * Return the unique identifier for the statsd stats report. This id is
768      * reset on boot.
769      */
getStatsdStatsId()770     inline int32_t getStatsdStatsId() const {
771         return mStatsdStatsId;
772     }
773 
774     /**
775      * Returns true if there is recorded event queue overflow
776      */
777     bool hasEventQueueOverflow() const;
778 
779     typedef std::unordered_map<int32_t, int32_t> QueueOverflowAtomsStatsMap;
780     QueueOverflowAtomsStatsMap getQueueOverflowAtomsStats() const;
781 
782     /**
783      * Returns true if there is recorded socket loss
784      */
785     bool hasSocketLoss() const;
786 
787     typedef struct PullTimeoutMetadata {
788         int64_t pullTimeoutUptimeMillis;
789         int64_t pullTimeoutElapsedMillis;
PullTimeoutMetadataPullTimeoutMetadata790         PullTimeoutMetadata(int64_t uptimeMillis, int64_t elapsedMillis)
791             : pullTimeoutUptimeMillis(uptimeMillis),
792               pullTimeoutElapsedMillis(elapsedMillis) { /* do nothing */
793         }
794     } PullTimeoutMetadata;
795 
796     typedef struct {
797         long totalPull = 0;
798         long totalPullFromCache = 0;
799         long minPullIntervalSec = LONG_MAX;
800         int64_t avgPullTimeNs = 0;
801         int64_t maxPullTimeNs = 0;
802         long numPullTime = 0;
803         int64_t avgPullDelayNs = 0;
804         int64_t maxPullDelayNs = 0;
805         long numPullDelay = 0;
806         long dataError = 0;
807         long pullTimeout = 0;
808         long pullExceedMaxDelay = 0;
809         long pullFailed = 0;
810         long pullUidProviderNotFound = 0;
811         long pullerNotFound = 0;
812         long emptyData = 0;
813         long registeredCount = 0;
814         long unregisteredCount = 0;
815         int32_t atomErrorCount = 0;
816         long binderCallFailCount = 0;
817         std::list<PullTimeoutMetadata> pullTimeoutMetadata;
818         int32_t subscriptionPullCount = 0;
819     } PulledAtomStats;
820 
821     typedef struct {
822         long hardDimensionLimitReached = 0;
823         long lateLogEventSkipped = 0;
824         long skippedForwardBuckets = 0;
825         long badValueType = 0;
826         long conditionChangeInNextBucket = 0;
827         long invalidatedBucket = 0;
828         long bucketDropped = 0;
829         int64_t minBucketBoundaryDelayNs = 0;
830         int64_t maxBucketBoundaryDelayNs = 0;
831         long bucketUnknownCondition = 0;
832         long bucketCount = 0;
833     } AtomMetricStats;
834 
835 private:
836     StatsdStats();
837 
838     mutable std::mutex mLock;
839 
840     int32_t mStartTimeSec;
841 
842     // Random id set using rand() during the initialization. Used to uniquely
843     // identify a session. This is more reliable than mStartTimeSec due to the
844     // unreliable nature of wall clock times.
845     const int32_t mStatsdStatsId;
846 
847     // Track the number of dropped entries used by the uid map.
848     UidMapStats mUidMapStats;
849 
850     // The stats about the configs that are still in use.
851     // The map size is capped by kMaxConfigCount.
852     std::map<const ConfigKey, std::shared_ptr<ConfigStats>> mConfigStats;
853 
854     // Stores the stats for the configs that are no longer in use.
855     // The size of the vector is capped by kMaxIceBoxSize.
856     std::list<std::shared_ptr<ConfigStats>> mIceBox;
857 
858     // Stores the number of times a pushed atom is logged and skipped (if skipped).
859     // The size of the vector is the largest pushed atom id in atoms.proto + 1. Atoms
860     // out of that range will be put in mNonPlatformPushedAtomStats.
861     // This is a vector, not a map because it will be accessed A LOT -- for each stats log.
862     struct PushedAtomStats {
863         int logCount = 0;
864         int skipCount = 0;
865     };
866 
867     std::vector<PushedAtomStats> mPushedAtomStats;
868 
869     // Stores the number of times a pushed atom is logged and skipped for atom ids above
870     // kMaxPushedAtomId. The max size of the map is kMaxNonPlatformPushedAtoms.
871     std::unordered_map<int, PushedAtomStats> mNonPlatformPushedAtomStats;
872 
873     // Stores the number of times a pushed atom is dropped due to queue overflow event.
874     // We do not expect it will happen too often so the map is preferable vs pre-allocated vector
875     // The max size of the map is kMaxPushedAtomId + kMaxNonPlatformPushedAtoms.
876     QueueOverflowAtomsStatsMap mPushedAtomDropsStats;
877 
878     // Maps PullAtomId to its stats. The size is capped by the puller atom counts.
879     std::map<int, PulledAtomStats> mPulledAtomStats;
880 
881     // Stores the number of times a pushed atom was logged erroneously. The
882     // corresponding counts for pulled atoms are stored in PulledAtomStats.
883     // The max size of this map is kMaxPushedAtomErrorStatsSize.
884     std::map<int, int> mPushedAtomErrorStats;
885 
886     // Stores the number of times a pushed atom was lost due to socket error.
887     // Represents counter per uid per tag per error with indication when the loss event was observed
888     // first & last time.
889     struct SocketLossStats {
SocketLossStatsSocketLossStats890         SocketLossStats(int32_t uid, int64_t firstLossTsNanos, int64_t lastLossTsNanos)
891             : mUid(uid), mFirstLossTsNanos(firstLossTsNanos), mLastLossTsNanos(lastLossTsNanos) {
892         }
893 
894         int32_t mUid;
895         int64_t mFirstLossTsNanos;
896         int64_t mLastLossTsNanos;
897         // atom loss count per error, atom id
898         struct AtomLossInfo {
AtomLossInfoSocketLossStats::AtomLossInfo899             AtomLossInfo(int32_t atomId, int32_t error, int32_t count)
900                 : mAtomId(atomId), mError(error), mCount(count) {
901             }
902             int mAtomId;
903             int mError;
904             int mCount;
905         };
906         std::vector<AtomLossInfo> mLossCountPerErrorAtomId;
907     };
908     // The max size of this list is kMaxSocketLossStatsSize.
909     std::list<SocketLossStats> mSocketLossStats;
910 
911     // Stores the number of times a pushed atom loss info was dropped from the stats
912     // on libstatssocket side due to guardrail hit.
913     // Represents counter per uid.
914     // The max size of this map is kMaxSocketLossStatsSize.
915     std::map<int32_t, int32_t> mSocketLossStatsOverflowCounters;
916 
917     // Maps metric ID to its stats. The size is capped by the number of metrics.
918     std::map<int64_t, AtomMetricStats> mAtomMetricStats;
919 
920     // Maps uids to times when the activation changed broadcast not sent due to hitting the
921     // guardrail. The size is capped by the number of configs, and up to 20 times per uid.
922     std::map<int, std::list<int32_t>> mActivationBroadcastGuardrailStats;
923 
924     struct LogLossStats {
LogLossStatsLogLossStats925         LogLossStats(int32_t sec, int32_t count, int32_t error, int32_t tag, int32_t uid,
926                      int32_t pid)
927             : mWallClockSec(sec),
928               mCount(count),
929               mLastError(error),
930               mLastTag(tag),
931               mUid(uid),
932               mPid(pid) {
933         }
934         int32_t mWallClockSec;
935         int32_t mCount;
936         // error code defined in linux/errno.h
937         int32_t mLastError;
938         int32_t mLastTag;
939         int32_t mUid;
940         int32_t mPid;
941     };
942 
943     // Max of {(now - oldestEventTimestamp) when overflow happens}.
944     // This number is helpful to understand how SLOW statsd can be.
945     int64_t mMaxQueueHistoryNs = 0;
946 
947     // Min of {(now - oldestEventTimestamp) when overflow happens}.
948     // This number is helpful to understand how FAST the events floods to statsd.
949     int64_t mMinQueueHistoryNs = kInt64Max;
950 
951     // Total number of events that are lost due to queue overflow.
952     int32_t mOverflowCount = 0;
953 
954     // Max number of events stored into the queue seen so far.
955     int32_t mEventQueueMaxSizeObserved = 0;
956 
957     // Event timestamp for associated max size hit.
958     int64_t mEventQueueMaxSizeObservedElapsedNanos = 0;
959 
960     // Timestamps when we detect log loss, and the number of logs lost.
961     std::list<LogLossStats> mLogLossStats;
962 
963     std::list<int32_t> mSystemServerRestartSec;
964 
965     std::vector<int64_t> mSocketBatchReadHistogram;
966 
967     // Stores stats about large socket batch reads
968     struct LargeBatchSocketReadStats {
LargeBatchSocketReadStatsLargeBatchSocketReadStats969         LargeBatchSocketReadStats(int32_t size, int64_t lastReadTimeNs, int64_t currReadTimeNs,
970                                   int64_t minAtomReadTimeNs, int64_t maxAtomReadTimeNs,
971                                   const std::unordered_map<int32_t, int32_t>& atomCounts)
972             : mSize(size),
973               mLastReadTimeNs(lastReadTimeNs),
974               mCurrReadTimeNs(currReadTimeNs),
975               mMinAtomReadTimeNs(minAtomReadTimeNs),
976               mMaxAtomReadTimeNs(maxAtomReadTimeNs),
977               mCommonAtomCounts(atomCounts) {
978         }
979 
980         int32_t mSize;
981         // The elapsed time of the previous and current read times.
982         int64_t mLastReadTimeNs;
983         int64_t mCurrReadTimeNs;
984         // The min and max times of the LogEvents processed in the batch
985         int64_t mMinAtomReadTimeNs;
986         int64_t mMaxAtomReadTimeNs;
987         // Map of atom id to count for atoms logged more than kMaxLargeBatchReadAtomThreshold times.
988         std::unordered_map<int32_t, int32_t> mCommonAtomCounts;
989     };
990     // The max size of this list is kMaxSocketLossStatsSize.
991     std::list<LargeBatchSocketReadStats> mLargeBatchSocketReadStats;
992 
993     struct RestrictedMetricQueryStats {
RestrictedMetricQueryStatsRestrictedMetricQueryStats994         RestrictedMetricQueryStats(int32_t callingUid, int64_t configId,
995                                    const string& configPackage, std::optional<int32_t> configUid,
996                                    int64_t queryTimeNs,
997                                    std::optional<InvalidQueryReason> invalidQueryReason,
998                                    const string& error, std::optional<int64_t> queryLatencyNs)
999             : mCallingUid(callingUid),
1000               mConfigId(configId),
1001               mConfigPackage(configPackage),
1002               mConfigUid(configUid),
1003               mQueryWallTimeNs(queryTimeNs),
1004               mInvalidQueryReason(invalidQueryReason),
1005               mError(error),
1006               mQueryLatencyNs(queryLatencyNs) {
1007             mHasError = invalidQueryReason.has_value();
1008         }
1009         int32_t mCallingUid;
1010         int64_t mConfigId;
1011         string mConfigPackage;
1012         std::optional<int32_t> mConfigUid;
1013         int64_t mQueryWallTimeNs;
1014         std::optional<InvalidQueryReason> mInvalidQueryReason;
1015         bool mHasError;
1016         string mError;
1017         std::optional<int64_t> mQueryLatencyNs;
1018     };
1019     std::list<RestrictedMetricQueryStats> mRestrictedMetricQueryStats;
1020 
1021     void noteQueryRestrictedMetricFailedLocked(const int64_t configId, const string& configPackage,
1022                                                const std::optional<int32_t> configUid,
1023                                                const int32_t callingUid,
1024                                                const InvalidQueryReason reason,
1025                                                const string& error);
1026 
1027     int32_t mSubscriptionPullThreadWakeupCount = 0;
1028 
1029     // Maps Subscription ID to the corresponding SubscriptionStats struct object.
1030     // Size of this map is capped by ShellSubscriber::kMaxSubscriptions.
1031     std::map<int32_t, SubscriptionStats> mSubscriptionStats;
1032 
1033     // Stores the number of times statsd modified the anomaly alarm registered with
1034     // StatsCompanionService.
1035     int mAnomalyAlarmRegisteredStats = 0;
1036 
1037     // Stores the number of times statsd registers the periodic alarm changes
1038     int mPeriodicAlarmRegisteredStats = 0;
1039 
1040     void noteConfigResetInternalLocked(const ConfigKey& key);
1041 
1042     void noteConfigRemovedInternalLocked(const ConfigKey& key);
1043 
1044     void resetInternalLocked();
1045 
1046     void noteAtomLoggedLocked(int atomId, bool isSkipped);
1047 
1048     void noteAtomDroppedLocked(int atomId);
1049 
1050     void noteDataDropped(const ConfigKey& key, const size_t totalBytes, int32_t timeSec);
1051 
1052     void noteMetricsReportSent(const ConfigKey& key, const size_t numBytes, int32_t timeSec,
1053                                const int32_t reportNumber);
1054 
1055     void noteBroadcastSent(const ConfigKey& key, int32_t timeSec);
1056 
1057     void noteActiveStatusChanged(const ConfigKey& key, bool activate, int32_t timeSec);
1058 
1059     void noteActivationBroadcastGuardrailHit(const int uid, int32_t timeSec);
1060 
1061     void addToIceBoxLocked(std::shared_ptr<ConfigStats>& stats);
1062 
1063     int getPushedAtomErrorsLocked(int atomId) const;
1064 
1065     int getPushedAtomDropsLocked(int atomId) const;
1066 
1067     bool hasRestrictedConfigErrors(const std::shared_ptr<ConfigStats>& configStats) const;
1068 
1069     /**
1070      * Get a reference to AtomMetricStats for a metric. If none exists, create it. The reference
1071      * will live as long as `this`.
1072      */
1073     StatsdStats::AtomMetricStats& getAtomMetricStats(int64_t metricId);
1074 
1075     FRIEND_TEST(LogEventQueue_test, TestQueueMaxSize);
1076     FRIEND_TEST(SocketParseMessageTest, TestProcessMessage);
1077     FRIEND_TEST(StatsLogProcessorTest, InvalidConfigRemoved);
1078     FRIEND_TEST(StatsdStatsTest, TestActivationBroadcastGuardrailHit);
1079     FRIEND_TEST(StatsdStatsTest, TestAnomalyMonitor);
1080     FRIEND_TEST(StatsdStatsTest, TestAtomDroppedStats);
1081     FRIEND_TEST(StatsdStatsTest, TestAtomErrorStats);
1082     FRIEND_TEST(StatsdStatsTest, TestAtomLog);
1083     FRIEND_TEST(StatsdStatsTest, TestAtomLoggedAndDroppedAndSkippedStats);
1084     FRIEND_TEST(StatsdStatsTest, TestAtomLoggedAndDroppedStats);
1085     FRIEND_TEST(StatsdStatsTest, TestAtomMetricsStats);
1086     FRIEND_TEST(StatsdStatsTest, TestAtomSkippedStats);
1087     FRIEND_TEST(StatsdStatsTest, TestConfigMetadataProviderPromotionFailed);
1088     FRIEND_TEST(StatsdStatsTest, TestConfigRemove);
1089     FRIEND_TEST(StatsdStatsTest, TestHasHitDimensionGuardrail);
1090     FRIEND_TEST(StatsdStatsTest, TestInvalidConfigAdd);
1091     FRIEND_TEST(StatsdStatsTest, TestInvalidConfigMissingMetricId);
1092     FRIEND_TEST(StatsdStatsTest, TestInvalidConfigOnlyMetricId);
1093     FRIEND_TEST(StatsdStatsTest, TestNonPlatformAtomLog);
1094     FRIEND_TEST(StatsdStatsTest, TestPullAtomStats);
1095     FRIEND_TEST(StatsdStatsTest, TestQueueStats);
1096     FRIEND_TEST(StatsdStatsTest, TestRestrictedMetricsQueryStats);
1097     FRIEND_TEST(StatsdStatsTest, TestRestrictedMetricsStats);
1098     FRIEND_TEST(StatsdStatsTest, TestShardOffsetProvider);
1099     FRIEND_TEST(StatsdStatsTest, TestSocketLossStats);
1100     FRIEND_TEST(StatsdStatsTest, TestSocketLossStatsOverflowCounter);
1101     FRIEND_TEST(StatsdStatsTest, TestSubStats);
1102     FRIEND_TEST(StatsdStatsTest, TestSubscriptionAtomPulled);
1103     FRIEND_TEST(StatsdStatsTest, TestSubscriptionEnded);
1104     FRIEND_TEST(StatsdStatsTest, TestSubscriptionFlushed);
1105     FRIEND_TEST(StatsdStatsTest, TestSubscriptionPullThreadWakeup);
1106     FRIEND_TEST(StatsdStatsTest, TestSubscriptionStarted);
1107     FRIEND_TEST(StatsdStatsTest, TestSubscriptionStartedMaxActiveSubscriptions);
1108     FRIEND_TEST(StatsdStatsTest, TestSubscriptionStartedRemoveFinishedSubscription);
1109     FRIEND_TEST(StatsdStatsTest, TestSystemServerCrash);
1110     FRIEND_TEST(StatsdStatsTest, TestTimestampThreshold);
1111     FRIEND_TEST(StatsdStatsTest, TestValidConfigAdd);
1112     FRIEND_TEST(StatsdStatsTest, TestSocketBatchReadStats);
1113 };
1114 
1115 InvalidConfigReason createInvalidConfigReasonWithMatcher(const InvalidConfigReasonEnum reason,
1116                                                          const int64_t matcherId);
1117 
1118 InvalidConfigReason createInvalidConfigReasonWithMatcher(const InvalidConfigReasonEnum reason,
1119                                                          const int64_t metricId,
1120                                                          const int64_t matcherId);
1121 
1122 InvalidConfigReason createInvalidConfigReasonWithPredicate(const InvalidConfigReasonEnum reason,
1123                                                            const int64_t conditionId);
1124 
1125 InvalidConfigReason createInvalidConfigReasonWithPredicate(const InvalidConfigReasonEnum reason,
1126                                                            const int64_t metricId,
1127                                                            const int64_t conditionId);
1128 
1129 InvalidConfigReason createInvalidConfigReasonWithState(const InvalidConfigReasonEnum reason,
1130                                                        const int64_t metricId,
1131                                                        const int64_t stateId);
1132 
1133 InvalidConfigReason createInvalidConfigReasonWithAlert(const InvalidConfigReasonEnum reason,
1134                                                        const int64_t alertId);
1135 
1136 InvalidConfigReason createInvalidConfigReasonWithAlert(const InvalidConfigReasonEnum reason,
1137                                                        const int64_t metricId,
1138                                                        const int64_t alertId);
1139 
1140 InvalidConfigReason createInvalidConfigReasonWithAlarm(const InvalidConfigReasonEnum reason,
1141                                                        const int64_t alarmId);
1142 
1143 InvalidConfigReason createInvalidConfigReasonWithSubscription(const InvalidConfigReasonEnum reason,
1144                                                               const int64_t subscriptionId);
1145 
1146 InvalidConfigReason createInvalidConfigReasonWithSubscriptionAndAlarm(
1147         const InvalidConfigReasonEnum reason, int64_t subscriptionId, int64_t alarmId);
1148 
1149 InvalidConfigReason createInvalidConfigReasonWithSubscriptionAndAlert(
1150         const InvalidConfigReasonEnum reason, int64_t subscriptionId, int64_t alertId);
1151 
1152 }  // namespace statsd
1153 }  // namespace os
1154 }  // namespace android
1155