1 /* 2 * Copyright 2017, The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #pragma once 17 18 #include <gtest/gtest_prod.h> 19 #include <log/log_time.h> 20 #include <src/guardrail/stats_log_enums.pb.h> 21 22 #include <list> 23 #include <mutex> 24 #include <string> 25 #include <unordered_map> 26 #include <vector> 27 28 #include "config/ConfigKey.h" 29 #include "logd/logevent_util.h" 30 31 namespace android { 32 namespace os { 33 namespace statsd { 34 35 struct InvalidConfigReason { 36 InvalidConfigReasonEnum reason; 37 std::optional<int64_t> metricId; 38 std::optional<int64_t> stateId; 39 std::optional<int64_t> alertId; 40 std::optional<int64_t> alarmId; 41 std::optional<int64_t> subscriptionId; 42 std::vector<int64_t> matcherIds; 43 std::vector<int64_t> conditionIds; InvalidConfigReasonInvalidConfigReason44 InvalidConfigReason(){}; InvalidConfigReasonInvalidConfigReason45 InvalidConfigReason(InvalidConfigReasonEnum reason) : reason(reason){}; InvalidConfigReasonInvalidConfigReason46 InvalidConfigReason(InvalidConfigReasonEnum reason, int64_t metricId) 47 : reason(reason), metricId(metricId){}; 48 bool operator==(const InvalidConfigReason& other) const { 49 return (this->reason == other.reason) && (this->metricId == other.metricId) && 50 (this->stateId == other.stateId) && (this->alertId == other.alertId) && 51 (this->alarmId == other.alarmId) && (this->subscriptionId == other.subscriptionId) && 52 (this->matcherIds == other.matcherIds) && (this->conditionIds == other.conditionIds); 53 } 54 55 // For better failure messages in statsd_test 56 friend void PrintTo(const InvalidConfigReason& obj, std::ostream* os); 57 }; 58 59 typedef struct { 60 int64_t insertError = 0; 61 int64_t tableCreationError = 0; 62 int64_t tableDeletionError = 0; 63 std::list<int64_t> flushLatencyNs; 64 int64_t categoryChangedCount = 0; 65 } RestrictedMetricStats; 66 67 struct DumpReportStats { DumpReportStatsDumpReportStats68 DumpReportStats(int32_t dumpReportSec, int32_t dumpReportSize, int32_t reportNumber) 69 : mDumpReportTimeSec(dumpReportSec), 70 mDumpReportSizeBytes(dumpReportSize), 71 mDumpReportNumber(reportNumber) { 72 } 73 int32_t mDumpReportTimeSec = 0; 74 int32_t mDumpReportSizeBytes = 0; 75 int32_t mDumpReportNumber = 0; 76 }; 77 78 struct ConfigStats { 79 int32_t uid; 80 int64_t id; 81 int32_t creation_time_sec; 82 int32_t deletion_time_sec = 0; 83 int32_t reset_time_sec = 0; 84 int32_t metric_count; 85 int32_t condition_count; 86 int32_t matcher_count; 87 int32_t alert_count; 88 bool is_valid; 89 bool device_info_table_creation_failed = false; 90 int32_t db_corrupted_count = 0; 91 int32_t db_deletion_stat_failed = 0; 92 int32_t db_deletion_size_exceeded_limit = 0; 93 int32_t db_deletion_config_invalid = 0; 94 int32_t db_deletion_too_old = 0; 95 int32_t db_deletion_config_removed = 0; 96 int32_t db_deletion_config_updated = 0; 97 // Stores the number of ConfigMetadataProvider promotion failures 98 int32_t config_metadata_provider_promote_failure = 0; 99 100 // Stores reasons for why config is valid or not 101 std::optional<InvalidConfigReason> reason; 102 103 std::list<int32_t> broadcast_sent_time_sec; 104 105 // Times at which this config is activated. 106 std::list<int32_t> activation_time_sec; 107 108 // Times at which this config is deactivated. 109 std::list<int32_t> deactivation_time_sec; 110 111 std::list<int32_t> data_drop_time_sec; 112 // Number of bytes dropped at corresponding time. 113 std::list<int64_t> data_drop_bytes; 114 115 std::list<DumpReportStats> dump_report_stats; 116 117 // Stores how many times a matcher have been matched. The map size is capped by kMaxConfigCount. 118 std::map<const int64_t, int> matcher_stats; 119 120 // Stores the number of output tuple of condition trackers when it's bigger than 121 // kDimensionKeySizeSoftLimit. When you see the number is kDimensionKeySizeHardLimit +1, 122 // it means some data has been dropped. The map size is capped by kMaxConfigCount. 123 std::map<const int64_t, int> condition_stats; 124 125 // Stores the number of output tuple of metric producers when it's bigger than 126 // kDimensionKeySizeSoftLimit. When you see the number is kDimensionKeySizeHardLimit +1, 127 // it means some data has been dropped. The map size is capped by kMaxConfigCount. 128 std::map<const int64_t, int> metric_stats; 129 130 // Stores the max number of output tuple of dimensions in condition across dimensions in what 131 // when it's bigger than kDimensionKeySizeSoftLimit. When you see the number is 132 // kDimensionKeySizeHardLimit +1, it means some data has been dropped. The map size is capped by 133 // kMaxConfigCount. 134 std::map<const int64_t, int> metric_dimension_in_condition_stats; 135 136 // Stores the number of times an anomaly detection alert has been declared. 137 // The map size is capped by kMaxConfigCount. 138 std::map<const int64_t, int> alert_stats; 139 140 // Stores the config ID for each sub-config used. 141 std::list<std::pair<const int64_t, const int32_t>> annotations; 142 143 // Maps metric ID of restricted metric to its stats. 144 std::map<int64_t, RestrictedMetricStats> restricted_metric_stats; 145 146 std::list<int64_t> total_flush_latency_ns; 147 148 // Stores the last 20 timestamps for computing sqlite db size. 149 std::list<int64_t> total_db_size_timestamps; 150 151 // Stores the last 20 sizes of the sqlite db. 152 std::list<int64_t> total_db_sizes; 153 }; 154 155 struct UidMapStats { 156 int32_t changes = 0; 157 int32_t bytes_used = 0; 158 int32_t dropped_changes = 0; 159 int32_t deleted_apps = 0; 160 }; 161 162 struct SubscriptionStats { 163 int32_t pushed_atom_count = 0; 164 int32_t pulled_atom_count = 0; 165 int32_t start_time_sec = 0; 166 int32_t end_time_sec = 0; 167 int32_t flush_count = 0; 168 }; 169 170 // Keeps track of stats of statsd. 171 // Single instance shared across the process. All public methods are thread safe. 172 class StatsdStats { 173 public: 174 static StatsdStats& getInstance(); ~StatsdStats()175 ~StatsdStats(){}; 176 177 const static int kDimensionKeySizeSoftLimit = 500; 178 static constexpr int kDimensionKeySizeHardLimit = 800; 179 static constexpr int kDimensionKeySizeHardLimitMin = 800; 180 static constexpr int kDimensionKeySizeHardLimitMax = 3000; 181 182 // Per atom dimension key size limit 183 static const std::map<int, std::pair<size_t, size_t>> kAtomDimensionKeySizeLimitMap; 184 185 const static int kMaxConfigCountPerUid = 20; 186 const static int kMaxAlertCountPerConfig = 200; 187 const static int kMaxConditionCountPerConfig = 500; 188 const static int kMaxMetricCountPerConfig = 3000; 189 const static int kMaxMatcherCountPerConfig = 3500; 190 191 // The max number of old config stats we keep. 192 const static int kMaxIceBoxSize = 20; 193 194 const static int kMaxLoggerErrors = 20; 195 196 const static int kMaxSystemServerRestarts = 20; 197 198 const static int kMaxTimestampCount = 20; 199 200 const static int kMaxLogSourceCount = 150; 201 202 const static int kMaxPullAtomPackages = 100; 203 204 const static int kMaxRestrictedMetricQueryCount = 20; 205 206 const static int kMaxRestrictedMetricFlushLatencyCount = 20; 207 208 const static int kMaxRestrictedConfigFlushLatencyCount = 20; 209 210 const static int kMaxRestrictedConfigDbSizeCount = 20; 211 212 // Max memory allowed for storing metrics per configuration. If this limit is exceeded, statsd 213 // drops the metrics data in memory. 214 static const size_t kDefaultMaxMetricsBytesPerConfig = 2 * 1024 * 1024; 215 216 // Hard limit for custom memory allowed for storing metrics per configuration. 217 static const size_t kHardMaxMetricsBytesPerConfig = 20 * 1024 * 1024; 218 219 // Max memory allowed for storing metrics per configuration before triggering a intent to fetch 220 // data. 221 static const size_t kHardMaxTriggerGetDataBytes = 10 * 1024 * 1024; 222 223 // Soft memory limit per configuration. Once this limit is exceeded, we begin notifying the 224 // data subscriber that it's time to call getData. 225 static const size_t kDefaultBytesPerConfigTriggerGetData = 192 * 1024; 226 227 // Soft memory limit per restricted configuration. Once this limit is exceeded, 228 // we begin flush in-memory restricted metrics to database. 229 static const size_t kBytesPerRestrictedConfigTriggerFlush = 25 * 1024; 230 231 // Cap the UID map's memory usage to this. This should be fairly high since the UID information 232 // is critical for understanding the metrics. 233 const static size_t kMaxBytesUsedUidMap = 50 * 1024; 234 235 // The number of deleted apps that are stored in the uid map. 236 const static int kMaxDeletedAppsInUidMap = 100; 237 238 /* Minimum period between two broadcasts in nanoseconds. */ 239 static const int64_t kMinBroadcastPeriodNs = 60 * NS_PER_SEC; 240 241 /* Min period between two checks of byte size per config key in nanoseconds. */ 242 static const int64_t kMinByteSizeCheckPeriodNs = 1 * 60 * NS_PER_SEC; 243 244 // Min period between two checks of byte size per config key in nanoseconds for V2 memory 245 // calculations. 246 static const int64_t kMinByteSizeV2CheckPeriodNs = 5 * 60 * NS_PER_SEC; 247 248 /* Min period between two checks of restricted metrics TTLs. */ 249 static const int64_t kMinTtlCheckPeriodNs = 60 * 60 * NS_PER_SEC; 250 251 /* Min period between two flush operations of restricted metrics. */ 252 static const int64_t kMinFlushRestrictedPeriodNs = 60 * 60 * NS_PER_SEC; 253 254 /* Min period between two db guardrail check operations of restricted metrics. */ 255 static const int64_t kMinDbGuardrailEnforcementPeriodNs = 60 * 60 * NS_PER_SEC; 256 257 /* Minimum period between two activation broadcasts in nanoseconds. */ 258 static const int64_t kMinActivationBroadcastPeriodNs = 10 * NS_PER_SEC; 259 260 // Maximum age (30 days) that files on disk can exist in seconds. 261 static const int kMaxAgeSecond = 60 * 60 * 24 * 30; 262 263 // Maximum age (2 days) that local history files on disk can exist in seconds. 264 static const int kMaxLocalHistoryAgeSecond = 60 * 60 * 24 * 2; 265 266 // Maximum number of files (1000) that can be in stats directory on disk. 267 static const int kMaxFileNumber = 1000; 268 269 // Maximum size of all files that can be written to stats directory on disk. 270 static const int kMaxFileSize = 50 * 1024 * 1024; 271 272 // How long to try to clear puller cache from last time 273 static const long kPullerCacheClearIntervalSec = 1; 274 275 // Max time to do a pull. 276 static const int64_t kPullMaxDelayNs = 30 * NS_PER_SEC; 277 278 // Maximum number of pushed atoms statsd stats will track above kMaxPushedAtomId. 279 static const int kMaxNonPlatformPushedAtoms = 600; 280 281 // Maximum number of pushed atoms error statsd stats will track. 282 static const int kMaxPushedAtomErrorStatsSize = 100; 283 284 // Maximum number of socket loss stats to track. 285 static const int kMaxSocketLossStatsSize = 50; 286 287 // Maximum atom id value that we consider a platform pushed atom. 288 // This should be updated once highest pushed atom id in atoms.proto approaches this value. 289 static const int kMaxPushedAtomId = 1500; 290 291 // Atom id that is the start of the pulled atoms. 292 static const int kPullAtomStartTag = 10000; 293 294 // Atom id that is the start of vendor atoms. 295 static const int kVendorAtomStartTag = 100000; 296 297 // Vendor pulled atom start id. 298 static const int32_t kVendorPulledAtomStartTag = 150000; 299 300 // Beginning of range for timestamp truncation. 301 static const int32_t kTimestampTruncationStartTag = 300000; 302 303 // End of range for timestamp truncation. 304 static const int32_t kTimestampTruncationEndTag = 304999; 305 306 // Max accepted atom id. 307 static const int32_t kMaxAtomTag = 200000; 308 309 static const int64_t kInt64Max = 0x7fffffffffffffffLL; 310 311 static const int32_t kMaxLoggedBucketDropEvents = 10; 312 313 static const int32_t kNumBinsInSocketBatchReadHistogram = 30; 314 static const int32_t kLargeBatchReadThreshold = 1000; 315 static const int32_t kMaxLargeBatchReadSize = 20; 316 static const int32_t kMaxLargeBatchReadAtomThreshold = 50; 317 318 /** 319 * Report a new config has been received and report the static stats about the config. 320 * 321 * The static stats include: the count of metrics, conditions, matchers, and alerts. 322 * If the config is not valid, this config stats will be put into icebox immediately. 323 */ 324 void noteConfigReceived(const ConfigKey& key, int metricsCount, int conditionsCount, 325 int matchersCount, int alertCount, 326 const std::list<std::pair<const int64_t, const int32_t>>& annotations, 327 const std::optional<InvalidConfigReason>& reason); 328 /** 329 * Report a config has been removed. 330 */ 331 void noteConfigRemoved(const ConfigKey& key); 332 /** 333 * Report a config has been reset when ttl expires. 334 */ 335 void noteConfigReset(const ConfigKey& key); 336 337 /** 338 * Report a broadcast has been sent to a config owner to collect the data. 339 */ 340 void noteBroadcastSent(const ConfigKey& key); 341 342 /** 343 * Report that a config has become activated or deactivated. 344 * This can be different from whether or not a broadcast is sent if the 345 * guardrail prevented the broadcast from being sent. 346 */ 347 void noteActiveStatusChanged(const ConfigKey& key, bool activate); 348 349 /** 350 * Report a config's metrics data has been dropped. 351 */ 352 void noteDataDropped(const ConfigKey& key, const size_t totalBytes); 353 354 /** 355 * Report metrics data report has been sent. 356 * 357 * The report may be requested via StatsManager API, or through adb cmd. 358 */ 359 void noteMetricsReportSent(const ConfigKey& key, const size_t numBytes, 360 const int32_t reportNumber); 361 362 /** 363 * Report failure in creating the device info metadata table for restricted configs. 364 */ 365 void noteDeviceInfoTableCreationFailed(const ConfigKey& key); 366 367 /** 368 * Report db corruption for restricted configs. 369 */ 370 void noteDbCorrupted(const ConfigKey& key); 371 372 /** 373 * Report db exceeded the size limit for restricted configs. 374 */ 375 void noteDbSizeExceeded(const ConfigKey& key); 376 377 /** 378 * Report db size check with stat for restricted configs failed. 379 */ 380 void noteDbStatFailed(const ConfigKey& key); 381 382 /** 383 * Report restricted config is invalid. 384 */ 385 void noteDbConfigInvalid(const ConfigKey& key); 386 387 /** 388 * Report db is too old for restricted configs. 389 */ 390 void noteDbTooOld(const ConfigKey& key); 391 392 /** 393 * Report db was deleted due to config removal. 394 */ 395 void noteDbDeletionConfigRemoved(const ConfigKey& key); 396 397 /** 398 * Report db was deleted due to config update. 399 */ 400 void noteDbDeletionConfigUpdated(const ConfigKey& key); 401 402 /** 403 * Reports that the promotion for ConfigMetadataProvider failed. 404 */ 405 void noteConfigMetadataProviderPromotionFailed(const ConfigKey& key); 406 407 /** 408 * Report the size of output tuple of a condition. 409 * 410 * Note: only report when the condition has an output dimension, and the tuple 411 * count > kDimensionKeySizeSoftLimit. 412 * 413 * [key]: The config key that this condition belongs to. 414 * [id]: The id of the condition. 415 * [size]: The output tuple size. 416 */ 417 void noteConditionDimensionSize(const ConfigKey& key, int64_t id, int size); 418 419 /** 420 * Report the size of output tuple of a metric. 421 * 422 * Note: only report when the metric has an output dimension, and the tuple 423 * count > kDimensionKeySizeSoftLimit. 424 * 425 * [key]: The config key that this metric belongs to. 426 * [id]: The id of the metric. 427 * [size]: The output tuple size. 428 */ 429 void noteMetricDimensionSize(const ConfigKey& key, int64_t id, int size); 430 431 /** 432 * Report the max size of output tuple of dimension in condition across dimensions in what. 433 * 434 * Note: only report when the metric has an output dimension in condition, and the max tuple 435 * count > kDimensionKeySizeSoftLimit. 436 * 437 * [key]: The config key that this metric belongs to. 438 * [id]: The id of the metric. 439 * [size]: The output tuple size. 440 */ 441 void noteMetricDimensionInConditionSize(const ConfigKey& key, int64_t id, int size); 442 443 /** 444 * Report a matcher has been matched. 445 * 446 * [key]: The config key that this matcher belongs to. 447 * [id]: The id of the matcher. 448 */ 449 void noteMatcherMatched(const ConfigKey& key, int64_t id); 450 451 /** 452 * Report that an anomaly detection alert has been declared. 453 * 454 * [key]: The config key that this alert belongs to. 455 * [id]: The id of the alert. 456 */ 457 void noteAnomalyDeclared(const ConfigKey& key, int64_t id); 458 459 /** 460 * Report an atom event has been logged. 461 */ 462 void noteAtomLogged(int atomId, int32_t timeSec, bool isSkipped); 463 464 /** 465 * Report that statsd modified the anomaly alarm registered with StatsCompanionService. 466 */ 467 void noteRegisteredAnomalyAlarmChanged(); 468 469 /** 470 * Report that statsd modified the periodic alarm registered with StatsCompanionService. 471 */ 472 void noteRegisteredPeriodicAlarmChanged(); 473 474 /** 475 * Records the number of delta entries that are being dropped from the uid map. 476 */ 477 void noteUidMapDropped(int deltas); 478 479 /** 480 * Records that an app was deleted (from statsd's map). 481 */ 482 void noteUidMapAppDeletionDropped(); 483 484 /** 485 * Updates the number of changes currently stored in the uid map. 486 */ 487 void setUidMapChanges(int changes); 488 void setCurrentUidMapMemory(int bytes); 489 490 /* 491 * Updates minimum interval between pulls for an pulled atom. 492 */ 493 void updateMinPullIntervalSec(int pullAtomId, long intervalSec); 494 495 /* 496 * Notes an atom is pulled. 497 */ 498 void notePull(int pullAtomId); 499 500 /* 501 * Notes an atom is served from puller cache. 502 */ 503 void notePullFromCache(int pullAtomId); 504 505 /* 506 * Notify data error for pulled atom. 507 */ 508 void notePullDataError(int pullAtomId); 509 510 /* 511 * Records time for actual pulling, not including those served from cache and not including 512 * statsd processing delays. 513 */ 514 void notePullTime(int pullAtomId, int64_t pullTimeNs); 515 516 /* 517 * Records pull delay for a pulled atom, including those served from cache and including statsd 518 * processing delays. 519 */ 520 void notePullDelay(int pullAtomId, int64_t pullDelayNs); 521 522 /* 523 * Records pull exceeds timeout for the puller. 524 */ 525 void notePullTimeout(int pullAtomId, int64_t pullUptimeMillis, int64_t pullElapsedMillis); 526 527 /* 528 * Records pull exceeds max delay for a metric. 529 */ 530 void notePullExceedMaxDelay(int pullAtomId); 531 532 /* 533 * Records when system server restarts. 534 */ 535 void noteSystemServerRestart(int32_t timeSec); 536 537 /** 538 * Records statsd skipped an event. 539 */ 540 void noteLogLost(int32_t wallClockTimeSec, int32_t count, int32_t lastError, 541 int32_t lastAtomTag, int32_t uid, int32_t pid); 542 543 /** 544 * Records that the pull of an atom has failed. Eg, if the client indicated the pull failed, if 545 * the pull timed out, or if the outgoing binder call failed. 546 * This count will only increment if the puller was actually invoked. 547 * 548 * It does not include a pull not occurring due to not finding the appropriate 549 * puller. These cases are covered in other counts. 550 */ 551 void notePullFailed(int atomId); 552 553 /** 554 * Records that the pull of an atom has failed due to not having a uid provider. 555 */ 556 void notePullUidProviderNotFound(int atomId); 557 558 /** 559 * Records that the pull of an atom has failed due not finding a puller registered by a 560 * trusted uid. 561 */ 562 void notePullerNotFound(int atomId); 563 564 /** 565 * Records that the pull has failed due to the outgoing binder call failing. 566 */ 567 void notePullBinderCallFailed(int atomId); 568 569 /** 570 * A pull with no data occurred 571 */ 572 void noteEmptyData(int atomId); 573 574 /** 575 * Records that a puller callback for the given atomId was registered or unregistered. 576 * 577 * @param registered True if the callback was registered, false if was unregistered. 578 */ 579 void notePullerCallbackRegistrationChanged(int atomId, bool registered); 580 581 /** 582 * Hard limit was reached in the cardinality of an atom 583 */ 584 void noteHardDimensionLimitReached(int64_t metricId); 585 586 /** 587 * A log event was too late, arrived in the wrong bucket and was skipped 588 */ 589 void noteLateLogEventSkipped(int64_t metricId); 590 591 /** 592 * Buckets were skipped as time elapsed without any data for them 593 */ 594 void noteSkippedForwardBuckets(int64_t metricId); 595 596 /** 597 * An unsupported value type was received 598 */ 599 void noteBadValueType(int64_t metricId); 600 601 /** 602 * Buckets were dropped due to reclaim memory. 603 */ 604 void noteBucketDropped(int64_t metricId); 605 606 /** 607 * A condition change was too late, arrived in the wrong bucket and was skipped 608 */ 609 void noteConditionChangeInNextBucket(int64_t metricId); 610 611 /** 612 * A bucket has been tagged as invalid. 613 */ 614 void noteInvalidatedBucket(int64_t metricId); 615 616 /** 617 * Tracks the total number of buckets (include skipped/invalid buckets). 618 */ 619 void noteBucketCount(int64_t metricId); 620 621 /** 622 * For pulls at bucket boundaries, it represents the misalignment between the real timestamp and 623 * the end of the bucket. 624 */ 625 void noteBucketBoundaryDelayNs(int64_t metricId, int64_t timeDelayNs); 626 627 /** 628 * Number of buckets with unknown condition. 629 */ 630 void noteBucketUnknownCondition(int64_t metricId); 631 632 /* Reports one event id has been dropped due to queue overflow, and the oldest event timestamp 633 * in the queue */ 634 void noteEventQueueOverflow(int64_t oldestEventTimestampNs, int32_t atomId, bool isSkipped); 635 636 /* Notes queue max size seen so far and associated timestamp */ 637 void noteEventQueueSize(int32_t size, int64_t eventTimestampNs); 638 639 /** 640 * Reports that the activation broadcast guardrail was hit for this uid. Namely, the broadcast 641 * should have been sent, but instead was skipped due to hitting the guardrail. 642 */ 643 void noteActivationBroadcastGuardrailHit(const int uid); 644 645 /** 646 * Reports that an atom is erroneous or cannot be parsed successfully by 647 * statsd. An atom tag of 0 indicates that the client did not supply the 648 * atom id within the encoding. 649 * 650 * For pushed atoms only, this call should be preceded by a call to 651 * noteAtomLogged. 652 */ 653 void noteAtomError(int atomTag, bool pull = false); 654 655 /** Report query of restricted metric succeed **/ 656 void noteQueryRestrictedMetricSucceed(const int64_t configId, const string& configPackage, 657 const std::optional<int32_t> configUid, 658 const int32_t callingUid, int64_t queryLatencyNs); 659 660 /** Report query of restricted metric failed **/ 661 void noteQueryRestrictedMetricFailed(const int64_t configId, const string& configPackage, 662 const std::optional<int32_t> configUid, 663 const int32_t callingUid, const InvalidQueryReason reason); 664 665 /** Report query of restricted metric failed along with an error string **/ 666 void noteQueryRestrictedMetricFailed(const int64_t configId, const string& configPackage, 667 const std::optional<int32_t> configUid, 668 const int32_t callingUid, const InvalidQueryReason reason, 669 const string& error); 670 671 // Reports that a restricted metric fails to be inserted to database. 672 void noteRestrictedMetricInsertError(const ConfigKey& configKey, int64_t metricId); 673 674 // Reports that a restricted metric fails to create table in database. 675 void noteRestrictedMetricTableCreationError(const ConfigKey& configKey, int64_t metricId); 676 677 // Reports that a restricted metric fails to delete table in database. 678 void noteRestrictedMetricTableDeletionError(const ConfigKey& configKey, int64_t metricId); 679 680 // Reports the time it takes for a restricted metric to flush the data to the database. 681 void noteRestrictedMetricFlushLatency(const ConfigKey& configKey, int64_t metricId, 682 const int64_t flushLatencyNs); 683 684 // Reports that a restricted metric had a category change. 685 void noteRestrictedMetricCategoryChanged(const ConfigKey& configKey, int64_t metricId); 686 687 // Reports the time is takes to flush a restricted config to the database. 688 void noteRestrictedConfigFlushLatency(const ConfigKey& configKey, 689 const int64_t totalFlushLatencyNs); 690 691 // Reports the size of the internal sqlite db. 692 void noteRestrictedConfigDbSize(const ConfigKey& configKey, int64_t elapsedTimeNs, 693 const int64_t dbSize); 694 695 /** 696 * Records libstatssocket was not able to write into socket. 697 */ 698 void noteAtomSocketLoss(const SocketLossInfo& lossInfo); 699 700 /** 701 * Report a new subscription has started and report the static stats about the subscription 702 * config. 703 * 704 * The static stats include: the count of pushed atoms and pulled atoms. 705 */ 706 void noteSubscriptionStarted(int subId, int32_t pushedAtomCount, int32_t pulledAtomCount); 707 708 /** 709 * Report an existing subscription has ended. 710 */ 711 void noteSubscriptionEnded(int subId); 712 713 /** 714 * Report an existing subscription was flushed. 715 */ 716 void noteSubscriptionFlushed(int subId); 717 718 /** 719 * Report an atom was pulled for a subscription. 720 */ 721 void noteSubscriptionAtomPulled(int atomId); 722 723 /** 724 * Report subscriber pull thread wakeup. 725 */ 726 void noteSubscriptionPullThreadWakeup(); 727 728 void noteBatchSocketRead(int32_t size, int64_t lastReadTimeNs, int64_t currReadTimeNs, 729 int64_t minAtomReadTimeNs, int64_t maxAtomReadTimeNs, 730 const std::unordered_map<int32_t, int32_t>& atomCounts); 731 732 /** 733 * Reset the historical stats. Including all stats in icebox, and the tracked stats about 734 * metrics, matchers, and atoms. The active configs will be kept and StatsdStats will continue 735 * to collect stats after reset() has been called. 736 */ 737 void reset(); 738 739 /** 740 * Output the stats in protobuf binary format to [buffer]. 741 * 742 * [reset]: whether to clear the historical stats after the call. 743 */ 744 void dumpStats(std::vector<uint8_t>* buffer, bool reset); 745 746 /** 747 * Output statsd stats in human readable format to [out] file descriptor. 748 */ 749 void dumpStats(int outFd) const; 750 751 /** 752 * Returns true if dimension guardrail has been hit since boot for given metric. 753 */ 754 bool hasHitDimensionGuardrail(int64_t metricId) const; 755 756 /** 757 * Return soft and hard atom key dimension size limits as an std::pair. 758 */ 759 static std::pair<size_t, size_t> getAtomDimensionKeySizeLimits(int atomId, 760 size_t defaultHardLimit); 761 clampDimensionKeySizeLimit(int dimLimit)762 inline static int clampDimensionKeySizeLimit(int dimLimit) { 763 return std::clamp(dimLimit, kDimensionKeySizeHardLimitMin, kDimensionKeySizeHardLimitMax); 764 } 765 766 /** 767 * Return the unique identifier for the statsd stats report. This id is 768 * reset on boot. 769 */ getStatsdStatsId()770 inline int32_t getStatsdStatsId() const { 771 return mStatsdStatsId; 772 } 773 774 /** 775 * Returns true if there is recorded event queue overflow 776 */ 777 bool hasEventQueueOverflow() const; 778 779 typedef std::unordered_map<int32_t, int32_t> QueueOverflowAtomsStatsMap; 780 QueueOverflowAtomsStatsMap getQueueOverflowAtomsStats() const; 781 782 /** 783 * Returns true if there is recorded socket loss 784 */ 785 bool hasSocketLoss() const; 786 787 typedef struct PullTimeoutMetadata { 788 int64_t pullTimeoutUptimeMillis; 789 int64_t pullTimeoutElapsedMillis; PullTimeoutMetadataPullTimeoutMetadata790 PullTimeoutMetadata(int64_t uptimeMillis, int64_t elapsedMillis) 791 : pullTimeoutUptimeMillis(uptimeMillis), 792 pullTimeoutElapsedMillis(elapsedMillis) { /* do nothing */ 793 } 794 } PullTimeoutMetadata; 795 796 typedef struct { 797 long totalPull = 0; 798 long totalPullFromCache = 0; 799 long minPullIntervalSec = LONG_MAX; 800 int64_t avgPullTimeNs = 0; 801 int64_t maxPullTimeNs = 0; 802 long numPullTime = 0; 803 int64_t avgPullDelayNs = 0; 804 int64_t maxPullDelayNs = 0; 805 long numPullDelay = 0; 806 long dataError = 0; 807 long pullTimeout = 0; 808 long pullExceedMaxDelay = 0; 809 long pullFailed = 0; 810 long pullUidProviderNotFound = 0; 811 long pullerNotFound = 0; 812 long emptyData = 0; 813 long registeredCount = 0; 814 long unregisteredCount = 0; 815 int32_t atomErrorCount = 0; 816 long binderCallFailCount = 0; 817 std::list<PullTimeoutMetadata> pullTimeoutMetadata; 818 int32_t subscriptionPullCount = 0; 819 } PulledAtomStats; 820 821 typedef struct { 822 long hardDimensionLimitReached = 0; 823 long lateLogEventSkipped = 0; 824 long skippedForwardBuckets = 0; 825 long badValueType = 0; 826 long conditionChangeInNextBucket = 0; 827 long invalidatedBucket = 0; 828 long bucketDropped = 0; 829 int64_t minBucketBoundaryDelayNs = 0; 830 int64_t maxBucketBoundaryDelayNs = 0; 831 long bucketUnknownCondition = 0; 832 long bucketCount = 0; 833 } AtomMetricStats; 834 835 private: 836 StatsdStats(); 837 838 mutable std::mutex mLock; 839 840 int32_t mStartTimeSec; 841 842 // Random id set using rand() during the initialization. Used to uniquely 843 // identify a session. This is more reliable than mStartTimeSec due to the 844 // unreliable nature of wall clock times. 845 const int32_t mStatsdStatsId; 846 847 // Track the number of dropped entries used by the uid map. 848 UidMapStats mUidMapStats; 849 850 // The stats about the configs that are still in use. 851 // The map size is capped by kMaxConfigCount. 852 std::map<const ConfigKey, std::shared_ptr<ConfigStats>> mConfigStats; 853 854 // Stores the stats for the configs that are no longer in use. 855 // The size of the vector is capped by kMaxIceBoxSize. 856 std::list<std::shared_ptr<ConfigStats>> mIceBox; 857 858 // Stores the number of times a pushed atom is logged and skipped (if skipped). 859 // The size of the vector is the largest pushed atom id in atoms.proto + 1. Atoms 860 // out of that range will be put in mNonPlatformPushedAtomStats. 861 // This is a vector, not a map because it will be accessed A LOT -- for each stats log. 862 struct PushedAtomStats { 863 int logCount = 0; 864 int skipCount = 0; 865 }; 866 867 std::vector<PushedAtomStats> mPushedAtomStats; 868 869 // Stores the number of times a pushed atom is logged and skipped for atom ids above 870 // kMaxPushedAtomId. The max size of the map is kMaxNonPlatformPushedAtoms. 871 std::unordered_map<int, PushedAtomStats> mNonPlatformPushedAtomStats; 872 873 // Stores the number of times a pushed atom is dropped due to queue overflow event. 874 // We do not expect it will happen too often so the map is preferable vs pre-allocated vector 875 // The max size of the map is kMaxPushedAtomId + kMaxNonPlatformPushedAtoms. 876 QueueOverflowAtomsStatsMap mPushedAtomDropsStats; 877 878 // Maps PullAtomId to its stats. The size is capped by the puller atom counts. 879 std::map<int, PulledAtomStats> mPulledAtomStats; 880 881 // Stores the number of times a pushed atom was logged erroneously. The 882 // corresponding counts for pulled atoms are stored in PulledAtomStats. 883 // The max size of this map is kMaxPushedAtomErrorStatsSize. 884 std::map<int, int> mPushedAtomErrorStats; 885 886 // Stores the number of times a pushed atom was lost due to socket error. 887 // Represents counter per uid per tag per error with indication when the loss event was observed 888 // first & last time. 889 struct SocketLossStats { SocketLossStatsSocketLossStats890 SocketLossStats(int32_t uid, int64_t firstLossTsNanos, int64_t lastLossTsNanos) 891 : mUid(uid), mFirstLossTsNanos(firstLossTsNanos), mLastLossTsNanos(lastLossTsNanos) { 892 } 893 894 int32_t mUid; 895 int64_t mFirstLossTsNanos; 896 int64_t mLastLossTsNanos; 897 // atom loss count per error, atom id 898 struct AtomLossInfo { AtomLossInfoSocketLossStats::AtomLossInfo899 AtomLossInfo(int32_t atomId, int32_t error, int32_t count) 900 : mAtomId(atomId), mError(error), mCount(count) { 901 } 902 int mAtomId; 903 int mError; 904 int mCount; 905 }; 906 std::vector<AtomLossInfo> mLossCountPerErrorAtomId; 907 }; 908 // The max size of this list is kMaxSocketLossStatsSize. 909 std::list<SocketLossStats> mSocketLossStats; 910 911 // Stores the number of times a pushed atom loss info was dropped from the stats 912 // on libstatssocket side due to guardrail hit. 913 // Represents counter per uid. 914 // The max size of this map is kMaxSocketLossStatsSize. 915 std::map<int32_t, int32_t> mSocketLossStatsOverflowCounters; 916 917 // Maps metric ID to its stats. The size is capped by the number of metrics. 918 std::map<int64_t, AtomMetricStats> mAtomMetricStats; 919 920 // Maps uids to times when the activation changed broadcast not sent due to hitting the 921 // guardrail. The size is capped by the number of configs, and up to 20 times per uid. 922 std::map<int, std::list<int32_t>> mActivationBroadcastGuardrailStats; 923 924 struct LogLossStats { LogLossStatsLogLossStats925 LogLossStats(int32_t sec, int32_t count, int32_t error, int32_t tag, int32_t uid, 926 int32_t pid) 927 : mWallClockSec(sec), 928 mCount(count), 929 mLastError(error), 930 mLastTag(tag), 931 mUid(uid), 932 mPid(pid) { 933 } 934 int32_t mWallClockSec; 935 int32_t mCount; 936 // error code defined in linux/errno.h 937 int32_t mLastError; 938 int32_t mLastTag; 939 int32_t mUid; 940 int32_t mPid; 941 }; 942 943 // Max of {(now - oldestEventTimestamp) when overflow happens}. 944 // This number is helpful to understand how SLOW statsd can be. 945 int64_t mMaxQueueHistoryNs = 0; 946 947 // Min of {(now - oldestEventTimestamp) when overflow happens}. 948 // This number is helpful to understand how FAST the events floods to statsd. 949 int64_t mMinQueueHistoryNs = kInt64Max; 950 951 // Total number of events that are lost due to queue overflow. 952 int32_t mOverflowCount = 0; 953 954 // Max number of events stored into the queue seen so far. 955 int32_t mEventQueueMaxSizeObserved = 0; 956 957 // Event timestamp for associated max size hit. 958 int64_t mEventQueueMaxSizeObservedElapsedNanos = 0; 959 960 // Timestamps when we detect log loss, and the number of logs lost. 961 std::list<LogLossStats> mLogLossStats; 962 963 std::list<int32_t> mSystemServerRestartSec; 964 965 std::vector<int64_t> mSocketBatchReadHistogram; 966 967 // Stores stats about large socket batch reads 968 struct LargeBatchSocketReadStats { LargeBatchSocketReadStatsLargeBatchSocketReadStats969 LargeBatchSocketReadStats(int32_t size, int64_t lastReadTimeNs, int64_t currReadTimeNs, 970 int64_t minAtomReadTimeNs, int64_t maxAtomReadTimeNs, 971 const std::unordered_map<int32_t, int32_t>& atomCounts) 972 : mSize(size), 973 mLastReadTimeNs(lastReadTimeNs), 974 mCurrReadTimeNs(currReadTimeNs), 975 mMinAtomReadTimeNs(minAtomReadTimeNs), 976 mMaxAtomReadTimeNs(maxAtomReadTimeNs), 977 mCommonAtomCounts(atomCounts) { 978 } 979 980 int32_t mSize; 981 // The elapsed time of the previous and current read times. 982 int64_t mLastReadTimeNs; 983 int64_t mCurrReadTimeNs; 984 // The min and max times of the LogEvents processed in the batch 985 int64_t mMinAtomReadTimeNs; 986 int64_t mMaxAtomReadTimeNs; 987 // Map of atom id to count for atoms logged more than kMaxLargeBatchReadAtomThreshold times. 988 std::unordered_map<int32_t, int32_t> mCommonAtomCounts; 989 }; 990 // The max size of this list is kMaxSocketLossStatsSize. 991 std::list<LargeBatchSocketReadStats> mLargeBatchSocketReadStats; 992 993 struct RestrictedMetricQueryStats { RestrictedMetricQueryStatsRestrictedMetricQueryStats994 RestrictedMetricQueryStats(int32_t callingUid, int64_t configId, 995 const string& configPackage, std::optional<int32_t> configUid, 996 int64_t queryTimeNs, 997 std::optional<InvalidQueryReason> invalidQueryReason, 998 const string& error, std::optional<int64_t> queryLatencyNs) 999 : mCallingUid(callingUid), 1000 mConfigId(configId), 1001 mConfigPackage(configPackage), 1002 mConfigUid(configUid), 1003 mQueryWallTimeNs(queryTimeNs), 1004 mInvalidQueryReason(invalidQueryReason), 1005 mError(error), 1006 mQueryLatencyNs(queryLatencyNs) { 1007 mHasError = invalidQueryReason.has_value(); 1008 } 1009 int32_t mCallingUid; 1010 int64_t mConfigId; 1011 string mConfigPackage; 1012 std::optional<int32_t> mConfigUid; 1013 int64_t mQueryWallTimeNs; 1014 std::optional<InvalidQueryReason> mInvalidQueryReason; 1015 bool mHasError; 1016 string mError; 1017 std::optional<int64_t> mQueryLatencyNs; 1018 }; 1019 std::list<RestrictedMetricQueryStats> mRestrictedMetricQueryStats; 1020 1021 void noteQueryRestrictedMetricFailedLocked(const int64_t configId, const string& configPackage, 1022 const std::optional<int32_t> configUid, 1023 const int32_t callingUid, 1024 const InvalidQueryReason reason, 1025 const string& error); 1026 1027 int32_t mSubscriptionPullThreadWakeupCount = 0; 1028 1029 // Maps Subscription ID to the corresponding SubscriptionStats struct object. 1030 // Size of this map is capped by ShellSubscriber::kMaxSubscriptions. 1031 std::map<int32_t, SubscriptionStats> mSubscriptionStats; 1032 1033 // Stores the number of times statsd modified the anomaly alarm registered with 1034 // StatsCompanionService. 1035 int mAnomalyAlarmRegisteredStats = 0; 1036 1037 // Stores the number of times statsd registers the periodic alarm changes 1038 int mPeriodicAlarmRegisteredStats = 0; 1039 1040 void noteConfigResetInternalLocked(const ConfigKey& key); 1041 1042 void noteConfigRemovedInternalLocked(const ConfigKey& key); 1043 1044 void resetInternalLocked(); 1045 1046 void noteAtomLoggedLocked(int atomId, bool isSkipped); 1047 1048 void noteAtomDroppedLocked(int atomId); 1049 1050 void noteDataDropped(const ConfigKey& key, const size_t totalBytes, int32_t timeSec); 1051 1052 void noteMetricsReportSent(const ConfigKey& key, const size_t numBytes, int32_t timeSec, 1053 const int32_t reportNumber); 1054 1055 void noteBroadcastSent(const ConfigKey& key, int32_t timeSec); 1056 1057 void noteActiveStatusChanged(const ConfigKey& key, bool activate, int32_t timeSec); 1058 1059 void noteActivationBroadcastGuardrailHit(const int uid, int32_t timeSec); 1060 1061 void addToIceBoxLocked(std::shared_ptr<ConfigStats>& stats); 1062 1063 int getPushedAtomErrorsLocked(int atomId) const; 1064 1065 int getPushedAtomDropsLocked(int atomId) const; 1066 1067 bool hasRestrictedConfigErrors(const std::shared_ptr<ConfigStats>& configStats) const; 1068 1069 /** 1070 * Get a reference to AtomMetricStats for a metric. If none exists, create it. The reference 1071 * will live as long as `this`. 1072 */ 1073 StatsdStats::AtomMetricStats& getAtomMetricStats(int64_t metricId); 1074 1075 FRIEND_TEST(LogEventQueue_test, TestQueueMaxSize); 1076 FRIEND_TEST(SocketParseMessageTest, TestProcessMessage); 1077 FRIEND_TEST(StatsLogProcessorTest, InvalidConfigRemoved); 1078 FRIEND_TEST(StatsdStatsTest, TestActivationBroadcastGuardrailHit); 1079 FRIEND_TEST(StatsdStatsTest, TestAnomalyMonitor); 1080 FRIEND_TEST(StatsdStatsTest, TestAtomDroppedStats); 1081 FRIEND_TEST(StatsdStatsTest, TestAtomErrorStats); 1082 FRIEND_TEST(StatsdStatsTest, TestAtomLog); 1083 FRIEND_TEST(StatsdStatsTest, TestAtomLoggedAndDroppedAndSkippedStats); 1084 FRIEND_TEST(StatsdStatsTest, TestAtomLoggedAndDroppedStats); 1085 FRIEND_TEST(StatsdStatsTest, TestAtomMetricsStats); 1086 FRIEND_TEST(StatsdStatsTest, TestAtomSkippedStats); 1087 FRIEND_TEST(StatsdStatsTest, TestConfigMetadataProviderPromotionFailed); 1088 FRIEND_TEST(StatsdStatsTest, TestConfigRemove); 1089 FRIEND_TEST(StatsdStatsTest, TestHasHitDimensionGuardrail); 1090 FRIEND_TEST(StatsdStatsTest, TestInvalidConfigAdd); 1091 FRIEND_TEST(StatsdStatsTest, TestInvalidConfigMissingMetricId); 1092 FRIEND_TEST(StatsdStatsTest, TestInvalidConfigOnlyMetricId); 1093 FRIEND_TEST(StatsdStatsTest, TestNonPlatformAtomLog); 1094 FRIEND_TEST(StatsdStatsTest, TestPullAtomStats); 1095 FRIEND_TEST(StatsdStatsTest, TestQueueStats); 1096 FRIEND_TEST(StatsdStatsTest, TestRestrictedMetricsQueryStats); 1097 FRIEND_TEST(StatsdStatsTest, TestRestrictedMetricsStats); 1098 FRIEND_TEST(StatsdStatsTest, TestShardOffsetProvider); 1099 FRIEND_TEST(StatsdStatsTest, TestSocketLossStats); 1100 FRIEND_TEST(StatsdStatsTest, TestSocketLossStatsOverflowCounter); 1101 FRIEND_TEST(StatsdStatsTest, TestSubStats); 1102 FRIEND_TEST(StatsdStatsTest, TestSubscriptionAtomPulled); 1103 FRIEND_TEST(StatsdStatsTest, TestSubscriptionEnded); 1104 FRIEND_TEST(StatsdStatsTest, TestSubscriptionFlushed); 1105 FRIEND_TEST(StatsdStatsTest, TestSubscriptionPullThreadWakeup); 1106 FRIEND_TEST(StatsdStatsTest, TestSubscriptionStarted); 1107 FRIEND_TEST(StatsdStatsTest, TestSubscriptionStartedMaxActiveSubscriptions); 1108 FRIEND_TEST(StatsdStatsTest, TestSubscriptionStartedRemoveFinishedSubscription); 1109 FRIEND_TEST(StatsdStatsTest, TestSystemServerCrash); 1110 FRIEND_TEST(StatsdStatsTest, TestTimestampThreshold); 1111 FRIEND_TEST(StatsdStatsTest, TestValidConfigAdd); 1112 FRIEND_TEST(StatsdStatsTest, TestSocketBatchReadStats); 1113 }; 1114 1115 InvalidConfigReason createInvalidConfigReasonWithMatcher(const InvalidConfigReasonEnum reason, 1116 const int64_t matcherId); 1117 1118 InvalidConfigReason createInvalidConfigReasonWithMatcher(const InvalidConfigReasonEnum reason, 1119 const int64_t metricId, 1120 const int64_t matcherId); 1121 1122 InvalidConfigReason createInvalidConfigReasonWithPredicate(const InvalidConfigReasonEnum reason, 1123 const int64_t conditionId); 1124 1125 InvalidConfigReason createInvalidConfigReasonWithPredicate(const InvalidConfigReasonEnum reason, 1126 const int64_t metricId, 1127 const int64_t conditionId); 1128 1129 InvalidConfigReason createInvalidConfigReasonWithState(const InvalidConfigReasonEnum reason, 1130 const int64_t metricId, 1131 const int64_t stateId); 1132 1133 InvalidConfigReason createInvalidConfigReasonWithAlert(const InvalidConfigReasonEnum reason, 1134 const int64_t alertId); 1135 1136 InvalidConfigReason createInvalidConfigReasonWithAlert(const InvalidConfigReasonEnum reason, 1137 const int64_t metricId, 1138 const int64_t alertId); 1139 1140 InvalidConfigReason createInvalidConfigReasonWithAlarm(const InvalidConfigReasonEnum reason, 1141 const int64_t alarmId); 1142 1143 InvalidConfigReason createInvalidConfigReasonWithSubscription(const InvalidConfigReasonEnum reason, 1144 const int64_t subscriptionId); 1145 1146 InvalidConfigReason createInvalidConfigReasonWithSubscriptionAndAlarm( 1147 const InvalidConfigReasonEnum reason, int64_t subscriptionId, int64_t alarmId); 1148 1149 InvalidConfigReason createInvalidConfigReasonWithSubscriptionAndAlert( 1150 const InvalidConfigReasonEnum reason, int64_t subscriptionId, int64_t alertId); 1151 1152 } // namespace statsd 1153 } // namespace os 1154 } // namespace android 1155