1 /* 2 * Copyright (c) 2020, The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #pragma once 18 19 #include "LooperWrapper.h" 20 #include "ProcDiskStatsCollector.h" 21 #include "ProcStatCollector.h" 22 #include "UidStatsCollector.h" 23 #include "WatchdogServiceHelper.h" 24 25 #include <WatchdogProperties.sysprop.h> 26 #include <aidl/android/automotive/watchdog/internal/PackageIoOveruseStats.h> 27 #include <aidl/android/automotive/watchdog/internal/ResourceStats.h> 28 #include <aidl/android/automotive/watchdog/internal/UserState.h> 29 #include <android-base/chrono_utils.h> 30 #include <android-base/result.h> 31 #include <android/util/ProtoOutputStream.h> 32 #include <cutils/multiuser.h> 33 #include <gtest/gtest_prod.h> 34 #include <utils/Errors.h> 35 #include <utils/Looper.h> 36 #include <utils/Mutex.h> 37 #include <utils/RefBase.h> 38 #include <utils/String16.h> 39 #include <utils/StrongPointer.h> 40 #include <utils/Vector.h> 41 42 #include <time.h> 43 44 #include <string> 45 #include <thread> // NOLINT(build/c++11) 46 #include <unordered_set> 47 48 namespace android { 49 namespace automotive { 50 namespace watchdog { 51 52 // Forward declaration for testing use only. 53 namespace internal { 54 55 class WatchdogPerfServicePeer; 56 57 } // namespace internal 58 59 constexpr std::chrono::seconds kDefaultPostSystemEventDurationSec = 30s; 60 constexpr std::chrono::seconds kDefaultWakeUpEventDurationSec = 30s; 61 constexpr std::chrono::seconds kDefaultUserSwitchTimeoutSec = 30s; 62 constexpr std::chrono::nanoseconds kPrevUnsentResourceStatsMaxDurationNs = 10min; 63 constexpr const char* kStartCustomCollectionFlag = "--start_perf"; 64 constexpr const char* kEndCustomCollectionFlag = "--stop_perf"; 65 constexpr const char* kIntervalFlag = "--interval"; 66 constexpr const char* kMaxDurationFlag = "--max_duration"; 67 constexpr const char* kFilterPackagesFlag = "--filter_packages"; 68 69 enum SystemState { 70 NORMAL_MODE = 0, 71 GARAGE_MODE = 1, 72 }; 73 74 using time_point_millis = 75 std::chrono::time_point<std::chrono::system_clock, std::chrono::milliseconds>; 76 77 /** 78 * DataProcessor defines methods that must be implemented in order to process the data collected 79 * by |WatchdogPerfService|. 80 */ 81 class DataProcessorInterface : virtual public android::RefBase { 82 public: 83 struct CollectionIntervals { 84 std::chrono::milliseconds mBoottimeIntervalMillis = std::chrono::milliseconds(0); 85 std::chrono::milliseconds mPeriodicIntervalMillis = std::chrono::milliseconds(0); 86 std::chrono::milliseconds mUserSwitchIntervalMillis = std::chrono::milliseconds(0); 87 std::chrono::milliseconds mWakeUpIntervalMillis = std::chrono::milliseconds(0); 88 std::chrono::milliseconds mCustomIntervalMillis = std::chrono::milliseconds(0); 89 bool operator==(const CollectionIntervals& other) const { 90 return mBoottimeIntervalMillis == other.mBoottimeIntervalMillis && 91 mPeriodicIntervalMillis == other.mPeriodicIntervalMillis && 92 mUserSwitchIntervalMillis == other.mUserSwitchIntervalMillis && 93 mWakeUpIntervalMillis == other.mWakeUpIntervalMillis && 94 mCustomIntervalMillis == other.mCustomIntervalMillis; 95 } 96 }; DataProcessorInterface()97 DataProcessorInterface() {} ~DataProcessorInterface()98 virtual ~DataProcessorInterface() {} 99 // Returns the name of the data processor. 100 virtual std::string name() const = 0; 101 // Callback to initialize the data processor. 102 virtual android::base::Result<void> init() = 0; 103 // Callback to terminate the data processor. 104 virtual void terminate() = 0; 105 // Callback to perform actions (such as clearing stats from previous system startup events) 106 // before starting boot-time or wake-up collections. 107 virtual android::base::Result<void> onSystemStartup() = 0; 108 // Callback to perform actions once CarWatchdogService is registered. 109 virtual void onCarWatchdogServiceRegistered() = 0; 110 // Callback to process the data collected during boot-time. 111 virtual android::base::Result<void> onBoottimeCollection( 112 time_point_millis time, 113 const android::wp<UidStatsCollectorInterface>& uidStatsCollector, 114 const android::wp<ProcStatCollectorInterface>& procStatCollector, 115 aidl::android::automotive::watchdog::internal::ResourceStats* resourceStats) = 0; 116 // Callback to process the data collected during a wake-up event. 117 virtual android::base::Result<void> onWakeUpCollection( 118 time_point_millis time, 119 const android::wp<UidStatsCollectorInterface>& uidStatsCollector, 120 const android::wp<ProcStatCollectorInterface>& procStatCollector) = 0; 121 // Callback to process the data collected periodically post boot complete. 122 virtual android::base::Result<void> onPeriodicCollection( 123 time_point_millis time, SystemState systemState, 124 const android::wp<UidStatsCollectorInterface>& uidStatsCollector, 125 const android::wp<ProcStatCollectorInterface>& procStatCollector, 126 aidl::android::automotive::watchdog::internal::ResourceStats* resourceStats) = 0; 127 // Callback to process the data collected during user switch. 128 virtual android::base::Result<void> onUserSwitchCollection( 129 time_point_millis time, userid_t from, userid_t to, 130 const android::wp<UidStatsCollectorInterface>& uidStatsCollector, 131 const android::wp<ProcStatCollectorInterface>& procStatCollector) = 0; 132 133 /** 134 * Callback to process the data collected on custom collection and filter the results only to 135 * the specified |filterPackages|. 136 */ 137 virtual android::base::Result<void> onCustomCollection( 138 time_point_millis time, SystemState systemState, 139 const std::unordered_set<std::string>& filterPackages, 140 const android::wp<UidStatsCollectorInterface>& uidStatsCollector, 141 const android::wp<ProcStatCollectorInterface>& procStatCollector, 142 aidl::android::automotive::watchdog::internal::ResourceStats* resourceStats) = 0; 143 /** 144 * Callback to periodically monitor the collected data and trigger the given |alertHandler| 145 * on detecting resource overuse. 146 */ 147 virtual android::base::Result<void> onPeriodicMonitor( 148 time_t time, const android::wp<ProcDiskStatsCollectorInterface>& procDiskStatsCollector, 149 const std::function<void()>& alertHandler) = 0; 150 // Callback to dump system event data and periodically collected data. 151 virtual android::base::Result<void> onDump(int fd) const = 0; 152 // Callback to dump system event data and periodically collected data in proto format. 153 virtual android::base::Result<void> onDumpProto( 154 const CollectionIntervals& collectionIntervals, 155 android::util::ProtoOutputStream& outProto) const = 0; 156 /** 157 * Callback to dump the custom collected data. When fd == -1, clear the custom collection cache. 158 */ 159 virtual android::base::Result<void> onCustomCollectionDump(int fd) = 0; 160 }; 161 162 enum EventType { 163 // WatchdogPerfService's state. 164 INIT = 0, 165 TERMINATED, 166 167 // Collection events. 168 BOOT_TIME_COLLECTION, 169 PERIODIC_COLLECTION, 170 USER_SWITCH_COLLECTION, 171 WAKE_UP_COLLECTION, 172 CUSTOM_COLLECTION, 173 174 // Monitor event. 175 PERIODIC_MONITOR, 176 177 LAST_EVENT, 178 }; 179 180 enum SwitchMessage { 181 /** 182 * On receiving this message, collect the last boot-time record and start periodic collection 183 * and monitor. 184 */ 185 END_BOOTTIME_COLLECTION = EventType::LAST_EVENT + 1, 186 187 /** 188 * On receiving this message, collect the last user switch record and start periodic collection 189 * and monitor. 190 */ 191 END_USER_SWITCH_COLLECTION, 192 193 /** 194 * On receiving this message, collect the last wake up record and start periodic collection and 195 * monitor. 196 */ 197 END_WAKE_UP_COLLECTION, 198 199 /** 200 * On receiving this message, ends custom collection, discard collected data and start periodic 201 * collection and monitor. 202 */ 203 END_CUSTOM_COLLECTION, 204 205 LAST_SWITCH_MSG, 206 }; 207 208 enum TaskMessage { 209 // On receiving this message, send the cached resource stats to CarWatchdogService. 210 SEND_RESOURCE_STATS = SwitchMessage::LAST_SWITCH_MSG + 1, 211 }; 212 213 /** 214 * WatchdogPerfServiceInterface collects performance data during boot-time, user switch, system wake 215 * up and periodically post system events. It exposes APIs that the main thread and binder service 216 * can call to start a collection, switch the collection type, and generate collection dumps. 217 */ 218 class WatchdogPerfServiceInterface : virtual public MessageHandler { 219 public: 220 // Register a data processor to process the data collected by |WatchdogPerfService|. 221 virtual android::base::Result<void> registerDataProcessor( 222 android::sp<DataProcessorInterface> processor) = 0; 223 /** 224 * Starts the boot-time collection in the looper handler on a new thread and returns 225 * immediately. Must be called only once. Otherwise, returns an error. 226 */ 227 virtual android::base::Result<void> start() = 0; 228 // Terminates the collection thread and returns. 229 virtual void terminate() = 0; 230 // Sets the system state. 231 virtual void setSystemState(SystemState systemState) = 0; 232 // Handles unsent resource stats. 233 virtual void onCarWatchdogServiceRegistered() = 0; 234 // Ends the boot-time collection by switching to periodic collection after the post event 235 // duration. 236 virtual android::base::Result<void> onBootFinished() = 0; 237 // Starts and ends the user switch collection depending on the user states received. 238 virtual android::base::Result<void> onUserStateChange( 239 userid_t userId, 240 const aidl::android::automotive::watchdog::internal::UserState& userState) = 0; 241 // Starts wake-up collection. Any running collection is stopped, except for custom collections. 242 virtual android::base::Result<void> onSuspendExit() = 0; 243 // Called on shutdown enter, suspend enter and hibernation enter. 244 virtual android::base::Result<void> onShutdownEnter() = 0; 245 246 /** 247 * Depending on the arguments, it either: 248 * 1. Starts a custom collection. 249 * 2. Or ends the current custom collection and dumps the collected data. 250 * Returns any error observed during the dump generation. 251 */ 252 virtual android::base::Result<void> onCustomCollection(int fd, const char** args, 253 uint32_t numArgs) = 0; 254 // Generates a dump from the system events and periodic collection events. 255 virtual android::base::Result<void> onDump(int fd) const = 0; 256 // Generates a proto dump from system events and periodic collection events. 257 virtual android::base::Result<void> onDumpProto( 258 android::util::ProtoOutputStream& outProto) const = 0; 259 // Dumps the help text. 260 virtual bool dumpHelpText(int fd) const = 0; 261 }; 262 263 class WatchdogPerfService final : public WatchdogPerfServiceInterface { 264 public: WatchdogPerfService(const android::sp<WatchdogServiceHelperInterface> & watchdogServiceHelper,const std::function<int64_t ()> & getElapsedTimeSinceBootMsFunc)265 WatchdogPerfService(const android::sp<WatchdogServiceHelperInterface>& watchdogServiceHelper, 266 const std::function<int64_t()>& getElapsedTimeSinceBootMsFunc) : 267 kGetElapsedTimeSinceBootMillisFunc(std::move(getElapsedTimeSinceBootMsFunc)), 268 mPostSystemEventDurationNs(std::chrono::duration_cast<std::chrono::nanoseconds>( 269 std::chrono::seconds(sysprop::postSystemEventDuration().value_or( 270 kDefaultPostSystemEventDurationSec.count())))), 271 mWakeUpDurationNs(std::chrono::duration_cast<std::chrono::nanoseconds>( 272 std::chrono::seconds(sysprop::wakeUpEventDuration().value_or( 273 kDefaultWakeUpEventDurationSec.count())))), 274 mUserSwitchTimeoutNs(std::chrono::duration_cast<std::chrono::nanoseconds>( 275 std::chrono::seconds(sysprop::userSwitchTimeout().value_or( 276 kDefaultUserSwitchTimeoutSec.count())))), 277 mHandlerLooper(android::sp<LooperWrapper>::make()), 278 mSystemState(NORMAL_MODE), 279 mBoottimeCollection({}), 280 mPeriodicCollection({}), 281 mUserSwitchCollection({}), 282 mCustomCollection({}), 283 mPeriodicMonitor({}), 284 mUnsentResourceStats({}), 285 mLastCollectionTimeMillis(0), 286 mBootCompletedTimeEpochSeconds(0), 287 mKernelStartTimeEpochSeconds(0), 288 mCurrCollectionEvent(EventType::INIT), 289 mUidStatsCollector(android::sp<UidStatsCollector>::make()), 290 mProcStatCollector(android::sp<ProcStatCollector>::make()), 291 mProcDiskStatsCollector(android::sp<ProcDiskStatsCollector>::make()), 292 mDataProcessors({}), 293 mWatchdogServiceHelper(watchdogServiceHelper) {} 294 295 android::base::Result<void> registerDataProcessor( 296 android::sp<DataProcessorInterface> processor) override; 297 298 android::base::Result<void> start() override; 299 300 void terminate() override; 301 302 void setSystemState(SystemState systemState) override; 303 304 void onCarWatchdogServiceRegistered() override; 305 306 android::base::Result<void> onBootFinished() override; 307 308 android::base::Result<void> onUserStateChange( 309 userid_t userId, 310 const aidl::android::automotive::watchdog::internal::UserState& userState) override; 311 312 android::base::Result<void> onSuspendExit() override; 313 314 android::base::Result<void> onShutdownEnter() override; 315 316 android::base::Result<void> onCustomCollection(int fd, const char** args, 317 uint32_t numArgs) override; 318 319 android::base::Result<void> onDump(int fd) const override; 320 android::base::Result<void> onDumpProto( 321 android::util::ProtoOutputStream& outProto) const override; 322 323 bool dumpHelpText(int fd) const override; 324 325 private: 326 struct EventMetadata { 327 // Collection or monitor event. 328 EventType eventType = EventType::LAST_EVENT; 329 // Interval between subsequent events. 330 std::chrono::nanoseconds pollingIntervalNs = 0ns; 331 // Used to calculate the uptime for next event. 332 nsecs_t lastPollElapsedRealTimeNs = 0; 333 // Filter the results only to the specified packages. 334 std::unordered_set<std::string> filterPackages; 335 336 std::string toString() const; 337 }; 338 339 struct UserSwitchEventMetadata : WatchdogPerfService::EventMetadata { 340 // User id of user being switched from. 341 userid_t from = 0; 342 // User id of user being switched to. 343 userid_t to = 0; 344 }; 345 346 // Dumps the collectors' status when they are disabled. 347 android::base::Result<void> dumpCollectorsStatusLocked(int fd) const; 348 349 /** 350 * Starts a custom collection on the looper handler, temporarily stops the periodic collection 351 * (won't discard the collected data), and returns immediately. Returns any error observed 352 * during this process. 353 * The custom collection happens once every |interval| seconds. When the |maxDuration| is 354 * reached, the looper receives a message to end the collection, discards the collected data, 355 * and starts the periodic collection. This is needed to ensure the custom collection doesn't 356 * run forever when a subsequent |endCustomCollection| call is not received. 357 * When |kFilterPackagesFlag| value specified, the results are filtered only to the specified 358 * package names. 359 */ 360 android::base::Result<void> startCustomCollection( 361 std::chrono::nanoseconds interval, std::chrono::nanoseconds maxDuration, 362 const std::unordered_set<std::string>& filterPackages); 363 364 /** 365 * Ends the current custom collection, generates a dump, sends a looper message to start the 366 * periodic collection, and returns immediately. Returns an error when there is no custom 367 * collection running or when a dump couldn't be generated from the custom collection. 368 */ 369 android::base::Result<void> endCustomCollection(int fd); 370 371 // Start a user switch collection. 372 android::base::Result<void> startUserSwitchCollection(); 373 374 // Switch to periodic collection and periodic monitor. 375 void switchToPeriodicLocked(bool startNow); 376 377 // Handles the messages received by the looper. 378 void handleMessage(const Message& message) override; 379 380 // Processes the collection events received by |handleMessage|. 381 android::base::Result<void> processCollectionEvent(EventMetadata* metadata); 382 383 // Collects/processes the performance data for the current collection event. 384 android::base::Result<void> collectLocked(EventMetadata* metadata); 385 386 // Processes the monitor events received by |handleMessage|. 387 android::base::Result<void> processMonitorEvent(EventMetadata* metadata); 388 389 // Sends the unsent resource stats. 390 android::base::Result<void> sendResourceStats(); 391 392 // Notifies all registered data processors that either boot-time or wake-up collection will 393 // start. Individual implementations of data processors may clear stats collected during 394 // previous system startup events. 395 android::base::Result<void> notifySystemStartUpLocked(); 396 397 // Caches resource stats that have not been sent to CarWatchdogService. 398 void cacheUnsentResourceStatsLocked( 399 aidl::android::automotive::watchdog::internal::ResourceStats resourceStats); 400 401 /** 402 * Returns the metadata for the current collection based on |mCurrCollectionEvent|. Returns 403 * nullptr on invalid collection event. 404 */ 405 EventMetadata* getCurrentCollectionMetadataLocked(); 406 407 std::function<int64_t()> kGetElapsedTimeSinceBootMillisFunc; 408 409 // Duration to extend a system event collection after the final signal is received. 410 std::chrono::nanoseconds mPostSystemEventDurationNs; 411 412 // Duration of the wake-up collection event. 413 std::chrono::nanoseconds mWakeUpDurationNs; 414 415 // Timeout duration for user switch collection in case final signal isn't received. 416 std::chrono::nanoseconds mUserSwitchTimeoutNs; 417 418 // Thread on which the actual collection happens. 419 std::thread mCollectionThread; 420 421 // Makes sure only one collection is running at any given time. 422 mutable Mutex mMutex; 423 424 // Handler looper to execute different collection events on the collection thread. 425 android::sp<LooperWrapper> mHandlerLooper GUARDED_BY(mMutex); 426 427 // Current system state. 428 SystemState mSystemState GUARDED_BY(mMutex); 429 430 // Info for the |EventType::BOOT_TIME_COLLECTION| collection event. 431 EventMetadata mBoottimeCollection GUARDED_BY(mMutex); 432 433 // Info for the |EventType::PERIODIC_COLLECTION| collection event. 434 EventMetadata mPeriodicCollection GUARDED_BY(mMutex); 435 436 // Info for the |EventType::USER_SWITCH_COLLECTION| collection event. 437 UserSwitchEventMetadata mUserSwitchCollection GUARDED_BY(mMutex); 438 439 // Info for the |EventType::WAKE_UP_COLLECTION| collection event. 440 EventMetadata mWakeUpCollection GUARDED_BY(mMutex); 441 442 // Info for the |EventType::CUSTOM_COLLECTION| collection event. The info is cleared at the end 443 // of every custom collection. 444 EventMetadata mCustomCollection GUARDED_BY(mMutex); 445 446 // Info for the |EventType::PERIODIC_MONITOR| monitor event. 447 EventMetadata mPeriodicMonitor GUARDED_BY(mMutex); 448 449 // Cache of resource stats that have not been sent to CarWatchdogService. 450 std::vector<std::tuple<nsecs_t, aidl::android::automotive::watchdog::internal::ResourceStats>> 451 mUnsentResourceStats GUARDED_BY(mMutex); 452 453 // Tracks the latest collection time since boot in millis. 454 int64_t mLastCollectionTimeMillis GUARDED_BY(mMutex); 455 456 // Time of receiving boot complete signal. 457 time_t mBootCompletedTimeEpochSeconds GUARDED_BY(mMutex); 458 459 // Boot start time collected from /proc/stat. 460 time_t mKernelStartTimeEpochSeconds GUARDED_BY(mMutex); 461 462 // Tracks either the WatchdogPerfService's state or current collection event. Updated on 463 // |start|, |onBootFinished|, |onUserStateChange|, |startCustomCollection|, 464 // |endCustomCollection|, and |terminate|. 465 EventType mCurrCollectionEvent GUARDED_BY(mMutex); 466 467 // Collector for UID process and I/O stats. 468 android::sp<UidStatsCollectorInterface> mUidStatsCollector GUARDED_BY(mMutex); 469 470 // Collector/parser for `/proc/stat`. 471 android::sp<ProcStatCollectorInterface> mProcStatCollector GUARDED_BY(mMutex); 472 473 // Collector/parser for `/proc/diskstats` file. 474 android::sp<ProcDiskStatsCollectorInterface> mProcDiskStatsCollector GUARDED_BY(mMutex); 475 476 // Data processors for the collected performance data. 477 std::vector<android::sp<DataProcessorInterface>> mDataProcessors GUARDED_BY(mMutex); 478 479 // Helper to communicate with the CarWatchdogService. 480 android::sp<WatchdogServiceHelperInterface> mWatchdogServiceHelper GUARDED_BY(mMutex); 481 482 // For unit tests. 483 friend class internal::WatchdogPerfServicePeer; 484 FRIEND_TEST(WatchdogPerfServiceTest, TestServiceStartAndTerminate); 485 }; 486 487 } // namespace watchdog 488 } // namespace automotive 489 } // namespace android 490