1 /*
2  * Copyright (c) 2020, The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include "LooperWrapper.h"
20 #include "ProcDiskStatsCollector.h"
21 #include "ProcStatCollector.h"
22 #include "UidStatsCollector.h"
23 #include "WatchdogServiceHelper.h"
24 
25 #include <WatchdogProperties.sysprop.h>
26 #include <aidl/android/automotive/watchdog/internal/PackageIoOveruseStats.h>
27 #include <aidl/android/automotive/watchdog/internal/ResourceStats.h>
28 #include <aidl/android/automotive/watchdog/internal/UserState.h>
29 #include <android-base/chrono_utils.h>
30 #include <android-base/result.h>
31 #include <android/util/ProtoOutputStream.h>
32 #include <cutils/multiuser.h>
33 #include <gtest/gtest_prod.h>
34 #include <utils/Errors.h>
35 #include <utils/Looper.h>
36 #include <utils/Mutex.h>
37 #include <utils/RefBase.h>
38 #include <utils/String16.h>
39 #include <utils/StrongPointer.h>
40 #include <utils/Vector.h>
41 
42 #include <time.h>
43 
44 #include <string>
45 #include <thread>  // NOLINT(build/c++11)
46 #include <unordered_set>
47 
48 namespace android {
49 namespace automotive {
50 namespace watchdog {
51 
52 // Forward declaration for testing use only.
53 namespace internal {
54 
55 class WatchdogPerfServicePeer;
56 
57 }  // namespace internal
58 
59 constexpr std::chrono::seconds kDefaultPostSystemEventDurationSec = 30s;
60 constexpr std::chrono::seconds kDefaultWakeUpEventDurationSec = 30s;
61 constexpr std::chrono::seconds kDefaultUserSwitchTimeoutSec = 30s;
62 constexpr std::chrono::nanoseconds kPrevUnsentResourceStatsMaxDurationNs = 10min;
63 constexpr const char* kStartCustomCollectionFlag = "--start_perf";
64 constexpr const char* kEndCustomCollectionFlag = "--stop_perf";
65 constexpr const char* kIntervalFlag = "--interval";
66 constexpr const char* kMaxDurationFlag = "--max_duration";
67 constexpr const char* kFilterPackagesFlag = "--filter_packages";
68 
69 enum SystemState {
70     NORMAL_MODE = 0,
71     GARAGE_MODE = 1,
72 };
73 
74 using time_point_millis =
75         std::chrono::time_point<std::chrono::system_clock, std::chrono::milliseconds>;
76 
77 /**
78  * DataProcessor defines methods that must be implemented in order to process the data collected
79  * by |WatchdogPerfService|.
80  */
81 class DataProcessorInterface : virtual public android::RefBase {
82 public:
83     struct CollectionIntervals {
84         std::chrono::milliseconds mBoottimeIntervalMillis = std::chrono::milliseconds(0);
85         std::chrono::milliseconds mPeriodicIntervalMillis = std::chrono::milliseconds(0);
86         std::chrono::milliseconds mUserSwitchIntervalMillis = std::chrono::milliseconds(0);
87         std::chrono::milliseconds mWakeUpIntervalMillis = std::chrono::milliseconds(0);
88         std::chrono::milliseconds mCustomIntervalMillis = std::chrono::milliseconds(0);
89         bool operator==(const CollectionIntervals& other) const {
90             return mBoottimeIntervalMillis == other.mBoottimeIntervalMillis &&
91             mPeriodicIntervalMillis == other.mPeriodicIntervalMillis &&
92             mUserSwitchIntervalMillis == other.mUserSwitchIntervalMillis &&
93             mWakeUpIntervalMillis == other.mWakeUpIntervalMillis &&
94             mCustomIntervalMillis == other.mCustomIntervalMillis;
95         }
96     };
DataProcessorInterface()97     DataProcessorInterface() {}
~DataProcessorInterface()98     virtual ~DataProcessorInterface() {}
99     // Returns the name of the data processor.
100     virtual std::string name() const = 0;
101     // Callback to initialize the data processor.
102     virtual android::base::Result<void> init() = 0;
103     // Callback to terminate the data processor.
104     virtual void terminate() = 0;
105     // Callback to perform actions (such as clearing stats from previous system startup events)
106     // before starting boot-time or wake-up collections.
107     virtual android::base::Result<void> onSystemStartup() = 0;
108     // Callback to perform actions once CarWatchdogService is registered.
109     virtual void onCarWatchdogServiceRegistered() = 0;
110     // Callback to process the data collected during boot-time.
111     virtual android::base::Result<void> onBoottimeCollection(
112             time_point_millis time,
113             const android::wp<UidStatsCollectorInterface>& uidStatsCollector,
114             const android::wp<ProcStatCollectorInterface>& procStatCollector,
115             aidl::android::automotive::watchdog::internal::ResourceStats* resourceStats) = 0;
116     // Callback to process the data collected during a wake-up event.
117     virtual android::base::Result<void> onWakeUpCollection(
118             time_point_millis time,
119             const android::wp<UidStatsCollectorInterface>& uidStatsCollector,
120             const android::wp<ProcStatCollectorInterface>& procStatCollector) = 0;
121     // Callback to process the data collected periodically post boot complete.
122     virtual android::base::Result<void> onPeriodicCollection(
123             time_point_millis time, SystemState systemState,
124             const android::wp<UidStatsCollectorInterface>& uidStatsCollector,
125             const android::wp<ProcStatCollectorInterface>& procStatCollector,
126             aidl::android::automotive::watchdog::internal::ResourceStats* resourceStats) = 0;
127     // Callback to process the data collected during user switch.
128     virtual android::base::Result<void> onUserSwitchCollection(
129             time_point_millis time, userid_t from, userid_t to,
130             const android::wp<UidStatsCollectorInterface>& uidStatsCollector,
131             const android::wp<ProcStatCollectorInterface>& procStatCollector) = 0;
132 
133     /**
134      * Callback to process the data collected on custom collection and filter the results only to
135      * the specified |filterPackages|.
136      */
137     virtual android::base::Result<void> onCustomCollection(
138             time_point_millis time, SystemState systemState,
139             const std::unordered_set<std::string>& filterPackages,
140             const android::wp<UidStatsCollectorInterface>& uidStatsCollector,
141             const android::wp<ProcStatCollectorInterface>& procStatCollector,
142             aidl::android::automotive::watchdog::internal::ResourceStats* resourceStats) = 0;
143     /**
144      * Callback to periodically monitor the collected data and trigger the given |alertHandler|
145      * on detecting resource overuse.
146      */
147     virtual android::base::Result<void> onPeriodicMonitor(
148             time_t time, const android::wp<ProcDiskStatsCollectorInterface>& procDiskStatsCollector,
149             const std::function<void()>& alertHandler) = 0;
150     // Callback to dump system event data and periodically collected data.
151     virtual android::base::Result<void> onDump(int fd) const = 0;
152     // Callback to dump system event data and periodically collected data in proto format.
153     virtual android::base::Result<void> onDumpProto(
154             const CollectionIntervals& collectionIntervals,
155             android::util::ProtoOutputStream& outProto) const = 0;
156     /**
157      * Callback to dump the custom collected data. When fd == -1, clear the custom collection cache.
158      */
159     virtual android::base::Result<void> onCustomCollectionDump(int fd) = 0;
160 };
161 
162 enum EventType {
163     // WatchdogPerfService's state.
164     INIT = 0,
165     TERMINATED,
166 
167     // Collection events.
168     BOOT_TIME_COLLECTION,
169     PERIODIC_COLLECTION,
170     USER_SWITCH_COLLECTION,
171     WAKE_UP_COLLECTION,
172     CUSTOM_COLLECTION,
173 
174     // Monitor event.
175     PERIODIC_MONITOR,
176 
177     LAST_EVENT,
178 };
179 
180 enum SwitchMessage {
181     /**
182      * On receiving this message, collect the last boot-time record and start periodic collection
183      * and monitor.
184      */
185     END_BOOTTIME_COLLECTION = EventType::LAST_EVENT + 1,
186 
187     /**
188      * On receiving this message, collect the last user switch record and start periodic collection
189      * and monitor.
190      */
191     END_USER_SWITCH_COLLECTION,
192 
193     /**
194      * On receiving this message, collect the last wake up record and start periodic collection and
195      * monitor.
196      */
197     END_WAKE_UP_COLLECTION,
198 
199     /**
200      * On receiving this message, ends custom collection, discard collected data and start periodic
201      * collection and monitor.
202      */
203     END_CUSTOM_COLLECTION,
204 
205     LAST_SWITCH_MSG,
206 };
207 
208 enum TaskMessage {
209     // On receiving this message, send the cached resource stats to CarWatchdogService.
210     SEND_RESOURCE_STATS = SwitchMessage::LAST_SWITCH_MSG + 1,
211 };
212 
213 /**
214  * WatchdogPerfServiceInterface collects performance data during boot-time, user switch, system wake
215  * up and periodically post system events. It exposes APIs that the main thread and binder service
216  * can call to start a collection, switch the collection type, and generate collection dumps.
217  */
218 class WatchdogPerfServiceInterface : virtual public MessageHandler {
219 public:
220     // Register a data processor to process the data collected by |WatchdogPerfService|.
221     virtual android::base::Result<void> registerDataProcessor(
222             android::sp<DataProcessorInterface> processor) = 0;
223     /**
224      * Starts the boot-time collection in the looper handler on a new thread and returns
225      * immediately. Must be called only once. Otherwise, returns an error.
226      */
227     virtual android::base::Result<void> start() = 0;
228     // Terminates the collection thread and returns.
229     virtual void terminate() = 0;
230     // Sets the system state.
231     virtual void setSystemState(SystemState systemState) = 0;
232     // Handles unsent resource stats.
233     virtual void onCarWatchdogServiceRegistered() = 0;
234     // Ends the boot-time collection by switching to periodic collection after the post event
235     // duration.
236     virtual android::base::Result<void> onBootFinished() = 0;
237     // Starts and ends the user switch collection depending on the user states received.
238     virtual android::base::Result<void> onUserStateChange(
239             userid_t userId,
240             const aidl::android::automotive::watchdog::internal::UserState& userState) = 0;
241     // Starts wake-up collection. Any running collection is stopped, except for custom collections.
242     virtual android::base::Result<void> onSuspendExit() = 0;
243     // Called on shutdown enter, suspend enter and hibernation enter.
244     virtual android::base::Result<void> onShutdownEnter() = 0;
245 
246     /**
247      * Depending on the arguments, it either:
248      * 1. Starts a custom collection.
249      * 2. Or ends the current custom collection and dumps the collected data.
250      * Returns any error observed during the dump generation.
251      */
252     virtual android::base::Result<void> onCustomCollection(int fd, const char** args,
253                                                            uint32_t numArgs) = 0;
254     // Generates a dump from the system events and periodic collection events.
255     virtual android::base::Result<void> onDump(int fd) const = 0;
256     // Generates a proto dump from system events and periodic collection events.
257     virtual android::base::Result<void> onDumpProto(
258             android::util::ProtoOutputStream& outProto) const = 0;
259     // Dumps the help text.
260     virtual bool dumpHelpText(int fd) const = 0;
261 };
262 
263 class WatchdogPerfService final : public WatchdogPerfServiceInterface {
264 public:
WatchdogPerfService(const android::sp<WatchdogServiceHelperInterface> & watchdogServiceHelper,const std::function<int64_t ()> & getElapsedTimeSinceBootMsFunc)265     WatchdogPerfService(const android::sp<WatchdogServiceHelperInterface>& watchdogServiceHelper,
266                         const std::function<int64_t()>& getElapsedTimeSinceBootMsFunc) :
267           kGetElapsedTimeSinceBootMillisFunc(std::move(getElapsedTimeSinceBootMsFunc)),
268           mPostSystemEventDurationNs(std::chrono::duration_cast<std::chrono::nanoseconds>(
269                   std::chrono::seconds(sysprop::postSystemEventDuration().value_or(
270                           kDefaultPostSystemEventDurationSec.count())))),
271           mWakeUpDurationNs(std::chrono::duration_cast<std::chrono::nanoseconds>(
272                   std::chrono::seconds(sysprop::wakeUpEventDuration().value_or(
273                           kDefaultWakeUpEventDurationSec.count())))),
274           mUserSwitchTimeoutNs(std::chrono::duration_cast<std::chrono::nanoseconds>(
275                   std::chrono::seconds(sysprop::userSwitchTimeout().value_or(
276                           kDefaultUserSwitchTimeoutSec.count())))),
277           mHandlerLooper(android::sp<LooperWrapper>::make()),
278           mSystemState(NORMAL_MODE),
279           mBoottimeCollection({}),
280           mPeriodicCollection({}),
281           mUserSwitchCollection({}),
282           mCustomCollection({}),
283           mPeriodicMonitor({}),
284           mUnsentResourceStats({}),
285           mLastCollectionTimeMillis(0),
286           mBootCompletedTimeEpochSeconds(0),
287           mKernelStartTimeEpochSeconds(0),
288           mCurrCollectionEvent(EventType::INIT),
289           mUidStatsCollector(android::sp<UidStatsCollector>::make()),
290           mProcStatCollector(android::sp<ProcStatCollector>::make()),
291           mProcDiskStatsCollector(android::sp<ProcDiskStatsCollector>::make()),
292           mDataProcessors({}),
293           mWatchdogServiceHelper(watchdogServiceHelper) {}
294 
295     android::base::Result<void> registerDataProcessor(
296             android::sp<DataProcessorInterface> processor) override;
297 
298     android::base::Result<void> start() override;
299 
300     void terminate() override;
301 
302     void setSystemState(SystemState systemState) override;
303 
304     void onCarWatchdogServiceRegistered() override;
305 
306     android::base::Result<void> onBootFinished() override;
307 
308     android::base::Result<void> onUserStateChange(
309             userid_t userId,
310             const aidl::android::automotive::watchdog::internal::UserState& userState) override;
311 
312     android::base::Result<void> onSuspendExit() override;
313 
314     android::base::Result<void> onShutdownEnter() override;
315 
316     android::base::Result<void> onCustomCollection(int fd, const char** args,
317                                                    uint32_t numArgs) override;
318 
319     android::base::Result<void> onDump(int fd) const override;
320     android::base::Result<void> onDumpProto(
321             android::util::ProtoOutputStream& outProto) const override;
322 
323     bool dumpHelpText(int fd) const override;
324 
325 private:
326     struct EventMetadata {
327         // Collection or monitor event.
328         EventType eventType = EventType::LAST_EVENT;
329         // Interval between subsequent events.
330         std::chrono::nanoseconds pollingIntervalNs = 0ns;
331         // Used to calculate the uptime for next event.
332         nsecs_t lastPollElapsedRealTimeNs = 0;
333         // Filter the results only to the specified packages.
334         std::unordered_set<std::string> filterPackages;
335 
336         std::string toString() const;
337     };
338 
339     struct UserSwitchEventMetadata : WatchdogPerfService::EventMetadata {
340         // User id of user being switched from.
341         userid_t from = 0;
342         // User id of user being switched to.
343         userid_t to = 0;
344     };
345 
346     // Dumps the collectors' status when they are disabled.
347     android::base::Result<void> dumpCollectorsStatusLocked(int fd) const;
348 
349     /**
350      * Starts a custom collection on the looper handler, temporarily stops the periodic collection
351      * (won't discard the collected data), and returns immediately. Returns any error observed
352      * during this process.
353      * The custom collection happens once every |interval| seconds. When the |maxDuration| is
354      * reached, the looper receives a message to end the collection, discards the collected data,
355      * and starts the periodic collection. This is needed to ensure the custom collection doesn't
356      * run forever when a subsequent |endCustomCollection| call is not received.
357      * When |kFilterPackagesFlag| value specified, the results are filtered only to the specified
358      * package names.
359      */
360     android::base::Result<void> startCustomCollection(
361             std::chrono::nanoseconds interval, std::chrono::nanoseconds maxDuration,
362             const std::unordered_set<std::string>& filterPackages);
363 
364     /**
365      * Ends the current custom collection, generates a dump, sends a looper message to start the
366      * periodic collection, and returns immediately. Returns an error when there is no custom
367      * collection running or when a dump couldn't be generated from the custom collection.
368      */
369     android::base::Result<void> endCustomCollection(int fd);
370 
371     // Start a user switch collection.
372     android::base::Result<void> startUserSwitchCollection();
373 
374     // Switch to periodic collection and periodic monitor.
375     void switchToPeriodicLocked(bool startNow);
376 
377     // Handles the messages received by the looper.
378     void handleMessage(const Message& message) override;
379 
380     // Processes the collection events received by |handleMessage|.
381     android::base::Result<void> processCollectionEvent(EventMetadata* metadata);
382 
383     // Collects/processes the performance data for the current collection event.
384     android::base::Result<void> collectLocked(EventMetadata* metadata);
385 
386     // Processes the monitor events received by |handleMessage|.
387     android::base::Result<void> processMonitorEvent(EventMetadata* metadata);
388 
389     // Sends the unsent resource stats.
390     android::base::Result<void> sendResourceStats();
391 
392     // Notifies all registered data processors that either boot-time or wake-up collection will
393     // start. Individual implementations of data processors may clear stats collected during
394     // previous system startup events.
395     android::base::Result<void> notifySystemStartUpLocked();
396 
397     // Caches resource stats that have not been sent to CarWatchdogService.
398     void cacheUnsentResourceStatsLocked(
399             aidl::android::automotive::watchdog::internal::ResourceStats resourceStats);
400 
401     /**
402      * Returns the metadata for the current collection based on |mCurrCollectionEvent|. Returns
403      * nullptr on invalid collection event.
404      */
405     EventMetadata* getCurrentCollectionMetadataLocked();
406 
407     std::function<int64_t()> kGetElapsedTimeSinceBootMillisFunc;
408 
409     // Duration to extend a system event collection after the final signal is received.
410     std::chrono::nanoseconds mPostSystemEventDurationNs;
411 
412     // Duration of the wake-up collection event.
413     std::chrono::nanoseconds mWakeUpDurationNs;
414 
415     // Timeout duration for user switch collection in case final signal isn't received.
416     std::chrono::nanoseconds mUserSwitchTimeoutNs;
417 
418     // Thread on which the actual collection happens.
419     std::thread mCollectionThread;
420 
421     // Makes sure only one collection is running at any given time.
422     mutable Mutex mMutex;
423 
424     // Handler looper to execute different collection events on the collection thread.
425     android::sp<LooperWrapper> mHandlerLooper GUARDED_BY(mMutex);
426 
427     // Current system state.
428     SystemState mSystemState GUARDED_BY(mMutex);
429 
430     // Info for the |EventType::BOOT_TIME_COLLECTION| collection event.
431     EventMetadata mBoottimeCollection GUARDED_BY(mMutex);
432 
433     // Info for the |EventType::PERIODIC_COLLECTION| collection event.
434     EventMetadata mPeriodicCollection GUARDED_BY(mMutex);
435 
436     // Info for the |EventType::USER_SWITCH_COLLECTION| collection event.
437     UserSwitchEventMetadata mUserSwitchCollection GUARDED_BY(mMutex);
438 
439     // Info for the |EventType::WAKE_UP_COLLECTION| collection event.
440     EventMetadata mWakeUpCollection GUARDED_BY(mMutex);
441 
442     // Info for the |EventType::CUSTOM_COLLECTION| collection event. The info is cleared at the end
443     // of every custom collection.
444     EventMetadata mCustomCollection GUARDED_BY(mMutex);
445 
446     // Info for the |EventType::PERIODIC_MONITOR| monitor event.
447     EventMetadata mPeriodicMonitor GUARDED_BY(mMutex);
448 
449     // Cache of resource stats that have not been sent to CarWatchdogService.
450     std::vector<std::tuple<nsecs_t, aidl::android::automotive::watchdog::internal::ResourceStats>>
451             mUnsentResourceStats GUARDED_BY(mMutex);
452 
453     // Tracks the latest collection time since boot in millis.
454     int64_t mLastCollectionTimeMillis GUARDED_BY(mMutex);
455 
456     // Time of receiving boot complete signal.
457     time_t mBootCompletedTimeEpochSeconds GUARDED_BY(mMutex);
458 
459     // Boot start time collected from /proc/stat.
460     time_t mKernelStartTimeEpochSeconds GUARDED_BY(mMutex);
461 
462     // Tracks either the WatchdogPerfService's state or current collection event. Updated on
463     // |start|, |onBootFinished|, |onUserStateChange|, |startCustomCollection|,
464     // |endCustomCollection|, and |terminate|.
465     EventType mCurrCollectionEvent GUARDED_BY(mMutex);
466 
467     // Collector for UID process and I/O stats.
468     android::sp<UidStatsCollectorInterface> mUidStatsCollector GUARDED_BY(mMutex);
469 
470     // Collector/parser for `/proc/stat`.
471     android::sp<ProcStatCollectorInterface> mProcStatCollector GUARDED_BY(mMutex);
472 
473     // Collector/parser for `/proc/diskstats` file.
474     android::sp<ProcDiskStatsCollectorInterface> mProcDiskStatsCollector GUARDED_BY(mMutex);
475 
476     // Data processors for the collected performance data.
477     std::vector<android::sp<DataProcessorInterface>> mDataProcessors GUARDED_BY(mMutex);
478 
479     // Helper to communicate with the CarWatchdogService.
480     android::sp<WatchdogServiceHelperInterface> mWatchdogServiceHelper GUARDED_BY(mMutex);
481 
482     // For unit tests.
483     friend class internal::WatchdogPerfServicePeer;
484     FRIEND_TEST(WatchdogPerfServiceTest, TestServiceStartAndTerminate);
485 };
486 
487 }  // namespace watchdog
488 }  // namespace automotive
489 }  // namespace android
490