1 /*
2  * Copyright (C) 2023 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <stats_event.h>
18 #include <stats_socket_loss_reporter.h>
19 #include <unistd.h>
20 
21 #include <vector>
22 
23 #include "stats_statsdsocketlog.h"
24 #include "utils.h"
25 
StatsSocketLossReporter()26 StatsSocketLossReporter::StatsSocketLossReporter() : mUid(getuid()) {
27 }
28 
~StatsSocketLossReporter()29 StatsSocketLossReporter::~StatsSocketLossReporter() {
30     // try to dump loss stats since there might be pending data which have been not sent earlier
31     // due to:
32     // - cool down timer was active
33     // - no input atoms to trigger loss info dump after cooldown timer expired
34     if (__builtin_available(android __ANDROID_API_T__, *)) {
35         dumpAtomsLossStats(true);
36     }
37 }
38 
getInstance()39 StatsSocketLossReporter& StatsSocketLossReporter::getInstance() {
40     static StatsSocketLossReporter instance;
41     return instance;
42 }
43 
noteDrop(int32_t error,int32_t atomId)44 void StatsSocketLossReporter::noteDrop(int32_t error, int32_t atomId) {
45     using namespace android::os::statsdsocket;
46 
47     const int64_t currentRealtimeTsNanos = get_elapsed_realtime_ns();
48 
49     // The intention is to skip self counting, however the timestamps still need to be updated
50     // to know when was last failed attempt to log atom.
51     // This is required for more accurate cool down timer work
52     if (mFirstTsNanos == 0) {
53         mFirstTsNanos.store(currentRealtimeTsNanos, std::memory_order_relaxed);
54     }
55     mLastTsNanos.store(currentRealtimeTsNanos, std::memory_order_relaxed);
56 
57     if (atomId == STATS_SOCKET_LOSS_REPORTED) {
58         // avoid self counting due to write to socket might fail during dumpAtomsLossStats()
59         // also due to mutex is not re-entrant and is already locked by dumpAtomsLossStats() API,
60         // return to avoid deadlock
61         // alternative is to consider std::recursive_mutex
62         return;
63     }
64 
65     std::unique_lock<std::mutex> lock(mMutex);
66 
67     // using unordered_map is more CPU efficient vs vectors, however will require some
68     // postprocessing before writing into the socket
69     const LossInfoKey key = std::make_pair(error, atomId);
70     auto counterIt = mLossInfo.find(key);
71     if (counterIt != mLossInfo.end()) {
72         ++counterIt->second;
73     } else if (mLossInfo.size() < kMaxAtomTagsCount) {
74         mLossInfo[key] = 1;
75     } else {
76         mOverflowCounter++;
77     }
78 }
79 
dumpAtomsLossStats(bool forceDump)80 void StatsSocketLossReporter::dumpAtomsLossStats(bool forceDump) {
81     using namespace android::os::statsdsocket;
82 
83     const int64_t currentRealtimeTsNanos = get_elapsed_realtime_ns();
84 
85     if (!forceDump && isCooldownTimerActive(currentRealtimeTsNanos)) {
86         // To avoid socket flooding with more STATS_SOCKET_LOSS_REPORTED atoms,
87         // which have high probability of write failures, the cooldown timer approach is applied:
88         // - start cooldown timer for 10us for every failed dump
89         // - before writing STATS_SOCKET_LOSS_REPORTED do check the timestamp to keep some delay
90         return;
91     }
92 
93     // intention to hold mutex here during the stats_write() to avoid data copy overhead
94     std::unique_lock<std::mutex> lock(mMutex);
95     if (mLossInfo.size() == 0) {
96         return;
97     }
98 
99     // populate temp vectors to be written into the socket
100     std::vector<int> errors(mLossInfo.size());
101     std::vector<int> tags(mLossInfo.size());
102     std::vector<int> counts(mLossInfo.size());
103 
104     auto lossInfoIt = mLossInfo.begin();
105     for (size_t i = 0; i < mLossInfo.size(); i++, lossInfoIt++) {
106         const LossInfoKey& key = lossInfoIt->first;
107         errors[i] = key.first;
108         tags[i] = key.second;
109         counts[i] = lossInfoIt->second;
110     }
111 
112     // below call might lead to socket loss event - intention is to avoid self counting
113     const int ret = stats_write(STATS_SOCKET_LOSS_REPORTED, mUid, mFirstTsNanos, mLastTsNanos,
114                                 mOverflowCounter, errors, tags, counts);
115     if (ret > 0) {
116         // Otherwise, in case of failure we preserve all socket loss information between dumps.
117         // When above write failed - the socket loss stats are not discarded
118         // and would be re-send during next attempt.
119         mOverflowCounter = 0;
120         mLossInfo.clear();
121 
122         mFirstTsNanos.store(0, std::memory_order_relaxed);
123         mLastTsNanos.store(0, std::memory_order_relaxed);
124     }
125     // since the delay before next attempt is significantly larger than this API call
126     // duration it is ok to have correctness of timestamp in a range of 10us
127     startCooldownTimer(currentRealtimeTsNanos);
128 }
129 
startCooldownTimer(int64_t elapsedRealtimeNanos)130 void StatsSocketLossReporter::startCooldownTimer(int64_t elapsedRealtimeNanos) {
131     mCooldownTimerFinishAtNanos = elapsedRealtimeNanos + kCoolDownTimerDurationNanos;
132 }
133 
isCooldownTimerActive(int64_t elapsedRealtimeNanos) const134 bool StatsSocketLossReporter::isCooldownTimerActive(int64_t elapsedRealtimeNanos) const {
135     return mCooldownTimerFinishAtNanos > elapsedRealtimeNanos;
136 }
137