xref: /aosp_15_r20/external/grpc-grpc/src/cpp/thread_manager/thread_manager.cc (revision cc02d7e222339f7a4f6ba5f422e6413f4bd931f2)
1 //
2 //
3 // Copyright 2016 gRPC authors.
4 //
5 // Licensed under the Apache License, Version 2.0 (the "License");
6 // you may not use this file except in compliance with the License.
7 // You may obtain a copy of the License at
8 //
9 //     http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 //
17 //
18 
19 #include "src/cpp/thread_manager/thread_manager.h"
20 
21 #include <climits>
22 
23 #include "absl/strings/str_format.h"
24 
25 #include <grpc/support/log.h>
26 
27 #include "src/core/lib/gprpp/crash.h"
28 #include "src/core/lib/gprpp/ref_counted_ptr.h"
29 #include "src/core/lib/gprpp/thd.h"
30 #include "src/core/lib/resource_quota/resource_quota.h"
31 
32 namespace grpc {
33 
WorkerThread(ThreadManager * thd_mgr)34 ThreadManager::WorkerThread::WorkerThread(ThreadManager* thd_mgr)
35     : thd_mgr_(thd_mgr) {
36   // Make thread creation exclusive with respect to its join happening in
37   // ~WorkerThread().
38   thd_ = grpc_core::Thread(
39       "grpcpp_sync_server",
40       [](void* th) { static_cast<ThreadManager::WorkerThread*>(th)->Run(); },
41       this, &created_);
42   if (!created_) {
43     gpr_log(GPR_ERROR, "Could not create grpc_sync_server worker-thread");
44   }
45 }
46 
Run()47 void ThreadManager::WorkerThread::Run() {
48   thd_mgr_->MainWorkLoop();
49   thd_mgr_->MarkAsCompleted(this);
50 }
51 
~WorkerThread()52 ThreadManager::WorkerThread::~WorkerThread() {
53   // Don't join until the thread is fully constructed.
54   thd_.Join();
55 }
56 
ThreadManager(const char *,grpc_resource_quota * resource_quota,int min_pollers,int max_pollers)57 ThreadManager::ThreadManager(const char*, grpc_resource_quota* resource_quota,
58                              int min_pollers, int max_pollers)
59     : shutdown_(false),
60       thread_quota_(
61           grpc_core::ResourceQuota::FromC(resource_quota)->thread_quota()),
62       num_pollers_(0),
63       min_pollers_(min_pollers),
64       max_pollers_(max_pollers == -1 ? INT_MAX : max_pollers),
65       num_threads_(0),
66       max_active_threads_sofar_(0) {}
67 
~ThreadManager()68 ThreadManager::~ThreadManager() {
69   {
70     grpc_core::MutexLock lock(&mu_);
71     GPR_ASSERT(num_threads_ == 0);
72   }
73 
74   CleanupCompletedThreads();
75 }
76 
Wait()77 void ThreadManager::Wait() {
78   grpc_core::MutexLock lock(&mu_);
79   while (num_threads_ != 0) {
80     shutdown_cv_.Wait(&mu_);
81   }
82 }
83 
Shutdown()84 void ThreadManager::Shutdown() {
85   grpc_core::MutexLock lock(&mu_);
86   shutdown_ = true;
87 }
88 
IsShutdown()89 bool ThreadManager::IsShutdown() {
90   grpc_core::MutexLock lock(&mu_);
91   return shutdown_;
92 }
93 
GetMaxActiveThreadsSoFar()94 int ThreadManager::GetMaxActiveThreadsSoFar() {
95   grpc_core::MutexLock list_lock(&list_mu_);
96   return max_active_threads_sofar_;
97 }
98 
MarkAsCompleted(WorkerThread * thd)99 void ThreadManager::MarkAsCompleted(WorkerThread* thd) {
100   {
101     grpc_core::MutexLock list_lock(&list_mu_);
102     completed_threads_.push_back(thd);
103   }
104 
105   {
106     grpc_core::MutexLock lock(&mu_);
107     num_threads_--;
108     if (num_threads_ == 0) {
109       shutdown_cv_.Signal();
110     }
111   }
112 
113   // Give a thread back to the resource quota
114   thread_quota_->Release(1);
115 }
116 
CleanupCompletedThreads()117 void ThreadManager::CleanupCompletedThreads() {
118   std::list<WorkerThread*> completed_threads;
119   {
120     // swap out the completed threads list: allows other threads to clean up
121     // more quickly
122     grpc_core::MutexLock lock(&list_mu_);
123     completed_threads.swap(completed_threads_);
124   }
125   for (auto thd : completed_threads) delete thd;
126 }
127 
Initialize()128 void ThreadManager::Initialize() {
129   if (!thread_quota_->Reserve(min_pollers_)) {
130     grpc_core::Crash(absl::StrFormat(
131         "No thread quota available to even create the minimum required "
132         "polling threads (i.e %d). Unable to start the thread manager",
133         min_pollers_));
134   }
135 
136   {
137     grpc_core::MutexLock lock(&mu_);
138     num_pollers_ = min_pollers_;
139     num_threads_ = min_pollers_;
140     max_active_threads_sofar_ = min_pollers_;
141   }
142 
143   for (int i = 0; i < min_pollers_; i++) {
144     WorkerThread* worker = new WorkerThread(this);
145     GPR_ASSERT(worker->created());  // Must be able to create the minimum
146     worker->Start();
147   }
148 }
149 
MainWorkLoop()150 void ThreadManager::MainWorkLoop() {
151   while (true) {
152     void* tag;
153     bool ok;
154     WorkStatus work_status = PollForWork(&tag, &ok);
155 
156     grpc_core::LockableAndReleasableMutexLock lock(&mu_);
157     // Reduce the number of pollers by 1 and check what happened with the poll
158     num_pollers_--;
159     bool done = false;
160     switch (work_status) {
161       case TIMEOUT:
162         // If we timed out and we have more pollers than we need (or we are
163         // shutdown), finish this thread
164         if (shutdown_ || num_pollers_ > max_pollers_) done = true;
165         break;
166       case SHUTDOWN:
167         // If the thread manager is shutdown, finish this thread
168         done = true;
169         break;
170       case WORK_FOUND:
171         // If we got work and there are now insufficient pollers and there is
172         // quota available to create a new thread, start a new poller thread
173         bool resource_exhausted = false;
174         if (!shutdown_ && num_pollers_ < min_pollers_) {
175           if (thread_quota_->Reserve(1)) {
176             // We can allocate a new poller thread
177             num_pollers_++;
178             num_threads_++;
179             if (num_threads_ > max_active_threads_sofar_) {
180               max_active_threads_sofar_ = num_threads_;
181             }
182             // Drop lock before spawning thread to avoid contention
183             lock.Release();
184             WorkerThread* worker = new WorkerThread(this);
185             if (worker->created()) {
186               worker->Start();
187             } else {
188               // Get lock again to undo changes to poller/thread counters.
189               grpc_core::MutexLock failure_lock(&mu_);
190               num_pollers_--;
191               num_threads_--;
192               resource_exhausted = true;
193               delete worker;
194             }
195           } else if (num_pollers_ > 0) {
196             // There is still at least some thread polling, so we can go on
197             // even though we are below the number of pollers that we would
198             // like to have (min_pollers_)
199             lock.Release();
200           } else {
201             // There are no pollers to spare and we couldn't allocate
202             // a new thread, so resources are exhausted!
203             lock.Release();
204             resource_exhausted = true;
205           }
206         } else {
207           // There are a sufficient number of pollers available so we can do
208           // the work and continue polling with our existing poller threads
209           lock.Release();
210         }
211         // Lock is always released at this point - do the application work
212         // or return resource exhausted if there is new work but we couldn't
213         // get a thread in which to do it.
214         DoWork(tag, ok, !resource_exhausted);
215         // Take the lock again to check post conditions
216         lock.Lock();
217         // If we're shutdown, we should finish at this point.
218         if (shutdown_) done = true;
219         break;
220     }
221     // If we decided to finish the thread, break out of the while loop
222     if (done) break;
223 
224     // Otherwise go back to polling as long as it doesn't exceed max_pollers_
225     //
226     // **WARNING**:
227     // There is a possibility of threads thrashing here (i.e excessive thread
228     // shutdowns and creations than the ideal case). This happens if max_poller_
229     // count is small and the rate of incoming requests is also small. In such
230     // scenarios we can possibly configure max_pollers_ to a higher value and/or
231     // increase the cq timeout.
232     //
233     // However, not doing this check here and unconditionally incrementing
234     // num_pollers (and hoping that the system will eventually settle down) has
235     // far worse consequences i.e huge number of threads getting created to the
236     // point of thread-exhaustion. For example: if the incoming request rate is
237     // very high, all the polling threads will return very quickly from
238     // PollForWork() with WORK_FOUND. They all briefly decrement num_pollers_
239     // counter thereby possibly - and briefly - making it go below min_pollers;
240     // This will most likely result in the creation of a new poller since
241     // num_pollers_ dipped below min_pollers_.
242     //
243     // Now, If we didn't do the max_poller_ check here, all these threads will
244     // go back to doing PollForWork() and the whole cycle repeats (with a new
245     // thread being added in each cycle). Once the total number of threads in
246     // the system crosses a certain threshold (around ~1500), there is heavy
247     // contention on mutexes (the mu_ here or the mutexes in gRPC core like the
248     // pollset mutex) that makes DoWork() take longer to finish thereby causing
249     // new poller threads to be created even faster. This results in a thread
250     // avalanche.
251     if (num_pollers_ < max_pollers_) {
252       num_pollers_++;
253     } else {
254       break;
255     }
256   };
257 
258   // This thread is exiting. Do some cleanup work i.e delete already completed
259   // worker threads
260   CleanupCompletedThreads();
261 
262   // If we are here, either ThreadManager is shutting down or it already has
263   // enough threads.
264 }
265 
266 }  // namespace grpc
267