xref: /aosp_15_r20/external/grpc-grpc/src/cpp/ext/gcp/environment_autodetect.cc (revision cc02d7e222339f7a4f6ba5f422e6413f4bd931f2)
1 //
2 //
3 // Copyright 2023 gRPC authors.
4 //
5 // Licensed under the Apache License, Version 2.0 (the "License");
6 // you may not use this file except in compliance with the License.
7 // You may obtain a copy of the License at
8 //
9 //     http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 //
17 //
18 
19 #include <grpc/support/port_platform.h>
20 
21 #include "src/cpp/ext/gcp/environment_autodetect.h"
22 
23 #include <memory>
24 #include <utility>
25 
26 #include "absl/container/flat_hash_map.h"
27 #include "absl/status/status.h"
28 #include "absl/status/statusor.h"
29 #include "absl/types/optional.h"
30 
31 #include <grpc/support/alloc.h>
32 #include <grpc/support/log.h>
33 #include <grpc/support/sync.h>
34 #include <grpcpp/impl/grpc_library.h>
35 
36 #include "src/core/ext/gcp/metadata_query.h"
37 #include "src/core/lib/debug/trace.h"
38 #include "src/core/lib/event_engine/default_event_engine.h"
39 #include "src/core/lib/gprpp/crash.h"
40 #include "src/core/lib/gprpp/env.h"
41 #include "src/core/lib/gprpp/load_file.h"
42 #include "src/core/lib/gprpp/orphanable.h"
43 #include "src/core/lib/gprpp/status_helper.h"
44 #include "src/core/lib/gprpp/time.h"
45 #include "src/core/lib/iomgr/closure.h"
46 #include "src/core/lib/iomgr/error.h"
47 #include "src/core/lib/iomgr/exec_ctx.h"
48 #include "src/core/lib/iomgr/iomgr_fwd.h"
49 #include "src/core/lib/iomgr/polling_entity.h"
50 #include "src/core/lib/iomgr/pollset.h"
51 #include "src/core/lib/slice/slice.h"
52 
53 namespace grpc {
54 namespace internal {
55 
56 namespace {
57 
58 grpc_core::TraceFlag grpc_environment_autodetect_trace(
59     false, "environment_autodetect");
60 
61 // This is not a definite method to get the namespace name for GKE, but it is
62 // the best we have.
GetNamespaceName()63 std::string GetNamespaceName() {
64   // Read the root file.
65   const char* filename =
66       "/var/run/secrets/kubernetes.io/serviceaccount/namespace";
67   auto namespace_name = grpc_core::LoadFile(filename, false);
68   if (!namespace_name.ok()) {
69     if (GRPC_TRACE_FLAG_ENABLED(grpc_environment_autodetect_trace)) {
70       gpr_log(GPR_DEBUG, "Reading file %s failed: %s", filename,
71               grpc_core::StatusToString(namespace_name.status()).c_str());
72     }
73     // Fallback on an environment variable
74     return grpc_core::GetEnv("NAMESPACE_NAME").value_or("");
75   }
76   return std::string(reinterpret_cast<const char*>((*namespace_name).begin()),
77                      (*namespace_name).length());
78 }
79 
80 // Get pod name for GKE
GetPodName()81 std::string GetPodName() {
82   auto pod_name = grpc_core::GetEnv("POD_NAME");
83   if (pod_name.has_value()) {
84     return pod_name.value();
85   }
86   return grpc_core::GetEnv("HOSTNAME").value_or("");
87 }
88 
89 // Get container name for GKE
GetContainerName()90 std::string GetContainerName() {
91   return grpc_core::GetEnv("HOSTNAME").value_or("");
92 }
93 
94 // Get function name for Cloud Functions
GetFunctionName()95 std::string GetFunctionName() {
96   auto k_service = grpc_core::GetEnv("K_SERVICE");
97   if (k_service.has_value()) {
98     return k_service.value();
99   }
100   return grpc_core::GetEnv("FUNCTION_NAME").value_or("");
101 }
102 
103 // Get revision name for Cloud run
GetRevisionName()104 std::string GetRevisionName() {
105   return grpc_core::GetEnv("K_REVISION").value_or("");
106 }
107 
108 // Get service name for Cloud run
GetServiceName()109 std::string GetServiceName() {
110   return grpc_core::GetEnv("K_SERVICE").value_or("");
111 }
112 
113 // Get configuration name for Cloud run
GetConfiguratioName()114 std::string GetConfiguratioName() {
115   return grpc_core::GetEnv("K_CONFIGURATION").value_or("");
116 }
117 
118 // Get module ID for App Engine
GetModuleId()119 std::string GetModuleId() {
120   return grpc_core::GetEnv("GAE_SERVICE").value_or("");
121 }
122 
123 // Get version ID for App Engine
GetVersionId()124 std::string GetVersionId() {
125   return grpc_core::GetEnv("GAE_VERSION").value_or("");
126 }
127 
128 // Fire and forget class
129 class EnvironmentAutoDetectHelper
130     : public grpc_core::InternallyRefCounted<EnvironmentAutoDetectHelper>,
131       private internal::GrpcLibrary {
132  public:
EnvironmentAutoDetectHelper(std::string project_id,absl::AnyInvocable<void (EnvironmentAutoDetect::ResourceType)> on_done,std::shared_ptr<grpc_event_engine::experimental::EventEngine> event_engine)133   EnvironmentAutoDetectHelper(
134       std::string project_id,
135       absl::AnyInvocable<void(EnvironmentAutoDetect::ResourceType)> on_done,
136       std::shared_ptr<grpc_event_engine::experimental::EventEngine>
137           event_engine)
138       : InternallyRefCounted(/*trace=*/nullptr, /*initial_refcount=*/2),
139         project_id_(std::move(project_id)),
140         on_done_(std::move(on_done)),
141         event_engine_(std::move(event_engine)) {
142     grpc_core::ExecCtx exec_ctx;
143     // TODO(yashykt): The pollset stuff should go away once the HTTP library is
144     // ported over to use EventEngine.
145     pollset_ = static_cast<grpc_pollset*>(gpr_zalloc(grpc_pollset_size()));
146     grpc_pollset_init(pollset_, &mu_poll_);
147     pollent_ = grpc_polling_entity_create_from_pollset(pollset_);
148     // TODO(yashykt): Note that using EventEngine::Run is not fork-safe. If we
149     // want to make this fork-safe, we might need some re-work here.
150     event_engine_->Run([this] { PollLoop(); });
151     AutoDetect();
152   }
153 
~EnvironmentAutoDetectHelper()154   ~EnvironmentAutoDetectHelper() override {
155     grpc_core::ExecCtx exec_ctx;
156     grpc_pollset_shutdown(
157         pollset_, GRPC_CLOSURE_CREATE(
158                       [](void* arg, absl::Status /* status */) {
159                         grpc_pollset_destroy(static_cast<grpc_pollset*>(arg));
160                         gpr_free(arg);
161                       },
162                       pollset_, nullptr));
163   }
164 
Orphan()165   void Orphan() override {
166     grpc_core::Crash("Illegal Orphan() call on EnvironmentAutoDetectHelper.");
167   }
168 
169  private:
170   struct Attribute {
171     std::string resource_attribute;
172     std::string metadata_server_atttribute;
173   };
174 
PollLoop()175   void PollLoop() {
176     grpc_core::ExecCtx exec_ctx;
177     bool done = false;
178     gpr_mu_lock(mu_poll_);
179     grpc_pollset_worker* worker = nullptr;
180     if (!GRPC_LOG_IF_ERROR(
181             "pollset_work",
182             grpc_pollset_work(grpc_polling_entity_pollset(&pollent_), &worker,
183                               grpc_core::Timestamp::InfPast()))) {
184       notify_poller_ = true;
185     }
186     done = notify_poller_;
187     gpr_mu_unlock(mu_poll_);
188     if (!done) {
189       event_engine_->RunAfter(grpc_core::Duration::Milliseconds(100),
190                               [this] { PollLoop(); });
191     } else {
192       Unref();
193     }
194   }
195 
AutoDetect()196   void AutoDetect() {
197     grpc_core::MutexLock lock(&mu_);
198     // GKE
199     resource_.labels.emplace("project_id", project_id_);
200     if (grpc_core::GetEnv("KUBERNETES_SERVICE_HOST").has_value()) {
201       resource_.resource_type = "k8s_container";
202       resource_.labels.emplace("namespace_name", GetNamespaceName());
203       resource_.labels.emplace("pod_name", GetPodName());
204       resource_.labels.emplace("container_name", GetContainerName());
205       attributes_to_fetch_.emplace(grpc_core::MetadataQuery::kZoneAttribute,
206                                    "location");
207       attributes_to_fetch_.emplace(
208           grpc_core::MetadataQuery::kClusterNameAttribute, "cluster_name");
209     }
210     // Cloud Functions
211     else if (grpc_core::GetEnv("FUNCTION_NAME").has_value() ||
212              grpc_core::GetEnv("FUNCTION_TARGET").has_value()) {
213       resource_.resource_type = "cloud_function";
214       resource_.labels.emplace("function_name", GetFunctionName());
215       attributes_to_fetch_.emplace(grpc_core::MetadataQuery::kRegionAttribute,
216                                    "region");
217     }
218     // Cloud Run
219     else if (grpc_core::GetEnv("K_CONFIGURATION").has_value()) {
220       resource_.resource_type = "cloud_run_revision";
221       resource_.labels.emplace("revision_name", GetRevisionName());
222       resource_.labels.emplace("service_name", GetServiceName());
223       resource_.labels.emplace("configuration_name", GetConfiguratioName());
224       attributes_to_fetch_.emplace(grpc_core::MetadataQuery::kRegionAttribute,
225                                    "location");
226     }
227     // App Engine
228     else if (grpc_core::GetEnv("GAE_SERVICE").has_value()) {
229       resource_.resource_type = "gae_app";
230       resource_.labels.emplace("module_id", GetModuleId());
231       resource_.labels.emplace("version_id", GetVersionId());
232       attributes_to_fetch_.emplace(grpc_core::MetadataQuery::kZoneAttribute,
233                                    "zone");
234     }
235     // Assume GCE
236     else {
237       assuming_gce_ = true;
238       resource_.resource_type = "gce_instance";
239       attributes_to_fetch_.emplace(
240           grpc_core::MetadataQuery::kInstanceIdAttribute, "instance_id");
241       attributes_to_fetch_.emplace(grpc_core::MetadataQuery::kZoneAttribute,
242                                    "zone");
243     }
244     FetchMetadataServerAttributesAsynchronouslyLocked();
245   }
246 
FetchMetadataServerAttributesAsynchronouslyLocked()247   void FetchMetadataServerAttributesAsynchronouslyLocked()
248       ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_) {
249     GPR_ASSERT(!attributes_to_fetch_.empty());
250     for (auto& element : attributes_to_fetch_) {
251       queries_.push_back(grpc_core::MakeOrphanable<grpc_core::MetadataQuery>(
252           element.first, &pollent_,
253           [this](std::string attribute, absl::StatusOr<std::string> result) {
254             if (GRPC_TRACE_FLAG_ENABLED(grpc_environment_autodetect_trace)) {
255               gpr_log(
256                   GPR_INFO,
257                   "Environment AutoDetect: Attribute: \"%s\" Result: \"%s\"",
258                   attribute.c_str(),
259                   result.ok()
260                       ? result.value().c_str()
261                       : grpc_core::StatusToString(result.status()).c_str());
262             }
263             absl::optional<EnvironmentAutoDetect::ResourceType> resource;
264             {
265               grpc_core::MutexLock lock(&mu_);
266               auto it = attributes_to_fetch_.find(attribute);
267               if (it != attributes_to_fetch_.end()) {
268                 if (result.ok()) {
269                   resource_.labels.emplace(std::move(it->second),
270                                            std::move(result).value());
271                 }
272                 // If fetching from the MetadataServer failed and we were
273                 // assuming a GCE environment, fallback to "global".
274                 else if (assuming_gce_) {
275                   if (GRPC_TRACE_FLAG_ENABLED(
276                           grpc_environment_autodetect_trace)) {
277                     gpr_log(GPR_INFO,
278                             "Environment Autodetect: Falling back to global "
279                             "resource type");
280                   }
281                   assuming_gce_ = false;
282                   resource_.resource_type = "global";
283                 }
284                 attributes_to_fetch_.erase(it);
285               } else {
286                 // This should not happen
287                 gpr_log(GPR_ERROR,
288                         "An unexpected attribute was seen from the "
289                         "MetadataServer: %s",
290                         attribute.c_str());
291               }
292               if (attributes_to_fetch_.empty()) {
293                 resource = std::move(resource_);
294               }
295             }
296             if (resource.has_value()) {
297               gpr_mu_lock(mu_poll_);
298               notify_poller_ = true;
299               gpr_mu_unlock(mu_poll_);
300               auto on_done = std::move(on_done_);
301               Unref();
302               on_done(std::move(resource).value());
303             }
304           },
305           grpc_core::Duration::Seconds(10)));
306     }
307   }
308 
309   const std::string project_id_;
310   grpc_pollset* pollset_ = nullptr;
311   grpc_polling_entity pollent_;
312   gpr_mu* mu_poll_ = nullptr;
313   absl::AnyInvocable<void(EnvironmentAutoDetect::ResourceType)> on_done_;
314   std::shared_ptr<grpc_event_engine::experimental::EventEngine> event_engine_;
315   grpc_core::Mutex mu_;
316   bool notify_poller_ = false;
317   absl::flat_hash_map<std::string /* metadata_server_attribute */,
318                       std::string /* resource_attribute */>
319       attributes_to_fetch_ ABSL_GUARDED_BY(mu_);
320   std::vector<grpc_core::OrphanablePtr<grpc_core::MetadataQuery>> queries_
321       ABSL_GUARDED_BY(mu_);
322   EnvironmentAutoDetect::ResourceType resource_ ABSL_GUARDED_BY(mu_);
323   // This would be true if we are assuming the resource to be GCE. In this case,
324   // there is a chance that it will fail and we should instead just use
325   // "global".
326   bool assuming_gce_ ABSL_GUARDED_BY(mu_) = false;
327 };
328 
329 EnvironmentAutoDetect* g_autodetect = nullptr;
330 
331 }  // namespace
332 
Create(std::string project_id)333 void EnvironmentAutoDetect::Create(std::string project_id) {
334   GPR_ASSERT(g_autodetect == nullptr && !project_id.empty());
335   g_autodetect = new EnvironmentAutoDetect(project_id);
336 }
337 
Get()338 EnvironmentAutoDetect& EnvironmentAutoDetect::Get() { return *g_autodetect; }
339 
EnvironmentAutoDetect(std::string project_id)340 EnvironmentAutoDetect::EnvironmentAutoDetect(std::string project_id)
341     : project_id_(std::move(project_id)) {
342   GPR_ASSERT(!project_id_.empty());
343 }
344 
NotifyOnDone(absl::AnyInvocable<void ()> callback)345 void EnvironmentAutoDetect::NotifyOnDone(absl::AnyInvocable<void()> callback) {
346   std::shared_ptr<grpc_event_engine::experimental::EventEngine> event_engine;
347   {
348     grpc_core::MutexLock lock(&mu_);
349     // Environment has already been detected
350     if (resource_ != nullptr) {
351       // Execute on the event engine to avoid deadlocks.
352       return event_engine_->Run(std::move(callback));
353     }
354     callbacks_.push_back(std::move(callback));
355     // Use the event_engine_ pointer as a signal to judge whether we've started
356     // detecting the environment.
357     if (event_engine_ == nullptr) {
358       event_engine_ = grpc_event_engine::experimental::GetDefaultEventEngine();
359       event_engine = event_engine_;
360     }
361   }
362   if (event_engine) {
363     new EnvironmentAutoDetectHelper(
364         project_id_,
365         [this](EnvironmentAutoDetect::ResourceType resource) {
366           std::vector<absl::AnyInvocable<void()>> callbacks;
367           {
368             grpc_core::MutexLock lock(&mu_);
369             resource_ = std::make_unique<EnvironmentAutoDetect::ResourceType>(
370                 std::move(resource));
371             callbacks = std::move(callbacks_);
372           }
373           for (auto& callback : callbacks) {
374             callback();
375           }
376         },
377         std::move(event_engine));
378   }
379 }
380 
381 }  // namespace internal
382 }  // namespace grpc
383