1 //
2 //
3 // Copyright 2023 gRPC authors.
4 //
5 // Licensed under the Apache License, Version 2.0 (the "License");
6 // you may not use this file except in compliance with the License.
7 // You may obtain a copy of the License at
8 //
9 //     http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 //
17 //
18 
19 #include <grpc/support/port_platform.h>
20 
21 #include "src/cpp/ext/gcp/environment_autodetect.h"
22 
23 #include <algorithm>
24 #include <memory>
25 #include <utility>
26 
27 #include "absl/container/flat_hash_map.h"
28 #include "absl/meta/type_traits.h"
29 #include "absl/status/status.h"
30 #include "absl/status/statusor.h"
31 #include "absl/types/optional.h"
32 
33 #include <grpc/support/alloc.h>
34 #include <grpc/support/log.h>
35 #include <grpc/support/sync.h>
36 #include <grpcpp/impl/grpc_library.h>
37 
38 #include "src/core/ext/gcp/metadata_query.h"
39 #include "src/core/lib/debug/trace.h"
40 #include "src/core/lib/event_engine/default_event_engine.h"
41 #include "src/core/lib/gprpp/crash.h"
42 #include "src/core/lib/gprpp/env.h"
43 #include "src/core/lib/gprpp/load_file.h"
44 #include "src/core/lib/gprpp/orphanable.h"
45 #include "src/core/lib/gprpp/status_helper.h"
46 #include "src/core/lib/gprpp/time.h"
47 #include "src/core/lib/iomgr/closure.h"
48 #include "src/core/lib/iomgr/error.h"
49 #include "src/core/lib/iomgr/exec_ctx.h"
50 #include "src/core/lib/iomgr/iomgr_fwd.h"
51 #include "src/core/lib/iomgr/polling_entity.h"
52 #include "src/core/lib/iomgr/pollset.h"
53 #include "src/core/lib/slice/slice.h"
54 
55 namespace grpc {
56 namespace internal {
57 
58 namespace {
59 
60 grpc_core::TraceFlag grpc_environment_autodetect_trace(
61     false, "environment_autodetect");
62 
63 // This is not a definite method to get the namespace name for GKE, but it is
64 // the best we have.
GetNamespaceName()65 std::string GetNamespaceName() {
66   // Read the root file.
67   const char* filename =
68       "/var/run/secrets/kubernetes.io/serviceaccount/namespace";
69   auto namespace_name = grpc_core::LoadFile(filename, false);
70   if (!namespace_name.ok()) {
71     if (GRPC_TRACE_FLAG_ENABLED(grpc_environment_autodetect_trace)) {
72       gpr_log(GPR_DEBUG, "Reading file %s failed: %s", filename,
73               grpc_core::StatusToString(namespace_name.status()).c_str());
74     }
75     // Fallback on an environment variable
76     return grpc_core::GetEnv("NAMESPACE_NAME").value_or("");
77   }
78   return std::string(reinterpret_cast<const char*>((*namespace_name).begin()),
79                      (*namespace_name).length());
80 }
81 
82 // Get pod name for GKE
GetPodName()83 std::string GetPodName() {
84   auto pod_name = grpc_core::GetEnv("POD_NAME");
85   if (pod_name.has_value()) {
86     return pod_name.value();
87   }
88   return grpc_core::GetEnv("HOSTNAME").value_or("");
89 }
90 
91 // Get container name for GKE
GetContainerName()92 std::string GetContainerName() {
93   return grpc_core::GetEnv("HOSTNAME").value_or("");
94 }
95 
96 // Get function name for Cloud Functions
GetFunctionName()97 std::string GetFunctionName() {
98   auto k_service = grpc_core::GetEnv("K_SERVICE");
99   if (k_service.has_value()) {
100     return k_service.value();
101   }
102   return grpc_core::GetEnv("FUNCTION_NAME").value_or("");
103 }
104 
105 // Get revision name for Cloud run
GetRevisionName()106 std::string GetRevisionName() {
107   return grpc_core::GetEnv("K_REVISION").value_or("");
108 }
109 
110 // Get service name for Cloud run
GetServiceName()111 std::string GetServiceName() {
112   return grpc_core::GetEnv("K_SERVICE").value_or("");
113 }
114 
115 // Get configuration name for Cloud run
GetConfiguratioName()116 std::string GetConfiguratioName() {
117   return grpc_core::GetEnv("K_CONFIGURATION").value_or("");
118 }
119 
120 // Get module ID for App Engine
GetModuleId()121 std::string GetModuleId() {
122   return grpc_core::GetEnv("GAE_SERVICE").value_or("");
123 }
124 
125 // Get version ID for App Engine
GetVersionId()126 std::string GetVersionId() {
127   return grpc_core::GetEnv("GAE_VERSION").value_or("");
128 }
129 
130 // Fire and forget class
131 class EnvironmentAutoDetectHelper
132     : public grpc_core::InternallyRefCounted<EnvironmentAutoDetectHelper>,
133       private internal::GrpcLibrary {
134  public:
EnvironmentAutoDetectHelper(std::string project_id,absl::AnyInvocable<void (EnvironmentAutoDetect::ResourceType)> on_done,std::shared_ptr<grpc_event_engine::experimental::EventEngine> event_engine)135   EnvironmentAutoDetectHelper(
136       std::string project_id,
137       absl::AnyInvocable<void(EnvironmentAutoDetect::ResourceType)> on_done,
138       std::shared_ptr<grpc_event_engine::experimental::EventEngine>
139           event_engine)
140       : InternallyRefCounted(/*trace=*/nullptr, /*initial_refcount=*/2),
141         project_id_(std::move(project_id)),
142         on_done_(std::move(on_done)),
143         event_engine_(std::move(event_engine)) {
144     grpc_core::ExecCtx exec_ctx;
145     // TODO(yashykt): The pollset stuff should go away once the HTTP library is
146     // ported over to use EventEngine.
147     pollset_ = static_cast<grpc_pollset*>(gpr_zalloc(grpc_pollset_size()));
148     grpc_pollset_init(pollset_, &mu_poll_);
149     pollent_ = grpc_polling_entity_create_from_pollset(pollset_);
150     // TODO(yashykt): Note that using EventEngine::Run is not fork-safe. If we
151     // want to make this fork-safe, we might need some re-work here.
152     event_engine_->Run([this] { PollLoop(); });
153     AutoDetect();
154   }
155 
~EnvironmentAutoDetectHelper()156   ~EnvironmentAutoDetectHelper() override {
157     grpc_core::ExecCtx exec_ctx;
158     grpc_pollset_shutdown(
159         pollset_, GRPC_CLOSURE_CREATE(
160                       [](void* arg, absl::Status /* status */) {
161                         grpc_pollset_destroy(static_cast<grpc_pollset*>(arg));
162                         gpr_free(arg);
163                       },
164                       pollset_, nullptr));
165   }
166 
Orphan()167   void Orphan() override {
168     grpc_core::Crash("Illegal Orphan() call on EnvironmentAutoDetectHelper.");
169   }
170 
171  private:
172   struct Attribute {
173     std::string resource_attribute;
174     std::string metadata_server_atttribute;
175   };
176 
PollLoop()177   void PollLoop() {
178     grpc_core::ExecCtx exec_ctx;
179     bool done = false;
180     gpr_mu_lock(mu_poll_);
181     grpc_pollset_worker* worker = nullptr;
182     if (!GRPC_LOG_IF_ERROR(
183             "pollset_work",
184             grpc_pollset_work(grpc_polling_entity_pollset(&pollent_), &worker,
185                               grpc_core::Timestamp::InfPast()))) {
186       notify_poller_ = true;
187     }
188     done = notify_poller_;
189     gpr_mu_unlock(mu_poll_);
190     if (!done) {
191       event_engine_->RunAfter(grpc_core::Duration::Milliseconds(100),
192                               [this] { PollLoop(); });
193     } else {
194       Unref();
195     }
196   }
197 
AutoDetect()198   void AutoDetect() {
199     grpc_core::MutexLock lock(&mu_);
200     // GKE
201     resource_.labels.emplace("project_id", project_id_);
202     if (grpc_core::GetEnv("KUBERNETES_SERVICE_HOST").has_value()) {
203       resource_.resource_type = "k8s_container";
204       resource_.labels.emplace("namespace_name", GetNamespaceName());
205       resource_.labels.emplace("pod_name", GetPodName());
206       resource_.labels.emplace("container_name", GetContainerName());
207       attributes_to_fetch_.emplace(grpc_core::MetadataQuery::kZoneAttribute,
208                                    "location");
209       attributes_to_fetch_.emplace(
210           grpc_core::MetadataQuery::kClusterNameAttribute, "cluster_name");
211     }
212     // Cloud Functions
213     else if (grpc_core::GetEnv("FUNCTION_NAME").has_value() ||
214              grpc_core::GetEnv("FUNCTION_TARGET").has_value()) {
215       resource_.resource_type = "cloud_function";
216       resource_.labels.emplace("function_name", GetFunctionName());
217       attributes_to_fetch_.emplace(grpc_core::MetadataQuery::kRegionAttribute,
218                                    "region");
219     }
220     // Cloud Run
221     else if (grpc_core::GetEnv("K_CONFIGURATION").has_value()) {
222       resource_.resource_type = "cloud_run_revision";
223       resource_.labels.emplace("revision_name", GetRevisionName());
224       resource_.labels.emplace("service_name", GetServiceName());
225       resource_.labels.emplace("configuration_name", GetConfiguratioName());
226       attributes_to_fetch_.emplace(grpc_core::MetadataQuery::kRegionAttribute,
227                                    "location");
228     }
229     // App Engine
230     else if (grpc_core::GetEnv("GAE_SERVICE").has_value()) {
231       resource_.resource_type = "gae_app";
232       resource_.labels.emplace("module_id", GetModuleId());
233       resource_.labels.emplace("version_id", GetVersionId());
234       attributes_to_fetch_.emplace(grpc_core::MetadataQuery::kZoneAttribute,
235                                    "zone");
236     }
237     // Assume GCE
238     else {
239       assuming_gce_ = true;
240       resource_.resource_type = "gce_instance";
241       attributes_to_fetch_.emplace(
242           grpc_core::MetadataQuery::kInstanceIdAttribute, "instance_id");
243       attributes_to_fetch_.emplace(grpc_core::MetadataQuery::kZoneAttribute,
244                                    "zone");
245     }
246     FetchMetadataServerAttributesAsynchronouslyLocked();
247   }
248 
FetchMetadataServerAttributesAsynchronouslyLocked()249   void FetchMetadataServerAttributesAsynchronouslyLocked()
250       ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_) {
251     GPR_ASSERT(!attributes_to_fetch_.empty());
252     for (auto& element : attributes_to_fetch_) {
253       queries_.push_back(grpc_core::MakeOrphanable<grpc_core::MetadataQuery>(
254           element.first, &pollent_,
255           [this](std::string attribute, absl::StatusOr<std::string> result) {
256             if (GRPC_TRACE_FLAG_ENABLED(grpc_environment_autodetect_trace)) {
257               gpr_log(
258                   GPR_INFO,
259                   "Environment AutoDetect: Attribute: \"%s\" Result: \"%s\"",
260                   attribute.c_str(),
261                   result.ok()
262                       ? result.value().c_str()
263                       : grpc_core::StatusToString(result.status()).c_str());
264             }
265             absl::optional<EnvironmentAutoDetect::ResourceType> resource;
266             {
267               grpc_core::MutexLock lock(&mu_);
268               auto it = attributes_to_fetch_.find(attribute);
269               if (it != attributes_to_fetch_.end()) {
270                 if (result.ok()) {
271                   resource_.labels.emplace(std::move(it->second),
272                                            std::move(result).value());
273                 }
274                 // If fetching from the MetadataServer failed and we were
275                 // assuming a GCE environment, fallback to "global".
276                 else if (assuming_gce_) {
277                   if (GRPC_TRACE_FLAG_ENABLED(
278                           grpc_environment_autodetect_trace)) {
279                     gpr_log(GPR_INFO,
280                             "Environment Autodetect: Falling back to global "
281                             "resource type");
282                   }
283                   assuming_gce_ = false;
284                   resource_.resource_type = "global";
285                 }
286                 attributes_to_fetch_.erase(it);
287               } else {
288                 // This should not happen
289                 gpr_log(GPR_ERROR,
290                         "An unexpected attribute was seen from the "
291                         "MetadataServer: %s",
292                         attribute.c_str());
293               }
294               if (attributes_to_fetch_.empty()) {
295                 resource = std::move(resource_);
296               }
297             }
298             if (resource.has_value()) {
299               gpr_mu_lock(mu_poll_);
300               notify_poller_ = true;
301               gpr_mu_unlock(mu_poll_);
302               auto on_done = std::move(on_done_);
303               Unref();
304               on_done(std::move(resource).value());
305             }
306           },
307           grpc_core::Duration::Seconds(10)));
308     }
309   }
310 
311   const std::string project_id_;
312   grpc_pollset* pollset_ = nullptr;
313   grpc_polling_entity pollent_;
314   gpr_mu* mu_poll_ = nullptr;
315   absl::AnyInvocable<void(EnvironmentAutoDetect::ResourceType)> on_done_;
316   std::shared_ptr<grpc_event_engine::experimental::EventEngine> event_engine_;
317   grpc_core::Mutex mu_;
318   bool notify_poller_ = false;
319   absl::flat_hash_map<std::string /* metadata_server_attribute */,
320                       std::string /* resource_attribute */>
321       attributes_to_fetch_ ABSL_GUARDED_BY(mu_);
322   std::vector<grpc_core::OrphanablePtr<grpc_core::MetadataQuery>> queries_
323       ABSL_GUARDED_BY(mu_);
324   EnvironmentAutoDetect::ResourceType resource_ ABSL_GUARDED_BY(mu_);
325   // This would be true if we are assuming the resource to be GCE. In this case,
326   // there is a chance that it will fail and we should instead just use
327   // "global".
328   bool assuming_gce_ ABSL_GUARDED_BY(mu_) = false;
329 };
330 
331 EnvironmentAutoDetect* g_autodetect = nullptr;
332 
333 }  // namespace
334 
Create(std::string project_id)335 void EnvironmentAutoDetect::Create(std::string project_id) {
336   GPR_ASSERT(g_autodetect == nullptr && !project_id.empty());
337   g_autodetect = new EnvironmentAutoDetect(project_id);
338 }
339 
Get()340 EnvironmentAutoDetect& EnvironmentAutoDetect::Get() { return *g_autodetect; }
341 
EnvironmentAutoDetect(std::string project_id)342 EnvironmentAutoDetect::EnvironmentAutoDetect(std::string project_id)
343     : project_id_(std::move(project_id)) {
344   GPR_ASSERT(!project_id_.empty());
345 }
346 
NotifyOnDone(absl::AnyInvocable<void ()> callback)347 void EnvironmentAutoDetect::NotifyOnDone(absl::AnyInvocable<void()> callback) {
348   std::shared_ptr<grpc_event_engine::experimental::EventEngine> event_engine;
349   {
350     grpc_core::MutexLock lock(&mu_);
351     // Environment has already been detected
352     if (resource_ != nullptr) {
353       // Execute on the event engine to avoid deadlocks.
354       return event_engine_->Run(std::move(callback));
355     }
356     callbacks_.push_back(std::move(callback));
357     // Use the event_engine_ pointer as a signal to judge whether we've started
358     // detecting the environment.
359     if (event_engine_ == nullptr) {
360       event_engine_ = grpc_event_engine::experimental::GetDefaultEventEngine();
361       event_engine = event_engine_;
362     }
363   }
364   if (event_engine) {
365     new EnvironmentAutoDetectHelper(
366         project_id_,
367         [this](EnvironmentAutoDetect::ResourceType resource) {
368           std::vector<absl::AnyInvocable<void()>> callbacks;
369           {
370             grpc_core::MutexLock lock(&mu_);
371             resource_ = std::make_unique<EnvironmentAutoDetect::ResourceType>(
372                 std::move(resource));
373             callbacks = std::move(callbacks_);
374           }
375           for (auto& callback : callbacks) {
376             callback();
377           }
378         },
379         std::move(event_engine));
380   }
381 }
382 
383 }  // namespace internal
384 }  // namespace grpc
385