1 //
2 //
3 // Copyright 2015-2016 gRPC authors.
4 //
5 // Licensed under the Apache License, Version 2.0 (the "License");
6 // you may not use this file except in compliance with the License.
7 // You may obtain a copy of the License at
8 //
9 //     http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 //
17 //
18 #include <grpc/support/port_platform.h>
19 
20 #include "src/core/lib/surface/completion_queue.h"
21 
22 #include <inttypes.h>
23 #include <stdio.h>
24 
25 #include <algorithm>
26 #include <atomic>
27 #include <initializer_list>
28 #include <new>
29 #include <string>
30 #include <utility>
31 #include <vector>
32 
33 #include "absl/status/status.h"
34 #include "absl/strings/str_format.h"
35 #include "absl/strings/str_join.h"
36 
37 #include <grpc/grpc.h>
38 #include <grpc/support/alloc.h>
39 #include <grpc/support/atm.h>
40 #include <grpc/support/log.h>
41 #include <grpc/support/sync.h>
42 #include <grpc/support/time.h>
43 
44 #include "src/core/lib/debug/stats.h"
45 #include "src/core/lib/debug/stats_data.h"
46 #include "src/core/lib/gpr/spinlock.h"
47 #include "src/core/lib/gprpp/atomic_utils.h"
48 #include "src/core/lib/gprpp/debug_location.h"
49 #include "src/core/lib/gprpp/ref_counted.h"
50 #include "src/core/lib/gprpp/status_helper.h"
51 #include "src/core/lib/gprpp/time.h"
52 #include "src/core/lib/iomgr/closure.h"
53 #include "src/core/lib/iomgr/exec_ctx.h"
54 #include "src/core/lib/iomgr/executor.h"
55 #include "src/core/lib/iomgr/iomgr.h"
56 #include "src/core/lib/iomgr/pollset.h"
57 #include "src/core/lib/surface/api_trace.h"
58 #include "src/core/lib/surface/event_string.h"
59 
60 grpc_core::TraceFlag grpc_trace_operation_failures(false, "op_failure");
61 grpc_core::DebugOnlyTraceFlag grpc_trace_pending_tags(false, "pending_tags");
62 grpc_core::DebugOnlyTraceFlag grpc_trace_cq_refcount(false, "cq_refcount");
63 
64 namespace {
65 
66 // Specifies a cq thread local cache.
67 // The first event that occurs on a thread
68 // with a cq cache will go into that cache, and
69 // will only be returned on the thread that initialized the cache.
70 // NOTE: Only one event will ever be cached.
71 thread_local grpc_cq_completion* g_cached_event;
72 thread_local grpc_completion_queue* g_cached_cq;
73 
74 struct plucker {
75   grpc_pollset_worker** worker;
76   void* tag;
77 };
78 struct cq_poller_vtable {
79   bool can_get_pollset;
80   bool can_listen;
81   size_t (*size)(void);
82   void (*init)(grpc_pollset* pollset, gpr_mu** mu);
83   grpc_error_handle (*kick)(grpc_pollset* pollset,
84                             grpc_pollset_worker* specific_worker);
85   grpc_error_handle (*work)(grpc_pollset* pollset, grpc_pollset_worker** worker,
86                             grpc_core::Timestamp deadline);
87   void (*shutdown)(grpc_pollset* pollset, grpc_closure* closure);
88   void (*destroy)(grpc_pollset* pollset);
89 };
90 typedef struct non_polling_worker {
91   gpr_cv cv;
92   bool kicked;
93   struct non_polling_worker* next;
94   struct non_polling_worker* prev;
95 } non_polling_worker;
96 
97 struct non_polling_poller {
98   gpr_mu mu;
99   bool kicked_without_poller;
100   non_polling_worker* root;
101   grpc_closure* shutdown;
102 };
non_polling_poller_size(void)103 size_t non_polling_poller_size(void) { return sizeof(non_polling_poller); }
104 
non_polling_poller_init(grpc_pollset * pollset,gpr_mu ** mu)105 void non_polling_poller_init(grpc_pollset* pollset, gpr_mu** mu) {
106   non_polling_poller* npp = reinterpret_cast<non_polling_poller*>(pollset);
107   gpr_mu_init(&npp->mu);
108   *mu = &npp->mu;
109 }
110 
non_polling_poller_destroy(grpc_pollset * pollset)111 void non_polling_poller_destroy(grpc_pollset* pollset) {
112   non_polling_poller* npp = reinterpret_cast<non_polling_poller*>(pollset);
113   gpr_mu_destroy(&npp->mu);
114 }
115 
non_polling_poller_work(grpc_pollset * pollset,grpc_pollset_worker ** worker,grpc_core::Timestamp deadline)116 grpc_error_handle non_polling_poller_work(grpc_pollset* pollset,
117                                           grpc_pollset_worker** worker,
118                                           grpc_core::Timestamp deadline) {
119   non_polling_poller* npp = reinterpret_cast<non_polling_poller*>(pollset);
120   if (npp->shutdown) return absl::OkStatus();
121   if (npp->kicked_without_poller) {
122     npp->kicked_without_poller = false;
123     return absl::OkStatus();
124   }
125   non_polling_worker w;
126   gpr_cv_init(&w.cv);
127   if (worker != nullptr) *worker = reinterpret_cast<grpc_pollset_worker*>(&w);
128   if (npp->root == nullptr) {
129     npp->root = w.next = w.prev = &w;
130   } else {
131     w.next = npp->root;
132     w.prev = w.next->prev;
133     w.next->prev = w.prev->next = &w;
134   }
135   w.kicked = false;
136   gpr_timespec deadline_ts = deadline.as_timespec(GPR_CLOCK_MONOTONIC);
137   while (!npp->shutdown && !w.kicked &&
138          !gpr_cv_wait(&w.cv, &npp->mu, deadline_ts)) {
139   }
140   grpc_core::ExecCtx::Get()->InvalidateNow();
141   if (&w == npp->root) {
142     npp->root = w.next;
143     if (&w == npp->root) {
144       if (npp->shutdown) {
145         grpc_core::ExecCtx::Run(DEBUG_LOCATION, npp->shutdown,
146                                 absl::OkStatus());
147       }
148       npp->root = nullptr;
149     }
150   }
151   w.next->prev = w.prev;
152   w.prev->next = w.next;
153   gpr_cv_destroy(&w.cv);
154   if (worker != nullptr) *worker = nullptr;
155   return absl::OkStatus();
156 }
157 
non_polling_poller_kick(grpc_pollset * pollset,grpc_pollset_worker * specific_worker)158 grpc_error_handle non_polling_poller_kick(
159     grpc_pollset* pollset, grpc_pollset_worker* specific_worker) {
160   non_polling_poller* p = reinterpret_cast<non_polling_poller*>(pollset);
161   if (specific_worker == nullptr) {
162     specific_worker = reinterpret_cast<grpc_pollset_worker*>(p->root);
163   }
164   if (specific_worker != nullptr) {
165     non_polling_worker* w =
166         reinterpret_cast<non_polling_worker*>(specific_worker);
167     if (!w->kicked) {
168       w->kicked = true;
169       gpr_cv_signal(&w->cv);
170     }
171   } else {
172     p->kicked_without_poller = true;
173   }
174   return absl::OkStatus();
175 }
176 
non_polling_poller_shutdown(grpc_pollset * pollset,grpc_closure * closure)177 void non_polling_poller_shutdown(grpc_pollset* pollset, grpc_closure* closure) {
178   non_polling_poller* p = reinterpret_cast<non_polling_poller*>(pollset);
179   GPR_ASSERT(closure != nullptr);
180   p->shutdown = closure;
181   if (p->root == nullptr) {
182     grpc_core::ExecCtx::Run(DEBUG_LOCATION, closure, absl::OkStatus());
183   } else {
184     non_polling_worker* w = p->root;
185     do {
186       gpr_cv_signal(&w->cv);
187       w = w->next;
188     } while (w != p->root);
189   }
190 }
191 
192 const cq_poller_vtable g_poller_vtable_by_poller_type[] = {
193     // GRPC_CQ_DEFAULT_POLLING
194     {true, true, grpc_pollset_size, grpc_pollset_init, grpc_pollset_kick,
195      grpc_pollset_work, grpc_pollset_shutdown, grpc_pollset_destroy},
196     // GRPC_CQ_NON_LISTENING
197     {true, false, grpc_pollset_size, grpc_pollset_init, grpc_pollset_kick,
198      grpc_pollset_work, grpc_pollset_shutdown, grpc_pollset_destroy},
199     // GRPC_CQ_NON_POLLING
200     {false, false, non_polling_poller_size, non_polling_poller_init,
201      non_polling_poller_kick, non_polling_poller_work,
202      non_polling_poller_shutdown, non_polling_poller_destroy},
203 };
204 
205 }  // namespace
206 
207 struct cq_vtable {
208   grpc_cq_completion_type cq_completion_type;
209   size_t data_size;
210   void (*init)(void* data, grpc_completion_queue_functor* shutdown_callback);
211   void (*shutdown)(grpc_completion_queue* cq);
212   void (*destroy)(void* data);
213   bool (*begin_op)(grpc_completion_queue* cq, void* tag);
214   void (*end_op)(grpc_completion_queue* cq, void* tag, grpc_error_handle error,
215                  void (*done)(void* done_arg, grpc_cq_completion* storage),
216                  void* done_arg, grpc_cq_completion* storage, bool internal);
217   grpc_event (*next)(grpc_completion_queue* cq, gpr_timespec deadline,
218                      void* reserved);
219   grpc_event (*pluck)(grpc_completion_queue* cq, void* tag,
220                       gpr_timespec deadline, void* reserved);
221 };
222 
223 namespace {
224 
225 // Queue that holds the cq_completion_events. Internally uses
226 // MultiProducerSingleConsumerQueue (a lockfree multiproducer single consumer
227 // queue). It uses a queue_lock to support multiple consumers.
228 // Only used in completion queues whose completion_type is GRPC_CQ_NEXT
229 class CqEventQueue {
230  public:
231   CqEventQueue() = default;
232   ~CqEventQueue() = default;
233 
234   // Note: The counter is not incremented/decremented atomically with push/pop.
235   // The count is only eventually consistent
num_items() const236   intptr_t num_items() const {
237     return num_queue_items_.load(std::memory_order_relaxed);
238   }
239 
240   bool Push(grpc_cq_completion* c);
241   grpc_cq_completion* Pop();
242 
243  private:
244   // Spinlock to serialize consumers i.e pop() operations
245   gpr_spinlock queue_lock_ = GPR_SPINLOCK_INITIALIZER;
246 
247   grpc_core::MultiProducerSingleConsumerQueue queue_;
248 
249   // A lazy counter of number of items in the queue. This is NOT atomically
250   // incremented/decremented along with push/pop operations and hence is only
251   // eventually consistent
252   std::atomic<intptr_t> num_queue_items_{0};
253 };
254 
255 struct cq_next_data {
~cq_next_data__anon25665f120211::cq_next_data256   ~cq_next_data() {
257     GPR_ASSERT(queue.num_items() == 0);
258 #ifndef NDEBUG
259     if (pending_events.load(std::memory_order_acquire) != 0) {
260       gpr_log(GPR_ERROR, "Destroying CQ without draining it fully.");
261     }
262 #endif
263   }
264 
265   /// Completed events for completion-queues of type GRPC_CQ_NEXT
266   CqEventQueue queue;
267 
268   /// Counter of how many things have ever been queued on this completion queue
269   /// useful for avoiding locks to check the queue
270   std::atomic<intptr_t> things_queued_ever{0};
271 
272   /// Number of outstanding events (+1 if not shut down)
273   /// Initial count is dropped by grpc_completion_queue_shutdown
274   std::atomic<intptr_t> pending_events{1};
275 
276   /// 0 initially. 1 once we initiated shutdown
277   bool shutdown_called = false;
278 };
279 
280 struct cq_pluck_data {
cq_pluck_data__anon25665f120211::cq_pluck_data281   cq_pluck_data() {
282     completed_tail = &completed_head;
283     completed_head.next = reinterpret_cast<uintptr_t>(completed_tail);
284   }
285 
~cq_pluck_data__anon25665f120211::cq_pluck_data286   ~cq_pluck_data() {
287     GPR_ASSERT(completed_head.next ==
288                reinterpret_cast<uintptr_t>(&completed_head));
289 #ifndef NDEBUG
290     if (pending_events.load(std::memory_order_acquire) != 0) {
291       gpr_log(GPR_ERROR, "Destroying CQ without draining it fully.");
292     }
293 #endif
294   }
295 
296   /// Completed events for completion-queues of type GRPC_CQ_PLUCK
297   grpc_cq_completion completed_head;
298   grpc_cq_completion* completed_tail;
299 
300   /// Number of pending events (+1 if we're not shutdown).
301   /// Initial count is dropped by grpc_completion_queue_shutdown.
302   std::atomic<intptr_t> pending_events{1};
303 
304   /// Counter of how many things have ever been queued on this completion queue
305   /// useful for avoiding locks to check the queue
306   std::atomic<intptr_t> things_queued_ever{0};
307 
308   /// 0 initially. 1 once we completed shutting
309   // TODO(sreek): This is not needed since (shutdown == 1) if and only if
310   // (pending_events == 0). So consider removing this in future and use
311   // pending_events
312   std::atomic<bool> shutdown{false};
313 
314   /// 0 initially. 1 once we initiated shutdown
315   bool shutdown_called = false;
316 
317   int num_pluckers = 0;
318   plucker pluckers[GRPC_MAX_COMPLETION_QUEUE_PLUCKERS];
319 };
320 
321 struct cq_callback_data {
cq_callback_data__anon25665f120211::cq_callback_data322   explicit cq_callback_data(grpc_completion_queue_functor* shutdown_callback)
323       : shutdown_callback(shutdown_callback) {}
324 
~cq_callback_data__anon25665f120211::cq_callback_data325   ~cq_callback_data() {
326 #ifndef NDEBUG
327     if (pending_events.load(std::memory_order_acquire) != 0) {
328       gpr_log(GPR_ERROR, "Destroying CQ without draining it fully.");
329     }
330 #endif
331   }
332 
333   /// No actual completed events queue, unlike other types
334 
335   /// Number of pending events (+1 if we're not shutdown).
336   /// Initial count is dropped by grpc_completion_queue_shutdown.
337   std::atomic<intptr_t> pending_events{1};
338 
339   /// 0 initially. 1 once we initiated shutdown
340   bool shutdown_called = false;
341 
342   /// A callback that gets invoked when the CQ completes shutdown
343   grpc_completion_queue_functor* shutdown_callback;
344 };
345 
346 }  // namespace
347 
348 // Completion queue structure
349 struct grpc_completion_queue {
350   /// Once owning_refs drops to zero, we will destroy the cq
351   grpc_core::RefCount owning_refs;
352   /// Add the paddings to fix the false sharing
353   char padding_1[GPR_CACHELINE_SIZE];
354   gpr_mu* mu;
355 
356   char padding_2[GPR_CACHELINE_SIZE];
357   const cq_vtable* vtable;
358 
359   char padding_3[GPR_CACHELINE_SIZE];
360   const cq_poller_vtable* poller_vtable;
361 
362 #ifndef NDEBUG
363   void** outstanding_tags;
364   size_t outstanding_tag_count;
365   size_t outstanding_tag_capacity;
366 #endif
367 
368   grpc_closure pollset_shutdown_done;
369   int num_polls;
370 };
371 
372 // Forward declarations
373 static void cq_finish_shutdown_next(grpc_completion_queue* cq);
374 static void cq_finish_shutdown_pluck(grpc_completion_queue* cq);
375 static void cq_finish_shutdown_callback(grpc_completion_queue* cq);
376 static void cq_shutdown_next(grpc_completion_queue* cq);
377 static void cq_shutdown_pluck(grpc_completion_queue* cq);
378 static void cq_shutdown_callback(grpc_completion_queue* cq);
379 
380 static bool cq_begin_op_for_next(grpc_completion_queue* cq, void* tag);
381 static bool cq_begin_op_for_pluck(grpc_completion_queue* cq, void* tag);
382 static bool cq_begin_op_for_callback(grpc_completion_queue* cq, void* tag);
383 
384 // A cq_end_op function is called when an operation on a given CQ with
385 // a given tag has completed. The storage argument is a reference to the
386 // space reserved for this completion as it is placed into the corresponding
387 // queue. The done argument is a callback that will be invoked when it is
388 // safe to free up that storage. The storage MUST NOT be freed until the
389 // done callback is invoked.
390 static void cq_end_op_for_next(
391     grpc_completion_queue* cq, void* tag, grpc_error_handle error,
392     void (*done)(void* done_arg, grpc_cq_completion* storage), void* done_arg,
393     grpc_cq_completion* storage, bool internal);
394 
395 static void cq_end_op_for_pluck(
396     grpc_completion_queue* cq, void* tag, grpc_error_handle error,
397     void (*done)(void* done_arg, grpc_cq_completion* storage), void* done_arg,
398     grpc_cq_completion* storage, bool internal);
399 
400 static void cq_end_op_for_callback(
401     grpc_completion_queue* cq, void* tag, grpc_error_handle error,
402     void (*done)(void* done_arg, grpc_cq_completion* storage), void* done_arg,
403     grpc_cq_completion* storage, bool internal);
404 
405 static grpc_event cq_next(grpc_completion_queue* cq, gpr_timespec deadline,
406                           void* reserved);
407 
408 static grpc_event cq_pluck(grpc_completion_queue* cq, void* tag,
409                            gpr_timespec deadline, void* reserved);
410 
411 // Note that cq_init_next and cq_init_pluck do not use the shutdown_callback
412 static void cq_init_next(void* data,
413                          grpc_completion_queue_functor* shutdown_callback);
414 static void cq_init_pluck(void* data,
415                           grpc_completion_queue_functor* shutdown_callback);
416 static void cq_init_callback(void* data,
417                              grpc_completion_queue_functor* shutdown_callback);
418 static void cq_destroy_next(void* data);
419 static void cq_destroy_pluck(void* data);
420 static void cq_destroy_callback(void* data);
421 
422 // Completion queue vtables based on the completion-type
423 static const cq_vtable g_cq_vtable[] = {
424     // GRPC_CQ_NEXT
425     {GRPC_CQ_NEXT, sizeof(cq_next_data), cq_init_next, cq_shutdown_next,
426      cq_destroy_next, cq_begin_op_for_next, cq_end_op_for_next, cq_next,
427      nullptr},
428     // GRPC_CQ_PLUCK
429     {GRPC_CQ_PLUCK, sizeof(cq_pluck_data), cq_init_pluck, cq_shutdown_pluck,
430      cq_destroy_pluck, cq_begin_op_for_pluck, cq_end_op_for_pluck, nullptr,
431      cq_pluck},
432     // GRPC_CQ_CALLBACK
433     {GRPC_CQ_CALLBACK, sizeof(cq_callback_data), cq_init_callback,
434      cq_shutdown_callback, cq_destroy_callback, cq_begin_op_for_callback,
435      cq_end_op_for_callback, nullptr, nullptr},
436 };
437 
438 #define DATA_FROM_CQ(cq) ((void*)((cq) + 1))
439 #define POLLSET_FROM_CQ(cq) \
440   ((grpc_pollset*)((cq)->vtable->data_size + (char*)DATA_FROM_CQ(cq)))
441 
442 grpc_core::TraceFlag grpc_cq_pluck_trace(false, "queue_pluck");
443 
444 #define GRPC_SURFACE_TRACE_RETURNED_EVENT(cq, event)     \
445   do {                                                   \
446     if (GRPC_TRACE_FLAG_ENABLED(grpc_api_trace) &&       \
447         (GRPC_TRACE_FLAG_ENABLED(grpc_cq_pluck_trace) || \
448          (event)->type != GRPC_QUEUE_TIMEOUT)) {         \
449       gpr_log(GPR_INFO, "RETURN_EVENT[%p]: %s", cq,      \
450               grpc_event_string(event).c_str());         \
451     }                                                    \
452   } while (0)
453 
454 static void on_pollset_shutdown_done(void* arg, grpc_error_handle error);
455 
grpc_completion_queue_thread_local_cache_init(grpc_completion_queue * cq)456 void grpc_completion_queue_thread_local_cache_init(grpc_completion_queue* cq) {
457   if (g_cached_cq == nullptr) {
458     g_cached_event = nullptr;
459     g_cached_cq = cq;
460   }
461 }
462 
grpc_completion_queue_thread_local_cache_flush(grpc_completion_queue * cq,void ** tag,int * ok)463 int grpc_completion_queue_thread_local_cache_flush(grpc_completion_queue* cq,
464                                                    void** tag, int* ok) {
465   grpc_cq_completion* storage = g_cached_event;
466   int ret = 0;
467   if (storage != nullptr && g_cached_cq == cq) {
468     *tag = storage->tag;
469     grpc_core::ExecCtx exec_ctx;
470     *ok = (storage->next & uintptr_t{1}) == 1;
471     storage->done(storage->done_arg, storage);
472     ret = 1;
473     cq_next_data* cqd = static_cast<cq_next_data*> DATA_FROM_CQ(cq);
474     if (cqd->pending_events.fetch_sub(1, std::memory_order_acq_rel) == 1) {
475       GRPC_CQ_INTERNAL_REF(cq, "shutting_down");
476       gpr_mu_lock(cq->mu);
477       cq_finish_shutdown_next(cq);
478       gpr_mu_unlock(cq->mu);
479       GRPC_CQ_INTERNAL_UNREF(cq, "shutting_down");
480     }
481   }
482   g_cached_event = nullptr;
483   g_cached_cq = nullptr;
484 
485   return ret;
486 }
487 
Push(grpc_cq_completion * c)488 bool CqEventQueue::Push(grpc_cq_completion* c) {
489   queue_.Push(
490       reinterpret_cast<grpc_core::MultiProducerSingleConsumerQueue::Node*>(c));
491   return num_queue_items_.fetch_add(1, std::memory_order_relaxed) == 0;
492 }
493 
Pop()494 grpc_cq_completion* CqEventQueue::Pop() {
495   grpc_cq_completion* c = nullptr;
496 
497   if (gpr_spinlock_trylock(&queue_lock_)) {
498     bool is_empty = false;
499     c = reinterpret_cast<grpc_cq_completion*>(queue_.PopAndCheckEnd(&is_empty));
500     gpr_spinlock_unlock(&queue_lock_);
501   }
502 
503   if (c) {
504     num_queue_items_.fetch_sub(1, std::memory_order_relaxed);
505   }
506 
507   return c;
508 }
509 
grpc_completion_queue_create_internal(grpc_cq_completion_type completion_type,grpc_cq_polling_type polling_type,grpc_completion_queue_functor * shutdown_callback)510 grpc_completion_queue* grpc_completion_queue_create_internal(
511     grpc_cq_completion_type completion_type, grpc_cq_polling_type polling_type,
512     grpc_completion_queue_functor* shutdown_callback) {
513   grpc_completion_queue* cq;
514 
515   GRPC_API_TRACE(
516       "grpc_completion_queue_create_internal(completion_type=%d, "
517       "polling_type=%d)",
518       2, (completion_type, polling_type));
519 
520   switch (completion_type) {
521     case GRPC_CQ_NEXT:
522       grpc_core::global_stats().IncrementCqNextCreates();
523       break;
524     case GRPC_CQ_PLUCK:
525       grpc_core::global_stats().IncrementCqPluckCreates();
526       break;
527     case GRPC_CQ_CALLBACK:
528       grpc_core::global_stats().IncrementCqCallbackCreates();
529       break;
530   }
531 
532   const cq_vtable* vtable = &g_cq_vtable[completion_type];
533   const cq_poller_vtable* poller_vtable =
534       &g_poller_vtable_by_poller_type[polling_type];
535 
536   grpc_core::ExecCtx exec_ctx;
537 
538   cq = static_cast<grpc_completion_queue*>(
539       gpr_zalloc(sizeof(grpc_completion_queue) + vtable->data_size +
540                  poller_vtable->size()));
541 
542   cq->vtable = vtable;
543   cq->poller_vtable = poller_vtable;
544 
545   // One for destroy(), one for pollset_shutdown
546   new (&cq->owning_refs) grpc_core::RefCount(
547       2, grpc_trace_cq_refcount.enabled() ? "completion_queue" : nullptr);
548 
549   poller_vtable->init(POLLSET_FROM_CQ(cq), &cq->mu);
550   vtable->init(DATA_FROM_CQ(cq), shutdown_callback);
551 
552   GRPC_CLOSURE_INIT(&cq->pollset_shutdown_done, on_pollset_shutdown_done, cq,
553                     grpc_schedule_on_exec_ctx);
554   return cq;
555 }
556 
cq_init_next(void * data,grpc_completion_queue_functor *)557 static void cq_init_next(void* data,
558                          grpc_completion_queue_functor* /*shutdown_callback*/) {
559   new (data) cq_next_data();
560 }
561 
cq_destroy_next(void * data)562 static void cq_destroy_next(void* data) {
563   cq_next_data* cqd = static_cast<cq_next_data*>(data);
564   cqd->~cq_next_data();
565 }
566 
cq_init_pluck(void * data,grpc_completion_queue_functor *)567 static void cq_init_pluck(
568     void* data, grpc_completion_queue_functor* /*shutdown_callback*/) {
569   new (data) cq_pluck_data();
570 }
571 
cq_destroy_pluck(void * data)572 static void cq_destroy_pluck(void* data) {
573   cq_pluck_data* cqd = static_cast<cq_pluck_data*>(data);
574   cqd->~cq_pluck_data();
575 }
576 
cq_init_callback(void * data,grpc_completion_queue_functor * shutdown_callback)577 static void cq_init_callback(void* data,
578                              grpc_completion_queue_functor* shutdown_callback) {
579   new (data) cq_callback_data(shutdown_callback);
580 }
581 
cq_destroy_callback(void * data)582 static void cq_destroy_callback(void* data) {
583   cq_callback_data* cqd = static_cast<cq_callback_data*>(data);
584   cqd->~cq_callback_data();
585 }
586 
grpc_get_cq_completion_type(grpc_completion_queue * cq)587 grpc_cq_completion_type grpc_get_cq_completion_type(grpc_completion_queue* cq) {
588   return cq->vtable->cq_completion_type;
589 }
590 
grpc_get_cq_poll_num(grpc_completion_queue * cq)591 int grpc_get_cq_poll_num(grpc_completion_queue* cq) {
592   int cur_num_polls;
593   gpr_mu_lock(cq->mu);
594   cur_num_polls = cq->num_polls;
595   gpr_mu_unlock(cq->mu);
596   return cur_num_polls;
597 }
598 
599 #ifndef NDEBUG
grpc_cq_internal_ref(grpc_completion_queue * cq,const char * reason,const char * file,int line)600 void grpc_cq_internal_ref(grpc_completion_queue* cq, const char* reason,
601                           const char* file, int line) {
602   grpc_core::DebugLocation debug_location(file, line);
603 #else
604 void grpc_cq_internal_ref(grpc_completion_queue* cq) {
605   grpc_core::DebugLocation debug_location;
606   const char* reason = nullptr;
607 #endif
608   cq->owning_refs.Ref(debug_location, reason);
609 }
610 
611 static void on_pollset_shutdown_done(void* arg, grpc_error_handle /*error*/) {
612   grpc_completion_queue* cq = static_cast<grpc_completion_queue*>(arg);
613   GRPC_CQ_INTERNAL_UNREF(cq, "pollset_destroy");
614 }
615 
616 #ifndef NDEBUG
617 void grpc_cq_internal_unref(grpc_completion_queue* cq, const char* reason,
618                             const char* file, int line) {
619   grpc_core::DebugLocation debug_location(file, line);
620 #else
621 void grpc_cq_internal_unref(grpc_completion_queue* cq) {
622   grpc_core::DebugLocation debug_location;
623   const char* reason = nullptr;
624 #endif
625   if (GPR_UNLIKELY(cq->owning_refs.Unref(debug_location, reason))) {
626     cq->vtable->destroy(DATA_FROM_CQ(cq));
627     cq->poller_vtable->destroy(POLLSET_FROM_CQ(cq));
628 #ifndef NDEBUG
629     gpr_free(cq->outstanding_tags);
630 #endif
631     gpr_free(cq);
632   }
633 }
634 
635 #ifndef NDEBUG
636 static void cq_check_tag(grpc_completion_queue* cq, void* tag, bool lock_cq) {
637   int found = 0;
638   if (lock_cq) {
639     gpr_mu_lock(cq->mu);
640   }
641 
642   for (int i = 0; i < static_cast<int>(cq->outstanding_tag_count); i++) {
643     if (cq->outstanding_tags[i] == tag) {
644       cq->outstanding_tag_count--;
645       std::swap(cq->outstanding_tags[i],
646                 cq->outstanding_tags[cq->outstanding_tag_count]);
647       found = 1;
648       break;
649     }
650   }
651 
652   if (lock_cq) {
653     gpr_mu_unlock(cq->mu);
654   }
655 
656   GPR_ASSERT(found);
657 }
658 #else
659 static void cq_check_tag(grpc_completion_queue* /*cq*/, void* /*tag*/,
660                          bool /*lock_cq*/) {}
661 #endif
662 
663 static bool cq_begin_op_for_next(grpc_completion_queue* cq, void* /*tag*/) {
664   cq_next_data* cqd = static_cast<cq_next_data*> DATA_FROM_CQ(cq);
665   return grpc_core::IncrementIfNonzero(&cqd->pending_events);
666 }
667 
668 static bool cq_begin_op_for_pluck(grpc_completion_queue* cq, void* /*tag*/) {
669   cq_pluck_data* cqd = static_cast<cq_pluck_data*> DATA_FROM_CQ(cq);
670   return grpc_core::IncrementIfNonzero(&cqd->pending_events);
671 }
672 
673 static bool cq_begin_op_for_callback(grpc_completion_queue* cq, void* /*tag*/) {
674   cq_callback_data* cqd = static_cast<cq_callback_data*> DATA_FROM_CQ(cq);
675   return grpc_core::IncrementIfNonzero(&cqd->pending_events);
676 }
677 
678 bool grpc_cq_begin_op(grpc_completion_queue* cq, void* tag) {
679 #ifndef NDEBUG
680   gpr_mu_lock(cq->mu);
681   if (cq->outstanding_tag_count == cq->outstanding_tag_capacity) {
682     cq->outstanding_tag_capacity =
683         std::max(size_t(4), 2 * cq->outstanding_tag_capacity);
684     cq->outstanding_tags = static_cast<void**>(gpr_realloc(
685         cq->outstanding_tags,
686         sizeof(*cq->outstanding_tags) * cq->outstanding_tag_capacity));
687   }
688   cq->outstanding_tags[cq->outstanding_tag_count++] = tag;
689   gpr_mu_unlock(cq->mu);
690 #endif
691   return cq->vtable->begin_op(cq, tag);
692 }
693 
694 // Queue a GRPC_OP_COMPLETED operation to a completion queue (with a
695 // completion
696 // type of GRPC_CQ_NEXT)
697 static void cq_end_op_for_next(
698     grpc_completion_queue* cq, void* tag, grpc_error_handle error,
699     void (*done)(void* done_arg, grpc_cq_completion* storage), void* done_arg,
700     grpc_cq_completion* storage, bool /*internal*/) {
701   if (GRPC_TRACE_FLAG_ENABLED(grpc_api_trace) ||
702       (GRPC_TRACE_FLAG_ENABLED(grpc_trace_operation_failures) && !error.ok())) {
703     std::string errmsg = grpc_core::StatusToString(error);
704     GRPC_API_TRACE(
705         "cq_end_op_for_next(cq=%p, tag=%p, error=%s, "
706         "done=%p, done_arg=%p, storage=%p)",
707         6, (cq, tag, errmsg.c_str(), done, done_arg, storage));
708     if (GRPC_TRACE_FLAG_ENABLED(grpc_trace_operation_failures) && !error.ok()) {
709       gpr_log(GPR_INFO, "Operation failed: tag=%p, error=%s", tag,
710               errmsg.c_str());
711     }
712   }
713   cq_next_data* cqd = static_cast<cq_next_data*> DATA_FROM_CQ(cq);
714   int is_success = (error.ok());
715 
716   storage->tag = tag;
717   storage->done = done;
718   storage->done_arg = done_arg;
719   storage->next = static_cast<uintptr_t>(is_success);
720 
721   cq_check_tag(cq, tag, true);  // Used in debug builds only
722 
723   if (g_cached_cq == cq && g_cached_event == nullptr) {
724     g_cached_event = storage;
725   } else {
726     // Add the completion to the queue
727     bool is_first = cqd->queue.Push(storage);
728     cqd->things_queued_ever.fetch_add(1, std::memory_order_relaxed);
729     // Since we do not hold the cq lock here, it is important to do an 'acquire'
730     // load here (instead of a 'no_barrier' load) to match with the release
731     // store
732     // (done via pending_events.fetch_sub(1, ACQ_REL)) in cq_shutdown_next
733     //
734     if (cqd->pending_events.load(std::memory_order_acquire) != 1) {
735       // Only kick if this is the first item queued
736       if (is_first) {
737         gpr_mu_lock(cq->mu);
738         grpc_error_handle kick_error =
739             cq->poller_vtable->kick(POLLSET_FROM_CQ(cq), nullptr);
740         gpr_mu_unlock(cq->mu);
741 
742         if (!kick_error.ok()) {
743           gpr_log(GPR_ERROR, "Kick failed: %s",
744                   grpc_core::StatusToString(kick_error).c_str());
745         }
746       }
747       if (cqd->pending_events.fetch_sub(1, std::memory_order_acq_rel) == 1) {
748         GRPC_CQ_INTERNAL_REF(cq, "shutting_down");
749         gpr_mu_lock(cq->mu);
750         cq_finish_shutdown_next(cq);
751         gpr_mu_unlock(cq->mu);
752         GRPC_CQ_INTERNAL_UNREF(cq, "shutting_down");
753       }
754     } else {
755       GRPC_CQ_INTERNAL_REF(cq, "shutting_down");
756       cqd->pending_events.store(0, std::memory_order_release);
757       gpr_mu_lock(cq->mu);
758       cq_finish_shutdown_next(cq);
759       gpr_mu_unlock(cq->mu);
760       GRPC_CQ_INTERNAL_UNREF(cq, "shutting_down");
761     }
762   }
763 }
764 
765 // Queue a GRPC_OP_COMPLETED operation to a completion queue (with a
766 // completion
767 // type of GRPC_CQ_PLUCK)
768 static void cq_end_op_for_pluck(
769     grpc_completion_queue* cq, void* tag, grpc_error_handle error,
770     void (*done)(void* done_arg, grpc_cq_completion* storage), void* done_arg,
771     grpc_cq_completion* storage, bool /*internal*/) {
772   cq_pluck_data* cqd = static_cast<cq_pluck_data*> DATA_FROM_CQ(cq);
773   int is_success = (error.ok());
774 
775   if (GRPC_TRACE_FLAG_ENABLED(grpc_api_trace) ||
776       (GRPC_TRACE_FLAG_ENABLED(grpc_trace_operation_failures) && !error.ok())) {
777     std::string errmsg = grpc_core::StatusToString(error);
778     GRPC_API_TRACE(
779         "cq_end_op_for_pluck(cq=%p, tag=%p, error=%s, "
780         "done=%p, done_arg=%p, storage=%p)",
781         6, (cq, tag, errmsg.c_str(), done, done_arg, storage));
782     if (GRPC_TRACE_FLAG_ENABLED(grpc_trace_operation_failures) && !error.ok()) {
783       gpr_log(GPR_ERROR, "Operation failed: tag=%p, error=%s", tag,
784               errmsg.c_str());
785     }
786   }
787 
788   storage->tag = tag;
789   storage->done = done;
790   storage->done_arg = done_arg;
791   storage->next = reinterpret_cast<uintptr_t>(&cqd->completed_head) |
792                   static_cast<uintptr_t>(is_success);
793 
794   gpr_mu_lock(cq->mu);
795   cq_check_tag(cq, tag, false);  // Used in debug builds only
796 
797   // Add to the list of completions
798   cqd->things_queued_ever.fetch_add(1, std::memory_order_relaxed);
799   cqd->completed_tail->next =
800       reinterpret_cast<uintptr_t>(storage) | (1u & cqd->completed_tail->next);
801   cqd->completed_tail = storage;
802 
803   if (cqd->pending_events.fetch_sub(1, std::memory_order_acq_rel) == 1) {
804     cq_finish_shutdown_pluck(cq);
805     gpr_mu_unlock(cq->mu);
806   } else {
807     grpc_pollset_worker* pluck_worker = nullptr;
808     for (int i = 0; i < cqd->num_pluckers; i++) {
809       if (cqd->pluckers[i].tag == tag) {
810         pluck_worker = *cqd->pluckers[i].worker;
811         break;
812       }
813     }
814 
815     grpc_error_handle kick_error =
816         cq->poller_vtable->kick(POLLSET_FROM_CQ(cq), pluck_worker);
817     gpr_mu_unlock(cq->mu);
818     if (!kick_error.ok()) {
819       gpr_log(GPR_ERROR, "Kick failed: %s",
820               grpc_core::StatusToString(kick_error).c_str());
821     }
822   }
823 }
824 
825 static void functor_callback(void* arg, grpc_error_handle error) {
826   auto* functor = static_cast<grpc_completion_queue_functor*>(arg);
827   functor->functor_run(functor, error.ok());
828 }
829 
830 // Complete an event on a completion queue of type GRPC_CQ_CALLBACK
831 static void cq_end_op_for_callback(
832     grpc_completion_queue* cq, void* tag, grpc_error_handle error,
833     void (*done)(void* done_arg, grpc_cq_completion* storage), void* done_arg,
834     grpc_cq_completion* storage, bool internal) {
835   cq_callback_data* cqd = static_cast<cq_callback_data*> DATA_FROM_CQ(cq);
836 
837   if (GRPC_TRACE_FLAG_ENABLED(grpc_api_trace) ||
838       (GRPC_TRACE_FLAG_ENABLED(grpc_trace_operation_failures) && !error.ok())) {
839     std::string errmsg = grpc_core::StatusToString(error);
840     GRPC_API_TRACE(
841         "cq_end_op_for_callback(cq=%p, tag=%p, error=%s, "
842         "done=%p, done_arg=%p, storage=%p)",
843         6, (cq, tag, errmsg.c_str(), done, done_arg, storage));
844     if (GRPC_TRACE_FLAG_ENABLED(grpc_trace_operation_failures) && !error.ok()) {
845       gpr_log(GPR_ERROR, "Operation failed: tag=%p, error=%s", tag,
846               errmsg.c_str());
847     }
848   }
849 
850   // The callback-based CQ isn't really a queue at all and thus has no need
851   // for reserved storage. Invoke the done callback right away to release it.
852   done(done_arg, storage);
853 
854   cq_check_tag(cq, tag, true);  // Used in debug builds only
855 
856   if (cqd->pending_events.fetch_sub(1, std::memory_order_acq_rel) == 1) {
857     cq_finish_shutdown_callback(cq);
858   }
859 
860   // If possible, schedule the callback onto an existing thread-local
861   // ApplicationCallbackExecCtx, which is a work queue. This is possible for:
862   // 1. The callback is internally-generated and there is an ACEC available
863   // 2. The callback is marked inlineable and there is an ACEC available
864   // 3. We are already running in a background poller thread (which always has
865   //    an ACEC available at the base of the stack).
866   auto* functor = static_cast<grpc_completion_queue_functor*>(tag);
867   if (((internal || functor->inlineable) &&
868        grpc_core::ApplicationCallbackExecCtx::Available()) ||
869       grpc_iomgr_is_any_background_poller_thread()) {
870     grpc_core::ApplicationCallbackExecCtx::Enqueue(functor, (error.ok()));
871     return;
872   }
873 
874   // Schedule the callback on a closure if not internal or triggered
875   // from a background poller thread.
876   grpc_core::Executor::Run(
877       GRPC_CLOSURE_CREATE(functor_callback, functor, nullptr), error);
878 }
879 
880 void grpc_cq_end_op(grpc_completion_queue* cq, void* tag,
881                     grpc_error_handle error,
882                     void (*done)(void* done_arg, grpc_cq_completion* storage),
883                     void* done_arg, grpc_cq_completion* storage,
884                     bool internal) {
885   cq->vtable->end_op(cq, tag, error, done, done_arg, storage, internal);
886 }
887 
888 struct cq_is_finished_arg {
889   gpr_atm last_seen_things_queued_ever;
890   grpc_completion_queue* cq;
891   grpc_core::Timestamp deadline;
892   grpc_cq_completion* stolen_completion;
893   void* tag;  // for pluck
894   bool first_loop;
895 };
896 class ExecCtxNext : public grpc_core::ExecCtx {
897  public:
898   explicit ExecCtxNext(void* arg)
899       : ExecCtx(0), check_ready_to_finish_arg_(arg) {}
900 
901   bool CheckReadyToFinish() override {
902     cq_is_finished_arg* a =
903         static_cast<cq_is_finished_arg*>(check_ready_to_finish_arg_);
904     grpc_completion_queue* cq = a->cq;
905     cq_next_data* cqd = static_cast<cq_next_data*> DATA_FROM_CQ(cq);
906     GPR_ASSERT(a->stolen_completion == nullptr);
907 
908     intptr_t current_last_seen_things_queued_ever =
909         cqd->things_queued_ever.load(std::memory_order_relaxed);
910 
911     if (current_last_seen_things_queued_ever !=
912         a->last_seen_things_queued_ever) {
913       a->last_seen_things_queued_ever =
914           cqd->things_queued_ever.load(std::memory_order_relaxed);
915 
916       // Pop a cq_completion from the queue. Returns NULL if the queue is empty
917       // might return NULL in some cases even if the queue is not empty; but
918       // that
919       // is ok and doesn't affect correctness. Might effect the tail latencies a
920       // bit)
921       a->stolen_completion = cqd->queue.Pop();
922       if (a->stolen_completion != nullptr) {
923         return true;
924       }
925     }
926     return !a->first_loop && a->deadline < grpc_core::Timestamp::Now();
927   }
928 
929  private:
930   void* check_ready_to_finish_arg_;
931 };
932 
933 #ifndef NDEBUG
934 static void dump_pending_tags(grpc_completion_queue* cq) {
935   if (!GRPC_TRACE_FLAG_ENABLED(grpc_trace_pending_tags)) return;
936   std::vector<std::string> parts;
937   parts.push_back("PENDING TAGS:");
938   gpr_mu_lock(cq->mu);
939   for (size_t i = 0; i < cq->outstanding_tag_count; i++) {
940     parts.push_back(absl::StrFormat(" %p", cq->outstanding_tags[i]));
941   }
942   gpr_mu_unlock(cq->mu);
943   gpr_log(GPR_DEBUG, "%s", absl::StrJoin(parts, "").c_str());
944 }
945 #else
946 static void dump_pending_tags(grpc_completion_queue* /*cq*/) {}
947 #endif
948 
949 static grpc_event cq_next(grpc_completion_queue* cq, gpr_timespec deadline,
950                           void* reserved) {
951   grpc_event ret;
952   cq_next_data* cqd = static_cast<cq_next_data*> DATA_FROM_CQ(cq);
953 
954   GRPC_API_TRACE(
955       "grpc_completion_queue_next("
956       "cq=%p, "
957       "deadline=gpr_timespec { tv_sec: %" PRId64
958       ", tv_nsec: %d, clock_type: %d }, "
959       "reserved=%p)",
960       5,
961       (cq, deadline.tv_sec, deadline.tv_nsec, (int)deadline.clock_type,
962        reserved));
963   GPR_ASSERT(!reserved);
964 
965   dump_pending_tags(cq);
966 
967   GRPC_CQ_INTERNAL_REF(cq, "next");
968 
969   grpc_core::Timestamp deadline_millis =
970       grpc_core::Timestamp::FromTimespecRoundUp(deadline);
971   cq_is_finished_arg is_finished_arg = {
972       cqd->things_queued_ever.load(std::memory_order_relaxed),
973       cq,
974       deadline_millis,
975       nullptr,
976       nullptr,
977       true};
978   ExecCtxNext exec_ctx(&is_finished_arg);
979   for (;;) {
980     grpc_core::Timestamp iteration_deadline = deadline_millis;
981 
982     if (is_finished_arg.stolen_completion != nullptr) {
983       grpc_cq_completion* c = is_finished_arg.stolen_completion;
984       is_finished_arg.stolen_completion = nullptr;
985       ret.type = GRPC_OP_COMPLETE;
986       ret.success = c->next & 1u;
987       ret.tag = c->tag;
988       c->done(c->done_arg, c);
989       break;
990     }
991 
992     grpc_cq_completion* c = cqd->queue.Pop();
993 
994     if (c != nullptr) {
995       ret.type = GRPC_OP_COMPLETE;
996       ret.success = c->next & 1u;
997       ret.tag = c->tag;
998       c->done(c->done_arg, c);
999       break;
1000     } else {
1001       // If c == NULL it means either the queue is empty OR in an transient
1002       // inconsistent state. If it is the latter, we shold do a 0-timeout poll
1003       // so that the thread comes back quickly from poll to make a second
1004       // attempt at popping. Not doing this can potentially deadlock this
1005       // thread forever (if the deadline is infinity)
1006       if (cqd->queue.num_items() > 0) {
1007         iteration_deadline = grpc_core::Timestamp::ProcessEpoch();
1008       }
1009     }
1010 
1011     if (cqd->pending_events.load(std::memory_order_acquire) == 0) {
1012       // Before returning, check if the queue has any items left over (since
1013       // MultiProducerSingleConsumerQueue::Pop() can sometimes return NULL
1014       // even if the queue is not empty. If so, keep retrying but do not
1015       // return GRPC_QUEUE_SHUTDOWN
1016       if (cqd->queue.num_items() > 0) {
1017         // Go to the beginning of the loop. No point doing a poll because
1018         // (cq->shutdown == true) is only possible when there is no pending
1019         // work (i.e cq->pending_events == 0) and any outstanding completion
1020         // events should have already been queued on this cq
1021         continue;
1022       }
1023 
1024       ret.type = GRPC_QUEUE_SHUTDOWN;
1025       ret.success = 0;
1026       break;
1027     }
1028 
1029     if (!is_finished_arg.first_loop &&
1030         grpc_core::Timestamp::Now() >= deadline_millis) {
1031       ret.type = GRPC_QUEUE_TIMEOUT;
1032       ret.success = 0;
1033       dump_pending_tags(cq);
1034       break;
1035     }
1036 
1037     // The main polling work happens in grpc_pollset_work
1038     gpr_mu_lock(cq->mu);
1039     cq->num_polls++;
1040     grpc_error_handle err = cq->poller_vtable->work(
1041         POLLSET_FROM_CQ(cq), nullptr, iteration_deadline);
1042     gpr_mu_unlock(cq->mu);
1043 
1044     if (!err.ok()) {
1045       gpr_log(GPR_ERROR, "Completion queue next failed: %s",
1046               grpc_core::StatusToString(err).c_str());
1047       if (err == absl::CancelledError()) {
1048         ret.type = GRPC_QUEUE_SHUTDOWN;
1049       } else {
1050         ret.type = GRPC_QUEUE_TIMEOUT;
1051       }
1052       ret.success = 0;
1053       dump_pending_tags(cq);
1054       break;
1055     }
1056     is_finished_arg.first_loop = false;
1057   }
1058 
1059   if (cqd->queue.num_items() > 0 &&
1060       cqd->pending_events.load(std::memory_order_acquire) > 0) {
1061     gpr_mu_lock(cq->mu);
1062     (void)cq->poller_vtable->kick(POLLSET_FROM_CQ(cq), nullptr);
1063     gpr_mu_unlock(cq->mu);
1064   }
1065 
1066   GRPC_SURFACE_TRACE_RETURNED_EVENT(cq, &ret);
1067   GRPC_CQ_INTERNAL_UNREF(cq, "next");
1068 
1069   GPR_ASSERT(is_finished_arg.stolen_completion == nullptr);
1070 
1071   return ret;
1072 }
1073 
1074 // Finishes the completion queue shutdown. This means that there are no more
1075 // completion events / tags expected from the completion queue
1076 // - Must be called under completion queue lock
1077 // - Must be called only once in completion queue's lifetime
1078 // - grpc_completion_queue_shutdown() MUST have been called before calling
1079 // this function
1080 static void cq_finish_shutdown_next(grpc_completion_queue* cq) {
1081   cq_next_data* cqd = static_cast<cq_next_data*> DATA_FROM_CQ(cq);
1082 
1083   GPR_ASSERT(cqd->shutdown_called);
1084   GPR_ASSERT(cqd->pending_events.load(std::memory_order_relaxed) == 0);
1085 
1086   cq->poller_vtable->shutdown(POLLSET_FROM_CQ(cq), &cq->pollset_shutdown_done);
1087 }
1088 
1089 static void cq_shutdown_next(grpc_completion_queue* cq) {
1090   cq_next_data* cqd = static_cast<cq_next_data*> DATA_FROM_CQ(cq);
1091 
1092   // Need an extra ref for cq here because:
1093   // We call cq_finish_shutdown_next() below, that would call pollset shutdown.
1094   // Pollset shutdown decrements the cq ref count which can potentially destroy
1095   // the cq (if that happens to be the last ref).
1096   // Creating an extra ref here prevents the cq from getting destroyed while
1097   // this function is still active
1098   GRPC_CQ_INTERNAL_REF(cq, "shutting_down");
1099   gpr_mu_lock(cq->mu);
1100   if (cqd->shutdown_called) {
1101     gpr_mu_unlock(cq->mu);
1102     GRPC_CQ_INTERNAL_UNREF(cq, "shutting_down");
1103     return;
1104   }
1105   cqd->shutdown_called = true;
1106   // Doing acq/release fetch_sub here to match with
1107   // cq_begin_op_for_next and cq_end_op_for_next functions which read/write
1108   // on this counter without necessarily holding a lock on cq
1109   if (cqd->pending_events.fetch_sub(1, std::memory_order_acq_rel) == 1) {
1110     cq_finish_shutdown_next(cq);
1111   }
1112   gpr_mu_unlock(cq->mu);
1113   GRPC_CQ_INTERNAL_UNREF(cq, "shutting_down");
1114 }
1115 
1116 grpc_event grpc_completion_queue_next(grpc_completion_queue* cq,
1117                                       gpr_timespec deadline, void* reserved) {
1118   return cq->vtable->next(cq, deadline, reserved);
1119 }
1120 
1121 static int add_plucker(grpc_completion_queue* cq, void* tag,
1122                        grpc_pollset_worker** worker) {
1123   cq_pluck_data* cqd = static_cast<cq_pluck_data*> DATA_FROM_CQ(cq);
1124   if (cqd->num_pluckers == GRPC_MAX_COMPLETION_QUEUE_PLUCKERS) {
1125     return 0;
1126   }
1127   cqd->pluckers[cqd->num_pluckers].tag = tag;
1128   cqd->pluckers[cqd->num_pluckers].worker = worker;
1129   cqd->num_pluckers++;
1130   return 1;
1131 }
1132 
1133 static void del_plucker(grpc_completion_queue* cq, void* tag,
1134                         grpc_pollset_worker** worker) {
1135   cq_pluck_data* cqd = static_cast<cq_pluck_data*> DATA_FROM_CQ(cq);
1136   for (int i = 0; i < cqd->num_pluckers; i++) {
1137     if (cqd->pluckers[i].tag == tag && cqd->pluckers[i].worker == worker) {
1138       cqd->num_pluckers--;
1139       std::swap(cqd->pluckers[i], cqd->pluckers[cqd->num_pluckers]);
1140       return;
1141     }
1142   }
1143   GPR_UNREACHABLE_CODE(return);
1144 }
1145 
1146 class ExecCtxPluck : public grpc_core::ExecCtx {
1147  public:
1148   explicit ExecCtxPluck(void* arg)
1149       : ExecCtx(0), check_ready_to_finish_arg_(arg) {}
1150 
1151   bool CheckReadyToFinish() override {
1152     cq_is_finished_arg* a =
1153         static_cast<cq_is_finished_arg*>(check_ready_to_finish_arg_);
1154     grpc_completion_queue* cq = a->cq;
1155     cq_pluck_data* cqd = static_cast<cq_pluck_data*> DATA_FROM_CQ(cq);
1156 
1157     GPR_ASSERT(a->stolen_completion == nullptr);
1158     gpr_atm current_last_seen_things_queued_ever =
1159         cqd->things_queued_ever.load(std::memory_order_relaxed);
1160     if (current_last_seen_things_queued_ever !=
1161         a->last_seen_things_queued_ever) {
1162       gpr_mu_lock(cq->mu);
1163       a->last_seen_things_queued_ever =
1164           cqd->things_queued_ever.load(std::memory_order_relaxed);
1165       grpc_cq_completion* c;
1166       grpc_cq_completion* prev = &cqd->completed_head;
1167       while ((c = reinterpret_cast<grpc_cq_completion*>(
1168                   prev->next & ~uintptr_t{1})) != &cqd->completed_head) {
1169         if (c->tag == a->tag) {
1170           prev->next = (prev->next & uintptr_t{1}) | (c->next & ~uintptr_t{1});
1171           if (c == cqd->completed_tail) {
1172             cqd->completed_tail = prev;
1173           }
1174           gpr_mu_unlock(cq->mu);
1175           a->stolen_completion = c;
1176           return true;
1177         }
1178         prev = c;
1179       }
1180       gpr_mu_unlock(cq->mu);
1181     }
1182     return !a->first_loop && a->deadline < grpc_core::Timestamp::Now();
1183   }
1184 
1185  private:
1186   void* check_ready_to_finish_arg_;
1187 };
1188 
1189 static grpc_event cq_pluck(grpc_completion_queue* cq, void* tag,
1190                            gpr_timespec deadline, void* reserved) {
1191   grpc_event ret;
1192   grpc_cq_completion* c;
1193   grpc_cq_completion* prev;
1194   grpc_pollset_worker* worker = nullptr;
1195   cq_pluck_data* cqd = static_cast<cq_pluck_data*> DATA_FROM_CQ(cq);
1196 
1197   if (GRPC_TRACE_FLAG_ENABLED(grpc_cq_pluck_trace)) {
1198     GRPC_API_TRACE(
1199         "grpc_completion_queue_pluck("
1200         "cq=%p, tag=%p, "
1201         "deadline=gpr_timespec { tv_sec: %" PRId64
1202         ", tv_nsec: %d, clock_type: %d }, "
1203         "reserved=%p)",
1204         6,
1205         (cq, tag, deadline.tv_sec, deadline.tv_nsec, (int)deadline.clock_type,
1206          reserved));
1207   }
1208   GPR_ASSERT(!reserved);
1209 
1210   dump_pending_tags(cq);
1211 
1212   GRPC_CQ_INTERNAL_REF(cq, "pluck");
1213   gpr_mu_lock(cq->mu);
1214   grpc_core::Timestamp deadline_millis =
1215       grpc_core::Timestamp::FromTimespecRoundUp(deadline);
1216   cq_is_finished_arg is_finished_arg = {
1217       cqd->things_queued_ever.load(std::memory_order_relaxed),
1218       cq,
1219       deadline_millis,
1220       nullptr,
1221       tag,
1222       true};
1223   ExecCtxPluck exec_ctx(&is_finished_arg);
1224   for (;;) {
1225     if (is_finished_arg.stolen_completion != nullptr) {
1226       gpr_mu_unlock(cq->mu);
1227       c = is_finished_arg.stolen_completion;
1228       is_finished_arg.stolen_completion = nullptr;
1229       ret.type = GRPC_OP_COMPLETE;
1230       ret.success = c->next & 1u;
1231       ret.tag = c->tag;
1232       c->done(c->done_arg, c);
1233       break;
1234     }
1235     prev = &cqd->completed_head;
1236     while ((c = reinterpret_cast<grpc_cq_completion*>(
1237                 prev->next & ~uintptr_t{1})) != &cqd->completed_head) {
1238       if (GPR_LIKELY(c->tag == tag)) {
1239         prev->next = (prev->next & uintptr_t{1}) | (c->next & ~uintptr_t{1});
1240         if (c == cqd->completed_tail) {
1241           cqd->completed_tail = prev;
1242         }
1243         gpr_mu_unlock(cq->mu);
1244         ret.type = GRPC_OP_COMPLETE;
1245         ret.success = c->next & 1u;
1246         ret.tag = c->tag;
1247         c->done(c->done_arg, c);
1248         goto done;
1249       }
1250       prev = c;
1251     }
1252     if (cqd->shutdown.load(std::memory_order_relaxed)) {
1253       gpr_mu_unlock(cq->mu);
1254       ret.type = GRPC_QUEUE_SHUTDOWN;
1255       ret.success = 0;
1256       break;
1257     }
1258     if (!add_plucker(cq, tag, &worker)) {
1259       gpr_log(GPR_DEBUG,
1260               "Too many outstanding grpc_completion_queue_pluck calls: maximum "
1261               "is %d",
1262               GRPC_MAX_COMPLETION_QUEUE_PLUCKERS);
1263       gpr_mu_unlock(cq->mu);
1264       // TODO(ctiller): should we use a different result here
1265       ret.type = GRPC_QUEUE_TIMEOUT;
1266       ret.success = 0;
1267       dump_pending_tags(cq);
1268       break;
1269     }
1270     if (!is_finished_arg.first_loop &&
1271         grpc_core::Timestamp::Now() >= deadline_millis) {
1272       del_plucker(cq, tag, &worker);
1273       gpr_mu_unlock(cq->mu);
1274       ret.type = GRPC_QUEUE_TIMEOUT;
1275       ret.success = 0;
1276       dump_pending_tags(cq);
1277       break;
1278     }
1279     cq->num_polls++;
1280     grpc_error_handle err =
1281         cq->poller_vtable->work(POLLSET_FROM_CQ(cq), &worker, deadline_millis);
1282     if (!err.ok()) {
1283       del_plucker(cq, tag, &worker);
1284       gpr_mu_unlock(cq->mu);
1285       gpr_log(GPR_ERROR, "Completion queue pluck failed: %s",
1286               grpc_core::StatusToString(err).c_str());
1287       ret.type = GRPC_QUEUE_TIMEOUT;
1288       ret.success = 0;
1289       dump_pending_tags(cq);
1290       break;
1291     }
1292     is_finished_arg.first_loop = false;
1293     del_plucker(cq, tag, &worker);
1294   }
1295 done:
1296   GRPC_SURFACE_TRACE_RETURNED_EVENT(cq, &ret);
1297   GRPC_CQ_INTERNAL_UNREF(cq, "pluck");
1298 
1299   GPR_ASSERT(is_finished_arg.stolen_completion == nullptr);
1300 
1301   return ret;
1302 }
1303 
1304 grpc_event grpc_completion_queue_pluck(grpc_completion_queue* cq, void* tag,
1305                                        gpr_timespec deadline, void* reserved) {
1306   return cq->vtable->pluck(cq, tag, deadline, reserved);
1307 }
1308 
1309 static void cq_finish_shutdown_pluck(grpc_completion_queue* cq) {
1310   cq_pluck_data* cqd = static_cast<cq_pluck_data*> DATA_FROM_CQ(cq);
1311 
1312   GPR_ASSERT(cqd->shutdown_called);
1313   GPR_ASSERT(!cqd->shutdown.load(std::memory_order_relaxed));
1314   cqd->shutdown.store(true, std::memory_order_relaxed);
1315 
1316   cq->poller_vtable->shutdown(POLLSET_FROM_CQ(cq), &cq->pollset_shutdown_done);
1317 }
1318 
1319 // NOTE: This function is almost exactly identical to cq_shutdown_next() but
1320 // merging them is a bit tricky and probably not worth it
1321 static void cq_shutdown_pluck(grpc_completion_queue* cq) {
1322   cq_pluck_data* cqd = static_cast<cq_pluck_data*> DATA_FROM_CQ(cq);
1323 
1324   // Need an extra ref for cq here because:
1325   // We call cq_finish_shutdown_pluck() below, that would call pollset shutdown.
1326   // Pollset shutdown decrements the cq ref count which can potentially destroy
1327   // the cq (if that happens to be the last ref).
1328   // Creating an extra ref here prevents the cq from getting destroyed while
1329   // this function is still active
1330   GRPC_CQ_INTERNAL_REF(cq, "shutting_down (pluck cq)");
1331   gpr_mu_lock(cq->mu);
1332   if (cqd->shutdown_called) {
1333     gpr_mu_unlock(cq->mu);
1334     GRPC_CQ_INTERNAL_UNREF(cq, "shutting_down (pluck cq)");
1335     return;
1336   }
1337   cqd->shutdown_called = true;
1338   if (cqd->pending_events.fetch_sub(1, std::memory_order_acq_rel) == 1) {
1339     cq_finish_shutdown_pluck(cq);
1340   }
1341   gpr_mu_unlock(cq->mu);
1342   GRPC_CQ_INTERNAL_UNREF(cq, "shutting_down (pluck cq)");
1343 }
1344 
1345 static void cq_finish_shutdown_callback(grpc_completion_queue* cq) {
1346   cq_callback_data* cqd = static_cast<cq_callback_data*> DATA_FROM_CQ(cq);
1347   auto* callback = cqd->shutdown_callback;
1348 
1349   GPR_ASSERT(cqd->shutdown_called);
1350 
1351   cq->poller_vtable->shutdown(POLLSET_FROM_CQ(cq), &cq->pollset_shutdown_done);
1352   if (grpc_iomgr_is_any_background_poller_thread()) {
1353     grpc_core::ApplicationCallbackExecCtx::Enqueue(callback, true);
1354     return;
1355   }
1356 
1357   // Schedule the callback on a closure if not internal or triggered
1358   // from a background poller thread.
1359   grpc_core::Executor::Run(
1360       GRPC_CLOSURE_CREATE(functor_callback, callback, nullptr),
1361       absl::OkStatus());
1362 }
1363 
1364 static void cq_shutdown_callback(grpc_completion_queue* cq) {
1365   cq_callback_data* cqd = static_cast<cq_callback_data*> DATA_FROM_CQ(cq);
1366 
1367   // Need an extra ref for cq here because:
1368   // We call cq_finish_shutdown_callback() below, which calls pollset shutdown.
1369   // Pollset shutdown decrements the cq ref count which can potentially destroy
1370   // the cq (if that happens to be the last ref).
1371   // Creating an extra ref here prevents the cq from getting destroyed while
1372   // this function is still active
1373   GRPC_CQ_INTERNAL_REF(cq, "shutting_down (callback cq)");
1374   gpr_mu_lock(cq->mu);
1375   if (cqd->shutdown_called) {
1376     gpr_mu_unlock(cq->mu);
1377     GRPC_CQ_INTERNAL_UNREF(cq, "shutting_down (callback cq)");
1378     return;
1379   }
1380   cqd->shutdown_called = true;
1381   if (cqd->pending_events.fetch_sub(1, std::memory_order_acq_rel) == 1) {
1382     gpr_mu_unlock(cq->mu);
1383     cq_finish_shutdown_callback(cq);
1384   } else {
1385     gpr_mu_unlock(cq->mu);
1386   }
1387   GRPC_CQ_INTERNAL_UNREF(cq, "shutting_down (callback cq)");
1388 }
1389 
1390 // Shutdown simply drops a ref that we reserved at creation time; if we drop
1391 // to zero here, then enter shutdown mode and wake up any waiters
1392 void grpc_completion_queue_shutdown(grpc_completion_queue* cq) {
1393   grpc_core::ApplicationCallbackExecCtx callback_exec_ctx;
1394   grpc_core::ExecCtx exec_ctx;
1395   GRPC_API_TRACE("grpc_completion_queue_shutdown(cq=%p)", 1, (cq));
1396   cq->vtable->shutdown(cq);
1397 }
1398 
1399 void grpc_completion_queue_destroy(grpc_completion_queue* cq) {
1400   GRPC_API_TRACE("grpc_completion_queue_destroy(cq=%p)", 1, (cq));
1401   grpc_completion_queue_shutdown(cq);
1402 
1403   grpc_core::ExecCtx exec_ctx;
1404   GRPC_CQ_INTERNAL_UNREF(cq, "destroy");
1405 }
1406 
1407 grpc_pollset* grpc_cq_pollset(grpc_completion_queue* cq) {
1408   return cq->poller_vtable->can_get_pollset ? POLLSET_FROM_CQ(cq) : nullptr;
1409 }
1410 
1411 bool grpc_cq_can_listen(grpc_completion_queue* cq) {
1412   return cq->poller_vtable->can_listen;
1413 }
1414