static_stride_scheduler.cc (revision cc02d7e222339f7a4f6ba5f422e6413f4bd931f2) - OpenGrok cross reference for /aosp_15_r20/external/grpc-grpc/src/core/load_balancing/weighted_round_robin/static_stride_scheduler.cc

//
// Copyright 2022 gRPC authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//

#include <grpc/support/port_platform.h>

#include "src/core/load_balancing/weighted_round_robin/static_stride_scheduler.h"

#include <algorithm>
#include <cmath>
#include <limits>
#include <utility>
#include <vector>

#include "absl/functional/any_invocable.h"

#include <grpc/support/log.h>

namespace grpc_core {

namespace {

constexpr uint16_t kMaxWeight = std::numeric_limits<uint16_t>::max();

// Assuming the mean of all known weights is M, StaticStrideScheduler will cap
// from above all known weights that are bigger than M*kMaxRatio (to
// M*kMaxRatio).
//
// This is done to limit the number of rounds for picks.
constexpr double kMaxRatio = 10;

// Assuming the mean of all known weights is M, StaticStrideScheduler will cap
// from below all known weights to M*kMinRatio.
//
// This is done as a performance optimization for edge cases when channels with
// large weights are non-accepting (and thus WeightedRoundRobin will retry
// picking them over and over again), and there are also channels with near-zero
// weights that are possibly accepting. In this case, without kMinRatio, it
// would potentially require WeightedRoundRobin to perform thousands of picks
// until it gets a single channel with near-zero weight. This was a part of what
// hapenned in b/276292666.
//
// The current value of 0.01 was chosen without any experimenting. It should
// ensure that WeightedRoundRobin doesn't do much more than an order of 100
// picks of non-accepting channels with high weights in such corner cases. But
// it also makes WeightedRoundRobin to send slightly more requests to
// potentially very bad tasks (that would have near-zero weights) than zero.
// This is not necesserily a downside, though. Perhaps this is not a problem at
// all and we should increase this value (to 0.05 or 0.1) to save CPU cycles.
//
// Note that this class treats weights that are exactly equal to zero as unknown
// and thus needing to be replaced with M. This behavior itself makes sense
// (fresh channels without feedback information will get an average flow of
// requests). However, it follows from this that this class will replace weight
// = 0 with M, but weight = epsilon with M*kMinRatio, and this step function is
// logically faulty. A demonstration of this is that the function that computes
// weights in WeightedRoundRobin
// (http://google3/production/rpc/stubs/core/loadbalancing/weightedroundrobin.cc;l=324-325;rcl=514986476)
// will cap some epsilon values to zero. There should be a clear distinction
// between "task is new, weight is unknown" and "task is unhealthy, weight is
// very low". A better solution would be to not mix "unknown" and "weight" into
// a single value but represent weights as std::optional<float> or, if memory
// usage is a concern, use NaN as the indicator of unknown weight.
constexpr double kMinRatio = 0.01;

}  // namespace

absl::optional<StaticStrideScheduler> StaticStrideScheduler::Make(
    absl::Span<const float> float_weights,
    absl::AnyInvocable<uint32_t()> next_sequence_func) {
  if (float_weights.empty()) return absl::nullopt;
  if (float_weights.size() == 1) return absl::nullopt;

  // TODO(b/190488683): should we normalize negative weights to 0?

  const size_t n = float_weights.size();
  size_t num_zero_weight_channels = 0;
  double sum = 0;
  float unscaled_max = 0;
  for (const float weight : float_weights) {
    sum += weight;
    unscaled_max = std::max(unscaled_max, weight);
    if (weight == 0) {
      ++num_zero_weight_channels;
    }
  }

  if (num_zero_weight_channels == n) return absl::nullopt;

  // Mean of non-zero weights before scaling to `kMaxWeight`.
  const double unscaled_mean =
      sum / static_cast<double>(n - num_zero_weight_channels);
  const double ratio = unscaled_max / unscaled_mean;

  // Adjust max value such that ratio does not exceed kMaxRatio. This should
  // ensure that we on average do at most kMaxRatio rounds for picks.
  if (ratio > kMaxRatio) {
    unscaled_max = kMaxRatio * unscaled_mean;
  }

  // Scale weights such that the largest is equal to `kMaxWeight`. This should
  // be accurate enough once we convert to an integer. Quantisation errors won't
  // be measurable on borg.
  // TODO(b/190488683): it may be more stable over updates if we try to keep
  // `scaling_factor` consistent, and only change it when we can't accurately
  // represent the new weights.
  const double scaling_factor = kMaxWeight / unscaled_max;

  // Note that since we cap the weights to stay within kMaxRatio, `mean` might
  // not match the actual mean of the values that end up in the scheduler.
  const uint16_t mean = std::lround(scaling_factor * unscaled_mean);

  // We compute weight_lower_bound and cap it to 1 from below so that in the
  // worst case we represent tiny weights as 1 but not as 0 (which would cause
  // an infinite loop as in b/276292666). This capping to 1 is probably only
  // useful in case someone misconfigures kMinRatio to be very small.
  //
  // NOMUTANTS -- We have tests for this expression, but they are not precise
  // enough to catch errors of plus/minus 1, what mutation testing does.
  const uint16_t weight_lower_bound =
      std::max(static_cast<uint16_t>(1),
               static_cast<uint16_t>(std::lround(mean * kMinRatio)));

  std::vector<uint16_t> weights;
  weights.reserve(n);
  for (size_t i = 0; i < n; ++i) {
    if (float_weights[i] == 0) {  // Weight is unknown.
      weights.push_back(mean);
    } else {
      const double float_weight_capped_from_above =
          std::min(float_weights[i], unscaled_max);
      const uint16_t weight =
          std::lround(float_weight_capped_from_above * scaling_factor);
      weights.push_back(std::max(weight, weight_lower_bound));
    }
  }

  GPR_ASSERT(weights.size() == float_weights.size());
  return StaticStrideScheduler{std::move(weights),
                               std::move(next_sequence_func)};
}

StaticStrideScheduler::StaticStrideScheduler(
    std::vector<uint16_t> weights,
    absl::AnyInvocable<uint32_t()> next_sequence_func)
    : next_sequence_func_(std::move(next_sequence_func)),
      weights_(std::move(weights)) {
  GPR_ASSERT(next_sequence_func_ != nullptr);
}

size_t StaticStrideScheduler::Pick() const {
  while (true) {
    const uint32_t sequence = next_sequence_func_();

    // The sequence number is split in two: the lower %n gives the index of the
    // backend, and the rest gives the number of times we've iterated through
    // all backends. `generation` is used to deterministically decide whether
    // we pick or skip the backend on this iteration, in proportion to the
    // backend's weight.
    const uint64_t backend_index = sequence % weights_.size();
    const uint64_t generation = sequence / weights_.size();
    const uint64_t weight = weights_[backend_index];

    // We pick a backend `weight` times per `kMaxWeight` generations. The
    // multiply and modulus ~evenly spread out the picks for a given backend
    // between different generations. The offset by `backend_index` helps to
    // reduce the chance of multiple consecutive non-picks: if we have two
    // consecutive backends with an equal, say, 80% weight of the max, with no
    // offset we would see 1/5 generations that skipped both.
    // TODO(b/190488683): add test for offset efficacy.
    const uint16_t kOffset = kMaxWeight / 2;
    const uint16_t mod =
        (weight * generation + backend_index * kOffset) % kMaxWeight;

    if (mod < kMaxWeight - weight) {
      // Probability of skipping = 1 - mean(weights) / max(weights).
      // For a typical large-scale service using RR, max task utilization will
      // be ~100% when mean utilization is ~80%. So ~20% of picks will be
      // skipped.
      continue;
    }
    return backend_index;
  }
}

}  // namespace grpc_core