1 /*
2 * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "modules/audio_processing/agc2/rnn_vad/pitch_search_internal.h"
12
13 #include <stdlib.h>
14
15 #include <algorithm>
16 #include <cmath>
17 #include <cstddef>
18 #include <numeric>
19
20 #include "modules/audio_processing/agc2/rnn_vad/common.h"
21 #include "modules/audio_processing/agc2/rnn_vad/vector_math.h"
22 #include "rtc_base/checks.h"
23 #include "rtc_base/numerics/safe_compare.h"
24 #include "rtc_base/numerics/safe_conversions.h"
25 #include "rtc_base/system/arch.h"
26
27 namespace webrtc {
28 namespace rnn_vad {
29 namespace {
30
ComputeAutoCorrelation(int inverted_lag,rtc::ArrayView<const float,kBufSize24kHz> pitch_buffer,const VectorMath & vector_math)31 float ComputeAutoCorrelation(
32 int inverted_lag,
33 rtc::ArrayView<const float, kBufSize24kHz> pitch_buffer,
34 const VectorMath& vector_math) {
35 RTC_DCHECK_LT(inverted_lag, kBufSize24kHz);
36 RTC_DCHECK_LT(inverted_lag, kRefineNumLags24kHz);
37 static_assert(kMaxPitch24kHz < kBufSize24kHz, "");
38 return vector_math.DotProduct(
39 pitch_buffer.subview(/*offset=*/kMaxPitch24kHz),
40 pitch_buffer.subview(inverted_lag, kFrameSize20ms24kHz));
41 }
42
43 // Given an auto-correlation coefficient `curr_auto_correlation` and its
44 // neighboring values `prev_auto_correlation` and `next_auto_correlation`
45 // computes a pseudo-interpolation offset to be applied to the pitch period
46 // associated to `curr`. The output is a lag in {-1, 0, +1}.
47 // TODO(bugs.webrtc.org/9076): Consider removing this method.
48 // `GetPitchPseudoInterpolationOffset()` it is relevant only if the spectral
49 // analysis works at a sample rate that is twice as that of the pitch buffer;
50 // In particular, it is not relevant for the estimated pitch period feature fed
51 // into the RNN.
GetPitchPseudoInterpolationOffset(float prev_auto_correlation,float curr_auto_correlation,float next_auto_correlation)52 int GetPitchPseudoInterpolationOffset(float prev_auto_correlation,
53 float curr_auto_correlation,
54 float next_auto_correlation) {
55 if ((next_auto_correlation - prev_auto_correlation) >
56 0.7f * (curr_auto_correlation - prev_auto_correlation)) {
57 return 1; // `next_auto_correlation` is the largest auto-correlation
58 // coefficient.
59 } else if ((prev_auto_correlation - next_auto_correlation) >
60 0.7f * (curr_auto_correlation - next_auto_correlation)) {
61 return -1; // `prev_auto_correlation` is the largest auto-correlation
62 // coefficient.
63 }
64 return 0;
65 }
66
67 // Refines a pitch period `lag` encoded as lag with pseudo-interpolation. The
68 // output sample rate is twice as that of `lag`.
PitchPseudoInterpolationLagPitchBuf(int lag,rtc::ArrayView<const float,kBufSize24kHz> pitch_buffer,const VectorMath & vector_math)69 int PitchPseudoInterpolationLagPitchBuf(
70 int lag,
71 rtc::ArrayView<const float, kBufSize24kHz> pitch_buffer,
72 const VectorMath& vector_math) {
73 int offset = 0;
74 // Cannot apply pseudo-interpolation at the boundaries.
75 if (lag > 0 && lag < kMaxPitch24kHz) {
76 const int inverted_lag = kMaxPitch24kHz - lag;
77 offset = GetPitchPseudoInterpolationOffset(
78 ComputeAutoCorrelation(inverted_lag + 1, pitch_buffer, vector_math),
79 ComputeAutoCorrelation(inverted_lag, pitch_buffer, vector_math),
80 ComputeAutoCorrelation(inverted_lag - 1, pitch_buffer, vector_math));
81 }
82 return 2 * lag + offset;
83 }
84
85 // Integer multipliers used in ComputeExtendedPitchPeriod48kHz() when
86 // looking for sub-harmonics.
87 // The values have been chosen to serve the following algorithm. Given the
88 // initial pitch period T, we examine whether one of its harmonics is the true
89 // fundamental frequency. We consider T/k with k in {2, ..., 15}. For each of
90 // these harmonics, in addition to the pitch strength of itself, we choose one
91 // multiple of its pitch period, n*T/k, to validate it (by averaging their pitch
92 // strengths). The multiplier n is chosen so that n*T/k is used only one time
93 // over all k. When for example k = 4, we should also expect a peak at 3*T/4.
94 // When k = 8 instead we don't want to look at 2*T/8, since we have already
95 // checked T/4 before. Instead, we look at T*3/8.
96 // The array can be generate in Python as follows:
97 // from fractions import Fraction
98 // # Smallest positive integer not in X.
99 // def mex(X):
100 // for i in range(1, int(max(X)+2)):
101 // if i not in X:
102 // return i
103 // # Visited multiples of the period.
104 // S = {1}
105 // for n in range(2, 16):
106 // sn = mex({n * i for i in S} | {1})
107 // S = S | {Fraction(1, n), Fraction(sn, n)}
108 // print(sn, end=', ')
109 constexpr std::array<int, 14> kSubHarmonicMultipliers = {
110 {3, 2, 3, 2, 5, 2, 3, 2, 3, 2, 5, 2, 3, 2}};
111
112 struct Range {
113 int min;
114 int max;
115 };
116
117 // Number of analyzed pitches to the left(right) of a pitch candidate.
118 constexpr int kPitchNeighborhoodRadius = 2;
119
120 // Creates a pitch period interval centered in `inverted_lag` with hard-coded
121 // radius. Clipping is applied so that the interval is always valid for a 24 kHz
122 // pitch buffer.
CreateInvertedLagRange(int inverted_lag)123 Range CreateInvertedLagRange(int inverted_lag) {
124 return {std::max(inverted_lag - kPitchNeighborhoodRadius, 0),
125 std::min(inverted_lag + kPitchNeighborhoodRadius,
126 kInitialNumLags24kHz - 1)};
127 }
128
129 constexpr int kNumPitchCandidates = 2; // Best and second best.
130 // Maximum number of analyzed pitch periods.
131 constexpr int kMaxPitchPeriods24kHz =
132 kNumPitchCandidates * (2 * kPitchNeighborhoodRadius + 1);
133
134 // Collection of inverted lags.
135 class InvertedLagsIndex {
136 public:
InvertedLagsIndex()137 InvertedLagsIndex() : num_entries_(0) {}
138 // Adds an inverted lag to the index. Cannot add more than
139 // `kMaxPitchPeriods24kHz` values.
Append(int inverted_lag)140 void Append(int inverted_lag) {
141 RTC_DCHECK_LT(num_entries_, kMaxPitchPeriods24kHz);
142 inverted_lags_[num_entries_++] = inverted_lag;
143 }
data() const144 const int* data() const { return inverted_lags_.data(); }
size() const145 int size() const { return num_entries_; }
146
147 private:
148 std::array<int, kMaxPitchPeriods24kHz> inverted_lags_;
149 int num_entries_;
150 };
151
152 // Computes the auto correlation coefficients for the inverted lags in the
153 // closed interval `inverted_lags`. Updates `inverted_lags_index` by appending
154 // the inverted lags for the computed auto correlation values.
ComputeAutoCorrelation(Range inverted_lags,rtc::ArrayView<const float,kBufSize24kHz> pitch_buffer,rtc::ArrayView<float,kInitialNumLags24kHz> auto_correlation,InvertedLagsIndex & inverted_lags_index,const VectorMath & vector_math)155 void ComputeAutoCorrelation(
156 Range inverted_lags,
157 rtc::ArrayView<const float, kBufSize24kHz> pitch_buffer,
158 rtc::ArrayView<float, kInitialNumLags24kHz> auto_correlation,
159 InvertedLagsIndex& inverted_lags_index,
160 const VectorMath& vector_math) {
161 // Check valid range.
162 RTC_DCHECK_LE(inverted_lags.min, inverted_lags.max);
163 // Trick to avoid zero initialization of `auto_correlation`.
164 // Needed by the pseudo-interpolation.
165 if (inverted_lags.min > 0) {
166 auto_correlation[inverted_lags.min - 1] = 0.f;
167 }
168 if (inverted_lags.max < kInitialNumLags24kHz - 1) {
169 auto_correlation[inverted_lags.max + 1] = 0.f;
170 }
171 // Check valid `inverted_lag` indexes.
172 RTC_DCHECK_GE(inverted_lags.min, 0);
173 RTC_DCHECK_LT(inverted_lags.max, kInitialNumLags24kHz);
174 for (int inverted_lag = inverted_lags.min; inverted_lag <= inverted_lags.max;
175 ++inverted_lag) {
176 auto_correlation[inverted_lag] =
177 ComputeAutoCorrelation(inverted_lag, pitch_buffer, vector_math);
178 inverted_lags_index.Append(inverted_lag);
179 }
180 }
181
182 // Searches the strongest pitch period at 24 kHz and returns its inverted lag at
183 // 48 kHz.
ComputePitchPeriod48kHz(rtc::ArrayView<const float,kBufSize24kHz> pitch_buffer,rtc::ArrayView<const int> inverted_lags,rtc::ArrayView<const float,kInitialNumLags24kHz> auto_correlation,rtc::ArrayView<const float,kRefineNumLags24kHz> y_energy,const VectorMath & vector_math)184 int ComputePitchPeriod48kHz(
185 rtc::ArrayView<const float, kBufSize24kHz> pitch_buffer,
186 rtc::ArrayView<const int> inverted_lags,
187 rtc::ArrayView<const float, kInitialNumLags24kHz> auto_correlation,
188 rtc::ArrayView<const float, kRefineNumLags24kHz> y_energy,
189 const VectorMath& vector_math) {
190 static_assert(kMaxPitch24kHz > kInitialNumLags24kHz, "");
191 static_assert(kMaxPitch24kHz < kBufSize24kHz, "");
192 int best_inverted_lag = 0; // Pitch period.
193 float best_numerator = -1.f; // Pitch strength numerator.
194 float best_denominator = 0.f; // Pitch strength denominator.
195 for (int inverted_lag : inverted_lags) {
196 // A pitch candidate must have positive correlation.
197 if (auto_correlation[inverted_lag] > 0.f) {
198 // Auto-correlation energy normalized by frame energy.
199 const float numerator =
200 auto_correlation[inverted_lag] * auto_correlation[inverted_lag];
201 const float denominator = y_energy[inverted_lag];
202 // Compare numerator/denominator ratios without using divisions.
203 if (numerator * best_denominator > best_numerator * denominator) {
204 best_inverted_lag = inverted_lag;
205 best_numerator = numerator;
206 best_denominator = denominator;
207 }
208 }
209 }
210 // Pseudo-interpolation to transform `best_inverted_lag` (24 kHz pitch) to a
211 // 48 kHz pitch period.
212 if (best_inverted_lag == 0 || best_inverted_lag >= kInitialNumLags24kHz - 1) {
213 // Cannot apply pseudo-interpolation at the boundaries.
214 return best_inverted_lag * 2;
215 }
216 int offset = GetPitchPseudoInterpolationOffset(
217 auto_correlation[best_inverted_lag + 1],
218 auto_correlation[best_inverted_lag],
219 auto_correlation[best_inverted_lag - 1]);
220 // TODO(bugs.webrtc.org/9076): When retraining, check if `offset` below should
221 // be subtracted since `inverted_lag` is an inverted lag but offset is a lag.
222 return 2 * best_inverted_lag + offset;
223 }
224
225 // Returns an alternative pitch period for `pitch_period` given a `multiplier`
226 // and a `divisor` of the period.
GetAlternativePitchPeriod(int pitch_period,int multiplier,int divisor)227 constexpr int GetAlternativePitchPeriod(int pitch_period,
228 int multiplier,
229 int divisor) {
230 RTC_DCHECK_GT(divisor, 0);
231 // Same as `round(multiplier * pitch_period / divisor)`.
232 return (2 * multiplier * pitch_period + divisor) / (2 * divisor);
233 }
234
235 // Returns true if the alternative pitch period is stronger than the initial one
236 // given the last estimated pitch and the value of `period_divisor` used to
237 // compute the alternative pitch period via `GetAlternativePitchPeriod()`.
IsAlternativePitchStrongerThanInitial(PitchInfo last,PitchInfo initial,PitchInfo alternative,int period_divisor)238 bool IsAlternativePitchStrongerThanInitial(PitchInfo last,
239 PitchInfo initial,
240 PitchInfo alternative,
241 int period_divisor) {
242 // Initial pitch period candidate thresholds for a sample rate of 24 kHz.
243 // Computed as [5*k*k for k in range(16)].
244 constexpr std::array<int, 14> kInitialPitchPeriodThresholds = {
245 {20, 45, 80, 125, 180, 245, 320, 405, 500, 605, 720, 845, 980, 1125}};
246 static_assert(
247 kInitialPitchPeriodThresholds.size() == kSubHarmonicMultipliers.size(),
248 "");
249 RTC_DCHECK_GE(last.period, 0);
250 RTC_DCHECK_GE(initial.period, 0);
251 RTC_DCHECK_GE(alternative.period, 0);
252 RTC_DCHECK_GE(period_divisor, 2);
253 // Compute a term that lowers the threshold when `alternative.period` is close
254 // to the last estimated period `last.period` - i.e., pitch tracking.
255 float lower_threshold_term = 0.f;
256 if (std::abs(alternative.period - last.period) <= 1) {
257 // The candidate pitch period is within 1 sample from the last one.
258 // Make the candidate at `alternative.period` very easy to be accepted.
259 lower_threshold_term = last.strength;
260 } else if (std::abs(alternative.period - last.period) == 2 &&
261 initial.period >
262 kInitialPitchPeriodThresholds[period_divisor - 2]) {
263 // The candidate pitch period is 2 samples far from the last one and the
264 // period `initial.period` (from which `alternative.period` has been
265 // derived) is greater than a threshold. Make `alternative.period` easy to
266 // be accepted.
267 lower_threshold_term = 0.5f * last.strength;
268 }
269 // Set the threshold based on the strength of the initial estimate
270 // `initial.period`. Also reduce the chance of false positives caused by a
271 // bias towards high frequencies (originating from short-term correlations).
272 float threshold =
273 std::max(0.3f, 0.7f * initial.strength - lower_threshold_term);
274 if (alternative.period < 3 * kMinPitch24kHz) {
275 // High frequency.
276 threshold = std::max(0.4f, 0.85f * initial.strength - lower_threshold_term);
277 } else if (alternative.period < 2 * kMinPitch24kHz) {
278 // Even higher frequency.
279 threshold = std::max(0.5f, 0.9f * initial.strength - lower_threshold_term);
280 }
281 return alternative.strength > threshold;
282 }
283
284 } // namespace
285
Decimate2x(rtc::ArrayView<const float,kBufSize24kHz> src,rtc::ArrayView<float,kBufSize12kHz> dst)286 void Decimate2x(rtc::ArrayView<const float, kBufSize24kHz> src,
287 rtc::ArrayView<float, kBufSize12kHz> dst) {
288 // TODO(bugs.webrtc.org/9076): Consider adding anti-aliasing filter.
289 static_assert(2 * kBufSize12kHz == kBufSize24kHz, "");
290 for (int i = 0; i < kBufSize12kHz; ++i) {
291 dst[i] = src[2 * i];
292 }
293 }
294
ComputeSlidingFrameSquareEnergies24kHz(rtc::ArrayView<const float,kBufSize24kHz> pitch_buffer,rtc::ArrayView<float,kRefineNumLags24kHz> y_energy,AvailableCpuFeatures cpu_features)295 void ComputeSlidingFrameSquareEnergies24kHz(
296 rtc::ArrayView<const float, kBufSize24kHz> pitch_buffer,
297 rtc::ArrayView<float, kRefineNumLags24kHz> y_energy,
298 AvailableCpuFeatures cpu_features) {
299 VectorMath vector_math(cpu_features);
300 static_assert(kFrameSize20ms24kHz < kBufSize24kHz, "");
301 const auto frame_20ms_view = pitch_buffer.subview(0, kFrameSize20ms24kHz);
302 float yy = vector_math.DotProduct(frame_20ms_view, frame_20ms_view);
303 y_energy[0] = yy;
304 static_assert(kMaxPitch24kHz - 1 + kFrameSize20ms24kHz < kBufSize24kHz, "");
305 static_assert(kMaxPitch24kHz < kRefineNumLags24kHz, "");
306 for (int inverted_lag = 0; inverted_lag < kMaxPitch24kHz; ++inverted_lag) {
307 yy -= pitch_buffer[inverted_lag] * pitch_buffer[inverted_lag];
308 yy += pitch_buffer[inverted_lag + kFrameSize20ms24kHz] *
309 pitch_buffer[inverted_lag + kFrameSize20ms24kHz];
310 yy = std::max(1.f, yy);
311 y_energy[inverted_lag + 1] = yy;
312 }
313 }
314
ComputePitchPeriod12kHz(rtc::ArrayView<const float,kBufSize12kHz> pitch_buffer,rtc::ArrayView<const float,kNumLags12kHz> auto_correlation,AvailableCpuFeatures cpu_features)315 CandidatePitchPeriods ComputePitchPeriod12kHz(
316 rtc::ArrayView<const float, kBufSize12kHz> pitch_buffer,
317 rtc::ArrayView<const float, kNumLags12kHz> auto_correlation,
318 AvailableCpuFeatures cpu_features) {
319 static_assert(kMaxPitch12kHz > kNumLags12kHz, "");
320 static_assert(kMaxPitch12kHz < kBufSize12kHz, "");
321
322 // Stores a pitch candidate period and strength information.
323 struct PitchCandidate {
324 // Pitch period encoded as inverted lag.
325 int period_inverted_lag = 0;
326 // Pitch strength encoded as a ratio.
327 float strength_numerator = -1.f;
328 float strength_denominator = 0.f;
329 // Compare the strength of two pitch candidates.
330 bool HasStrongerPitchThan(const PitchCandidate& b) const {
331 // Comparing the numerator/denominator ratios without using divisions.
332 return strength_numerator * b.strength_denominator >
333 b.strength_numerator * strength_denominator;
334 }
335 };
336
337 VectorMath vector_math(cpu_features);
338 static_assert(kFrameSize20ms12kHz + 1 < kBufSize12kHz, "");
339 const auto frame_view = pitch_buffer.subview(0, kFrameSize20ms12kHz + 1);
340 float denominator = 1.f + vector_math.DotProduct(frame_view, frame_view);
341 // Search best and second best pitches by looking at the scaled
342 // auto-correlation.
343 PitchCandidate best;
344 PitchCandidate second_best;
345 second_best.period_inverted_lag = 1;
346 for (int inverted_lag = 0; inverted_lag < kNumLags12kHz; ++inverted_lag) {
347 // A pitch candidate must have positive correlation.
348 if (auto_correlation[inverted_lag] > 0.f) {
349 PitchCandidate candidate{
350 inverted_lag,
351 auto_correlation[inverted_lag] * auto_correlation[inverted_lag],
352 denominator};
353 if (candidate.HasStrongerPitchThan(second_best)) {
354 if (candidate.HasStrongerPitchThan(best)) {
355 second_best = best;
356 best = candidate;
357 } else {
358 second_best = candidate;
359 }
360 }
361 }
362 // Update `squared_energy_y` for the next inverted lag.
363 const float y_old = pitch_buffer[inverted_lag];
364 const float y_new = pitch_buffer[inverted_lag + kFrameSize20ms12kHz];
365 denominator -= y_old * y_old;
366 denominator += y_new * y_new;
367 denominator = std::max(0.f, denominator);
368 }
369 return {best.period_inverted_lag, second_best.period_inverted_lag};
370 }
371
ComputePitchPeriod48kHz(rtc::ArrayView<const float,kBufSize24kHz> pitch_buffer,rtc::ArrayView<const float,kRefineNumLags24kHz> y_energy,CandidatePitchPeriods pitch_candidates,AvailableCpuFeatures cpu_features)372 int ComputePitchPeriod48kHz(
373 rtc::ArrayView<const float, kBufSize24kHz> pitch_buffer,
374 rtc::ArrayView<const float, kRefineNumLags24kHz> y_energy,
375 CandidatePitchPeriods pitch_candidates,
376 AvailableCpuFeatures cpu_features) {
377 // Compute the auto-correlation terms only for neighbors of the two pitch
378 // candidates (best and second best).
379 std::array<float, kInitialNumLags24kHz> auto_correlation;
380 InvertedLagsIndex inverted_lags_index;
381 // Create two inverted lag ranges so that `r1` precedes `r2`.
382 const bool swap_candidates =
383 pitch_candidates.best > pitch_candidates.second_best;
384 const Range r1 = CreateInvertedLagRange(
385 swap_candidates ? pitch_candidates.second_best : pitch_candidates.best);
386 const Range r2 = CreateInvertedLagRange(
387 swap_candidates ? pitch_candidates.best : pitch_candidates.second_best);
388 // Check valid ranges.
389 RTC_DCHECK_LE(r1.min, r1.max);
390 RTC_DCHECK_LE(r2.min, r2.max);
391 // Check `r1` precedes `r2`.
392 RTC_DCHECK_LE(r1.min, r2.min);
393 RTC_DCHECK_LE(r1.max, r2.max);
394 VectorMath vector_math(cpu_features);
395 if (r1.max + 1 >= r2.min) {
396 // Overlapping or adjacent ranges.
397 ComputeAutoCorrelation({r1.min, r2.max}, pitch_buffer, auto_correlation,
398 inverted_lags_index, vector_math);
399 } else {
400 // Disjoint ranges.
401 ComputeAutoCorrelation(r1, pitch_buffer, auto_correlation,
402 inverted_lags_index, vector_math);
403 ComputeAutoCorrelation(r2, pitch_buffer, auto_correlation,
404 inverted_lags_index, vector_math);
405 }
406 return ComputePitchPeriod48kHz(pitch_buffer, inverted_lags_index,
407 auto_correlation, y_energy, vector_math);
408 }
409
ComputeExtendedPitchPeriod48kHz(rtc::ArrayView<const float,kBufSize24kHz> pitch_buffer,rtc::ArrayView<const float,kRefineNumLags24kHz> y_energy,int initial_pitch_period_48kHz,PitchInfo last_pitch_48kHz,AvailableCpuFeatures cpu_features)410 PitchInfo ComputeExtendedPitchPeriod48kHz(
411 rtc::ArrayView<const float, kBufSize24kHz> pitch_buffer,
412 rtc::ArrayView<const float, kRefineNumLags24kHz> y_energy,
413 int initial_pitch_period_48kHz,
414 PitchInfo last_pitch_48kHz,
415 AvailableCpuFeatures cpu_features) {
416 RTC_DCHECK_LE(kMinPitch48kHz, initial_pitch_period_48kHz);
417 RTC_DCHECK_LE(initial_pitch_period_48kHz, kMaxPitch48kHz);
418
419 // Stores information for a refined pitch candidate.
420 struct RefinedPitchCandidate {
421 int period;
422 float strength;
423 // Additional strength data used for the final pitch estimation.
424 float xy; // Auto-correlation.
425 float y_energy; // Energy of the sliding frame `y`.
426 };
427
428 const float x_energy = y_energy[kMaxPitch24kHz];
429 const auto pitch_strength = [x_energy](float xy, float y_energy) {
430 RTC_DCHECK_GE(x_energy * y_energy, 0.f);
431 return xy / std::sqrt(1.f + x_energy * y_energy);
432 };
433 VectorMath vector_math(cpu_features);
434
435 // Initialize the best pitch candidate with `initial_pitch_period_48kHz`.
436 RefinedPitchCandidate best_pitch;
437 best_pitch.period =
438 std::min(initial_pitch_period_48kHz / 2, kMaxPitch24kHz - 1);
439 best_pitch.xy = ComputeAutoCorrelation(kMaxPitch24kHz - best_pitch.period,
440 pitch_buffer, vector_math);
441 best_pitch.y_energy = y_energy[kMaxPitch24kHz - best_pitch.period];
442 best_pitch.strength = pitch_strength(best_pitch.xy, best_pitch.y_energy);
443 // Keep a copy of the initial pitch candidate.
444 const PitchInfo initial_pitch{best_pitch.period, best_pitch.strength};
445 // 24 kHz version of the last estimated pitch.
446 const PitchInfo last_pitch{last_pitch_48kHz.period / 2,
447 last_pitch_48kHz.strength};
448
449 // Find `max_period_divisor` such that the result of
450 // `GetAlternativePitchPeriod(initial_pitch_period, 1, max_period_divisor)`
451 // equals `kMinPitch24kHz`.
452 const int max_period_divisor =
453 (2 * initial_pitch.period) / (2 * kMinPitch24kHz - 1);
454 for (int period_divisor = 2; period_divisor <= max_period_divisor;
455 ++period_divisor) {
456 PitchInfo alternative_pitch;
457 alternative_pitch.period = GetAlternativePitchPeriod(
458 initial_pitch.period, /*multiplier=*/1, period_divisor);
459 RTC_DCHECK_GE(alternative_pitch.period, kMinPitch24kHz);
460 // When looking at `alternative_pitch.period`, we also look at one of its
461 // sub-harmonics. `kSubHarmonicMultipliers` is used to know where to look.
462 // `period_divisor` == 2 is a special case since `dual_alternative_period`
463 // might be greater than the maximum pitch period.
464 int dual_alternative_period = GetAlternativePitchPeriod(
465 initial_pitch.period, kSubHarmonicMultipliers[period_divisor - 2],
466 period_divisor);
467 RTC_DCHECK_GT(dual_alternative_period, 0);
468 if (period_divisor == 2 && dual_alternative_period > kMaxPitch24kHz) {
469 dual_alternative_period = initial_pitch.period;
470 }
471 RTC_DCHECK_NE(alternative_pitch.period, dual_alternative_period)
472 << "The lower pitch period and the additional sub-harmonic must not "
473 "coincide.";
474 // Compute an auto-correlation score for the primary pitch candidate
475 // `alternative_pitch.period` by also looking at its possible sub-harmonic
476 // `dual_alternative_period`.
477 const float xy_primary_period = ComputeAutoCorrelation(
478 kMaxPitch24kHz - alternative_pitch.period, pitch_buffer, vector_math);
479 // TODO(webrtc:10480): Copy `xy_primary_period` if the secondary period is
480 // equal to the primary one.
481 const float xy_secondary_period = ComputeAutoCorrelation(
482 kMaxPitch24kHz - dual_alternative_period, pitch_buffer, vector_math);
483 const float xy = 0.5f * (xy_primary_period + xy_secondary_period);
484 const float yy =
485 0.5f * (y_energy[kMaxPitch24kHz - alternative_pitch.period] +
486 y_energy[kMaxPitch24kHz - dual_alternative_period]);
487 alternative_pitch.strength = pitch_strength(xy, yy);
488
489 // Maybe update best period.
490 if (IsAlternativePitchStrongerThanInitial(
491 last_pitch, initial_pitch, alternative_pitch, period_divisor)) {
492 best_pitch = {alternative_pitch.period, alternative_pitch.strength, xy,
493 yy};
494 }
495 }
496
497 // Final pitch strength and period.
498 best_pitch.xy = std::max(0.f, best_pitch.xy);
499 RTC_DCHECK_LE(0.f, best_pitch.y_energy);
500 float final_pitch_strength =
501 (best_pitch.y_energy <= best_pitch.xy)
502 ? 1.f
503 : best_pitch.xy / (best_pitch.y_energy + 1.f);
504 final_pitch_strength = std::min(best_pitch.strength, final_pitch_strength);
505 int final_pitch_period_48kHz = std::max(
506 kMinPitch48kHz, PitchPseudoInterpolationLagPitchBuf(
507 best_pitch.period, pitch_buffer, vector_math));
508
509 return {final_pitch_period_48kHz, final_pitch_strength};
510 }
511
512 } // namespace rnn_vad
513 } // namespace webrtc
514