xref: /aosp_15_r20/external/autotest/server/brillo/audio_utils.py (revision 9c5db1993ded3edbeafc8092d69fe5de2ee02df7)
1*9c5db199SXin Li# Lint as: python2, python3
2*9c5db199SXin Li# Copyright (c) 2016 The Chromium Authors. All rights reserved.
3*9c5db199SXin Li# Use of this source code is governed by a BSD-style license that can be
4*9c5db199SXin Li# found in the LICENSE file.
5*9c5db199SXin Li
6*9c5db199SXin Li"""Server side audio utilities functions for Brillo."""
7*9c5db199SXin Li
8*9c5db199SXin Lifrom __future__ import absolute_import
9*9c5db199SXin Lifrom __future__ import division
10*9c5db199SXin Lifrom __future__ import print_function
11*9c5db199SXin Li
12*9c5db199SXin Liimport contextlib
13*9c5db199SXin Liimport logging
14*9c5db199SXin Liimport numpy
15*9c5db199SXin Liimport os
16*9c5db199SXin Liimport struct
17*9c5db199SXin Liimport subprocess
18*9c5db199SXin Liimport tempfile
19*9c5db199SXin Liimport wave
20*9c5db199SXin Li
21*9c5db199SXin Lifrom autotest_lib.client.common_lib import error
22*9c5db199SXin Lifrom six.moves import map
23*9c5db199SXin Lifrom six.moves import range
24*9c5db199SXin Li
25*9c5db199SXin Li
26*9c5db199SXin Li_BITS_PER_BYTE=8
27*9c5db199SXin Li
28*9c5db199SXin Li# Thresholds used when comparing files.
29*9c5db199SXin Li#
30*9c5db199SXin Li# The frequency threshold used when comparing files. The frequency of the
31*9c5db199SXin Li# recorded audio has to be within _FREQUENCY_THRESHOLD percent of the frequency
32*9c5db199SXin Li# of the original audio.
33*9c5db199SXin Li_FREQUENCY_THRESHOLD = 0.01
34*9c5db199SXin Li# Noise threshold controls how much noise is allowed as a fraction of the
35*9c5db199SXin Li# magnitude of the peak frequency after taking an FFT. The power of all the
36*9c5db199SXin Li# other frequencies in the signal should be within _FFT_NOISE_THRESHOLD percent
37*9c5db199SXin Li# of the power of the main frequency.
38*9c5db199SXin Li_FFT_NOISE_THRESHOLD = 0.05
39*9c5db199SXin Li
40*9c5db199SXin Li# Command used to encode audio. If you want to test with something different,
41*9c5db199SXin Li# this should be changed.
42*9c5db199SXin Li_ENCODING_CMD = 'sox'
43*9c5db199SXin Li
44*9c5db199SXin Li
45*9c5db199SXin Lidef extract_wav_frames(wave_file):
46*9c5db199SXin Li    """Extract all frames from a WAV file.
47*9c5db199SXin Li
48*9c5db199SXin Li    @param wave_file: A Wave_read object representing a WAV file opened for
49*9c5db199SXin Li                      reading.
50*9c5db199SXin Li
51*9c5db199SXin Li    @return: A list containing the frames in the WAV file.
52*9c5db199SXin Li    """
53*9c5db199SXin Li    num_frames = wave_file.getnframes()
54*9c5db199SXin Li    sample_width = wave_file.getsampwidth()
55*9c5db199SXin Li    if sample_width == 1:
56*9c5db199SXin Li        fmt = '%iB'  # Read 1 byte.
57*9c5db199SXin Li    elif sample_width == 2:
58*9c5db199SXin Li        fmt = '%ih'  # Read 2 bytes.
59*9c5db199SXin Li    elif sample_width == 4:
60*9c5db199SXin Li        fmt = '%ii'  # Read 4 bytes.
61*9c5db199SXin Li    else:
62*9c5db199SXin Li        raise ValueError('Unsupported sample width')
63*9c5db199SXin Li    frames =  list(struct.unpack(fmt % num_frames * wave_file.getnchannels(),
64*9c5db199SXin Li                                 wave_file.readframes(num_frames)))
65*9c5db199SXin Li
66*9c5db199SXin Li    # Since 8-bit PCM is unsigned with an offset of 128, we subtract the offset
67*9c5db199SXin Li    # to make it signed since the rest of the code assumes signed numbers.
68*9c5db199SXin Li    if sample_width == 1:
69*9c5db199SXin Li        frames = [val - 128 for val in frames]
70*9c5db199SXin Li
71*9c5db199SXin Li    return frames
72*9c5db199SXin Li
73*9c5db199SXin Li
74*9c5db199SXin Lidef check_wav_file(filename, num_channels=None, sample_rate=None,
75*9c5db199SXin Li                   sample_width=None):
76*9c5db199SXin Li    """Checks a WAV file and returns its peak PCM values.
77*9c5db199SXin Li
78*9c5db199SXin Li    @param filename: Input WAV file to analyze.
79*9c5db199SXin Li    @param num_channels: Number of channels to expect (None to not check).
80*9c5db199SXin Li    @param sample_rate: Sample rate to expect (None to not check).
81*9c5db199SXin Li    @param sample_width: Sample width to expect (None to not check).
82*9c5db199SXin Li
83*9c5db199SXin Li    @return A list of the absolute maximum PCM values for each channel in the
84*9c5db199SXin Li            WAV file.
85*9c5db199SXin Li
86*9c5db199SXin Li    @raise ValueError: Failed to process the WAV file or validate an attribute.
87*9c5db199SXin Li    """
88*9c5db199SXin Li    chk_file = None
89*9c5db199SXin Li    try:
90*9c5db199SXin Li        chk_file = wave.open(filename, 'r')
91*9c5db199SXin Li        if num_channels is not None and chk_file.getnchannels() != num_channels:
92*9c5db199SXin Li            raise ValueError('Expected %d channels but got %d instead.',
93*9c5db199SXin Li                             num_channels, chk_file.getnchannels())
94*9c5db199SXin Li        if sample_rate is not None and chk_file.getframerate() != sample_rate:
95*9c5db199SXin Li            raise ValueError('Expected sample rate %d but got %d instead.',
96*9c5db199SXin Li                             sample_rate, chk_file.getframerate())
97*9c5db199SXin Li        if sample_width is not None and chk_file.getsampwidth() != sample_width:
98*9c5db199SXin Li            raise ValueError('Expected sample width %d but got %d instead.',
99*9c5db199SXin Li                             sample_width, chk_file.getsampwidth())
100*9c5db199SXin Li        frames = extract_wav_frames(chk_file)
101*9c5db199SXin Li    except wave.Error as e:
102*9c5db199SXin Li        raise ValueError('Error processing WAV file: %s' % e)
103*9c5db199SXin Li    finally:
104*9c5db199SXin Li        if chk_file is not None:
105*9c5db199SXin Li            chk_file.close()
106*9c5db199SXin Li
107*9c5db199SXin Li    peaks = []
108*9c5db199SXin Li    for i in range(chk_file.getnchannels()):
109*9c5db199SXin Li        peaks.append(max(list(map(abs, frames[i::chk_file.getnchannels()]))))
110*9c5db199SXin Li    return peaks;
111*9c5db199SXin Li
112*9c5db199SXin Li
113*9c5db199SXin Lidef generate_sine_file(host, num_channels, sample_rate, sample_width,
114*9c5db199SXin Li                       duration_secs, sine_frequency, temp_dir,
115*9c5db199SXin Li                       file_format='wav'):
116*9c5db199SXin Li    """Generate a sine file and push it to the DUT.
117*9c5db199SXin Li
118*9c5db199SXin Li    @param host: An object representing the DUT.
119*9c5db199SXin Li    @param num_channels: Number of channels to use.
120*9c5db199SXin Li    @param sample_rate: Sample rate to use for sine wave generation.
121*9c5db199SXin Li    @param sample_width: Sample width to use for sine wave generation.
122*9c5db199SXin Li    @param duration_secs: Duration in seconds to generate sine wave for.
123*9c5db199SXin Li    @param sine_frequency: Frequency to generate sine wave at.
124*9c5db199SXin Li    @param temp_dir: A temporary directory on the host.
125*9c5db199SXin Li    @param file_format: A string representing the encoding for the audio file.
126*9c5db199SXin Li
127*9c5db199SXin Li    @return A tuple of the filename on the server and the DUT.
128*9c5db199SXin Li    """;
129*9c5db199SXin Li    _, local_filename = tempfile.mkstemp(
130*9c5db199SXin Li        prefix='sine-', suffix='.' + file_format, dir=temp_dir)
131*9c5db199SXin Li    if sample_width == 1:
132*9c5db199SXin Li        byte_format = '-e unsigned'
133*9c5db199SXin Li    else:
134*9c5db199SXin Li        byte_format = '-e signed'
135*9c5db199SXin Li    gen_file_cmd = ('sox -n -t wav -c %d %s -b %d -r %d %s synth %d sine %d '
136*9c5db199SXin Li                    'vol 0.9' % (num_channels, byte_format,
137*9c5db199SXin Li                                 sample_width * _BITS_PER_BYTE, sample_rate,
138*9c5db199SXin Li                                 local_filename, duration_secs, sine_frequency))
139*9c5db199SXin Li    logging.info('Command to generate sine wave: %s', gen_file_cmd)
140*9c5db199SXin Li    subprocess.call(gen_file_cmd, shell=True)
141*9c5db199SXin Li    if file_format != 'wav':
142*9c5db199SXin Li        # Convert the file to the appropriate format.
143*9c5db199SXin Li        logging.info('Converting file to %s', file_format)
144*9c5db199SXin Li        _, local_encoded_filename = tempfile.mkstemp(
145*9c5db199SXin Li                prefix='sine-', suffix='.' + file_format, dir=temp_dir)
146*9c5db199SXin Li        cvt_file_cmd = '%s %s %s' % (_ENCODING_CMD, local_filename,
147*9c5db199SXin Li                                     local_encoded_filename)
148*9c5db199SXin Li        logging.info('Command to convert file: %s', cvt_file_cmd)
149*9c5db199SXin Li        subprocess.call(cvt_file_cmd, shell=True)
150*9c5db199SXin Li    else:
151*9c5db199SXin Li        local_encoded_filename = local_filename
152*9c5db199SXin Li    dut_tmp_dir = '/data'
153*9c5db199SXin Li    remote_filename = os.path.join(dut_tmp_dir, 'sine.' + file_format)
154*9c5db199SXin Li    logging.info('Send file to DUT.')
155*9c5db199SXin Li    # TODO(ralphnathan): Find a better place to put this file once the SELinux
156*9c5db199SXin Li    # issues are resolved.
157*9c5db199SXin Li    logging.info('remote_filename %s', remote_filename)
158*9c5db199SXin Li    host.send_file(local_encoded_filename, remote_filename)
159*9c5db199SXin Li    return local_filename, remote_filename
160*9c5db199SXin Li
161*9c5db199SXin Li
162*9c5db199SXin Lidef _is_outside_frequency_threshold(freq_reference, freq_rec):
163*9c5db199SXin Li    """Compares the frequency of the recorded audio with the reference audio.
164*9c5db199SXin Li
165*9c5db199SXin Li    This function checks to see if the frequencies corresponding to the peak
166*9c5db199SXin Li    FFT values are similiar meaning that the dominant frequency in the audio
167*9c5db199SXin Li    signal is the same for the recorded audio as that in the audio played.
168*9c5db199SXin Li
169*9c5db199SXin Li    @param req_reference: The dominant frequency in the reference audio file.
170*9c5db199SXin Li    @param freq_rec: The dominant frequency in the recorded audio file.
171*9c5db199SXin Li
172*9c5db199SXin Li    @return: True is freq_rec is with _FREQUENCY_THRESHOLD percent of
173*9c5db199SXin Li              freq_reference.
174*9c5db199SXin Li    """
175*9c5db199SXin Li    ratio = float(freq_rec) / freq_reference
176*9c5db199SXin Li    if ratio > 1 + _FREQUENCY_THRESHOLD or ratio < 1 - _FREQUENCY_THRESHOLD:
177*9c5db199SXin Li        return True
178*9c5db199SXin Li    return False
179*9c5db199SXin Li
180*9c5db199SXin Li
181*9c5db199SXin Lidef _compare_frames(reference_file_frames, rec_file_frames, num_channels,
182*9c5db199SXin Li                    sample_rate):
183*9c5db199SXin Li    """Compares audio frames from the reference file and the recorded file.
184*9c5db199SXin Li
185*9c5db199SXin Li    This method checks for two things:
186*9c5db199SXin Li      1. That the main frequency is the same in both the files. This is done
187*9c5db199SXin Li         using the FFT and observing the frequency corresponding to the
188*9c5db199SXin Li         peak.
189*9c5db199SXin Li      2. That there is no other dominant frequency in the recorded file.
190*9c5db199SXin Li         This is done by sweeping the frequency domain and checking that the
191*9c5db199SXin Li         frequency is always less than _FFT_NOISE_THRESHOLD percentage of
192*9c5db199SXin Li         the peak.
193*9c5db199SXin Li
194*9c5db199SXin Li    The key assumption here is that the reference audio file contains only
195*9c5db199SXin Li    one frequency.
196*9c5db199SXin Li
197*9c5db199SXin Li    @param reference_file_frames: Audio frames from the reference file.
198*9c5db199SXin Li    @param rec_file_frames: Audio frames from the recorded file.
199*9c5db199SXin Li    @param num_channels: Number of channels in the files.
200*9c5db199SXin Li    @param sample_rate: Sample rate of the files.
201*9c5db199SXin Li
202*9c5db199SXin Li    @raise error.TestFail: The frequency of the recorded signal doesn't
203*9c5db199SXin Li                           match that of the reference signal.
204*9c5db199SXin Li    @raise error.TestFail: There is too much noise in the recorded signal.
205*9c5db199SXin Li    """
206*9c5db199SXin Li    for channel in range(num_channels):
207*9c5db199SXin Li        reference_data = reference_file_frames[channel::num_channels]
208*9c5db199SXin Li        rec_data = rec_file_frames[channel::num_channels]
209*9c5db199SXin Li
210*9c5db199SXin Li        # Get fft and frequencies corresponding to the fft values.
211*9c5db199SXin Li        fft_reference = numpy.fft.rfft(reference_data)
212*9c5db199SXin Li        fft_rec = numpy.fft.rfft(rec_data)
213*9c5db199SXin Li        fft_freqs_reference = numpy.fft.rfftfreq(len(reference_data),
214*9c5db199SXin Li                                                 1.0 / sample_rate)
215*9c5db199SXin Li        fft_freqs_rec = numpy.fft.rfftfreq(len(rec_data), 1.0 / sample_rate)
216*9c5db199SXin Li
217*9c5db199SXin Li        # Get frequency at highest peak.
218*9c5db199SXin Li        freq_reference = fft_freqs_reference[
219*9c5db199SXin Li                numpy.argmax(numpy.abs(fft_reference))]
220*9c5db199SXin Li        abs_fft_rec = numpy.abs(fft_rec)
221*9c5db199SXin Li        freq_rec = fft_freqs_rec[numpy.argmax(abs_fft_rec)]
222*9c5db199SXin Li
223*9c5db199SXin Li        # Compare the two frequencies.
224*9c5db199SXin Li        logging.info('Golden frequency of channel %i is %f', channel,
225*9c5db199SXin Li                     freq_reference)
226*9c5db199SXin Li        logging.info('Recorded frequency of channel %i is  %f', channel,
227*9c5db199SXin Li                     freq_rec)
228*9c5db199SXin Li        if _is_outside_frequency_threshold(freq_reference, freq_rec):
229*9c5db199SXin Li            raise error.TestFail('The recorded audio frequency does not match '
230*9c5db199SXin Li                                 'that of the audio played.')
231*9c5db199SXin Li
232*9c5db199SXin Li        # Check for noise in the frequency domain.
233*9c5db199SXin Li        fft_rec_peak_val = numpy.max(abs_fft_rec)
234*9c5db199SXin Li        noise_detected = False
235*9c5db199SXin Li        for fft_index, fft_val in enumerate(abs_fft_rec):
236*9c5db199SXin Li            if _is_outside_frequency_threshold(freq_reference, freq_rec):
237*9c5db199SXin Li                # If the frequency exceeds _FFT_NOISE_THRESHOLD, then fail.
238*9c5db199SXin Li                if fft_val > _FFT_NOISE_THRESHOLD * fft_rec_peak_val:
239*9c5db199SXin Li                    logging.warning('Unexpected frequency peak detected at %f '
240*9c5db199SXin Li                                    'Hz.', fft_freqs_rec[fft_index])
241*9c5db199SXin Li                    noise_detected = True
242*9c5db199SXin Li
243*9c5db199SXin Li        if noise_detected:
244*9c5db199SXin Li            raise error.TestFail('Signal is noiser than expected.')
245*9c5db199SXin Li
246*9c5db199SXin Li
247*9c5db199SXin Lidef compare_file(reference_audio_filename, test_audio_filename):
248*9c5db199SXin Li    """Compares the recorded audio file to the reference audio file.
249*9c5db199SXin Li
250*9c5db199SXin Li    @param reference_audio_filename : Reference audio file containing the
251*9c5db199SXin Li                                      reference signal.
252*9c5db199SXin Li    @param test_audio_filename: Audio file containing audio captured from
253*9c5db199SXin Li                                the test.
254*9c5db199SXin Li    """
255*9c5db199SXin Li    with contextlib.closing(wave.open(reference_audio_filename,
256*9c5db199SXin Li                                      'rb')) as reference_file:
257*9c5db199SXin Li        with contextlib.closing(wave.open(test_audio_filename,
258*9c5db199SXin Li                                          'rb')) as rec_file:
259*9c5db199SXin Li            # Extract data from files.
260*9c5db199SXin Li            reference_file_frames = extract_wav_frames(reference_file)
261*9c5db199SXin Li            rec_file_frames = extract_wav_frames(rec_file)
262*9c5db199SXin Li
263*9c5db199SXin Li            num_channels = reference_file.getnchannels()
264*9c5db199SXin Li            _compare_frames(reference_file_frames, rec_file_frames,
265*9c5db199SXin Li                            reference_file.getnchannels(),
266*9c5db199SXin Li                            reference_file.getframerate())
267