1*9c5db199SXin Li# Lint as: python2, python3 2*9c5db199SXin Li# Copyright (c) 2016 The Chromium Authors. All rights reserved. 3*9c5db199SXin Li# Use of this source code is governed by a BSD-style license that can be 4*9c5db199SXin Li# found in the LICENSE file. 5*9c5db199SXin Li 6*9c5db199SXin Li"""Server side audio utilities functions for Brillo.""" 7*9c5db199SXin Li 8*9c5db199SXin Lifrom __future__ import absolute_import 9*9c5db199SXin Lifrom __future__ import division 10*9c5db199SXin Lifrom __future__ import print_function 11*9c5db199SXin Li 12*9c5db199SXin Liimport contextlib 13*9c5db199SXin Liimport logging 14*9c5db199SXin Liimport numpy 15*9c5db199SXin Liimport os 16*9c5db199SXin Liimport struct 17*9c5db199SXin Liimport subprocess 18*9c5db199SXin Liimport tempfile 19*9c5db199SXin Liimport wave 20*9c5db199SXin Li 21*9c5db199SXin Lifrom autotest_lib.client.common_lib import error 22*9c5db199SXin Lifrom six.moves import map 23*9c5db199SXin Lifrom six.moves import range 24*9c5db199SXin Li 25*9c5db199SXin Li 26*9c5db199SXin Li_BITS_PER_BYTE=8 27*9c5db199SXin Li 28*9c5db199SXin Li# Thresholds used when comparing files. 29*9c5db199SXin Li# 30*9c5db199SXin Li# The frequency threshold used when comparing files. The frequency of the 31*9c5db199SXin Li# recorded audio has to be within _FREQUENCY_THRESHOLD percent of the frequency 32*9c5db199SXin Li# of the original audio. 33*9c5db199SXin Li_FREQUENCY_THRESHOLD = 0.01 34*9c5db199SXin Li# Noise threshold controls how much noise is allowed as a fraction of the 35*9c5db199SXin Li# magnitude of the peak frequency after taking an FFT. The power of all the 36*9c5db199SXin Li# other frequencies in the signal should be within _FFT_NOISE_THRESHOLD percent 37*9c5db199SXin Li# of the power of the main frequency. 38*9c5db199SXin Li_FFT_NOISE_THRESHOLD = 0.05 39*9c5db199SXin Li 40*9c5db199SXin Li# Command used to encode audio. If you want to test with something different, 41*9c5db199SXin Li# this should be changed. 42*9c5db199SXin Li_ENCODING_CMD = 'sox' 43*9c5db199SXin Li 44*9c5db199SXin Li 45*9c5db199SXin Lidef extract_wav_frames(wave_file): 46*9c5db199SXin Li """Extract all frames from a WAV file. 47*9c5db199SXin Li 48*9c5db199SXin Li @param wave_file: A Wave_read object representing a WAV file opened for 49*9c5db199SXin Li reading. 50*9c5db199SXin Li 51*9c5db199SXin Li @return: A list containing the frames in the WAV file. 52*9c5db199SXin Li """ 53*9c5db199SXin Li num_frames = wave_file.getnframes() 54*9c5db199SXin Li sample_width = wave_file.getsampwidth() 55*9c5db199SXin Li if sample_width == 1: 56*9c5db199SXin Li fmt = '%iB' # Read 1 byte. 57*9c5db199SXin Li elif sample_width == 2: 58*9c5db199SXin Li fmt = '%ih' # Read 2 bytes. 59*9c5db199SXin Li elif sample_width == 4: 60*9c5db199SXin Li fmt = '%ii' # Read 4 bytes. 61*9c5db199SXin Li else: 62*9c5db199SXin Li raise ValueError('Unsupported sample width') 63*9c5db199SXin Li frames = list(struct.unpack(fmt % num_frames * wave_file.getnchannels(), 64*9c5db199SXin Li wave_file.readframes(num_frames))) 65*9c5db199SXin Li 66*9c5db199SXin Li # Since 8-bit PCM is unsigned with an offset of 128, we subtract the offset 67*9c5db199SXin Li # to make it signed since the rest of the code assumes signed numbers. 68*9c5db199SXin Li if sample_width == 1: 69*9c5db199SXin Li frames = [val - 128 for val in frames] 70*9c5db199SXin Li 71*9c5db199SXin Li return frames 72*9c5db199SXin Li 73*9c5db199SXin Li 74*9c5db199SXin Lidef check_wav_file(filename, num_channels=None, sample_rate=None, 75*9c5db199SXin Li sample_width=None): 76*9c5db199SXin Li """Checks a WAV file and returns its peak PCM values. 77*9c5db199SXin Li 78*9c5db199SXin Li @param filename: Input WAV file to analyze. 79*9c5db199SXin Li @param num_channels: Number of channels to expect (None to not check). 80*9c5db199SXin Li @param sample_rate: Sample rate to expect (None to not check). 81*9c5db199SXin Li @param sample_width: Sample width to expect (None to not check). 82*9c5db199SXin Li 83*9c5db199SXin Li @return A list of the absolute maximum PCM values for each channel in the 84*9c5db199SXin Li WAV file. 85*9c5db199SXin Li 86*9c5db199SXin Li @raise ValueError: Failed to process the WAV file or validate an attribute. 87*9c5db199SXin Li """ 88*9c5db199SXin Li chk_file = None 89*9c5db199SXin Li try: 90*9c5db199SXin Li chk_file = wave.open(filename, 'r') 91*9c5db199SXin Li if num_channels is not None and chk_file.getnchannels() != num_channels: 92*9c5db199SXin Li raise ValueError('Expected %d channels but got %d instead.', 93*9c5db199SXin Li num_channels, chk_file.getnchannels()) 94*9c5db199SXin Li if sample_rate is not None and chk_file.getframerate() != sample_rate: 95*9c5db199SXin Li raise ValueError('Expected sample rate %d but got %d instead.', 96*9c5db199SXin Li sample_rate, chk_file.getframerate()) 97*9c5db199SXin Li if sample_width is not None and chk_file.getsampwidth() != sample_width: 98*9c5db199SXin Li raise ValueError('Expected sample width %d but got %d instead.', 99*9c5db199SXin Li sample_width, chk_file.getsampwidth()) 100*9c5db199SXin Li frames = extract_wav_frames(chk_file) 101*9c5db199SXin Li except wave.Error as e: 102*9c5db199SXin Li raise ValueError('Error processing WAV file: %s' % e) 103*9c5db199SXin Li finally: 104*9c5db199SXin Li if chk_file is not None: 105*9c5db199SXin Li chk_file.close() 106*9c5db199SXin Li 107*9c5db199SXin Li peaks = [] 108*9c5db199SXin Li for i in range(chk_file.getnchannels()): 109*9c5db199SXin Li peaks.append(max(list(map(abs, frames[i::chk_file.getnchannels()])))) 110*9c5db199SXin Li return peaks; 111*9c5db199SXin Li 112*9c5db199SXin Li 113*9c5db199SXin Lidef generate_sine_file(host, num_channels, sample_rate, sample_width, 114*9c5db199SXin Li duration_secs, sine_frequency, temp_dir, 115*9c5db199SXin Li file_format='wav'): 116*9c5db199SXin Li """Generate a sine file and push it to the DUT. 117*9c5db199SXin Li 118*9c5db199SXin Li @param host: An object representing the DUT. 119*9c5db199SXin Li @param num_channels: Number of channels to use. 120*9c5db199SXin Li @param sample_rate: Sample rate to use for sine wave generation. 121*9c5db199SXin Li @param sample_width: Sample width to use for sine wave generation. 122*9c5db199SXin Li @param duration_secs: Duration in seconds to generate sine wave for. 123*9c5db199SXin Li @param sine_frequency: Frequency to generate sine wave at. 124*9c5db199SXin Li @param temp_dir: A temporary directory on the host. 125*9c5db199SXin Li @param file_format: A string representing the encoding for the audio file. 126*9c5db199SXin Li 127*9c5db199SXin Li @return A tuple of the filename on the server and the DUT. 128*9c5db199SXin Li """; 129*9c5db199SXin Li _, local_filename = tempfile.mkstemp( 130*9c5db199SXin Li prefix='sine-', suffix='.' + file_format, dir=temp_dir) 131*9c5db199SXin Li if sample_width == 1: 132*9c5db199SXin Li byte_format = '-e unsigned' 133*9c5db199SXin Li else: 134*9c5db199SXin Li byte_format = '-e signed' 135*9c5db199SXin Li gen_file_cmd = ('sox -n -t wav -c %d %s -b %d -r %d %s synth %d sine %d ' 136*9c5db199SXin Li 'vol 0.9' % (num_channels, byte_format, 137*9c5db199SXin Li sample_width * _BITS_PER_BYTE, sample_rate, 138*9c5db199SXin Li local_filename, duration_secs, sine_frequency)) 139*9c5db199SXin Li logging.info('Command to generate sine wave: %s', gen_file_cmd) 140*9c5db199SXin Li subprocess.call(gen_file_cmd, shell=True) 141*9c5db199SXin Li if file_format != 'wav': 142*9c5db199SXin Li # Convert the file to the appropriate format. 143*9c5db199SXin Li logging.info('Converting file to %s', file_format) 144*9c5db199SXin Li _, local_encoded_filename = tempfile.mkstemp( 145*9c5db199SXin Li prefix='sine-', suffix='.' + file_format, dir=temp_dir) 146*9c5db199SXin Li cvt_file_cmd = '%s %s %s' % (_ENCODING_CMD, local_filename, 147*9c5db199SXin Li local_encoded_filename) 148*9c5db199SXin Li logging.info('Command to convert file: %s', cvt_file_cmd) 149*9c5db199SXin Li subprocess.call(cvt_file_cmd, shell=True) 150*9c5db199SXin Li else: 151*9c5db199SXin Li local_encoded_filename = local_filename 152*9c5db199SXin Li dut_tmp_dir = '/data' 153*9c5db199SXin Li remote_filename = os.path.join(dut_tmp_dir, 'sine.' + file_format) 154*9c5db199SXin Li logging.info('Send file to DUT.') 155*9c5db199SXin Li # TODO(ralphnathan): Find a better place to put this file once the SELinux 156*9c5db199SXin Li # issues are resolved. 157*9c5db199SXin Li logging.info('remote_filename %s', remote_filename) 158*9c5db199SXin Li host.send_file(local_encoded_filename, remote_filename) 159*9c5db199SXin Li return local_filename, remote_filename 160*9c5db199SXin Li 161*9c5db199SXin Li 162*9c5db199SXin Lidef _is_outside_frequency_threshold(freq_reference, freq_rec): 163*9c5db199SXin Li """Compares the frequency of the recorded audio with the reference audio. 164*9c5db199SXin Li 165*9c5db199SXin Li This function checks to see if the frequencies corresponding to the peak 166*9c5db199SXin Li FFT values are similiar meaning that the dominant frequency in the audio 167*9c5db199SXin Li signal is the same for the recorded audio as that in the audio played. 168*9c5db199SXin Li 169*9c5db199SXin Li @param req_reference: The dominant frequency in the reference audio file. 170*9c5db199SXin Li @param freq_rec: The dominant frequency in the recorded audio file. 171*9c5db199SXin Li 172*9c5db199SXin Li @return: True is freq_rec is with _FREQUENCY_THRESHOLD percent of 173*9c5db199SXin Li freq_reference. 174*9c5db199SXin Li """ 175*9c5db199SXin Li ratio = float(freq_rec) / freq_reference 176*9c5db199SXin Li if ratio > 1 + _FREQUENCY_THRESHOLD or ratio < 1 - _FREQUENCY_THRESHOLD: 177*9c5db199SXin Li return True 178*9c5db199SXin Li return False 179*9c5db199SXin Li 180*9c5db199SXin Li 181*9c5db199SXin Lidef _compare_frames(reference_file_frames, rec_file_frames, num_channels, 182*9c5db199SXin Li sample_rate): 183*9c5db199SXin Li """Compares audio frames from the reference file and the recorded file. 184*9c5db199SXin Li 185*9c5db199SXin Li This method checks for two things: 186*9c5db199SXin Li 1. That the main frequency is the same in both the files. This is done 187*9c5db199SXin Li using the FFT and observing the frequency corresponding to the 188*9c5db199SXin Li peak. 189*9c5db199SXin Li 2. That there is no other dominant frequency in the recorded file. 190*9c5db199SXin Li This is done by sweeping the frequency domain and checking that the 191*9c5db199SXin Li frequency is always less than _FFT_NOISE_THRESHOLD percentage of 192*9c5db199SXin Li the peak. 193*9c5db199SXin Li 194*9c5db199SXin Li The key assumption here is that the reference audio file contains only 195*9c5db199SXin Li one frequency. 196*9c5db199SXin Li 197*9c5db199SXin Li @param reference_file_frames: Audio frames from the reference file. 198*9c5db199SXin Li @param rec_file_frames: Audio frames from the recorded file. 199*9c5db199SXin Li @param num_channels: Number of channels in the files. 200*9c5db199SXin Li @param sample_rate: Sample rate of the files. 201*9c5db199SXin Li 202*9c5db199SXin Li @raise error.TestFail: The frequency of the recorded signal doesn't 203*9c5db199SXin Li match that of the reference signal. 204*9c5db199SXin Li @raise error.TestFail: There is too much noise in the recorded signal. 205*9c5db199SXin Li """ 206*9c5db199SXin Li for channel in range(num_channels): 207*9c5db199SXin Li reference_data = reference_file_frames[channel::num_channels] 208*9c5db199SXin Li rec_data = rec_file_frames[channel::num_channels] 209*9c5db199SXin Li 210*9c5db199SXin Li # Get fft and frequencies corresponding to the fft values. 211*9c5db199SXin Li fft_reference = numpy.fft.rfft(reference_data) 212*9c5db199SXin Li fft_rec = numpy.fft.rfft(rec_data) 213*9c5db199SXin Li fft_freqs_reference = numpy.fft.rfftfreq(len(reference_data), 214*9c5db199SXin Li 1.0 / sample_rate) 215*9c5db199SXin Li fft_freqs_rec = numpy.fft.rfftfreq(len(rec_data), 1.0 / sample_rate) 216*9c5db199SXin Li 217*9c5db199SXin Li # Get frequency at highest peak. 218*9c5db199SXin Li freq_reference = fft_freqs_reference[ 219*9c5db199SXin Li numpy.argmax(numpy.abs(fft_reference))] 220*9c5db199SXin Li abs_fft_rec = numpy.abs(fft_rec) 221*9c5db199SXin Li freq_rec = fft_freqs_rec[numpy.argmax(abs_fft_rec)] 222*9c5db199SXin Li 223*9c5db199SXin Li # Compare the two frequencies. 224*9c5db199SXin Li logging.info('Golden frequency of channel %i is %f', channel, 225*9c5db199SXin Li freq_reference) 226*9c5db199SXin Li logging.info('Recorded frequency of channel %i is %f', channel, 227*9c5db199SXin Li freq_rec) 228*9c5db199SXin Li if _is_outside_frequency_threshold(freq_reference, freq_rec): 229*9c5db199SXin Li raise error.TestFail('The recorded audio frequency does not match ' 230*9c5db199SXin Li 'that of the audio played.') 231*9c5db199SXin Li 232*9c5db199SXin Li # Check for noise in the frequency domain. 233*9c5db199SXin Li fft_rec_peak_val = numpy.max(abs_fft_rec) 234*9c5db199SXin Li noise_detected = False 235*9c5db199SXin Li for fft_index, fft_val in enumerate(abs_fft_rec): 236*9c5db199SXin Li if _is_outside_frequency_threshold(freq_reference, freq_rec): 237*9c5db199SXin Li # If the frequency exceeds _FFT_NOISE_THRESHOLD, then fail. 238*9c5db199SXin Li if fft_val > _FFT_NOISE_THRESHOLD * fft_rec_peak_val: 239*9c5db199SXin Li logging.warning('Unexpected frequency peak detected at %f ' 240*9c5db199SXin Li 'Hz.', fft_freqs_rec[fft_index]) 241*9c5db199SXin Li noise_detected = True 242*9c5db199SXin Li 243*9c5db199SXin Li if noise_detected: 244*9c5db199SXin Li raise error.TestFail('Signal is noiser than expected.') 245*9c5db199SXin Li 246*9c5db199SXin Li 247*9c5db199SXin Lidef compare_file(reference_audio_filename, test_audio_filename): 248*9c5db199SXin Li """Compares the recorded audio file to the reference audio file. 249*9c5db199SXin Li 250*9c5db199SXin Li @param reference_audio_filename : Reference audio file containing the 251*9c5db199SXin Li reference signal. 252*9c5db199SXin Li @param test_audio_filename: Audio file containing audio captured from 253*9c5db199SXin Li the test. 254*9c5db199SXin Li """ 255*9c5db199SXin Li with contextlib.closing(wave.open(reference_audio_filename, 256*9c5db199SXin Li 'rb')) as reference_file: 257*9c5db199SXin Li with contextlib.closing(wave.open(test_audio_filename, 258*9c5db199SXin Li 'rb')) as rec_file: 259*9c5db199SXin Li # Extract data from files. 260*9c5db199SXin Li reference_file_frames = extract_wav_frames(reference_file) 261*9c5db199SXin Li rec_file_frames = extract_wav_frames(rec_file) 262*9c5db199SXin Li 263*9c5db199SXin Li num_channels = reference_file.getnchannels() 264*9c5db199SXin Li _compare_frames(reference_file_frames, rec_file_frames, 265*9c5db199SXin Li reference_file.getnchannels(), 266*9c5db199SXin Li reference_file.getframerate()) 267