server/brillo/audio_utils.py

*9c5db199SXin Li# Lint as: python2, python3
*9c5db199SXin Li# Copyright (c) 2016 The Chromium Authors. All rights reserved.
*9c5db199SXin Li# Use of this source code is governed by a BSD-style license that can be
*9c5db199SXin Li# found in the LICENSE file.
*9c5db199SXin Li
*9c5db199SXin Li"""Server side audio utilities functions for Brillo."""
*9c5db199SXin Li
*9c5db199SXin Lifrom __future__ import absolute_import
*9c5db199SXin Lifrom __future__ import division
*9c5db199SXin Lifrom __future__ import print_function
*9c5db199SXin Li
*9c5db199SXin Liimport contextlib
*9c5db199SXin Liimport logging
*9c5db199SXin Liimport numpy
*9c5db199SXin Liimport os
*9c5db199SXin Liimport struct
*9c5db199SXin Liimport subprocess
*9c5db199SXin Liimport tempfile
*9c5db199SXin Liimport wave
*9c5db199SXin Li
*9c5db199SXin Lifrom autotest_lib.client.common_lib import error
*9c5db199SXin Lifrom six.moves import map
*9c5db199SXin Lifrom six.moves import range
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Li_BITS_PER_BYTE=8
*9c5db199SXin Li
*9c5db199SXin Li# Thresholds used when comparing files.
*9c5db199SXin Li#
*9c5db199SXin Li# The frequency threshold used when comparing files. The frequency of the
*9c5db199SXin Li# recorded audio has to be within _FREQUENCY_THRESHOLD percent of the frequency
*9c5db199SXin Li# of the original audio.
*9c5db199SXin Li_FREQUENCY_THRESHOLD = 0.01
*9c5db199SXin Li# Noise threshold controls how much noise is allowed as a fraction of the
*9c5db199SXin Li# magnitude of the peak frequency after taking an FFT. The power of all the
*9c5db199SXin Li# other frequencies in the signal should be within _FFT_NOISE_THRESHOLD percent
*9c5db199SXin Li# of the power of the main frequency.
*9c5db199SXin Li_FFT_NOISE_THRESHOLD = 0.05
*9c5db199SXin Li
*9c5db199SXin Li# Command used to encode audio. If you want to test with something different,
*9c5db199SXin Li# this should be changed.
*9c5db199SXin Li_ENCODING_CMD = 'sox'
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Lidef extract_wav_frames(wave_file):
*9c5db199SXin Li    """Extract all frames from a WAV file.
*9c5db199SXin Li
*9c5db199SXin Li    @param wave_file: A Wave_read object representing a WAV file opened for
*9c5db199SXin Li                      reading.
*9c5db199SXin Li
*9c5db199SXin Li    @return: A list containing the frames in the WAV file.
*9c5db199SXin Li    """
*9c5db199SXin Li    num_frames = wave_file.getnframes()
*9c5db199SXin Li    sample_width = wave_file.getsampwidth()
*9c5db199SXin Li    if sample_width == 1:
*9c5db199SXin Li        fmt = '%iB'  # Read 1 byte.
*9c5db199SXin Li    elif sample_width == 2:
*9c5db199SXin Li        fmt = '%ih'  # Read 2 bytes.
*9c5db199SXin Li    elif sample_width == 4:
*9c5db199SXin Li        fmt = '%ii'  # Read 4 bytes.
*9c5db199SXin Li    else:
*9c5db199SXin Li        raise ValueError('Unsupported sample width')
*9c5db199SXin Li    frames =  list(struct.unpack(fmt % num_frames * wave_file.getnchannels(),
*9c5db199SXin Li                                 wave_file.readframes(num_frames)))
*9c5db199SXin Li
*9c5db199SXin Li    # Since 8-bit PCM is unsigned with an offset of 128, we subtract the offset
*9c5db199SXin Li    # to make it signed since the rest of the code assumes signed numbers.
*9c5db199SXin Li    if sample_width == 1:
*9c5db199SXin Li        frames = [val - 128 for val in frames]
*9c5db199SXin Li
*9c5db199SXin Li    return frames
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Lidef check_wav_file(filename, num_channels=None, sample_rate=None,
*9c5db199SXin Li                   sample_width=None):
*9c5db199SXin Li    """Checks a WAV file and returns its peak PCM values.
*9c5db199SXin Li
*9c5db199SXin Li    @param filename: Input WAV file to analyze.
*9c5db199SXin Li    @param num_channels: Number of channels to expect (None to not check).
*9c5db199SXin Li    @param sample_rate: Sample rate to expect (None to not check).
*9c5db199SXin Li    @param sample_width: Sample width to expect (None to not check).
*9c5db199SXin Li
*9c5db199SXin Li    @return A list of the absolute maximum PCM values for each channel in the
*9c5db199SXin Li            WAV file.
*9c5db199SXin Li
*9c5db199SXin Li    @raise ValueError: Failed to process the WAV file or validate an attribute.
*9c5db199SXin Li    """
*9c5db199SXin Li    chk_file = None
*9c5db199SXin Li    try:
*9c5db199SXin Li        chk_file = wave.open(filename, 'r')
*9c5db199SXin Li        if num_channels is not None and chk_file.getnchannels() != num_channels:
*9c5db199SXin Li            raise ValueError('Expected %d channels but got %d instead.',
*9c5db199SXin Li                             num_channels, chk_file.getnchannels())
*9c5db199SXin Li        if sample_rate is not None and chk_file.getframerate() != sample_rate:
*9c5db199SXin Li            raise ValueError('Expected sample rate %d but got %d instead.',
*9c5db199SXin Li                             sample_rate, chk_file.getframerate())
*9c5db199SXin Li        if sample_width is not None and chk_file.getsampwidth() != sample_width:
*9c5db199SXin Li            raise ValueError('Expected sample width %d but got %d instead.',
*9c5db199SXin Li                             sample_width, chk_file.getsampwidth())
*9c5db199SXin Li        frames = extract_wav_frames(chk_file)
*9c5db199SXin Li    except wave.Error as e:
*9c5db199SXin Li        raise ValueError('Error processing WAV file: %s' % e)
*9c5db199SXin Li    finally:
*9c5db199SXin Li        if chk_file is not None:
*9c5db199SXin Li            chk_file.close()
*9c5db199SXin Li
*9c5db199SXin Li    peaks = []
*9c5db199SXin Li    for i in range(chk_file.getnchannels()):
*9c5db199SXin Li        peaks.append(max(list(map(abs, frames[i::chk_file.getnchannels()]))))
*9c5db199SXin Li    return peaks;
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Lidef generate_sine_file(host, num_channels, sample_rate, sample_width,
*9c5db199SXin Li                       duration_secs, sine_frequency, temp_dir,
*9c5db199SXin Li                       file_format='wav'):
*9c5db199SXin Li    """Generate a sine file and push it to the DUT.
*9c5db199SXin Li
*9c5db199SXin Li    @param host: An object representing the DUT.
*9c5db199SXin Li    @param num_channels: Number of channels to use.
*9c5db199SXin Li    @param sample_rate: Sample rate to use for sine wave generation.
*9c5db199SXin Li    @param sample_width: Sample width to use for sine wave generation.
*9c5db199SXin Li    @param duration_secs: Duration in seconds to generate sine wave for.
*9c5db199SXin Li    @param sine_frequency: Frequency to generate sine wave at.
*9c5db199SXin Li    @param temp_dir: A temporary directory on the host.
*9c5db199SXin Li    @param file_format: A string representing the encoding for the audio file.
*9c5db199SXin Li
*9c5db199SXin Li    @return A tuple of the filename on the server and the DUT.
*9c5db199SXin Li    """;
*9c5db199SXin Li    _, local_filename = tempfile.mkstemp(
*9c5db199SXin Li        prefix='sine-', suffix='.' + file_format, dir=temp_dir)
*9c5db199SXin Li    if sample_width == 1:
*9c5db199SXin Li        byte_format = '-e unsigned'
*9c5db199SXin Li    else:
*9c5db199SXin Li        byte_format = '-e signed'
*9c5db199SXin Li    gen_file_cmd = ('sox -n -t wav -c %d %s -b %d -r %d %s synth %d sine %d '
*9c5db199SXin Li                    'vol 0.9' % (num_channels, byte_format,
*9c5db199SXin Li                                 sample_width * _BITS_PER_BYTE, sample_rate,
*9c5db199SXin Li                                 local_filename, duration_secs, sine_frequency))
*9c5db199SXin Li    logging.info('Command to generate sine wave: %s', gen_file_cmd)
*9c5db199SXin Li    subprocess.call(gen_file_cmd, shell=True)
*9c5db199SXin Li    if file_format != 'wav':
*9c5db199SXin Li        # Convert the file to the appropriate format.
*9c5db199SXin Li        logging.info('Converting file to %s', file_format)
*9c5db199SXin Li        _, local_encoded_filename = tempfile.mkstemp(
*9c5db199SXin Li                prefix='sine-', suffix='.' + file_format, dir=temp_dir)
*9c5db199SXin Li        cvt_file_cmd = '%s %s %s' % (_ENCODING_CMD, local_filename,
*9c5db199SXin Li                                     local_encoded_filename)
*9c5db199SXin Li        logging.info('Command to convert file: %s', cvt_file_cmd)
*9c5db199SXin Li        subprocess.call(cvt_file_cmd, shell=True)
*9c5db199SXin Li    else:
*9c5db199SXin Li        local_encoded_filename = local_filename
*9c5db199SXin Li    dut_tmp_dir = '/data'
*9c5db199SXin Li    remote_filename = os.path.join(dut_tmp_dir, 'sine.' + file_format)
*9c5db199SXin Li    logging.info('Send file to DUT.')
*9c5db199SXin Li    # TODO(ralphnathan): Find a better place to put this file once the SELinux
*9c5db199SXin Li    # issues are resolved.
*9c5db199SXin Li    logging.info('remote_filename %s', remote_filename)
*9c5db199SXin Li    host.send_file(local_encoded_filename, remote_filename)
*9c5db199SXin Li    return local_filename, remote_filename
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Lidef _is_outside_frequency_threshold(freq_reference, freq_rec):
*9c5db199SXin Li    """Compares the frequency of the recorded audio with the reference audio.
*9c5db199SXin Li
*9c5db199SXin Li    This function checks to see if the frequencies corresponding to the peak
*9c5db199SXin Li    FFT values are similiar meaning that the dominant frequency in the audio
*9c5db199SXin Li    signal is the same for the recorded audio as that in the audio played.
*9c5db199SXin Li
*9c5db199SXin Li    @param req_reference: The dominant frequency in the reference audio file.
*9c5db199SXin Li    @param freq_rec: The dominant frequency in the recorded audio file.
*9c5db199SXin Li
*9c5db199SXin Li    @return: True is freq_rec is with _FREQUENCY_THRESHOLD percent of
*9c5db199SXin Li              freq_reference.
*9c5db199SXin Li    """
*9c5db199SXin Li    ratio = float(freq_rec) / freq_reference
*9c5db199SXin Li    if ratio > 1 + _FREQUENCY_THRESHOLD or ratio < 1 - _FREQUENCY_THRESHOLD:
*9c5db199SXin Li        return True
*9c5db199SXin Li    return False
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Lidef _compare_frames(reference_file_frames, rec_file_frames, num_channels,
*9c5db199SXin Li                    sample_rate):
*9c5db199SXin Li    """Compares audio frames from the reference file and the recorded file.
*9c5db199SXin Li
*9c5db199SXin Li    This method checks for two things:
*9c5db199SXin Li      1. That the main frequency is the same in both the files. This is done
*9c5db199SXin Li         using the FFT and observing the frequency corresponding to the
*9c5db199SXin Li         peak.
*9c5db199SXin Li      2. That there is no other dominant frequency in the recorded file.
*9c5db199SXin Li         This is done by sweeping the frequency domain and checking that the
*9c5db199SXin Li         frequency is always less than _FFT_NOISE_THRESHOLD percentage of
*9c5db199SXin Li         the peak.
*9c5db199SXin Li
*9c5db199SXin Li    The key assumption here is that the reference audio file contains only
*9c5db199SXin Li    one frequency.
*9c5db199SXin Li
*9c5db199SXin Li    @param reference_file_frames: Audio frames from the reference file.
*9c5db199SXin Li    @param rec_file_frames: Audio frames from the recorded file.
*9c5db199SXin Li    @param num_channels: Number of channels in the files.
*9c5db199SXin Li    @param sample_rate: Sample rate of the files.
*9c5db199SXin Li
*9c5db199SXin Li    @raise error.TestFail: The frequency of the recorded signal doesn't
*9c5db199SXin Li                           match that of the reference signal.
*9c5db199SXin Li    @raise error.TestFail: There is too much noise in the recorded signal.
*9c5db199SXin Li    """
*9c5db199SXin Li    for channel in range(num_channels):
*9c5db199SXin Li        reference_data = reference_file_frames[channel::num_channels]
*9c5db199SXin Li        rec_data = rec_file_frames[channel::num_channels]
*9c5db199SXin Li
*9c5db199SXin Li        # Get fft and frequencies corresponding to the fft values.
*9c5db199SXin Li        fft_reference = numpy.fft.rfft(reference_data)
*9c5db199SXin Li        fft_rec = numpy.fft.rfft(rec_data)
*9c5db199SXin Li        fft_freqs_reference = numpy.fft.rfftfreq(len(reference_data),
*9c5db199SXin Li                                                 1.0 / sample_rate)
*9c5db199SXin Li        fft_freqs_rec = numpy.fft.rfftfreq(len(rec_data), 1.0 / sample_rate)
*9c5db199SXin Li
*9c5db199SXin Li        # Get frequency at highest peak.
*9c5db199SXin Li        freq_reference = fft_freqs_reference[
*9c5db199SXin Li                numpy.argmax(numpy.abs(fft_reference))]
*9c5db199SXin Li        abs_fft_rec = numpy.abs(fft_rec)
*9c5db199SXin Li        freq_rec = fft_freqs_rec[numpy.argmax(abs_fft_rec)]
*9c5db199SXin Li
*9c5db199SXin Li        # Compare the two frequencies.
*9c5db199SXin Li        logging.info('Golden frequency of channel %i is %f', channel,
*9c5db199SXin Li                     freq_reference)
*9c5db199SXin Li        logging.info('Recorded frequency of channel %i is  %f', channel,
*9c5db199SXin Li                     freq_rec)
*9c5db199SXin Li        if _is_outside_frequency_threshold(freq_reference, freq_rec):
*9c5db199SXin Li            raise error.TestFail('The recorded audio frequency does not match '
*9c5db199SXin Li                                 'that of the audio played.')
*9c5db199SXin Li
*9c5db199SXin Li        # Check for noise in the frequency domain.
*9c5db199SXin Li        fft_rec_peak_val = numpy.max(abs_fft_rec)
*9c5db199SXin Li        noise_detected = False
*9c5db199SXin Li        for fft_index, fft_val in enumerate(abs_fft_rec):
*9c5db199SXin Li            if _is_outside_frequency_threshold(freq_reference, freq_rec):
*9c5db199SXin Li                # If the frequency exceeds _FFT_NOISE_THRESHOLD, then fail.
*9c5db199SXin Li                if fft_val > _FFT_NOISE_THRESHOLD * fft_rec_peak_val:
*9c5db199SXin Li                    logging.warning('Unexpected frequency peak detected at %f '
*9c5db199SXin Li                                    'Hz.', fft_freqs_rec[fft_index])
*9c5db199SXin Li                    noise_detected = True
*9c5db199SXin Li
*9c5db199SXin Li        if noise_detected:
*9c5db199SXin Li            raise error.TestFail('Signal is noiser than expected.')
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Lidef compare_file(reference_audio_filename, test_audio_filename):
*9c5db199SXin Li    """Compares the recorded audio file to the reference audio file.
*9c5db199SXin Li
*9c5db199SXin Li    @param reference_audio_filename : Reference audio file containing the
*9c5db199SXin Li                                      reference signal.
*9c5db199SXin Li    @param test_audio_filename: Audio file containing audio captured from
*9c5db199SXin Li                                the test.
*9c5db199SXin Li    """
*9c5db199SXin Li    with contextlib.closing(wave.open(reference_audio_filename,
*9c5db199SXin Li                                      'rb')) as reference_file:
*9c5db199SXin Li        with contextlib.closing(wave.open(test_audio_filename,
*9c5db199SXin Li                                          'rb')) as rec_file:
*9c5db199SXin Li            # Extract data from files.
*9c5db199SXin Li            reference_file_frames = extract_wav_frames(reference_file)
*9c5db199SXin Li            rec_file_frames = extract_wav_frames(rec_file)
*9c5db199SXin Li
*9c5db199SXin Li            num_channels = reference_file.getnchannels()
*9c5db199SXin Li            _compare_frames(reference_file_frames, rec_file_frames,
*9c5db199SXin Li                            reference_file.getnchannels(),
*9c5db199SXin Li                            reference_file.getframerate())