cros/audio/sox_utils.py

*9c5db199SXin Li# Copyright (c) 2013 The Chromium Authors. All rights reserved.
*9c5db199SXin Li# Use of this source code is governed by a BSD-style license that can be
*9c5db199SXin Li# found in the LICENSE file.
*9c5db199SXin Li
*9c5db199SXin Liimport logging
*9c5db199SXin Liimport re
*9c5db199SXin Liimport subprocess
*9c5db199SXin Li
*9c5db199SXin Lifrom autotest_lib.client.cros.audio import cmd_utils
*9c5db199SXin Li
*9c5db199SXin LiSOX_PATH = 'sox'
*9c5db199SXin Li
*9c5db199SXin Lidef _raw_format_args(channels, bits, rate):
*9c5db199SXin Li    """Gets raw format args used in sox.
*9c5db199SXin Li
*9c5db199SXin Li    @param channels: Number of channels.
*9c5db199SXin Li    @param bits: Bit length for a sample.
*9c5db199SXin Li    @param rate: Sampling rate.
*9c5db199SXin Li
*9c5db199SXin Li    @returns: A list of args.
*9c5db199SXin Li
*9c5db199SXin Li    """
*9c5db199SXin Li    args = ['-t', 'raw', '-e', 'signed']
*9c5db199SXin Li    args += _format_args(channels, bits, rate)
*9c5db199SXin Li    return args
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Lidef _format_args(channels, bits, rate):
*9c5db199SXin Li    """Gets format args used in sox.
*9c5db199SXin Li
*9c5db199SXin Li    @param channels: Number of channels.
*9c5db199SXin Li    @param bits: Bit length for a sample.
*9c5db199SXin Li    @param rate: Sampling rate.
*9c5db199SXin Li
*9c5db199SXin Li    @returns: A list of args.
*9c5db199SXin Li
*9c5db199SXin Li    """
*9c5db199SXin Li    return ['-c', str(channels), '-b', str(bits), '-r', str(rate)]
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Lidef generate_sine_tone_cmd(
*9c5db199SXin Li        filename, channels=2, bits=16, rate=48000, duration=None, frequencies=440,
*9c5db199SXin Li        gain=None, vol=None, raw=True):
*9c5db199SXin Li    """Gets a command to generate sine tones at specified ferquencies.
*9c5db199SXin Li
*9c5db199SXin Li    @param filename: The name of the file to store the sine wave in.
*9c5db199SXin Li    @param channels: The number of channels.
*9c5db199SXin Li    @param bits: The number of bits of each sample.
*9c5db199SXin Li    @param rate: The sampling rate.
*9c5db199SXin Li    @param duration: The length of the generated sine tone (in seconds).
*9c5db199SXin Li    @param frequencies: The frequencies of the sine wave. Pass a number or a
*9c5db199SXin Li                        list to specify frequency for each channel.
*9c5db199SXin Li    @param gain: The gain (in db).
*9c5db199SXin Li    @param vol: A float for volume scale used in sox command.
*9c5db199SXin Li                         E.g. 1.0 is the same. 0.5 to scale volume by
*9c5db199SXin Li                         half. -1.0 to invert the data.
*9c5db199SXin Li    @param raw: True to use raw data format. False to use what filename specifies.
*9c5db199SXin Li
*9c5db199SXin Li    """
*9c5db199SXin Li    args = [SOX_PATH, '-n']
*9c5db199SXin Li    if raw:
*9c5db199SXin Li        args += _raw_format_args(channels, bits, rate)
*9c5db199SXin Li    else:
*9c5db199SXin Li        args += _format_args(channels, bits, rate)
*9c5db199SXin Li    args.append(filename)
*9c5db199SXin Li    args.append('synth')
*9c5db199SXin Li    if duration is not None:
*9c5db199SXin Li        args.append(str(duration))
*9c5db199SXin Li    if not isinstance(frequencies, list):
*9c5db199SXin Li        frequencies = [frequencies]
*9c5db199SXin Li    for freq in frequencies:
*9c5db199SXin Li        args += ['sine', str(freq)]
*9c5db199SXin Li    if gain is not None:
*9c5db199SXin Li        args += ['gain', str(gain)]
*9c5db199SXin Li    if vol is not None:
*9c5db199SXin Li        args += ['vol', str(vol)]
*9c5db199SXin Li    return args
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Lidef noise_profile(*args, **kwargs):
*9c5db199SXin Li    """A helper function to execute the noise_profile_cmd."""
*9c5db199SXin Li    return cmd_utils.execute(noise_profile_cmd(*args, **kwargs))
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Lidef noise_profile_cmd(input, output, channels=1, bits=16, rate=48000):
*9c5db199SXin Li    """Gets the noise profile of the input audio.
*9c5db199SXin Li
*9c5db199SXin Li    @param input: The input audio.
*9c5db199SXin Li    @param output: The file where the output profile will be stored in.
*9c5db199SXin Li    @param channels: The number of channels.
*9c5db199SXin Li    @param bits: The number of bits of each sample.
*9c5db199SXin Li    @param rate: The sampling rate.
*9c5db199SXin Li    """
*9c5db199SXin Li    args = [SOX_PATH]
*9c5db199SXin Li    args += _raw_format_args(channels, bits, rate)
*9c5db199SXin Li    args += [input, '-n', 'noiseprof', output]
*9c5db199SXin Li    return args
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Lidef noise_reduce(*args, **kwargs):
*9c5db199SXin Li    """A helper function to execute the noise_reduce_cmd."""
*9c5db199SXin Li    return cmd_utils.execute(noise_reduce_cmd(*args, **kwargs))
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Lidef noise_reduce_cmd(
*9c5db199SXin Li        input, output, noise_profile, channels=1, bits=16, rate=48000):
*9c5db199SXin Li    """Reduce noise in the input audio by the given noise profile.
*9c5db199SXin Li
*9c5db199SXin Li    @param input: The input audio file.
*9c5db199SXin Li    @param output: The output file in which the noise reduced audio is stored.
*9c5db199SXin Li    @param noise_profile: The noise profile.
*9c5db199SXin Li    @param channels: The number of channels.
*9c5db199SXin Li    @param bits: The number of bits of each sample.
*9c5db199SXin Li    @param rate: The sampling rate.
*9c5db199SXin Li    """
*9c5db199SXin Li    args = [SOX_PATH]
*9c5db199SXin Li    format_args = _raw_format_args(channels, bits, rate)
*9c5db199SXin Li    args += format_args
*9c5db199SXin Li    args.append(input)
*9c5db199SXin Li    # Uses the same format for output.
*9c5db199SXin Li    args += format_args
*9c5db199SXin Li    args.append(output)
*9c5db199SXin Li    args.append('noisered')
*9c5db199SXin Li    args.append(noise_profile)
*9c5db199SXin Li    return args
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Lidef extract_channel_cmd(
*9c5db199SXin Li        input, output, channel_index, channels=2, bits=16, rate=48000):
*9c5db199SXin Li    """Extract the specified channel data from the given input audio file.
*9c5db199SXin Li
*9c5db199SXin Li    @param input: The input audio file.
*9c5db199SXin Li    @param output: The output file to which the extracted channel is stored
*9c5db199SXin Li    @param channel_index: The index of the channel to be extracted.
*9c5db199SXin Li                          Note: 1 for the first channel.
*9c5db199SXin Li    @param channels: The number of channels.
*9c5db199SXin Li    @param bits: The number of bits of each sample.
*9c5db199SXin Li    @param rate: The sampling rate.
*9c5db199SXin Li    """
*9c5db199SXin Li    args = [SOX_PATH]
*9c5db199SXin Li    args += _raw_format_args(channels, bits, rate)
*9c5db199SXin Li    args.append(input)
*9c5db199SXin Li    args += ['-t', 'raw', output]
*9c5db199SXin Li    args += ['remix', str(channel_index)]
*9c5db199SXin Li    return args
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Lidef stat_cmd(input, channels=1, bits=16, rate=44100):
*9c5db199SXin Li    """Get statistical information about the input audio data.
*9c5db199SXin Li
*9c5db199SXin Li    The statistics will be output to standard error.
*9c5db199SXin Li
*9c5db199SXin Li    @param input: The input audio file.
*9c5db199SXin Li    @param channels: The number of channels.
*9c5db199SXin Li    @param bits: The number of bits of each sample.
*9c5db199SXin Li    @param rate: The sampling rate.
*9c5db199SXin Li    """
*9c5db199SXin Li    args = [SOX_PATH]
*9c5db199SXin Li    args += _raw_format_args(channels, bits, rate)
*9c5db199SXin Li    args += [input, '-n', 'stat']
*9c5db199SXin Li    return args
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Lidef get_stat(*args, **kargs):
*9c5db199SXin Li    """A helper function to execute the stat_cmd.
*9c5db199SXin Li
*9c5db199SXin Li    It returns the statistical information (in text) read from the standard
*9c5db199SXin Li    error.
*9c5db199SXin Li    """
*9c5db199SXin Li    p = cmd_utils.popen(stat_cmd(*args, **kargs), stderr=subprocess.PIPE)
*9c5db199SXin Li
*9c5db199SXin Li    #The output is read from the stderr instead of stdout
*9c5db199SXin Li    stat_output = p.stderr.read()
*9c5db199SXin Li    cmd_utils.wait_and_check_returncode(p)
*9c5db199SXin Li    return parse_stat_output(stat_output)
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Li_SOX_STAT_ATTR_MAP = {
*9c5db199SXin Li        'Samples read': ('sameple_count', int),
*9c5db199SXin Li        'Length (seconds)': ('length', float),
*9c5db199SXin Li        'RMS amplitude': ('rms', float),
*9c5db199SXin Li        'Rough frequency': ('rough_frequency', float)}
*9c5db199SXin Li
*9c5db199SXin Li_RE_STAT_LINE = re.compile('(.*):(.*)')
*9c5db199SXin Li
*9c5db199SXin Liclass _SOX_STAT:
*9c5db199SXin Li    def __str__(self):
*9c5db199SXin Li        return str(vars(self))
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Lidef _remove_redundant_spaces(value):
*9c5db199SXin Li    return ' '.join(value.split()).strip()
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Lidef parse_stat_output(stat_output):
*9c5db199SXin Li    """A helper function to parses the stat_cmd's output to get a python object
*9c5db199SXin Li    for easy access to the statistics.
*9c5db199SXin Li
*9c5db199SXin Li    It returns a python object with the following attributes:
*9c5db199SXin Li      .sample_count: The number of the audio samples.
*9c5db199SXin Li      .length: The length of the audio (in seconds).
*9c5db199SXin Li      .rms: The RMS value of the audio.
*9c5db199SXin Li      .rough_frequency: The rough frequency of the audio (in Hz).
*9c5db199SXin Li
*9c5db199SXin Li    @param stat_output: The statistics ouput to be parsed.
*9c5db199SXin Li    """
*9c5db199SXin Li    stat = _SOX_STAT()
*9c5db199SXin Li
*9c5db199SXin Li    for line in stat_output.splitlines():
*9c5db199SXin Li        match = _RE_STAT_LINE.match(line.decode('utf-8'))
*9c5db199SXin Li        if not match:
*9c5db199SXin Li            continue
*9c5db199SXin Li        key, value = (_remove_redundant_spaces(x) for x in match.groups())
*9c5db199SXin Li        attr, convfun = _SOX_STAT_ATTR_MAP.get(key, (None, None))
*9c5db199SXin Li        if attr:
*9c5db199SXin Li            setattr(stat, attr, convfun(value))
*9c5db199SXin Li
*9c5db199SXin Li    if not all(hasattr(stat, x[0]) for x in _SOX_STAT_ATTR_MAP.values()):
*9c5db199SXin Li        logging.error('stat_output: %s', stat_output)
*9c5db199SXin Li        raise RuntimeError('missing entries: ' + str(stat))
*9c5db199SXin Li
*9c5db199SXin Li    return stat
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Lidef convert_raw_file(path_src, channels_src, bits_src, rate_src,
*9c5db199SXin Li                     path_dst):
*9c5db199SXin Li    """Converts a raw file to a new format.
*9c5db199SXin Li
*9c5db199SXin Li    @param path_src: The path to the source file.
*9c5db199SXin Li    @param channels_src: The channel number of the source file.
*9c5db199SXin Li    @param bits_src: The size of sample in bits of the source file.
*9c5db199SXin Li    @param rate_src: The sampling rate of the source file.
*9c5db199SXin Li    @param path_dst: The path to the destination file. The file name determines
*9c5db199SXin Li                     the new file format.
*9c5db199SXin Li
*9c5db199SXin Li    """
*9c5db199SXin Li    sox_cmd = [SOX_PATH]
*9c5db199SXin Li    sox_cmd += _raw_format_args(channels_src, bits_src, rate_src)
*9c5db199SXin Li    sox_cmd += [path_src]
*9c5db199SXin Li    sox_cmd += [path_dst]
*9c5db199SXin Li    cmd_utils.execute(sox_cmd)
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Lidef convert_format(path_src, channels_src, bits_src, rate_src,
*9c5db199SXin Li                   path_dst, channels_dst, bits_dst, rate_dst,
*9c5db199SXin Li                   volume_scale, use_src_header=False, use_dst_header=False):
*9c5db199SXin Li    """Converts a raw file to a new format.
*9c5db199SXin Li
*9c5db199SXin Li    @param path_src: The path to the source file.
*9c5db199SXin Li    @param channels_src: The channel number of the source file.
*9c5db199SXin Li    @param bits_src: The size of sample in bits of the source file.
*9c5db199SXin Li    @param rate_src: The sampling rate of the source file.
*9c5db199SXin Li    @param path_dst: The path to the destination file.
*9c5db199SXin Li    @param channels_dst: The channel number of the destination file.
*9c5db199SXin Li    @param bits_dst: The size of sample in bits of the destination file.
*9c5db199SXin Li    @param rate_dst: The sampling rate of the destination file.
*9c5db199SXin Li    @param volume_scale: A float for volume scale used in sox command.
*9c5db199SXin Li                         E.g. 1.0 is the same. 0.5 to scale volume by
*9c5db199SXin Li                         half. -1.0 to invert the data.
*9c5db199SXin Li    @param use_src_header: True to use header from source file and skip
*9c5db199SXin Li                           specifying channel, sample format, and rate for
*9c5db199SXin Li                           source. False otherwise.
*9c5db199SXin Li    @param use_dst_header: True to use header for dst file. False to treat
*9c5db199SXin Li                           dst file as a raw file.
*9c5db199SXin Li
*9c5db199SXin Li    """
*9c5db199SXin Li    sox_cmd = [SOX_PATH]
*9c5db199SXin Li
*9c5db199SXin Li    if not use_src_header:
*9c5db199SXin Li        sox_cmd += _raw_format_args(channels_src, bits_src, rate_src)
*9c5db199SXin Li    sox_cmd += ['-v', '%f' % volume_scale]
*9c5db199SXin Li    sox_cmd += [path_src]
*9c5db199SXin Li
*9c5db199SXin Li    if not use_dst_header:
*9c5db199SXin Li        sox_cmd += _raw_format_args(channels_dst, bits_dst, rate_dst)
*9c5db199SXin Li    else:
*9c5db199SXin Li        sox_cmd += _format_args(channels_dst, bits_dst, rate_dst)
*9c5db199SXin Li    sox_cmd += [path_dst]
*9c5db199SXin Li
*9c5db199SXin Li    cmd_utils.execute(sox_cmd)
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Lidef lowpass_filter(path_src, channels_src, bits_src, rate_src,
*9c5db199SXin Li                   path_dst, frequency):
*9c5db199SXin Li    """Passes a raw file to a lowpass filter.
*9c5db199SXin Li
*9c5db199SXin Li    @param path_src: The path to the source file.
*9c5db199SXin Li    @param channels_src: The channel number of the source file.
*9c5db199SXin Li    @param bits_src: The size of sample in bits of the source file.
*9c5db199SXin Li    @param rate_src: The sampling rate of the source file.
*9c5db199SXin Li    @param path_dst: The path to the destination file.
*9c5db199SXin Li    @param frequency: A float for frequency used in sox command. The 3dB
*9c5db199SXin Li                      frequency of the lowpass filter. Checks manual of sox
*9c5db199SXin Li                      command for detail.
*9c5db199SXin Li
*9c5db199SXin Li    """
*9c5db199SXin Li    sox_cmd = [SOX_PATH]
*9c5db199SXin Li    sox_cmd += _raw_format_args(channels_src, bits_src, rate_src)
*9c5db199SXin Li    sox_cmd += [path_src]
*9c5db199SXin Li    sox_cmd += _raw_format_args(channels_src, bits_src, rate_src)
*9c5db199SXin Li    sox_cmd += [path_dst]
*9c5db199SXin Li    sox_cmd += ['lowpass', '-2', str(frequency)]
*9c5db199SXin Li    cmd_utils.execute(sox_cmd)
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Lidef trim_silence_from_wav_file(path_src,
*9c5db199SXin Li                               path_dst,
*9c5db199SXin Li                               new_duration,
*9c5db199SXin Li                               volume=1,
*9c5db199SXin Li                               duration_threshold=0.1):
*9c5db199SXin Li    """Trim silence from beginning of a file.
*9c5db199SXin Li
*9c5db199SXin Li    Trim silence from beginning of file, and trim remaining audio to
*9c5db199SXin Li    new_duration seconds in length.
*9c5db199SXin Li
*9c5db199SXin Li    @param path_src: The path to the source file.
*9c5db199SXin Li    @oaram path_dst: The path to the destination file.
*9c5db199SXin Li    @param new_duration: The new duration of the destination file in seconds.
*9c5db199SXin Li    @param volume: [Optional] A float indicating the volume in percent, below
*9c5db199SXin Li                   which sox will consider silence, defaults to 1 (1%).
*9c5db199SXin Li    @param duration_threshold: [Optional] A float of the duration in seconds of
*9c5db199SXin Li                               sound above volume parameter required to consider
*9c5db199SXin Li                               end of silence. Defaults to 0.1 (0.1 seconds).
*9c5db199SXin Li    """
*9c5db199SXin Li    mins, secs = divmod(new_duration, 60)
*9c5db199SXin Li    hrs, mins = divmod(mins, 60)
*9c5db199SXin Li    length_str = '{:d}:{:02d}:{:.3f}'.format(int(hrs), int(mins), float(secs))
*9c5db199SXin Li
*9c5db199SXin Li    sox_cmd = [SOX_PATH]
*9c5db199SXin Li    sox_cmd += ['-G', path_src, path_dst]
*9c5db199SXin Li    sox_cmd += ['silence', '1', str(duration_threshold), '{}%'.format(volume)]
*9c5db199SXin Li    sox_cmd += ['trim', '0', length_str]
*9c5db199SXin Li
*9c5db199SXin Li    cmd_utils.execute(sox_cmd)
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Lidef mix_two_wav_files(path_src1, path_src2, path_dst, input_volume=None):
*9c5db199SXin Li    """Generate the mixed WAV file from two input WAV files.
*9c5db199SXin Li
*9c5db199SXin Li    Use "man sox" for more details on the mixing.
*9c5db199SXin Li
*9c5db199SXin Li    @param path_src1: Path to the first source.
*9c5db199SXin Li    @param path_src2: Path to the second source.
*9c5db199SXin Li    @param path_dst: Path for the generated mixed file.
*9c5db199SXin Li    @param input_volume: The volume (0.0~1.0) of input sources on mixing. If not
*9c5db199SXin Li                         given, the default value for sox is 1 / (# of sources).
*9c5db199SXin Li    """
*9c5db199SXin Li    sox_cmd = [SOX_PATH]
*9c5db199SXin Li    sox_cmd += ['--combine', 'mix']
*9c5db199SXin Li
*9c5db199SXin Li    if isinstance(input_volume, (int, float)):
*9c5db199SXin Li        input_volume = min(1.0, max(0.0, input_volume))
*9c5db199SXin Li        sox_cmd += ['-v', '{:.3f}'.format(input_volume)]
*9c5db199SXin Li
*9c5db199SXin Li    sox_cmd += [path_src1, path_src2, path_dst]
*9c5db199SXin Li
*9c5db199SXin Li    cmd_utils.execute(sox_cmd)
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Lidef get_infos_from_wav_file(file_path):
*9c5db199SXin Li    """Get the information set from the header of the input WAV file.
*9c5db199SXin Li
*9c5db199SXin Li    It returns None if the input file is not WAV format.
*9c5db199SXin Li
*9c5db199SXin Li    @param file_path: Path to the WAV file.
*9c5db199SXin Li
*9c5db199SXin Li    @returns: A dict with the following elements:
*9c5db199SXin Li        'duration': The length of the audio (in seconds).
*9c5db199SXin Li        'channels': The number of channels.
*9c5db199SXin Li        'bits': The number of bits of each sample.
*9c5db199SXin Li        'rate': The sampling rate.
*9c5db199SXin Li    """
*9c5db199SXin Li    sox_cmd = [SOX_PATH]
*9c5db199SXin Li    sox_cmd += ['--i', None, file_path]  # sox_cmd[2] is placeholder
*9c5db199SXin Li
*9c5db199SXin Li    def _execute_sox_cmd_info(info_arg):
*9c5db199SXin Li        sox_cmd_info = sox_cmd[:2] + [info_arg] + sox_cmd[3:]
*9c5db199SXin Li        return cmd_utils.execute(
*9c5db199SXin Li                sox_cmd_info, stdout=subprocess.PIPE).decode('utf-8').strip()
*9c5db199SXin Li
*9c5db199SXin Li    format_output = _execute_sox_cmd_info('-t')
*9c5db199SXin Li    if format_output != 'wav':
*9c5db199SXin Li        logging.error('the input file format: %s', format_output)
*9c5db199SXin Li        return None
*9c5db199SXin Li
*9c5db199SXin Li    return dict(duration=float(_execute_sox_cmd_info('-D')),
*9c5db199SXin Li                channels=int(_execute_sox_cmd_info('-c')),
*9c5db199SXin Li                bits=int(_execute_sox_cmd_info('-b')),
*9c5db199SXin Li                rate=int(_execute_sox_cmd_info('-r')))
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Lidef get_file_length(file_path, channels, bits, rate):
*9c5db199SXin Li    """Get the length in seconds of an audio file.
*9c5db199SXin Li
*9c5db199SXin Li    @param file_path: Path to audio file.
*9c5db199SXin Li    @param channels: The number of channels.
*9c5db199SXin Li    @param bits: The number of bits of each sample.
*9c5db199SXin Li    @param rate: The sampling rate.
*9c5db199SXin Li
*9c5db199SXin Li    @returns: float length in seconds
*9c5db199SXin Li    """
*9c5db199SXin Li    return get_stat(file_path, channels, bits, rate).length