Shortcuts

Source code for torchaudio.backend._soundfile_backend

"""The new soundfile backend which will become default in 0.8.0 onward"""
from typing import Tuple, Optional
import warnings

import torch
from torchaudio._internal import module_utils as _mod_utils
from .common import AudioMetaData


if _mod_utils.is_module_available("soundfile"):
    import soundfile


[docs]@_mod_utils.requires_module("soundfile") def info(filepath: str) -> AudioMetaData: """Get signal information of an audio file. Args: filepath (str or pathlib.Path): Path to audio file. This functionalso handles ``pathlib.Path`` objects, but is annotated as ``str`` for the consistency with "sox_io" backend, which has a restriction on type annotation for TorchScript compiler compatiblity. Returns: AudioMetaData: meta data of the given audio. """ sinfo = soundfile.info(filepath) return AudioMetaData(sinfo.samplerate, sinfo.frames, sinfo.channels)
_SUBTYPE2DTYPE = { "PCM_S8": "int8", "PCM_U8": "uint8", "PCM_16": "int16", "PCM_32": "int32", "FLOAT": "float32", "DOUBLE": "float64", }
[docs]@_mod_utils.requires_module("soundfile") def load( filepath: str, frame_offset: int = 0, num_frames: int = -1, normalize: bool = True, channels_first: bool = True, ) -> Tuple[torch.Tensor, int]: """Load audio data from file. Note: The formats this function can handle depend on the soundfile installation. This function is tested on the following formats; * WAV * 32-bit floating-point * 32-bit signed integer * 16-bit signed integer * 8-bit unsigned integer * FLAC * OGG/VORBIS * SPHERE By default (``normalize=True``, ``channels_first=True``), this function returns Tensor with ``float32`` dtype and the shape of ``[channel, time]``. The samples are normalized to fit in the range of ``[-1.0, 1.0]``. When the input format is WAV with integer type, such as 32-bit signed integer, 16-bit signed integer and 8-bit unsigned integer (24-bit signed integer is not supported), by providing ``normalize=False``, this function can return integer Tensor, where the samples are expressed within the whole range of the corresponding dtype, that is, ``int32`` tensor for 32-bit signed PCM, ``int16`` for 16-bit signed PCM and ``uint8`` for 8-bit unsigned PCM. ``normalize`` parameter has no effect on 32-bit floating-point WAV and other formats, such as ``flac`` and ``mp3``. For these formats, this function always returns ``float32`` Tensor with values normalized to ``[-1.0, 1.0]``. Args: filepath (str or pathlib.Path): Path to audio file. This functionalso handles ``pathlib.Path`` objects, but is annotated as ``str`` for the consistency with "sox_io" backend, which has a restriction on type annotation for TorchScript compiler compatiblity. frame_offset (int): Number of frames to skip before start reading data. num_frames (int): Maximum number of frames to read. ``-1`` reads all the remaining samples, starting from ``frame_offset``. This function may return the less number of frames if there is not enough frames in the given file. normalize (bool): When ``True``, this function always return ``float32``, and sample values are normalized to ``[-1.0, 1.0]``. If input file is integer WAV, giving ``False`` will change the resulting Tensor type to integer type. This argument has no effect for formats other than integer WAV type. channels_first (bool): When True, the returned Tensor has dimension ``[channel, time]``. Otherwise, the returned Tensor's dimension is ``[time, channel]``. Returns: torch.Tensor: If the input file has integer wav format and normalization is off, then it has integer type, else ``float32`` type. If ``channels_first=True``, it has ``[channel, time]`` else ``[time, channel]``. """ with soundfile.SoundFile(filepath, "r") as file_: if file_.format != "WAV" or normalize: dtype = "float32" elif file_.subtype not in _SUBTYPE2DTYPE: raise ValueError(f"Unsupported subtype: {file_.subtype}") else: dtype = _SUBTYPE2DTYPE[file_.subtype] frames = file_._prepare_read(frame_offset, None, num_frames) waveform = file_.read(frames, dtype, always_2d=True) sample_rate = file_.samplerate waveform = torch.from_numpy(waveform) if channels_first: waveform = waveform.t() return waveform, sample_rate
[docs]@_mod_utils.requires_module("soundfile") def save( filepath: str, src: torch.Tensor, sample_rate: int, channels_first: bool = True, compression: Optional[float] = None, ): """Save audio data to file. Note: The formats this function can handle depend on the soundfile installation. This function is tested on the following formats; * WAV * 32-bit floating-point * 32-bit signed integer * 16-bit signed integer * 8-bit unsigned integer * FLAC * OGG/VORBIS * SPHERE Args: filepath (str or pathlib.Path): Path to audio file. This functionalso handles ``pathlib.Path`` objects, but is annotated as ``str`` for the consistency with "sox_io" backend, which has a restriction on type annotation for TorchScript compiler compatiblity. tensor (torch.Tensor): Audio data to save. must be 2D tensor. sample_rate (int): sampling rate channels_first (bool): If ``True``, the given tensor is interpreted as ``[channel, time]``, otherwise ``[time, channel]``. compression (Optional[float]): Not used. It is here only for interface compatibility reson with "sox_io" backend. """ if src.ndim != 2: raise ValueError(f"Expected 2D Tensor, got {src.ndim}D.") if compression is not None: warnings.warn( '`save` function of "soundfile" backend does not support "compression" parameter. ' "The argument is silently ignored." ) ext = str(filepath).split(".")[-1].lower() if ext != "wav": subtype = None elif src.dtype == torch.uint8: subtype = "PCM_U8" elif src.dtype == torch.int16: subtype = "PCM_16" elif src.dtype == torch.int32: subtype = "PCM_32" elif src.dtype == torch.float32: subtype = "FLOAT" elif src.dtype == torch.float64: subtype = "DOUBLE" else: raise ValueError(f"Unsupported dtype for WAV: {src.dtype}") format_ = None # sph is a extension used in TED-LIUM but soundfile does not recognize it as NIST format, # so we extend the extensions manually here if ext in ["nis", "nist", "sph"]: format_ = "NIST" if channels_first: src = src.t() soundfile.write( file=filepath, data=src, samplerate=sample_rate, subtype=subtype, format=format_ )
[docs]@_mod_utils.requires_module("soundfile") @_mod_utils.deprecated('Please use "torchaudio.load".', "0.9.0") def load_wav( filepath: str, frame_offset: int = 0, num_frames: int = -1, channels_first: bool = True, ) -> Tuple[torch.Tensor, int]: """Load wave file. This function is defined only for the purpose of compatibility against other backend for simple usecases, such as ``torchaudio.load_wav(filepath)``. The implementation is same as :py:func:`load`. """ return load( filepath, frame_offset, num_frames, normalize=False, channels_first=channels_first, )

Docs

Access comprehensive developer documentation for PyTorch

View Docs

Tutorials

Get in-depth tutorials for beginners and advanced developers

View Tutorials

Resources

Find development resources and get your questions answered

View Resources