Source code for torchaudio.backend._soundfile_backend
"""The new soundfile backend which will become default in 0.8.0 onward"""
from typing import Tuple, Optional
import warnings
import torch
from torchaudio._internal import module_utils as _mod_utils
from .common import AudioMetaData
if _mod_utils.is_module_available("soundfile"):
import soundfile
[docs]@_mod_utils.requires_module("soundfile")
def info(filepath: str) -> AudioMetaData:
"""Get signal information of an audio file.
Args:
filepath (str or pathlib.Path): Path to audio file.
This functionalso handles ``pathlib.Path`` objects, but is annotated as ``str``
for the consistency with "sox_io" backend, which has a restriction on type annotation
for TorchScript compiler compatiblity.
Returns:
AudioMetaData: meta data of the given audio.
"""
sinfo = soundfile.info(filepath)
return AudioMetaData(sinfo.samplerate, sinfo.frames, sinfo.channels)
_SUBTYPE2DTYPE = {
"PCM_S8": "int8",
"PCM_U8": "uint8",
"PCM_16": "int16",
"PCM_32": "int32",
"FLOAT": "float32",
"DOUBLE": "float64",
}
[docs]@_mod_utils.requires_module("soundfile")
def load(
filepath: str,
frame_offset: int = 0,
num_frames: int = -1,
normalize: bool = True,
channels_first: bool = True,
) -> Tuple[torch.Tensor, int]:
"""Load audio data from file.
Note:
The formats this function can handle depend on the soundfile installation.
This function is tested on the following formats;
* WAV
* 32-bit floating-point
* 32-bit signed integer
* 16-bit signed integer
* 8-bit unsigned integer
* FLAC
* OGG/VORBIS
* SPHERE
By default (``normalize=True``, ``channels_first=True``), this function returns Tensor with
``float32`` dtype and the shape of ``[channel, time]``.
The samples are normalized to fit in the range of ``[-1.0, 1.0]``.
When the input format is WAV with integer type, such as 32-bit signed integer, 16-bit
signed integer and 8-bit unsigned integer (24-bit signed integer is not supported),
by providing ``normalize=False``, this function can return integer Tensor, where the samples
are expressed within the whole range of the corresponding dtype, that is, ``int32`` tensor
for 32-bit signed PCM, ``int16`` for 16-bit signed PCM and ``uint8`` for 8-bit unsigned PCM.
``normalize`` parameter has no effect on 32-bit floating-point WAV and other formats, such as
``flac`` and ``mp3``.
For these formats, this function always returns ``float32`` Tensor with values normalized to
``[-1.0, 1.0]``.
Args:
filepath (str or pathlib.Path): Path to audio file.
This functionalso handles ``pathlib.Path`` objects, but is annotated as ``str``
for the consistency with "sox_io" backend, which has a restriction on type annotation
for TorchScript compiler compatiblity.
frame_offset (int):
Number of frames to skip before start reading data.
num_frames (int):
Maximum number of frames to read. ``-1`` reads all the remaining samples,
starting from ``frame_offset``.
This function may return the less number of frames if there is not enough
frames in the given file.
normalize (bool):
When ``True``, this function always return ``float32``, and sample values are
normalized to ``[-1.0, 1.0]``.
If input file is integer WAV, giving ``False`` will change the resulting Tensor type to
integer type.
This argument has no effect for formats other than integer WAV type.
channels_first (bool):
When True, the returned Tensor has dimension ``[channel, time]``.
Otherwise, the returned Tensor's dimension is ``[time, channel]``.
Returns:
torch.Tensor:
If the input file has integer wav format and normalization is off, then it has
integer type, else ``float32`` type. If ``channels_first=True``, it has
``[channel, time]`` else ``[time, channel]``.
"""
with soundfile.SoundFile(filepath, "r") as file_:
if file_.format != "WAV" or normalize:
dtype = "float32"
elif file_.subtype not in _SUBTYPE2DTYPE:
raise ValueError(f"Unsupported subtype: {file_.subtype}")
else:
dtype = _SUBTYPE2DTYPE[file_.subtype]
frames = file_._prepare_read(frame_offset, None, num_frames)
waveform = file_.read(frames, dtype, always_2d=True)
sample_rate = file_.samplerate
waveform = torch.from_numpy(waveform)
if channels_first:
waveform = waveform.t()
return waveform, sample_rate
[docs]@_mod_utils.requires_module("soundfile")
def save(
filepath: str,
src: torch.Tensor,
sample_rate: int,
channels_first: bool = True,
compression: Optional[float] = None,
):
"""Save audio data to file.
Note:
The formats this function can handle depend on the soundfile installation.
This function is tested on the following formats;
* WAV
* 32-bit floating-point
* 32-bit signed integer
* 16-bit signed integer
* 8-bit unsigned integer
* FLAC
* OGG/VORBIS
* SPHERE
Args:
filepath (str or pathlib.Path): Path to audio file.
This functionalso handles ``pathlib.Path`` objects, but is annotated as ``str``
for the consistency with "sox_io" backend, which has a restriction on type annotation
for TorchScript compiler compatiblity.
tensor (torch.Tensor): Audio data to save. must be 2D tensor.
sample_rate (int): sampling rate
channels_first (bool):
If ``True``, the given tensor is interpreted as ``[channel, time]``,
otherwise ``[time, channel]``.
compression (Optional[float]):
Not used. It is here only for interface compatibility reson with "sox_io" backend.
"""
if src.ndim != 2:
raise ValueError(f"Expected 2D Tensor, got {src.ndim}D.")
if compression is not None:
warnings.warn(
'`save` function of "soundfile" backend does not support "compression" parameter. '
"The argument is silently ignored."
)
ext = str(filepath).split(".")[-1].lower()
if ext != "wav":
subtype = None
elif src.dtype == torch.uint8:
subtype = "PCM_U8"
elif src.dtype == torch.int16:
subtype = "PCM_16"
elif src.dtype == torch.int32:
subtype = "PCM_32"
elif src.dtype == torch.float32:
subtype = "FLOAT"
elif src.dtype == torch.float64:
subtype = "DOUBLE"
else:
raise ValueError(f"Unsupported dtype for WAV: {src.dtype}")
format_ = None
# sph is a extension used in TED-LIUM but soundfile does not recognize it as NIST format,
# so we extend the extensions manually here
if ext in ["nis", "nist", "sph"]:
format_ = "NIST"
if channels_first:
src = src.t()
soundfile.write(
file=filepath, data=src, samplerate=sample_rate, subtype=subtype, format=format_
)
[docs]@_mod_utils.requires_module("soundfile")
@_mod_utils.deprecated('Please use "torchaudio.load".', "0.9.0")
def load_wav(
filepath: str,
frame_offset: int = 0,
num_frames: int = -1,
channels_first: bool = True,
) -> Tuple[torch.Tensor, int]:
"""Load wave file.
This function is defined only for the purpose of compatibility against other backend
for simple usecases, such as ``torchaudio.load_wav(filepath)``.
The implementation is same as :py:func:`load`.
"""
return load(
filepath,
frame_offset,
num_frames,
normalize=False,
channels_first=channels_first,
)