Note
Click here to download the full example code
AudioEffector Usages¶
Author: Moto Hira
This tutorial shows how to use torchaudio.io.AudioEffector
to
apply various effects and codecs to waveform tensor.
Note
This tutorial requires FFmpeg libraries. Please refer to FFmpeg dependency for the detail.
Overview¶
AudioEffector
combines in-memory encoding,
decoding and filtering that are provided by
StreamWriter
and
StreamReader
.
The following figure illustrates the process.
data:image/s3,"s3://crabby-images/9c0eb/9c0eb4e200450048c5d79c722fb4e4e6cf90dce5" alt="https://download.pytorch.org/torchaudio/tutorial-assets/AudioEffector.png"
import torch
import torchaudio
print(torch.__version__)
print(torchaudio.__version__)
2.4.0
2.4.0
from torchaudio.io import AudioEffector, CodecConfig
import matplotlib.pyplot as plt
from IPython.display import Audio
libavcodec (60, 3, 100)
libavdevice (60, 1, 100)
libavfilter (9, 3, 100)
libavformat (60, 3, 100)
libavutil (58, 2, 100)
Usage¶
To use AudioEffector
, instantiate it with effect
and
format
, then either pass the waveform to
apply()
or
stream()
method.
effector = AudioEffector(effect=..., format=...,)
# Apply at once
applied = effector.apply(waveform, sample_rate)
apply
method applies effect and codec to the entire waveform at
once. So if the input waveform is long, and memory consumption is an
issue, one can use stream
method to process chunk by chunk.
# Apply chunk by chunk
for applied_chunk = effector.stream(waveform, sample_rate):
...
Example¶
Gallery¶
def show(effect, *, stereo=False):
wf = torch.cat([waveform] * 2, dim=1) if stereo else waveform
figsize = (6.4, 2.1 if stereo else 1.2)
effector = AudioEffector(effect=effect, pad_end=False)
result = effector.apply(wf, int(sr))
num_channels = result.size(1)
f, ax = plt.subplots(num_channels, 1, squeeze=False, figsize=figsize, sharex=True)
for i in range(num_channels):
ax[i][0].specgram(result[:, i], Fs=sr)
f.set_tight_layout(True)
return Audio(result.numpy().T, rate=sr)
Original¶
show(effect=None)
data:image/s3,"s3://crabby-images/60103/60103e20903c64196997ead1c20fc92b72d142b3" alt="effector tutorial"
Effects¶
tempo¶
https://ffmpeg.org/ffmpeg-filters.html#atempo
show("atempo=0.7")
data:image/s3,"s3://crabby-images/1d53b/1d53be9534ed7b5cfd9cbcad8fa37e860c2432de" alt="effector tutorial"
show("atempo=1.8")
data:image/s3,"s3://crabby-images/d146e/d146e9ade115a1f3133aa42cd76121e20a705fa4" alt="effector tutorial"
highpass¶
https://ffmpeg.org/ffmpeg-filters.html#highpass
show("highpass=frequency=1500")
data:image/s3,"s3://crabby-images/e1b3f/e1b3fa0541926200d00b4bb35bf22975e159fc9e" alt="effector tutorial"
lowpass¶
https://ffmpeg.org/ffmpeg-filters.html#lowpass
show("lowpass=frequency=1000")
data:image/s3,"s3://crabby-images/ea530/ea53048bb3a7437b3946f04a7bed884e11d3dabd" alt="effector tutorial"
allpass¶
https://ffmpeg.org/ffmpeg-filters.html#allpass
show("allpass")
data:image/s3,"s3://crabby-images/b1e92/b1e921d18c5c5f2a40d8896cef5d8d38ea4e5848" alt="effector tutorial"
bandpass¶
https://ffmpeg.org/ffmpeg-filters.html#bandpass
show("bandpass=frequency=3000")
data:image/s3,"s3://crabby-images/272d3/272d3860bb9d61eced7c959ca97b513c5df46e91" alt="effector tutorial"
bandreject¶
https://ffmpeg.org/ffmpeg-filters.html#bandreject
show("bandreject=frequency=3000")
data:image/s3,"s3://crabby-images/3b951/3b951c6b68fa16728372e44a1508fa1465779faf" alt="effector tutorial"
echo¶
https://ffmpeg.org/ffmpeg-filters.html#aecho
show("aecho=in_gain=0.8:out_gain=0.88:delays=6:decays=0.4")
data:image/s3,"s3://crabby-images/34445/34445207f00f3bfbc530668a24e172ea628454b3" alt="effector tutorial"
show("aecho=in_gain=0.8:out_gain=0.88:delays=60:decays=0.4")
data:image/s3,"s3://crabby-images/8d733/8d73347386a3c071920a93499cc54b01ee64b25f" alt="effector tutorial"
show("aecho=in_gain=0.8:out_gain=0.9:delays=1000:decays=0.3")
data:image/s3,"s3://crabby-images/7efb0/7efb0b6458b32bae013cf3dc3445ca462d43aaa4" alt="effector tutorial"
chorus¶
https://ffmpeg.org/ffmpeg-filters.html#chorus
show("chorus=0.5:0.9:50|60|40:0.4|0.32|0.3:0.25|0.4|0.3:2|2.3|1.3")
data:image/s3,"s3://crabby-images/b57d7/b57d7f9887294e515c5b850d1c761692d65a18d9" alt="effector tutorial"
fft filter¶
https://ffmpeg.org/ffmpeg-filters.html#afftfilt
# fmt: off
show(
"afftfilt="
"real='re * (1-clip(b * (b/nb), 0, 1))':"
"imag='im * (1-clip(b * (b/nb), 0, 1))'"
)
data:image/s3,"s3://crabby-images/53db3/53db348e4ae141cd8f95df32e78f9ff571e46cf7" alt="effector tutorial"
show(
"afftfilt="
"real='hypot(re,im) * sin(0)':"
"imag='hypot(re,im) * cos(0)':"
"win_size=512:"
"overlap=0.75"
)
data:image/s3,"s3://crabby-images/c4e35/c4e35c7146f5ad9aa38d18dff20412e44425cc2a" alt="effector tutorial"
show(
"afftfilt="
"real='hypot(re,im) * cos(2 * 3.14 * (random(0) * 2-1))':"
"imag='hypot(re,im) * sin(2 * 3.14 * (random(1) * 2-1))':"
"win_size=128:"
"overlap=0.8"
)
# fmt: on
data:image/s3,"s3://crabby-images/3745b/3745bd27a227568bcee0c43375135a2dcbdb2b38" alt="effector tutorial"
vibrato¶
https://ffmpeg.org/ffmpeg-filters.html#vibrato
show("vibrato=f=10:d=0.8")
data:image/s3,"s3://crabby-images/5ec4a/5ec4aa153d14a50c73a691376ef9006c7b9e3287" alt="effector tutorial"
/pytorch/audio/ci_env/lib/python3.10/site-packages/IPython/lib/display.py:187: RuntimeWarning: invalid value encountered in divide
scaled = data / normalization_factor * 32767
/pytorch/audio/ci_env/lib/python3.10/site-packages/IPython/lib/display.py:188: RuntimeWarning: invalid value encountered in cast
return scaled.astype("<h").tobytes(), nchan
tremolo¶
https://ffmpeg.org/ffmpeg-filters.html#tremolo
show("tremolo=f=8:d=0.8")
data:image/s3,"s3://crabby-images/0f870/0f87078b0e15c541fd1feadd141740180b42c0ab" alt="effector tutorial"
crystalizer¶
https://ffmpeg.org/ffmpeg-filters.html#crystalizer
show("crystalizer")
data:image/s3,"s3://crabby-images/f620c/f620c770e408989dd4c4ca3044e52fb8794a7a7a" alt="effector tutorial"
flanger¶
https://ffmpeg.org/ffmpeg-filters.html#flanger
show("flanger")
data:image/s3,"s3://crabby-images/1537a/1537a7d168c6fad196b461ce30681897d6efd1a5" alt="effector tutorial"
phaser¶
https://ffmpeg.org/ffmpeg-filters.html#aphaser
show("aphaser")
data:image/s3,"s3://crabby-images/6d226/6d226d8bf1152de20e857ed87e392e34fab15931" alt="effector tutorial"
pulsator¶
https://ffmpeg.org/ffmpeg-filters.html#apulsator
show("apulsator", stereo=True)
data:image/s3,"s3://crabby-images/270c1/270c1d03bebf9bf2d411e74a03d6c1e3e4759731" alt="effector tutorial"
haas¶
https://ffmpeg.org/ffmpeg-filters.html#haas
show("haas")
data:image/s3,"s3://crabby-images/55247/55247238c34dc3cbb9f244195a5396c2cc4dc2d2" alt="effector tutorial"
Codecs¶
def show_multi(configs):
results = []
for config in configs:
effector = AudioEffector(**config)
results.append(effector.apply(waveform, int(sr)))
num_configs = len(configs)
figsize = (6.4, 0.3 + num_configs * 0.9)
f, axes = plt.subplots(num_configs, 1, figsize=figsize, sharex=True)
for result, ax in zip(results, axes):
ax.specgram(result[:, 0], Fs=sr)
f.set_tight_layout(True)
return [Audio(r.numpy().T, rate=sr) for r in results]
ogg¶
results = show_multi(
[
{"format": "ogg"},
{"format": "ogg", "encoder": "vorbis"},
{"format": "ogg", "encoder": "opus"},
]
)
data:image/s3,"s3://crabby-images/cb4bf/cb4bf6a2160f01e3273ee638db3ce4b500312a4b" alt="effector tutorial"
ogg - default encoder (flac)¶
results[0]
ogg - vorbis¶
results[1]
ogg - opus¶
results[2]
mp3¶
https://trac.ffmpeg.org/wiki/Encode/MP3
results = show_multi(
[
{"format": "mp3"},
{"format": "mp3", "codec_config": CodecConfig(compression_level=1)},
{"format": "mp3", "codec_config": CodecConfig(compression_level=9)},
{"format": "mp3", "codec_config": CodecConfig(bit_rate=192_000)},
{"format": "mp3", "codec_config": CodecConfig(bit_rate=8_000)},
{"format": "mp3", "codec_config": CodecConfig(qscale=9)},
{"format": "mp3", "codec_config": CodecConfig(qscale=1)},
]
)
data:image/s3,"s3://crabby-images/d0f3a/d0f3a9959f559209745f0d40bbf2b4595b945e61" alt="effector tutorial"
default¶
results[0]
compression_level=1¶
results[1]
compression_level=9¶
results[2]
bit_rate=192k¶
results[3]
bit_rate=8k¶
results[4]
qscale=9¶
results[5]
qscale=1¶
results[6]
Tag: torchaudio.io
Total running time of the script: ( 0 minutes 3.051 seconds)