Note
Click here to download the full example code
AudioEffector Usages
Author: Moto Hira
This tutorial shows how to use torchaudio.io.AudioEffector
to
apply various effects and codecs to waveform tensor.
Note
This tutorial requires FFmpeg libraries. Please refer to FFmpeg dependency for the detail.
Overview
AudioEffector
combines in-memory encoding,
decoding and filtering that are provided by
StreamWriter
and
StreamReader
.
The following figure illustrates the process.

import torch
import torchaudio
print(torch.__version__)
print(torchaudio.__version__)
2.6.0
2.6.0
from torchaudio.io import AudioEffector, CodecConfig
import matplotlib.pyplot as plt
from IPython.display import Audio
libavcodec (60, 3, 100)
libavdevice (60, 1, 100)
libavfilter (9, 3, 100)
libavformat (60, 3, 100)
libavutil (58, 2, 100)
Usage
To use AudioEffector
, instantiate it with effect
and
format
, then either pass the waveform to
apply()
or
stream()
method.
effector = AudioEffector(effect=..., format=...,)
# Apply at once
applied = effector.apply(waveform, sample_rate)
apply
method applies effect and codec to the entire waveform at
once. So if the input waveform is long, and memory consumption is an
issue, one can use stream
method to process chunk by chunk.
# Apply chunk by chunk
for applied_chunk = effector.stream(waveform, sample_rate):
...
Example
Gallery
def show(effect, *, stereo=False):
wf = torch.cat([waveform] * 2, dim=1) if stereo else waveform
figsize = (6.4, 2.1 if stereo else 1.2)
effector = AudioEffector(effect=effect, pad_end=False)
result = effector.apply(wf, int(sr))
num_channels = result.size(1)
f, ax = plt.subplots(num_channels, 1, squeeze=False, figsize=figsize, sharex=True)
for i in range(num_channels):
ax[i][0].specgram(result[:, i], Fs=sr)
f.set_tight_layout(True)
return Audio(result.numpy().T, rate=sr)
Original
show(effect=None)

Effects
tempo
https://ffmpeg.org/ffmpeg-filters.html#atempo
show("atempo=0.7")

show("atempo=1.8")

highpass
https://ffmpeg.org/ffmpeg-filters.html#highpass
show("highpass=frequency=1500")

lowpass
https://ffmpeg.org/ffmpeg-filters.html#lowpass
show("lowpass=frequency=1000")

allpass
https://ffmpeg.org/ffmpeg-filters.html#allpass
show("allpass")

bandpass
https://ffmpeg.org/ffmpeg-filters.html#bandpass
show("bandpass=frequency=3000")

bandreject
https://ffmpeg.org/ffmpeg-filters.html#bandreject
show("bandreject=frequency=3000")

echo
https://ffmpeg.org/ffmpeg-filters.html#aecho
show("aecho=in_gain=0.8:out_gain=0.88:delays=6:decays=0.4")

show("aecho=in_gain=0.8:out_gain=0.88:delays=60:decays=0.4")

show("aecho=in_gain=0.8:out_gain=0.9:delays=1000:decays=0.3")

chorus
https://ffmpeg.org/ffmpeg-filters.html#chorus
show("chorus=0.5:0.9:50|60|40:0.4|0.32|0.3:0.25|0.4|0.3:2|2.3|1.3")

fft filter
https://ffmpeg.org/ffmpeg-filters.html#afftfilt
# fmt: off
show(
"afftfilt="
"real='re * (1-clip(b * (b/nb), 0, 1))':"
"imag='im * (1-clip(b * (b/nb), 0, 1))'"
)

show(
"afftfilt="
"real='hypot(re,im) * sin(0)':"
"imag='hypot(re,im) * cos(0)':"
"win_size=512:"
"overlap=0.75"
)

show(
"afftfilt="
"real='hypot(re,im) * cos(2 * 3.14 * (random(0) * 2-1))':"
"imag='hypot(re,im) * sin(2 * 3.14 * (random(1) * 2-1))':"
"win_size=128:"
"overlap=0.8"
)
# fmt: on

vibrato
https://ffmpeg.org/ffmpeg-filters.html#vibrato
show("vibrato=f=10:d=0.8")

/pytorch/audio/ci_env/lib/python3.10/site-packages/IPython/lib/display.py:188: RuntimeWarning: invalid value encountered in cast
return scaled.astype("<h").tobytes(), nchan
tremolo
https://ffmpeg.org/ffmpeg-filters.html#tremolo
show("tremolo=f=8:d=0.8")

crystalizer
https://ffmpeg.org/ffmpeg-filters.html#crystalizer
show("crystalizer")

flanger
https://ffmpeg.org/ffmpeg-filters.html#flanger
show("flanger")

phaser
https://ffmpeg.org/ffmpeg-filters.html#aphaser
show("aphaser")

pulsator
https://ffmpeg.org/ffmpeg-filters.html#apulsator
show("apulsator", stereo=True)

haas
https://ffmpeg.org/ffmpeg-filters.html#haas
show("haas")

Codecs
def show_multi(configs):
results = []
for config in configs:
effector = AudioEffector(**config)
results.append(effector.apply(waveform, int(sr)))
num_configs = len(configs)
figsize = (6.4, 0.3 + num_configs * 0.9)
f, axes = plt.subplots(num_configs, 1, figsize=figsize, sharex=True)
for result, ax in zip(results, axes):
ax.specgram(result[:, 0], Fs=sr)
f.set_tight_layout(True)
return [Audio(r.numpy().T, rate=sr) for r in results]
ogg
results = show_multi(
[
{"format": "ogg"},
{"format": "ogg", "encoder": "vorbis"},
{"format": "ogg", "encoder": "opus"},
]
)

ogg - default encoder (flac)
results[0]
ogg - vorbis
results[1]
ogg - opus
results[2]
mp3
https://trac.ffmpeg.org/wiki/Encode/MP3
results = show_multi(
[
{"format": "mp3"},
{"format": "mp3", "codec_config": CodecConfig(compression_level=1)},
{"format": "mp3", "codec_config": CodecConfig(compression_level=9)},
{"format": "mp3", "codec_config": CodecConfig(bit_rate=192_000)},
{"format": "mp3", "codec_config": CodecConfig(bit_rate=8_000)},
{"format": "mp3", "codec_config": CodecConfig(qscale=9)},
{"format": "mp3", "codec_config": CodecConfig(qscale=1)},
]
)

default
results[0]
compression_level=1
results[1]
compression_level=9
results[2]
bit_rate=192k
results[3]
bit_rate=8k
results[4]
qscale=9
results[5]
qscale=1
results[6]
Tag: torchaudio.io
Total running time of the script: ( 0 minutes 3.057 seconds)