"""
While *nussl* does not come with any data sets, it does have the capability to interface with
many common source separation data sets used within the MIR and speech separation communities.
These data set "hooks" subclass BaseDataset and by default return AudioSignal objects in
labeled dictionaries for ease of use. Transforms can be applied to these datasets for use
in machine learning pipelines.
"""
import os
import musdb
import jams
from ..core import constants, utils
from .base_dataset import BaseDataset, DataSetException
[docs]class MUSDB18(BaseDataset):
"""
Hook for MUSDB18. Uses the musdb.DB object to access the
dataset. If ``download=True``, then the 7s snippets of each track
are downloaded to ``self.folder``. If no folder is given, then
the tracks are downloaded to ~/.nussl/musdb18.
Getting an item from this dataset with no transforms returns the
following dictionary:
.. code-block:: none
{
'mix': [AudioSignal object containing mix audio],
'source': {
'bass': [AudioSignal object containing vocals],
'drums': [AudioSignal object containing drums],
'other': [AudioSignal object containing other],
'vocals': [AudioSignal object containing vocals],
}
'metadata': {
'labels': ['bass', 'drums', 'other', 'vocals']
}
}
Args:
folder (str, optional): Location that should be processed to produce the
list of files. Defaults to None.
is_wav (bool, optional): Expect subfolder with wav files for each source
instead of stems, defaults to False.
download (bool, optional): Download sample version of MUSDB18 which
includes 7s excerpts. Defaults to False.
subsets (list, optional): Select a musdb subset train or test.
Defaults to ['train', 'test'] (all tracks).
split (str, optional): When subset train is loaded, split selects the
train/validation split. split=’train’ loads the training split,
`split=’valid’ loads the validation split. split=None applies no
splitting. Defaults to None.
**kwargs: Any additional arguments that are passed up to BaseDataset
(see ``nussl.datasets.BaseDataset``).
"""
DATASET_HASHES = {
"musdb": "56777516ad56fe6a8590badf877e6be013ff932c010e0fbdb0aba03ef878d4cd",
}
def __init__(self, folder=None, is_wav=False, download=False,
subsets=None, split=None, **kwargs):
subsets = ['train', 'test'] if subsets is None else subsets
if folder is None:
folder = os.path.join(
constants.DEFAULT_DOWNLOAD_DIRECTORY, 'musdb18'
)
self.musdb = musdb.DB(root=folder, is_wav=is_wav, download=download,
subsets=subsets, split=split)
super().__init__(folder, **kwargs)
[docs] def get_items(self, folder):
items = range(len(self.musdb))
return list(items)
[docs] def process_item(self, item):
track = self.musdb[item]
mix, sources = utils.musdb_track_to_audio_signals(track)
self._setup_audio_signal(mix)
for source in list(sources.values()):
self._setup_audio_signal(source)
output = {
'mix': mix,
'sources': sources,
'metadata': {
'labels': ['bass', 'drums', 'other', 'vocals']
}
}
return output
[docs]class MixSourceFolder(BaseDataset):
"""
This dataset expects your data to be formatted in the following way:
.. code-block:: none
data/
mix/
[file0].wav
[file1].wav
[file2].wav
...
[label0]/
[file0].wav
[file1].wav
[file2].wav
...
[label1]/
[file0].wav
[file1].wav
[file2].wav
...
[label2]/
[file0].wav
[file1].wav
[file2].wav
...
...
Note that the the filenames match between the mix folder and each source folder.
The source folder names can be whatever you want. Given a file in the
``self.mix_folder`` folder, this dataset will look up the corresponding files
with the same name in the source folders. These are the source audio files.
The sum of the sources should equal the mixture. Each source will be labeled
according to the folder name it comes from.
Getting an item from this dataset with no transforms returns the
following dictionary:
.. code-block:: none
{
'mix': [AudioSignal object containing mix audio],
'source': {
'[label0]': [AudioSignal object containing label0 audio],
'[label1]': [AudioSignal object containing label1 audio],
'[label2]': [AudioSignal object containing label2 audio],
'[label3]': [AudioSignal object containing label3 audio],
...
}
'metadata': {
'labels': ['label0', 'label1', 'label2', 'label3']
}
}
Args:
folder (str, optional): Location that should be processed to produce the
list of files. Defaults to None.
mix_folder (str, optional): Folder to look in for mixtures. Defaults to 'mix'.
source_folders (list, optional): List of folders to look in for sources.
Path is defined relative to folder. If None, all folders other than
mix_folder are treated as the source folders. Defaults to None.
ext (list, optional): Audio extensions to look for in mix_folder.
Defaults to ['.wav', '.flac', '.mp3'].
**kwargs: Any additional arguments that are passed up to BaseDataset
(see ``nussl.datasets.BaseDataset``).
"""
def __init__(self, folder, mix_folder='mix', source_folders=None, sample_rate=None,
ext=None, **kwargs):
self.mix_folder = mix_folder
self.source_folders = source_folders
self.ext = ['.wav', '.flac', '.mp3'] if ext is None else ext
super().__init__(folder, **kwargs)
[docs] def get_items(self, folder):
if self.source_folders is None:
self.source_folders = sorted([
f for f in os.listdir(folder)
if os.path.isdir(os.path.join(folder, f))
and f != self.mix_folder
])
mix_folder = os.path.join(folder, self.mix_folder)
items = sorted([
x for x in os.listdir(mix_folder)
if os.path.splitext(x)[1] in self.ext
])
return items
[docs] def process_item(self, item):
mix_path = os.path.join(self.folder, self.mix_folder, item)
mix = self._load_audio_file(mix_path)
sources = {}
for k in self.source_folders:
source_path = os.path.join(self.folder, k, item)
if os.path.exists(source_path):
sources[k] = self._load_audio_file(source_path)
output = {
'mix': mix,
'sources': sources,
'metadata': {
'labels': self.source_folders
}
}
return output
[docs]class Scaper(BaseDataset):
"""
Source separation datasets can be generated using Scaper, a library for
automatic soundscape generation. Datasets that are generated with Scaper
can be fed into this class easily. Scaper generates a large list of JAMS
files which specify the parameters of the soundscape. If the soundscape is
generated with `save_isolated_events=True`, then the audio corresponding
to each event in the soundscape will be saved as well.
Below is an example of using Scaper to generate a small dataset of 10
mixtures with 2 sources each. The generated dataset can then be immediately
loaded into an instance of ``nussl.datasets.Scaper`` for integration into
a training or evaluation pipeline.
The sources are output in a dictionary that looks like this:
.. code-block:: none
data['sources] = {
'{label}::{count}': AudioSignal,
'{label}::{count}': AudioSignal,
...
}
For example:
.. code-block:: none
data['sources] = {
'siren::0': AudioSignal,
'siren::1': AudioSignal,
'car_horn::0': AudioSignal,
...
}
Getting an item from this dataset with no transforms returns the
following dictionary:
.. code-block:: none
{
'mix': [AudioSignal object containing mix audio],
'source': {
'[label0::count]': [AudioSignal object containing label0 audio],
'[label1::count]': [AudioSignal object containing label1 audio],
'[label2::count]': [AudioSignal object containing label2 audio],
'[label3::count]': [AudioSignal object containing label3 audio],
...
}
'metadata': {
'jams': [the content of the jams file used to generate the soundscape]
'labels': ['label0', 'label1', 'label2', 'label3']
}
}
Example of generating a Scaper dataset and then loading it with nussl:
>>> n_sources = 2
>>> n_mixtures = 10
>>> duration = 3
>>> ref_db = -40
>>> fg_path = '/path/to/foreground/'
>>> output_dir = '/output/path'
>>> for i in range(n_mixtures):
>>> sc = scaper.Scaper(
>>> duration, fg_path, fg_path, random_state=i)
>>> sc.ref_db = ref_db
>>> sc.sr = 16000
>>> for j in range(n_sources):
>>> sc.add_event(
>>> label=('choose', []),
>>> source_file=('choose', []),
>>> source_time=('const', 0),
>>> event_time=('const', 0),
>>> event_duration=('const', duration),
>>> snr=('const', 0),
>>> pitch_shift=None,
>>> time_stretch=None
>>> )
>>> audio_path = os.path.join(output_dir, f'{i}.wav')
>>> jams_path = os.path.join(output_dir, f'{i}.jams')
>>> sc.generate(audio_path, jams_path, save_isolated_events=True)
>>> dataset = nussl.datasets.Scaper(output_dir)
>>> dataset[0] # contains mix, sources, and metadata corresponding to 0.jams.
Raises:
DataSetException: if Scaper dataset wasn't saved with isolated event audio.
"""
[docs] def get_items(self, folder):
items = sorted([
x for x in os.listdir(folder)
if os.path.splitext(x)[1] in ['.jams']
])
return items
def _get_info_from_item(self, item):
jam = jams.load(os.path.join(self.folder, item))
ann = jam.annotations.search(namespace='scaper')[0]
mix_path = ann.sandbox.scaper['soundscape_audio_path']
source_paths = ann.sandbox.scaper['isolated_events_audio_path']
return jam, ann, mix_path, source_paths
[docs] def process_item(self, item):
jam, ann, mix_path, source_paths = self._get_info_from_item(item)
if not source_paths:
raise DataSetException(
"No paths to isolated events found! Did you generate "
"the soundscape with save_isolated_events=True?")
mix = self._load_audio_file(mix_path)
sources = {}
for event_spec, event_audio_path in zip(ann, source_paths):
label = event_spec.value['label']
label_count = 0
for k in sources:
if label in k:
label_count += 1
label = f"{label}::{label_count}"
sources[label] = self._load_audio_file(event_audio_path)
output = {
'mix': mix,
'sources': sources,
'metadata': {
'scaper': jam,
'labels': ann.sandbox.scaper['fg_labels'],
}
}
return output
class FUSS(Scaper):
"""
The Free Universal Sound Separation (FUSS) Dataset is a database of arbitrary
sound mixtures and source-level references, for use in experiments on
arbitrary sound separation.
This is the official sound separation data for the DCASE2020 Challenge Task 4:
Sound Event Detection and Separation in Domestic Environments.
This is a hook for reading in this dataset, and making sure that the mix and
source paths are massaged to be relative paths.
References:
[1] Scott Wisdom, Hakan Erdogan, Daniel P. W. Ellis, Romain Serizel,
Nicolas Turpault, Eduardo Fonseca, Justin Salamon, Prem Seetharaman,
John R. Hershey, "What's All the FUSS About Free Universal Sound Separation
Data?", 2020, in preparation.
[2] Eduardo Fonseca, Jordi Pons, Xavier Favory, Frederic Font Corbera,
Dmitry Bogdanov, Andrés Ferraro, Sergio Oramas, Alastair Porter, and
Xavier Serra. "Freesound Datasets: A Platform for the Creation of Open Audio
Datasets." International Society for Music Information Retrieval Conference
(ISMIR), pp. 486–493. Suzhou, China, 2017.
Args:
root (str): Folder where the FUSS data is. Either points to ssdata or
ssdata_reverb.
split (str): Either the ``train``, ``validation``, or ``eval`` split.
kwargs: Additional keyword arguments to BaseDataset.
"""
def __init__(self, root, split='train', **kwargs):
if split not in ['train', 'validation', 'eval']:
raise DataSetException(
f"split '{split}' not one of the accepted splits: "
f"'train', 'validation', 'eval'.")
folder = os.path.join(root, split)
super().__init__(folder, sample_rate=16000, strict_sample_rate=True,
**kwargs)
def _get_info_from_item(self, item):
path_to_item = os.path.join(self.folder, item)
item_base_name = os.path.splitext(item)[0]
jam = jams.load(path_to_item)
ann = jam.annotations.search(namespace='scaper')[0]
mix_path = ann.sandbox.scaper['soundscape_audio_path']
source_paths = ann.sandbox.scaper['isolated_events_audio_path']
mix_path = os.path.join(
self.folder, item_base_name + mix_path.split(item_base_name)[-1])
for i, source_path in enumerate(source_paths):
source_paths[i] = os.path.join(
self.folder, item_base_name + source_path.split(item_base_name)[-1])
return jam, ann, mix_path, source_paths
[docs]class WHAM(MixSourceFolder):
"""
Hook for the WHAM dataset. Essentially subclasses MixSourceFolder but with presets
that are helpful for WHAM, which as the following directory structure:
.. code-block:: none
[wav8k, wav16k]/
[min, max]/
[tr, cv, tt]/
mix_both/
mix_clean/
mix_single/
noise/
s1/
s2/
wham_noise/
tr/
cv/
tt/
metadata/
Args:
MixSourceFolder ([type]): [description]
"""
MIX_TO_SOURCE_MAP = {
'mix_clean': ['s1', 's2'],
'mix_both': ['s1', 's2', 'noise'],
'mix_single': ['s1'],
}
DATASET_HASHES = {
"wav8k": "acd49e0dae066e16040c983d71cc5a8adb903abff6e5cbb92b3785a1997b7547",
"wav16k": "5691d6a35382f2408a99594f21d820b58371b5ea061841db37d548c0b8d6ec7f"
}
def __init__(self, root, mix_folder='mix_clean', mode='min', split='tr',
sample_rate=8000, **kwargs):
if mix_folder not in self.MIX_TO_SOURCE_MAP.keys():
raise DataSetException(
f"{mix_folder} must be in {list(self.MIX_TO_SOURCE_MAP.keys())}")
if sample_rate not in [8000, 16000]:
raise DataSetException(
f"{sample_rate} not available for WHAM (only 8000 and 16000 Hz allowed)")
if mode not in ['min', 'max']:
raise DataSetException(
f"{mode} not available, only 'min' or 'max' allowed.")
if split not in ['tr', 'cv', 'tt']:
raise DataSetException(
f"{split} not available, must be one of 'tr' (train), "
f"'cv' (validation), and 'tt' (test)")
wav_folder = 'wav8k' if sample_rate == 8000 else 'wav16k'
folder = os.path.join(root, wav_folder, mode, split)
source_folders = self.MIX_TO_SOURCE_MAP[mix_folder]
super().__init__(folder, mix_folder=mix_folder, source_folders=source_folders,
sample_rate=sample_rate, strict_sample_rate=True, **kwargs)