Source code for nussl.separation.primitive.timbre

import numpy as np
import librosa

from ..base import ClusteringSeparationBase, NMFMixin


[docs]class TimbreClustering(ClusteringSeparationBase, NMFMixin): """ Implements separation by timbre via NMF with MFCC clustering. The steps are: 1. Factorize the magnitude spectrogram of the mixture with NMF. 2. Take MFCC coefficients of each component. 3. Express each time-frequency bin as a combination of components. 4. The features for each time-frequency bin are the weighted combination of the MFCCs of each component. 5. Cluster each time-frequency bin based on these features. Args: input_audio_signal (AudioSignal): Signal to separate. n_components (int): Number of components to use in the NMF model. Corresponds to number of spectral templates. n_mfcc (int): Number of MFCC coefficients to use. Defaults to 13. nmf_args (dict): Dictionary containing keyword arguments for `NMFMixin.fit`. kwargs (dict): Extra keyword arguments are passed to ClusteringSeparationBase. """ def __init__(self, input_audio_signal, num_sources, n_components, n_mfcc=13, nmf_kwargs=None, **kwargs): self.n_components = n_components self.nmf_kwargs = {} if nmf_kwargs is None else nmf_kwargs self.n_mfcc = n_mfcc super().__init__( input_audio_signal=input_audio_signal, num_sources=num_sources, **kwargs) def extract_features(self): model, components, activations = self.fit( self.audio_signal, self.n_components, **self.nmf_kwargs) mel = librosa.feature.melspectrogram( S=(components.T ** 2), sr=self.sample_rate) mfcc = librosa.feature.mfcc(S=mel, n_mfcc=self.n_mfcc).T activations = activations.reshape(self.n_components, -1, 1) components = components.reshape(self.n_components, 1, -1) expansion = activations @ components expansion = expansion.transpose() features = expansion @ mfcc features = features.reshape(self.stft.shape + (-1,)) norm = np.linalg.norm(features, axis=-1, keepdims=True) features = features / (norm + 1e-8) return features