Source code for nussl.separation.base.nmf_mixin

import numpy as np

from ... import ml
from ... import AudioSignal


[docs]class NMFMixin:
[docs]    @staticmethod
    def fit(audio_signals, n_components, beta_loss='frobenius',
            l1_ratio=0.5, **kwargs):
        """
        Fits an NMF model to the magnitude spectrograms of each
        audio signal. If `audio_signals` is a list, the magnitude
        spectrograms of each signal are concatenated into a single
        data matrix to which NMF is fit. If `audio_signals`
        is a single audio signal, then NMF is fit only to the
        magnitude spectrogram for that audio signal. If any of
        the audio signals are multichannel, the channels are 
        concatenated into a single (longer) data matrix.

        Args:
            audio_signals (list or AudioSignal): AudioSignal object(s) that 
              NMF will be fit to.
            n_components (int): Number of components to use in the NMF
              module. Corresponds to number of spectral templates.
            beta_loss (float or string): String must be in 
              {'frobenius', 'kullback-leibler', 'itakura-saito'}.
              Beta divergence to be minimized, measuring the distance between X
              and the dot product WH. Note that values different from 'frobenius'
              (or 2) and 'kullback-leibler' (or 1) lead to significantly slower
              fits. Note that for beta_loss <= 0 (or 'itakura-saito'), the input
              matrix X cannot contain zeros. Used only in 'mu' solver. Defaults to 
              'frobenius'.
            l1_ratio (float): The regularization mixing parameter, with 0 <= l1_ratio <= 1.
              For l1_ratio = 0 the penalty is an elementwise L2 penalty (aka Frobenius Norm).
              For l1_ratio = 1 it is an elementwise L1 penalty.
              For 0 < l1_ratio < 1, the penalty is a combination of L1 and L2.
              Defaults to 1.0 (sparse templates and activations).
            kwargs (dict): Additional keyword arguments to initialization of the NMF
              decomposition method.

        Returns:
            model (NMF): Fitted NMF model to the audio signal(s).
            components (np.ndarray): Spectral templates (n_components, n_features)
            activations (np.ndarray): Activations (n_components, n_time, n_channels)
              The shape here is as if it was like an STFT but with components as the
              features rather than frequencies of the STFT.
        """
        if isinstance(audio_signals, AudioSignal):
            audio_signals = [audio_signals]

        data = []

        n_spectrograms = 0

        for audio_signal in audio_signals:
            _data = np.abs(audio_signal.stft())
            n_spectrograms += audio_signal.num_channels
            # flip around array so frequencies are last
            _data = _data.transpose()
            # flatten first 2 axes
            _data = _data.reshape(-1, _data.shape[-1])
            data.append(_data)

        data = np.concatenate(data, axis=0)

        model = ml.NMF(n_components=n_components, l1_ratio=l1_ratio,
                       beta_loss=beta_loss, **kwargs)
        activations = model.fit_transform(data)
        activations = activations.T.reshape(n_components, -1, n_spectrograms)
        return model, model.components_, activations

[docs]    @staticmethod
    def transform(audio_signal, model):
        """
        Use an already fit model to transform the magnitude spectrogram of an 
        audio signal into components and activations. These can be multiplied to 
        reconstruct the original matrix, or used to separate out sounds that correspond
        to components in the model.
        
        Args:
            audio_signal (AudioSignal): AudioSignal object to transform with model.
            model (NMF): NMF model to separate with. Must be fitted prior to this call.
            

        Returns:
            components (np.ndarray): Spectral templates (n_components, n_features)
            activations (np.ndarray): Activations (n_components, n_time, n_channels)
              The shape here is as if it was like an STFT but with components as the
              features rather than frequencies of the STFT.
        """
        data = np.abs(audio_signal.stft())

        shape = data.shape
        data = data.transpose()
        data = data.reshape(-1, data.shape[-1])
        activations = model.transform(data).T

        activations = activations.reshape((model.n_components,) + shape[1:])
        return model.components_, activations

[docs]    @staticmethod
    def inverse_transform(components, activations):
        """
        Reconstructs the magnitude spectrogram by matrix multiplying the components 
        with the activations. Components and activations are considered to be 2D matrices, 
        but if they are more, then the first dimension is interpreted to be the batch 
        dimension.
        
        Args:
            components (np.ndarray): Spectral templates (n_components, n_features)
            activations (np.ndarray): Activations (n_components, n_time, n_channels)
              The shape here is as if it was like an STFT but with components as the
              features rather than frequencies of the STFT.
        """
        activations = activations.transpose()
        shape = activations.shape

        reconstruction = activations @ components
        reconstruction = reconstruction.reshape(shape[:-1] + (-1,))

        return reconstruction.transpose()