Source code for denspp.offline.dnn.dataset.autoencoder

import numpy as np
from torch import is_tensor
from torch.utils.data import Dataset
from denspp.offline.data_process.frame_preprocessing import calculate_frame_mean



[docs]
class DatasetAE(Dataset):
    """Dataset Preparator for training Autoencoder"""
    def __init__(self, frames_raw: np.ndarray, cluster_id: np.ndarray,
                 frames_cluster_me: np.ndarray, cluster_dict=None,
                 noise_std=0.1, do_classification=False, mode_train=0):

        # --- Input Parameters
        self.__frames_orig = np.array(frames_raw, dtype=np.float32)
        self.__frames_size = frames_raw.shape[1]
        self.__cluster_id = np.array(cluster_id, dtype=np.uint8)
        self.__frames_me = np.array(frames_cluster_me, dtype=np.float32)
        # --- Parameters for Denoising Autoencoder
        self.__frames_noise_std = noise_std
        self.__do_classification = do_classification
        # --- Parameters for Confusion Matrix for Classification
        self.__labeled_dictionary = cluster_dict if isinstance(cluster_dict, list) else []
        self.__mode_train = mode_train


[docs]
    def __len__(self):
        return self.__cluster_id.shape[0]



[docs]
    def __getitem__(self, idx):
        if is_tensor(idx):
            idx = idx.tolist()

        cluster_id = self.__cluster_id[idx]
        if self.__mode_train == 1:
            # Denoising Autoencoder Training with mean
            frame_in = self.__frames_orig[idx, :]
            frame_out = self.__frames_me[cluster_id, :] if not self.__do_classification else cluster_id
        elif self.__mode_train == 2:
            # Denoising Autoencoder Training with adding random noise on input
            frame_in = self.__frames_orig[idx, :] + np.array(self.__frames_noise_std * np.random.randn(self.__frames_size), dtype=np.float32)
            frame_out = self.__frames_orig[idx, :] if not self.__do_classification else cluster_id
        elif self.__mode_train == 3:
            # Denoising Autoencoder Training with adding gaussian noise on input
            frame_out = self.__frames_orig[idx, :] if not self.__do_classification else cluster_id
            frame_in = self.__frames_orig[idx, :] + np.array(self.__frames_noise_std * np.random.normal(size=self.__frames_size), dtype=np.float32)
        else:
            # Normal Autoencoder Training
            frame_in = self.__frames_orig[idx, :]
            frame_out = self.__frames_orig[idx, :] if not self.__do_classification else cluster_id

        return {'in': frame_in, 'out': frame_out, 'class': cluster_id,
                'mean': self.__frames_me[cluster_id, :]}


    @property
    def get_mean_waveforms(self) -> np.ndarray:
        """Getting the mean waveforms of dataset"""
        return self.__frames_me

    @property
    def get_cluster_num(self) -> int:
        """"""
        return int(np.unique(self.__cluster_id).size)

    @property
    def get_dictionary(self) -> list:
        """Getting the dictionary of labeled dataset"""
        return self.__labeled_dictionary

    @property
    def get_topology_type(self) -> str:
        """Getting the information of used Autoencoder topology"""
        match self.__mode_train:
            case 1:
                out = "Denoising Autoencoder (mean)"
            case 2:
                out = "Denoising Autoencoder (Add random noise)"
            case 3:
                out = "Denoising Autoencoder (Add gaussian noise)"
            case _:
                out = "Autoencoder"
        if self.__do_classification:
            out += " for Classification"
        return out




[docs]
def prepare_training(rawdata: dict, do_classification: bool=False,
                     mode_train_ae: int=0, noise_std: float=0.1,
                     print_state: bool=True) -> DatasetAE:
    """Preparing dataset for Autoencoder Tasks
    Args:
        rawdata:                Dictionary with rawdata for training with labels ['data', 'label', 'dict']
        do_classification:      Decision if output should be a classification
        mode_train_ae:          Mode for training the autoencoder (0: normal, 1: Denoising (mean), 2: Denoising (input))
        noise_std:              Std of noise distribution
        print_state:            Printing the state and results into Terminal
    Returns:
        Dataloader for training autoencoders
    """
    frames_in = rawdata['data']
    frames_cl = rawdata['label']
    frames_dict = rawdata['dict']
    frames_me = rawdata['mean'] if 'mean' in rawdata.keys() else calculate_frame_mean(frames_in, frames_cl, False)

    # --- Output
    check = np.unique(frames_cl, return_counts=True)
    if print_state:
        print(f"... for training are {frames_in.shape[0]} frames with each {frames_in.shape[1]} points available")
        print(f"... used data points for training: in total {check[0].size} classes with {np.sum(check[1])} samples")
        for idx, id in enumerate(check[0]):
            addon = f'' if len(frames_dict) == 0 else f' ({frames_dict[idx]})'
            print(f"\tclass {id}{addon} --> {check[1][idx]} samples")

    return DatasetAE(
        frames_raw=frames_in,
        cluster_id=frames_cl,
        frames_cluster_me=frames_me,
        cluster_dict=frames_dict,
        mode_train=mode_train_ae,
        do_classification=do_classification,
        noise_std=noise_std
    )