Source code for denspp.offline.dnn.training.autoencoder_dataset

import numpy as np
from torch import is_tensor
from torch.utils.data import Dataset
from denspp.offline.dnn import DatasetFromFile


[docs] class DatasetAutoencoder(Dataset): def __init__(self, dataset: DatasetFromFile, noise_std=0.1, mode_train=0): """Dataset Preparator for training Autoencoder :param dataset: Dataclass DatasetFromFile with data from extern: :param noise_std: Adding noise standard deviation on input data :param mode_train: Autoencoder Training Mode [0: Autoencoder, 1: Denoising Autoencoder (mean), 2: Denoising Autoencoder (add random noise), 3: Denoising Autoencoder (add gaussian noise)] """ self.__mode = ["", "(mean) Denoising ", "(random noise) Denoising ", "(gaussian noise) Denoising "] self.__noise_std = noise_std self.__mode_train = mode_train self.__data = np.array(dataset.data, dtype=np.float32) self.__size = dataset.data.shape[1:] self.__label = np.array(dataset.label, dtype=np.uint8) self.__mean = np.array(dataset.mean, dtype=np.float32) self.__labeled_dictionary = dataset.dict if isinstance(dataset.dict, list) else []
[docs] def __len__(self): return self.__label.shape[0]
[docs] def __getitem__(self, idx): if is_tensor(idx): idx = idx.tolist() cluster_id = self.__label[idx] if self.__mode_train == 1: # Denoising Autoencoder Training with mean frame_in = self.__data[idx, :] frame_out = self.__mean[cluster_id, :] elif self.__mode_train == 2: # Denoising Autoencoder Training with adding random noise on input frame_in = self.__data[idx, :] + np.array(self.__noise_std * np.random.randn(*self.__size), dtype=np.float32) frame_out = self.__data[idx, :] elif self.__mode_train == 3: # Denoising Autoencoder Training with adding gaussian noise on input frame_in = self.__data[idx, :] + np.array(self.__noise_std * np.random.normal(size=self.__size), dtype=np.float32) frame_out = self.__data[idx, :] else: # Normal Autoencoder Training frame_in = self.__data[idx, :] frame_out = self.__data[idx, :] return { 'in': frame_in, 'out': frame_out, 'class': cluster_id, 'mean': self.__mean[cluster_id, :] }
@property def get_mean_waveforms(self) -> np.ndarray: """Getting the mean waveforms of dataset""" return self.__mean @property def get_cluster_num(self) -> int: """Returning the number of unique classes/labels in the dataset""" return int(np.unique(self.__label).size) @property def get_dictionary(self) -> list: """Getting the dictionary of labeled dataset""" return self.__labeled_dictionary @property def get_topology_type(self) -> str: """Getting the information of used Autoencoder topology""" return self.__mode[self.__mode_train] + "Autoencoder"