Source code for denspp.offline.dnn.dataset.autoencoder
import numpy as np
from torch import is_tensor
from torch.utils.data import Dataset
from denspp.offline.data_process.frame_preprocessing import calculate_frame_mean
[docs]
class DatasetAE(Dataset):
"""Dataset Preparator for training Autoencoder"""
def __init__(self, frames_raw: np.ndarray, cluster_id: np.ndarray,
frames_cluster_me: np.ndarray, cluster_dict=None,
noise_std=0.1, do_classification=False, mode_train=0):
# --- Input Parameters
self.__frames_orig = np.array(frames_raw, dtype=np.float32)
self.__frames_size = frames_raw.shape[1]
self.__cluster_id = np.array(cluster_id, dtype=np.uint8)
self.__frames_me = np.array(frames_cluster_me, dtype=np.float32)
# --- Parameters for Denoising Autoencoder
self.__frames_noise_std = noise_std
self.__do_classification = do_classification
# --- Parameters for Confusion Matrix for Classification
self.__labeled_dictionary = cluster_dict if isinstance(cluster_dict, list) else []
self.__mode_train = mode_train
[docs]
def __len__(self):
return self.__cluster_id.shape[0]
[docs]
def __getitem__(self, idx):
if is_tensor(idx):
idx = idx.tolist()
cluster_id = self.__cluster_id[idx]
if self.__mode_train == 1:
# Denoising Autoencoder Training with mean
frame_in = self.__frames_orig[idx, :]
frame_out = self.__frames_me[cluster_id, :] if not self.__do_classification else cluster_id
elif self.__mode_train == 2:
# Denoising Autoencoder Training with adding random noise on input
frame_in = self.__frames_orig[idx, :] + np.array(self.__frames_noise_std * np.random.randn(self.__frames_size), dtype=np.float32)
frame_out = self.__frames_orig[idx, :] if not self.__do_classification else cluster_id
elif self.__mode_train == 3:
# Denoising Autoencoder Training with adding gaussian noise on input
frame_out = self.__frames_orig[idx, :] if not self.__do_classification else cluster_id
frame_in = self.__frames_orig[idx, :] + np.array(self.__frames_noise_std * np.random.normal(size=self.__frames_size), dtype=np.float32)
else:
# Normal Autoencoder Training
frame_in = self.__frames_orig[idx, :]
frame_out = self.__frames_orig[idx, :] if not self.__do_classification else cluster_id
return {'in': frame_in, 'out': frame_out, 'class': cluster_id,
'mean': self.__frames_me[cluster_id, :]}
@property
def get_mean_waveforms(self) -> np.ndarray:
"""Getting the mean waveforms of dataset"""
return self.__frames_me
@property
def get_cluster_num(self) -> int:
""""""
return int(np.unique(self.__cluster_id).size)
@property
def get_dictionary(self) -> list:
"""Getting the dictionary of labeled dataset"""
return self.__labeled_dictionary
@property
def get_topology_type(self) -> str:
"""Getting the information of used Autoencoder topology"""
match self.__mode_train:
case 1:
out = "Denoising Autoencoder (mean)"
case 2:
out = "Denoising Autoencoder (Add random noise)"
case 3:
out = "Denoising Autoencoder (Add gaussian noise)"
case _:
out = "Autoencoder"
if self.__do_classification:
out += " for Classification"
return out
[docs]
def prepare_training(rawdata: dict, do_classification: bool=False,
mode_train_ae: int=0, noise_std: float=0.1,
print_state: bool=True) -> DatasetAE:
"""Preparing dataset for Autoencoder Tasks
Args:
rawdata: Dictionary with rawdata for training with labels ['data', 'label', 'dict']
do_classification: Decision if output should be a classification
mode_train_ae: Mode for training the autoencoder (0: normal, 1: Denoising (mean), 2: Denoising (input))
noise_std: Std of noise distribution
print_state: Printing the state and results into Terminal
Returns:
Dataloader for training autoencoders
"""
frames_in = rawdata['data']
frames_cl = rawdata['label']
frames_dict = rawdata['dict']
frames_me = rawdata['mean'] if 'mean' in rawdata.keys() else calculate_frame_mean(frames_in, frames_cl, False)
# --- Output
check = np.unique(frames_cl, return_counts=True)
if print_state:
print(f"... for training are {frames_in.shape[0]} frames with each {frames_in.shape[1]} points available")
print(f"... used data points for training: in total {check[0].size} classes with {np.sum(check[1])} samples")
for idx, id in enumerate(check[0]):
addon = f'' if len(frames_dict) == 0 else f' ({frames_dict[idx]})'
print(f"\tclass {id}{addon} --> {check[1][idx]} samples")
return DatasetAE(
frames_raw=frames_in,
cluster_id=frames_cl,
frames_cluster_me=frames_me,
cluster_dict=frames_dict,
mode_train=mode_train_ae,
do_classification=do_classification,
noise_std=noise_std
)