Source code for denspp.offline.dnn.dataset.autoencoder_class
import numpy as np
from os.path import join
from glob import glob
from torch import is_tensor, load, from_numpy
from torch.utils.data import Dataset
from denspp.offline.data_process.frame_preprocessing import calculate_frame_mean
[docs]
class DatasetAE_Class(Dataset):
def __init__(self, frames_raw: np.ndarray, frames_feat: np.ndarray,
cluster_id: np.ndarray, frames_cluster_me: np.ndarray,
cluster_dict=None):
"""Dataset Preparation for training autoencoder-based classifications"""
# --- Input Parameters
self.__frames_raw = np.array(frames_raw, dtype=np.float32)
self.__frames_feat = np.array(frames_feat, dtype=np.float32)
self.__cluster_id = np.array(cluster_id, dtype=np.uint8)
self.__frames_me = np.array(frames_cluster_me, dtype=np.float32)
# --- Parameters for Confusion Matrix for Classification
self.__labeled_dictionary = cluster_dict if isinstance(cluster_dict, list) else []
[docs]
def __len__(self):
return self.__cluster_id.shape[0]
[docs]
def __getitem__(self, idx):
if is_tensor(idx):
idx = idx.tolist()
return {'in': self.__frames_feat[idx, :],
'out': self.__cluster_id[idx]}
@property
def get_mean_waveforms(self) -> np.ndarray:
"""Getting the mean waveforms of dataset"""
return self.__frames_me
@property
def get_cluster_num(self) -> int:
""""""
return int(np.unique(self.__cluster_id).size)
@property
def get_dictionary(self) -> list:
"""Getting the dictionary of labeled dataset"""
return self.__labeled_dictionary
@property
def get_topology_type(self) -> str:
"""Getting the information of used Autoencoder topology"""
return "Autoencoder-based Classification"
[docs]
def prepare_training(rawdata: dict, path2model: str, print_state: bool=True) -> DatasetAE_Class:
"""Preparing dataset incl. augmentation for spike-frame based training
Args:
rawdata: Dict with raw data for training ['data', 'label', 'dict', 'mean']
path2model: Path to already-trained autoencoder
print_state: Printing state and results into Terminal
Returns:
Dataloader for training autoencoder-based classifier
"""
frames_in = rawdata['data']
frames_cl = rawdata['label']
frames_dict = rawdata['dict']
frames_me = rawdata['mean'] if 'mean' in rawdata.keys() else calculate_frame_mean(frames_in, frames_cl, False)
# --- PART: Calculating the features with given Autoencoder model
overview_model = glob(join(path2model, '*.pt'))
model_ae = load(overview_model[0], weights_only=False)
model_ae = model_ae.to("cpu")
feat = model_ae(from_numpy(np.array(frames_in, dtype=np.float32)))[0]
frames_feat = feat.detach().numpy()
# --- Output
check = np.unique(frames_cl, return_counts=True)
if print_state:
print("... for training are", frames_feat.shape[0], "frames with each", frames_feat.shape[1], "extracted features available")
print(f"... used data points for training: in total {check[0].size} classes with {np.sum(check[1])} samples")
for idx, id in enumerate(check[0]):
addon = f'' if len(frames_dict) == 0 else f' ({frames_dict[idx]})'
print(f"\tclass {id}{addon} --> {check[1][idx]} samples")
return DatasetAE_Class(
frames_raw=frames_in,
frames_feat=frames_feat,
cluster_id=frames_cl,
frames_cluster_me=frames_me,
cluster_dict=frames_dict
)