Source code for denspp.offline.dnn.dataset.autoencoder_class

import numpy as np
from os.path import join
from glob import glob
from torch import is_tensor, load, from_numpy
from torch.utils.data import Dataset
from denspp.offline.data_process.frame_preprocessing import calculate_frame_mean



[docs]
class DatasetAE_Class(Dataset):
    def __init__(self, frames_raw: np.ndarray, frames_feat: np.ndarray,
                 cluster_id: np.ndarray, frames_cluster_me: np.ndarray,
                 cluster_dict=None):
        """Dataset Preparation for training autoencoder-based classifications"""
        # --- Input Parameters
        self.__frames_raw = np.array(frames_raw, dtype=np.float32)
        self.__frames_feat = np.array(frames_feat, dtype=np.float32)
        self.__cluster_id = np.array(cluster_id, dtype=np.uint8)
        self.__frames_me = np.array(frames_cluster_me, dtype=np.float32)

        # --- Parameters for Confusion Matrix for Classification
        self.__labeled_dictionary = cluster_dict if isinstance(cluster_dict, list) else []


[docs]
    def __len__(self):
        return self.__cluster_id.shape[0]



[docs]
    def __getitem__(self, idx):
        if is_tensor(idx):
            idx = idx.tolist()

        return {'in': self.__frames_feat[idx, :],
                'out': self.__cluster_id[idx]}


    @property
    def get_mean_waveforms(self) -> np.ndarray:
        """Getting the mean waveforms of dataset"""
        return self.__frames_me

    @property
    def get_cluster_num(self) -> int:
        """"""
        return int(np.unique(self.__cluster_id).size)

    @property
    def get_dictionary(self) -> list:
        """Getting the dictionary of labeled dataset"""
        return self.__labeled_dictionary

    @property
    def get_topology_type(self) -> str:
        """Getting the information of used Autoencoder topology"""
        return "Autoencoder-based Classification"




[docs]
def prepare_training(rawdata: dict, path2model: str, print_state: bool=True) -> DatasetAE_Class:
    """Preparing dataset incl. augmentation for spike-frame based training
    Args:
        rawdata:        Dict with raw data for training ['data', 'label', 'dict', 'mean']
        path2model:     Path to already-trained autoencoder
        print_state:    Printing state and results into Terminal
    Returns:
        Dataloader for training autoencoder-based classifier
    """
    frames_in = rawdata['data']
    frames_cl = rawdata['label']
    frames_dict = rawdata['dict']
    frames_me = rawdata['mean'] if 'mean' in rawdata.keys() else calculate_frame_mean(frames_in, frames_cl, False)

    # --- PART: Calculating the features with given Autoencoder model
    overview_model = glob(join(path2model, '*.pt'))
    model_ae = load(overview_model[0], weights_only=False)
    model_ae = model_ae.to("cpu")
    feat = model_ae(from_numpy(np.array(frames_in, dtype=np.float32)))[0]
    frames_feat = feat.detach().numpy()

    # --- Output
    check = np.unique(frames_cl, return_counts=True)
    if print_state:
        print("... for training are", frames_feat.shape[0], "frames with each", frames_feat.shape[1], "extracted features available")
        print(f"... used data points for training: in total {check[0].size} classes with {np.sum(check[1])} samples")
        for idx, id in enumerate(check[0]):
            addon = f'' if len(frames_dict) == 0 else f' ({frames_dict[idx]})'
            print(f"\tclass {id}{addon} --> {check[1][idx]} samples")

    return DatasetAE_Class(
        frames_raw=frames_in,
        frames_feat=frames_feat,
        cluster_id=frames_cl,
        frames_cluster_me=frames_me,
        cluster_dict=frames_dict
    )