Source code for denspp.offline.dnn.dataset.torch_datasets

import numpy as np
from torch import is_tensor
from torch.utils.data import Dataset
from denspp.offline.dnn.pytorch_config_data import SettingsDataset


[docs] class DatasetTorchVision(Dataset): def __init__(self, picture: np.ndarray, label: np.ndarray, cluster_list: list=(), do_classification: bool=False) -> None: """Dataset Preparation for training Deep Learning Model using pre-defined datasets from torchvision.datasets :param picture: Numpy data with images to be preprocessed :param label: Numpy data with labels corresponding to the picture :param cluster_list: List of cluster labels corresponding to the labels :param do_classification: Boolean for doing classification (True) or autoencoder (False) :return: None """ # --- Input Parameters self.__frames_orig = np.array(picture, dtype=np.float32) self.__frames_size = picture.shape[1] self.__cluster_id = np.array(label, dtype=np.uint8) self.__do_classification = do_classification # --- Parameters for Confusion Matrix for Classification self.__labeled_dictionary = cluster_list
[docs] def __len__(self): return self.__cluster_id.shape[0]
[docs] def __getitem__(self, idx): if is_tensor(idx): idx = idx.tolist() cluster_id = self.__cluster_id[idx] frame_in = self.__frames_orig[idx, :] frame_out = self.__frames_orig[idx, :] if not self.__do_classification else cluster_id return {'in': frame_in, 'out': frame_out, 'class': cluster_id}
@property def get_dictionary(self) -> list: """Getting the dictionary of labeled dataset""" return self.__labeled_dictionary @property def get_topology_type(self) -> str: """Getting the information of used Autoencoder topology""" return "MNIST" + (" (Classification)" if self.__do_classification else " (Autoencoder)") @property def get_cluster_num(self) -> int: """Getting the number of classes""" return int(np.unique(self.__cluster_id).size)
[docs] def prepare_training(rawdata: dict, do_classification: bool) -> DatasetTorchVision: """Loading and preparing any dataset for training Deep Learning models from torchvision.datasets Args: rawdata: Dictionary with rawdata for training with labels ['data', 'label', 'dict'] do_classification: Option for doing a classification, otherwise Autoencoder Returns: Getting the prepared Dataset """ data_raw = rawdata['data'] data_dict = rawdata['dict'] data_label = rawdata['label'] # --- Print Output check = np.unique(data_label, return_counts=True) print(f"... for training are {data_raw.shape[0]} frames with each " f"({data_raw.shape[1]}, {data_raw.shape[2]}) points available") print(f"... used data points for training: " f"in total {check[0].size} classes with {np.sum(check[1])} samples") for idx, id in enumerate(check[0]): addon = f'' if not isinstance(data_dict, list) else f' ({data_dict[idx]})' print(f"\tclass {id}{addon} --> {check[1][idx]} samples") return DatasetTorchVision( picture=data_raw, label=data_label, cluster_list=data_dict, do_classification=do_classification )