Source code for denspp.offline.dnn.dataset.torch_datasets

import numpy as np
from torch import is_tensor
from torch.utils.data import Dataset
from denspp.offline.dnn.pytorch_config_data import SettingsDataset



[docs]
class DatasetTorchVision(Dataset):
    def __init__(self, picture: np.ndarray, label: np.ndarray,
                 cluster_list: list=(), do_classification: bool=False) -> None:
        """Dataset Preparation for training Deep Learning Model using pre-defined datasets from torchvision.datasets
        :param picture:             Numpy data with images to be preprocessed
        :param label:               Numpy data with labels corresponding to the picture
        :param cluster_list:        List of cluster labels corresponding to the labels
        :param do_classification:   Boolean for doing classification (True) or autoencoder (False)
        :return:                    None
        """

        # --- Input Parameters
        self.__frames_orig = np.array(picture, dtype=np.float32)
        self.__frames_size = picture.shape[1]
        self.__cluster_id = np.array(label, dtype=np.uint8)
        self.__do_classification = do_classification
        # --- Parameters for Confusion Matrix for Classification
        self.__labeled_dictionary = cluster_list


[docs]
    def __len__(self):
        return self.__cluster_id.shape[0]



[docs]
    def __getitem__(self, idx):
        if is_tensor(idx):
            idx = idx.tolist()

        cluster_id = self.__cluster_id[idx]
        frame_in = self.__frames_orig[idx, :]
        frame_out = self.__frames_orig[idx, :] if not self.__do_classification else cluster_id
        return {'in': frame_in, 'out': frame_out, 'class': cluster_id}


    @property
    def get_dictionary(self) -> list:
        """Getting the dictionary of labeled dataset"""
        return self.__labeled_dictionary

    @property
    def get_topology_type(self) -> str:
        """Getting the information of used Autoencoder topology"""
        return "MNIST" + (" (Classification)" if self.__do_classification else " (Autoencoder)")

    @property
    def get_cluster_num(self) -> int:
        """Getting the number of classes"""
        return int(np.unique(self.__cluster_id).size)




[docs]
def prepare_training(rawdata: dict, do_classification: bool) -> DatasetTorchVision:
    """Loading and preparing any dataset for training Deep Learning models from torchvision.datasets
    Args:
        rawdata:            Dictionary with rawdata for training with labels ['data', 'label', 'dict']
        do_classification:  Option for doing a classification, otherwise Autoencoder
    Returns:
        Getting the prepared Dataset
    """
    data_raw = rawdata['data']
    data_dict = rawdata['dict']
    data_label = rawdata['label']

    # --- Print Output
    check = np.unique(data_label, return_counts=True)
    print(f"... for training are {data_raw.shape[0]} frames with each "
          f"({data_raw.shape[1]}, {data_raw.shape[2]}) points available")
    print(f"... used data points for training: "
          f"in total {check[0].size} classes with {np.sum(check[1])} samples")
    for idx, id in enumerate(check[0]):
        addon = f'' if not isinstance(data_dict, list) else f' ({data_dict[idx]})'
        print(f"\tclass {id}{addon} --> {check[1][idx]} samples")
    return DatasetTorchVision(
        picture=data_raw,
        label=data_label,
        cluster_list=data_dict,
        do_classification=do_classification
    )