Source code for denspp.offline.template.call_dataset

import numpy as np
from logging import getLogger, Logger
from denspp.offline.data_format import JsonHandler
from denspp.offline.data_call import build_waveform_dataset, SettingsWaveformDataset, DefaultSettingsWaveformDataset
from denspp.offline.dnn import DatasetFromFile
from denspp.offline.dnn.data_config import SettingsDataset, ControllerDataset
from denspp.offline.dnn.data_processor import DataProcessor


[docs] class DatasetLoader(ControllerDataset): _logger: Logger _settings: SettingsDataset _processor: DataProcessor _path: str def __init__(self, settings: SettingsDataset, temp_folder: str='') -> None: """Class for downloading (function name with '__get_xyz') and preparing (function name with '__prepare_xyz') custom-defined datasets to train deep learning models :param settings: Object of class SettingsDataset for handling dataset used in DeepLearning""" super().__init__(settings, temp_folder) self._logger = getLogger(__name__) self._processor = DataProcessor(settings) def __get_mnist(self) -> None: pass def __prepare_mnist(self) -> DatasetFromFile: from sklearn.datasets import fetch_openml data, label = fetch_openml("mnist_784", return_X_y=True, as_frame=False, parser="liac-arff") dataset = DatasetFromFile( data=data.reshape(-1, 28, 28), label=np.array(label, dtype=np.uint8), dict=['zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine'], mean=np.zeros(shape=(10, 28, 28)), ) return self._processor.process_vision_datasets( data=dataset ) def __get_sinusoidal(self) -> None: pass def __prepare_sinusoidal(self) -> DatasetFromFile: seq_len = 100 n_samples = 2000 noise_amp = 0.5 data = [] labels = [] window = np.linspace(start=0, stop=2 * np.pi, num=seq_len) for _ in range(n_samples): if np.random.rand() > 0.5: x = np.sin(window) label = 0 else: x = np.cos(window) label = 1 x += noise_amp * np.random.randn(seq_len) # kleines Rauschen data.append(x) # shape: (seq_len, 1) labels.append(label) dataset = DatasetFromFile( data=np.array(data, dtype=float), label=np.array(labels, dtype=int), dict=['sin', 'cos'], mean=np.zeros_like(np.array(data)) ) return self._processor.process_timeseries_datasets(data=dataset, add_noise_cluster=False) def __get_waveforms(self) -> SettingsWaveformDataset: return JsonHandler( template=DefaultSettingsWaveformDataset, path='config', file_name='Config_WaveformDataset' ).get_class(SettingsWaveformDataset) def __prepare_waveforms(self) -> DatasetFromFile: data = build_waveform_dataset( settings_data=self.__get_waveforms() ) dataset = DatasetFromFile( data=data.data, label=data.label, dict=data.dict, mean=np.zeros(shape=(len(data.dict), *data.data.shape[1:])) ) return self._processor.process_timeseries_datasets(data=dataset)