From bc9ec9bc2bc3a3f5daae8fb86805596b9ad551a7 Mon Sep 17 00:00:00 2001 From: Karchkov Denis Date: Wed, 4 Dec 2024 19:52:31 +0300 Subject: [PATCH] Append ECG neural network tune example (#199) * I supplemented the documentation with a paragraph about the work of the framework with the optimal selection of two real and one discrete parameters. Corrected the problem code for finding real and discrete parameters. * correct target score * Append new examples. Correct documentation * Corrected documentation of examples --- .gitignore | 4 + .../Segmentation/Problem/Cardio2D.py | 145 ++++++++++++++++++ .../NeuralNetwork/Segmentation/UnetExample.py | 61 ++++++++ .../NeuralNetwork/Segmentation/__init__.py | 0 .../Segmentation/scripts/__init__.py | 0 .../Segmentation/scripts/dataset.py | 73 +++++++++ .../Segmentation/scripts/metric.py | 109 +++++++++++++ .../Segmentation/scripts/model.py | 141 +++++++++++++++++ .../NeuralNetwork/__init__.py | 0 9 files changed, 533 insertions(+) create mode 100644 examples/Machine_learning/NeuralNetwork/Segmentation/Problem/Cardio2D.py create mode 100644 examples/Machine_learning/NeuralNetwork/Segmentation/UnetExample.py create mode 100644 examples/Machine_learning/NeuralNetwork/Segmentation/__init__.py create mode 100644 examples/Machine_learning/NeuralNetwork/Segmentation/scripts/__init__.py create mode 100644 examples/Machine_learning/NeuralNetwork/Segmentation/scripts/dataset.py create mode 100644 examples/Machine_learning/NeuralNetwork/Segmentation/scripts/metric.py create mode 100644 examples/Machine_learning/NeuralNetwork/Segmentation/scripts/model.py create mode 100644 examples/Machine_learning/NeuralNetwork/__init__.py diff --git a/.gitignore b/.gitignore index d18fa3b7..13bcfe6c 100644 --- a/.gitignore +++ b/.gitignore @@ -134,3 +134,7 @@ dmypy.json # datasets benchmarks/data/datasets +examples/Machine_learning/NeuralNetwork/Segmentation/data +examples/Machine_learning/NeuralNetwork/Segmentation/models/* +examples/Machine_learning/NeuralNetwork/Segmentation/lightning_logs +examples/Machine_learning/NeuralNetwork/Segmentation/data.zip \ No newline at end of file diff --git a/examples/Machine_learning/NeuralNetwork/Segmentation/Problem/Cardio2D.py b/examples/Machine_learning/NeuralNetwork/Segmentation/Problem/Cardio2D.py new file mode 100644 index 00000000..588d5561 --- /dev/null +++ b/examples/Machine_learning/NeuralNetwork/Segmentation/Problem/Cardio2D.py @@ -0,0 +1,145 @@ +import random + +from examples.Machine_learning.NeuralNetwork.Segmentation.scripts.dataset import SegmentationDataset +from examples.Machine_learning.NeuralNetwork.Segmentation.scripts.metric import AllMetricTracker +from iOpt.trial import Point +from iOpt.trial import FunctionValue +from iOpt.problem import Problem +from typing import Dict +from datetime import datetime +import os +from sklearn.model_selection import train_test_split +from torch.utils.data import DataLoader +from lightning.pytorch import Trainer +from lightning.pytorch.callbacks import ModelCheckpoint, EarlyStopping +import torch +import torch.nn as nn +import numpy as np +from lightning.pytorch import LightningModule +from examples.Machine_learning.NeuralNetwork.Segmentation.scripts.metric import SegmentationMetric +from examples.Machine_learning.NeuralNetwork.Segmentation.scripts.model import Encoder, Decoder, UNet + + +class UnetModule(LightningModule): + def __init__(self, kernel_size=23, q=1.2, label_smoothing=0, p=0.75): + super().__init__() + self.save_hyperparameters() + encoder = Encoder(12, kernel_size=kernel_size, q=q, p=p) + decoder = Decoder(encoder, 4) + + self.model = UNet(encoder, decoder) + self.loss = nn.CrossEntropyLoss(ignore_index=4, label_smoothing=label_smoothing) + + self.p_metric = SegmentationMetric('p', 'all', return_type='f1', samples=150) + self.t_metric = SegmentationMetric('t', 'all', return_type='f1', samples=150) + self.qrs_metric = SegmentationMetric('qrs', 'all', return_type='f1', samples=150) + + def predict(self, x): + if isinstance(x, np.ndarray): + x = torch.Tensor(x) + x = x.unsqueeze(0) if len(x.shape) == 2 else x + x = x.to(self.device) + logits = self.model(x) + y_pred = logits.argmax(axis=1) + return y_pred.cpu().detach().numpy() + + def training_step(self, batch): + _, x, y = batch + logits = self.model(x) + loss = self.loss(logits, y) + dict_ = {'train_loss': loss} + self.log_dict(dict_, on_epoch=True, on_step=False) + return loss + + def validation_step(self, batch): + _, x, y = batch + logits = self.model(x) + loss = self.loss(logits, y) + dict_ = {'val_loss': loss} + + metrics = self.get_metric(x, y, 'val') + dict_.update(metrics) + + self.log_dict(dict_, on_epoch=True, on_step=False) + + return loss + + def get_metric(self, x, y_true, prefix): + y_true = y_true.cpu().detach().numpy() + y_pred = self.predict(x) + p_f1_score = self.p_metric(y_pred, y_true) + qrs_f1_score = self.qrs_metric(y_pred, y_true) + t_f1_score = self.t_metric(y_pred, y_true) + dict = {f'{prefix}_p_wave': p_f1_score, f'{prefix}_qrs_wave': qrs_f1_score, f'{prefix}_t_wave': t_f1_score} + return dict + + def configure_optimizers(self): + optimizer = torch.optim.AdamW(self.model.parameters()) + scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.3, patience=50) + return [optimizer], [{"scheduler": scheduler, + "interval": "epoch", + "monitor": "train_loss"}] + +def get_dataset(paths): + return [np.load(f'data/signals/{x}') for x in paths], \ + [np.load(f'data/masks/{x}') for x in paths] + +class Cardio2D(Problem): + def __init__(self, p_bound: Dict[str, float], q_bound: Dict[str, float]): + super(Cardio2D, self).__init__() + self.dimension = 2 + self.number_of_float_variables = 2 + self.number_of_discrete_variables = 0 + self.number_of_objectives = 1 + self.number_of_constraints = 0 + + ecg_list = sorted(os.listdir('data/signals/')) + ecg_list = [x for x in ecg_list if x.split('_')[-1] != 'unsupervised.npy'] + + train_list, test_list = train_test_split(ecg_list, test_size=0.2, shuffle=True, random_state=42) + + for x in sorted(os.listdir('data/signals/')): + if x.split('_')[-1] == 'unsupervised.npy': + train_list.append(x) + + x_train, y_train = get_dataset(train_list) + x_test, y_test = get_dataset(test_list) + + train_dataset = SegmentationDataset('cpu', train_list, x_train, y_train, common_mask=True, for_train=True) + val_dataset = SegmentationDataset('cpu', test_list, x_test, y_test, common_mask=True) + + self.train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True) + self.val_loader = DataLoader(val_dataset, batch_size=32) + + self.float_variable_names = np.array(["P parameter", "Q parameter"], dtype=str) + self.lower_bound_of_float_variables = np.array([p_bound['low'], q_bound['low']], + dtype=np.double) + self.upper_bound_of_float_variables = np.array([p_bound['up'], q_bound['up']], + dtype=np.double) + + def calculate(self, point: Point, function_value: FunctionValue) -> FunctionValue: + p, q = point.float_variables[0], point.float_variables[1] + + now = datetime.now().strftime('%d.%m.%Y_%H:%M:%S') + + checkpoint = ModelCheckpoint(dirpath=f'models/', + filename=f"{random.uniform(1, 100):.9f}" + " " + f"{p:.9f}" + '_' + f"{q:.9f}" + '_' + '{epoch}_{val_p_wave:.6f}_{val_qrs_wave:.6f}_{val_t_wave:.6f}', + monitor='val_p_wave', + save_top_k=3, + mode='max') + early_stopping = EarlyStopping(monitor='val_loss', + patience=300) + + cb = AllMetricTracker() + model = UnetModule(p=p, q=q) + trainer = Trainer(max_epochs=1_000_000, callbacks=[checkpoint, early_stopping, cb]) + try: + trainer.fit(model, self.train_loader, self.val_loader) + except Exception as err: + print(f"Unexpected {err=}, {type(err)=}") + + print('p ' + f"{p:.9f}") + print('q ' + f"{q:.9f}") + function_value.value = -cb.best_p_valscore + print(-cb.best_p_valscore) + return function_value \ No newline at end of file diff --git a/examples/Machine_learning/NeuralNetwork/Segmentation/UnetExample.py b/examples/Machine_learning/NeuralNetwork/Segmentation/UnetExample.py new file mode 100644 index 00000000..56a42671 --- /dev/null +++ b/examples/Machine_learning/NeuralNetwork/Segmentation/UnetExample.py @@ -0,0 +1,61 @@ +import shutil + +import numpy as np +from examples.Machine_learning.NeuralNetwork.Segmentation.Problem.Cardio2D import Cardio2D +from iOpt.output_system.listeners.console_outputers import ConsoleOutputListener +from iOpt.solver import Solver +from iOpt.solver_parametrs import SolverParameters +import hashlib +import os +from pathlib import Path + +import requests +from tqdm import tqdm + + +def _get_hash(path: Path) -> str: + file_hash = hashlib.sha256() + with open(path, "rb") as f: + while chunk := f.read(8192): + file_hash.update(chunk) + return file_hash.hexdigest() + + +def download(path: Path, public_key: str) -> None: + url = "https://cloud-api.yandex.net/v1/disk/public/resources" + params = {"public_key": f"https://disk.yandex.ru/d/{public_key}"} + + response = requests.get(url, params=params).json() + download_url = response["file"] + file_size = response["size"] + sha256 = response["sha256"] + + response = requests.get(download_url, stream=True) + + if path.is_file() and os.path.getsize(path) == file_size: + print(f"File already downloaded: {path}") + if _get_hash(path) == sha256: + return + + with tqdm(total=file_size, unit="B", unit_scale=True) as progress_bar: + with open(path, "wb") as f: + for data in response.iter_content(1024): + progress_bar.update(len(data)) + f.write(data) + + +if __name__ == "__main__": + if not os.path.exists('data'): + path = Path('data.zip') + download(path, 'Oqxcid6uX58kYQ') + shutil.unpack_archive('data.zip', 'data', format="zip") + os.remove('data.zip') + + p_value_bound = {'low': 0.0, 'up': 1.0} + q_value_bound = {'low': 1.0, 'up': 1.6} + problem = Cardio2D(p_value_bound, q_value_bound) + method_params = SolverParameters(r=np.double(3.0), iters_limit=10) + solver = Solver(problem, parameters=method_params) + cfol = ConsoleOutputListener(mode='full') + solver.add_listener(cfol) + solver_info = solver.solve() \ No newline at end of file diff --git a/examples/Machine_learning/NeuralNetwork/Segmentation/__init__.py b/examples/Machine_learning/NeuralNetwork/Segmentation/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/examples/Machine_learning/NeuralNetwork/Segmentation/scripts/__init__.py b/examples/Machine_learning/NeuralNetwork/Segmentation/scripts/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/examples/Machine_learning/NeuralNetwork/Segmentation/scripts/dataset.py b/examples/Machine_learning/NeuralNetwork/Segmentation/scripts/dataset.py new file mode 100644 index 00000000..c3ea6339 --- /dev/null +++ b/examples/Machine_learning/NeuralNetwork/Segmentation/scripts/dataset.py @@ -0,0 +1,73 @@ +import torch +import numpy as np + +class SegmentationDataset(torch.utils.data.Dataset): + def __init__(self, device, paths, signals, masks=None, common_mask=False, for_train=False): + + self._device = device + self._paths = paths + self._signals = [torch.Tensor(x).to(device) for x in signals] + self._masks = [torch.LongTensor(x).to(device) for x in masks] + + self.begin_noise, self.end_noise = 1e-3, 3e-3 + self.begin_ampl, self.end_ampl = 0, 0.3 + + self.begin_freq, self.end_freq = 0, 0.009 + + self.prob_isoline = 0.7 + self.prob_reverse = 0.5 + self.sub_len = 4000 + + self.common_mask = common_mask + self.for_train = for_train + + def reverse_ecg(self, signal): + result = torch.zeros_like(signal, device=self._device) + for i, x in enumerate(signal): + sign = 2 * (np.random.rand() < self.prob_reverse) - 1 + result[i] = sign * x + return result + + def __len__(self): + return len(self._signals) + + def __getitem__(self, i): + if not self.for_train: + return self._paths[i], self._signals[i], self.skip_borders(self._masks[i][0]) + + shift = np.random.randint(0, 5000 - self.sub_len - 1) + noise = self.begin_noise + (self.end_noise - self.begin_noise) * np.random.rand() + signal = self._signals[i][:, shift:shift + self.sub_len] + torch.normal(0, noise, + size=(self.sub_len,), + device=self._device) + + signal = self.reverse_ecg(signal) + + if self._masks is None: + return self._paths[i], signal + + mask = self._masks[i][:, shift: shift + self.sub_len] + indexes = torch.randperm(12, device=self._device) + + if self.common_mask: + mask = mask[0] + else: + mask = mask[indexes] + + return self._paths[i], signal[indexes], self.skip_borders(mask) + + def skip_borders(self, mask): + wave_start = torch.logical_and(torch.roll(mask, 1) == 0, mask != 0).type(torch.uint8) + wave_finish = torch.logical_and(torch.roll(mask, -1) == 0, mask != 0).type(torch.uint8) + + indexes_starts, = torch.where(wave_start == 1) + indexes_finish, = torch.where(wave_finish == 1) + + left_skip = indexes_starts[indexes_starts > 500][0] + right_skip = indexes_finish[indexes_finish < len(mask) - 500][-1] + + mask_copy = torch.clone(mask) + mask_copy[:left_skip] = 4 + mask_copy[right_skip:] = 4 + + return mask_copy \ No newline at end of file diff --git a/examples/Machine_learning/NeuralNetwork/Segmentation/scripts/metric.py b/examples/Machine_learning/NeuralNetwork/Segmentation/scripts/metric.py new file mode 100644 index 00000000..3771afde --- /dev/null +++ b/examples/Machine_learning/NeuralNetwork/Segmentation/scripts/metric.py @@ -0,0 +1,109 @@ +from typing import Literal +import numpy as np +import math +from lightning.pytorch.callbacks import Callback + +class AllMetricTracker(Callback): + def __init__(self): + self.collection = [] + self.best_p_valscore = -1 + self.best_qrs_valscore = -1 + self.best_t_valscore = -1 + self.l2_max = 0 + + def on_validation_epoch_end(self, trainer, module): + elogs = trainer.logged_metrics # access it here + self.collection.append(elogs) + if self.best_p_valscore < elogs['val_p_wave'].item(): + self.best_p_valscore = elogs['val_p_wave'].item() + self.best_qrs_valscore = elogs['val_qrs_wave'].item() + self.best_t_valscore = elogs['val_t_wave'].item() + cur_l2 = math.sqrt(elogs['val_p_wave'].item() * elogs['val_p_wave'].item() + elogs['val_qrs_wave'].item() * elogs['val_qrs_wave'].item() + elogs['val_t_wave'].item() * elogs['val_t_wave'].item()) + self.l2_max = max(cur_l2, self.l2_max) + print(f'P_VAL: {self.best_p_valscore:.6f}, QRS_VAL: {self.best_qrs_valscore:.6f}, T_VAL: {self.best_t_valscore:.6f}, L_VEC: {self.l2_max:.6f}') + + +class SegmentationMetric: + def __init__(self, + monitor: Literal['p', 'qrs', 't', 'all'] = 'all', + orientation_type: Literal['onset', 'offset', 'all'] = 'all', + return_type: Literal['precision', 'recall', 'f1', 'confusion_matrix'] = 'confusion_matrix', + samples=75): + + assert monitor in ['p', 'qrs', 't', 'all'] + assert orientation_type in ['onset', 'offset', 'all'] + assert return_type in ['precision', 'recall', 'f1', 'confusion_matrix'] + + self.samples = samples + self.monitor = monitor + self.orientation_type = orientation_type + self.return_type = return_type + + self.metric_to_func = {'precision': self.__precision, + 'recall': self.__recall, + 'f1': self.__f1} + + def __call__(self, y_pred, y_true): + assert y_pred.shape == y_true.shape + assert len(y_pred.shape) == 2 + + matrix = np.zeros((2, 2), dtype=int) + monitors = ['p', 'qrs', 't'] if self.monitor == 'all' else [self.monitor] + orientations = ['onset', 'offset'] if self.orientation_type == 'all' else [self.orientation_type] + for wave in monitors: + for orientation in orientations: + matrix += self.__handle(y_pred, y_true, wave, orientation) + + if self.return_type == 'confusion_matrix': + return matrix + + return self.metric_to_func[self.return_type](matrix[0, 1], matrix[1, 0], matrix[1, 1]) + + def __handle(self, y_pred, y_true, wave, orientation) -> tuple[int, int, int]: + + index = ['p', 'qrs', 't'].index(wave) + 1 + orientation = 2 * ['offset', 'onset'].index(orientation) - 1 + y_pred[y_true == 4] = 0 + + y_true, y_pred = (y_true == index), (y_pred == index) + + wave_true = np.logical_and(np.roll(y_true, orientation) != 1, y_true == 1).astype(int) + wave_pred = np.logical_and(np.roll(y_pred, orientation) != 1, y_pred == 1).astype(int) + + true_batch, true_indexes = np.where(wave_true == 1) + + tp = fn = 0 + + for batch, x in zip(true_batch, true_indexes): + wave = wave_pred[batch][x - self.samples // 2: x + self.samples // 2] + if wave.sum(): + tp += 1 + else: + fn += 1 + wave[:] = -1 + + fp = (wave_pred[:, self.samples:-self.samples] == 1).sum() + return np.array([[0, fp], [fn, tp]]) + + @staticmethod + def __precision(fp, fn, tp): + if fp + tp == 0: + return 1 + return tp / (tp + fp) + + @staticmethod + def __recall(fp, fn, tp): + if fn + tp == 0: + return 1 + return tp / (tp + fn) + + @staticmethod + def __f1(fp, fn, tp): + precision = SegmentationMetric.__precision(fp, fn, tp) + recall = SegmentationMetric.__recall(fp, fn, tp) + if precision + recall == 0: + return 0 + return 2 * (precision * recall) / (precision + recall) + + def __str__(self): + return f'{self.monitor}_{self.orientation_type}' \ No newline at end of file diff --git a/examples/Machine_learning/NeuralNetwork/Segmentation/scripts/model.py b/examples/Machine_learning/NeuralNetwork/Segmentation/scripts/model.py new file mode 100644 index 00000000..0b07b333 --- /dev/null +++ b/examples/Machine_learning/NeuralNetwork/Segmentation/scripts/model.py @@ -0,0 +1,141 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +class UNetConv(nn.Module): + def __init__(self, in_channels, out_channels, ks, p): + in_channels = int(in_channels) + out_channels = int(out_channels) + super(UNetConv, self).__init__() + self._model = nn.Sequential( + nn.Conv1d(in_channels, out_channels, kernel_size=ks, padding=ks // 2), + nn.BatchNorm1d(out_channels), + nn.ReLU(), + nn.Dropout1d(p=p), + nn.Conv1d(out_channels, out_channels, kernel_size=ks, padding=ks // 2), + nn.BatchNorm1d(out_channels), + nn.ReLU() + ) + + def forward(self, X): + return self._model(X) + + +class UNetDown(nn.Module): + def __init__(self, in_channels, out_channels, ks, p): + super(UNetDown, self).__init__() + self._model = nn.Sequential( + nn.MaxPool1d(2), + UNetConv(in_channels, out_channels, ks, p) + ) + + def forward(self, X): + return self._model(X) + + +class UNetUp(nn.Module): + def __init__(self, in_channels, in_channels_skip, out_channels, ks, p): + super(UNetUp, self).__init__() + in_channels = int(in_channels) + in_channels_skip = int(in_channels_skip) + out_channels = int(out_channels) + + self._up = nn.ConvTranspose1d(in_channels, in_channels, + kernel_size=ks - 1, + stride=2, + padding=(ks - 1) // 2 - 1) + self._model = UNetConv(in_channels + in_channels_skip, out_channels, ks, p) + + def forward(self, X_skip, X): + X = self._up(X) + diff = X_skip.size()[2] - X.size()[2] + X = F.pad(X, (diff // 2, diff - diff // 2)) + return self._model(torch.cat([X_skip, X], dim=1)) + + +class Encoder(nn.Module): + def __init__(self, in_channels, channels_coeff=1, q=2, kernel_size=23, p=0.1): + super(Encoder, self).__init__() + self.in_channels = in_channels + self.kernel_size = kernel_size + self.q = q + self.p = p + self._input = UNetConv(q ** 0 * self.in_channels, q ** 1 * in_channels, kernel_size, p) + self._down1 = UNetDown(q ** 1 * self.in_channels, q ** 2 * self.in_channels, kernel_size, p) + self._down2 = UNetDown(q ** 2 * self.in_channels, q ** 3 * self.in_channels, kernel_size, p) + self._down3 = UNetDown(q ** 3 * self.in_channels, q ** 4 * self.in_channels, kernel_size, p) + self._down4 = UNetDown(q ** 4 * self.in_channels, q ** 5 * self.in_channels, kernel_size, p) + self._down5 = UNetDown(q ** 5 * self.in_channels, q ** 6 * self.in_channels, kernel_size, p) + + def forward(self, x): + x1 = self._input(x) + x2 = self._down1(x1) + x3 = self._down2(x2) + x4 = self._down3(x3) + x5 = self._down4(x4) + return x1, x2, x3, x4, x5, self._down5(x5) + + +class Decoder(nn.Module): + def __init__(self, encoder: Encoder, num_classes, reshape=False): + super(Decoder, self).__init__() + self.encoder = encoder + self._up1 = UNetUp(encoder.q ** 6 * encoder.in_channels, + encoder.q ** 5 * encoder.in_channels, + encoder.q ** 5 * encoder.in_channels, + encoder.kernel_size, + encoder.p) + + self._up2 = UNetUp(encoder.q ** 5 * encoder.in_channels, + encoder.q ** 4 * encoder.in_channels, + encoder.q ** 4 * encoder.in_channels, + encoder.kernel_size, + encoder.p) + + self._up3 = UNetUp(encoder.q ** 4 * encoder.in_channels, + encoder.q ** 3 * encoder.in_channels, + encoder.q ** 3 * encoder.in_channels, + encoder.kernel_size, + encoder.p) + + self._up4 = UNetUp(encoder.q ** 3 * encoder.in_channels, + encoder.q ** 2 * encoder.in_channels, + encoder.q ** 2 * encoder.in_channels, + encoder.kernel_size, + encoder.p) + + self._up5 = UNetUp(encoder.q ** 2 * encoder.in_channels, + encoder.q ** 1 * encoder.in_channels, + num_classes, + encoder.kernel_size, + encoder.p) + + self._output = nn.Conv1d(num_classes, num_classes, kernel_size=1) + self.reshape = reshape + self.num_classes = num_classes + + def forward(self, x1, x2, x3, x4, x5, x): + batch_size = len(x) + x = self._up1(x5, x) + x = self._up2(x4, x) + x = self._up3(x3, x) + x = self._up4(x2, x) + x = self._up5(x1, x) + x = self._output(x) + if self.reshape: + x = x.reshape(batch_size, 4, 12, -1) + return x + + +class UNet(nn.Module): + def __init__(self, encoder: Encoder, decoder: Decoder): + super(UNet, self).__init__() + self.encoder = encoder + self.decoder = decoder + + def forward(self, x): + return self.decoder(*self.encoder(x)) + + def log(self): + return f"UNet(in_channels={self.encoder.in_channels}, num_classes={self.decoder.num_classes}, " \ + f"q={self.encoder.q}, reshape={self.decoder.reshape}, kernel_size={self.encoder.kernel_size})" \ No newline at end of file diff --git a/examples/Machine_learning/NeuralNetwork/__init__.py b/examples/Machine_learning/NeuralNetwork/__init__.py new file mode 100644 index 00000000..e69de29b