Source code for secmlt.adv.evasion.aggregators.ensemble

"""Ensemble metrics for getting best results across multiple attacks."""

from abc import ABC, abstractmethod
from typing import Union

import torch
from secmlt.adv.evasion.perturbation_models import LpPerturbationModels
from secmlt.models.base_model import BaseModel
from secmlt.utils.tensor_utils import atleast_kd
from torch.utils.data import DataLoader, TensorDataset


[docs] class Ensemble(ABC): """Abstract class for creating an ensemble metric."""
[docs] def __call__( self, model: BaseModel, data_loader: DataLoader, adv_loaders: list[DataLoader], ) -> DataLoader[torch.Tuple[torch.Tensor]]: """ Get the worst-case of the metric with the given implemented criterion. Parameters ---------- model : BaseModel Model to use for predictions. data_loader : DataLoader Test dataloader. adv_loaders : list[DataLoader] List of dataloaders returned by multiple attacks. Returns ------- DataLoader[torch.Tuple[torch.Tensor]] The worst-case metric computed on the multiple attacks. """ best_x_adv_data = [] original_labels = [] adv_loaders = [iter(a) for a in adv_loaders] for samples, labels in data_loader: best_x_adv = samples.clone() for adv_loader in adv_loaders: x_adv, _ = next(adv_loader) best_x_adv = self._get_best(model, samples, labels, x_adv, best_x_adv) best_x_adv_data.append(best_x_adv) original_labels.append(labels) best_x_adv_dataset = TensorDataset( torch.vstack(best_x_adv_data), torch.hstack(original_labels), ) return DataLoader( best_x_adv_dataset, batch_size=data_loader.batch_size, )
[docs] @abstractmethod def _get_best( self, model: BaseModel, samples: torch.Tensor, labels: torch.Tensor, x_adv: torch.Tensor, best_x_adv: torch.Tensor, ) -> torch.Tensor: """ Get the best result from multiple attacks. Parameters ---------- model : BaseModel Model to use to predict. samples : torch.Tensor Input samples. labels : torch.Tensor Labels for the samples. x_adv : torch.Tensor Adversarial examples. best_x_adv : torch.Tensor Best adversarial examples found so far. Returns ------- torch.Tensor Best adversarial examples between the current x_adv and the ones already tested on the given model. """ ...
[docs] class MinDistanceEnsemble(Ensemble): """Wrapper for ensembling results of multiple minimum-distance attacks."""
[docs] def __init__(self, perturbation_model: str) -> None: """ Create MinDistance Ensemble. Parameters ---------- perturbation_model : str Perturbation model to use to compute the distance. """ self.perturbation_model = perturbation_model
[docs] def _get_best( self, model: BaseModel, samples: torch.Tensor, labels: torch.Tensor, x_adv: torch.Tensor, best_x_adv: torch.Tensor, ) -> torch.Tensor: """ Get the adversarial examples with minimal perturbation. Parameters ---------- model : BaseModel Model to use to predict. samples : torch.Tensor Input samples. labels : torch.Tensor Labels for the samples. x_adv : torch.Tensor Adversarial examples. best_x_adv : torch.Tensor Best adversarial examples found so far. Returns ------- torch.Tensor The minimum-distance adversarial examples found so far. """ preds = model(x_adv).argmax(dim=1) is_adv = preds.type(labels.dtype) == labels norms = ( (samples - x_adv) .flatten(start_dim=1) .norm(LpPerturbationModels.get_p(self.perturbation_model), dim=-1) ) best_adv_norms = ( (samples - best_x_adv) .flatten(start_dim=1) .norm(LpPerturbationModels.get_p(self.perturbation_model)) ) is_best = torch.logical_and(norms < best_adv_norms, is_adv) return torch.where( atleast_kd(is_best, len(x_adv.shape)), x_adv, best_x_adv, )
[docs] class FixedEpsilonEnsemble(Ensemble): """Wrapper for ensembling results of multiple fixed-epsilon attacks."""
[docs] def __init__( self, loss_fn: torch.nn.Module, maximize: bool = True, y_target: Union[torch.Tensor, None] = None, ) -> None: """ Create fixed epsilon ensemble. Parameters ---------- loss_fn : torch.nn.Module Loss function to maximize (or minimize). maximize : bool, optional If True maximizes the loss otherwise it minimizes it, by default True. y_target : torch.Tensor | None, optional Target label for targeted attacks, None for untargeted, by default None. """ self.maximize = maximize self.loss_fn = loss_fn self.y_target = y_target
[docs] def _get_best( self, model: BaseModel, samples: torch.Tensor, labels: torch.Tensor, x_adv: torch.Tensor, best_x_adv: torch.Tensor, ) -> torch.Tensor: """ Get the adversarial examples with maximum (or minimum) loss. Parameters ---------- model : BaseModel Model to use to predict. samples : torch.Tensor Input samples. labels : torch.Tensor Labels for the samples. x_adv : torch.Tensor Adversarial examples. best_x_adv : torch.Tensor Best adversarial examples found so far. Returns ------- torch.Tensor The maximum-loss adversarial examples found so far. """ if self.y_target is None: targets = labels else: targets = torch.ones_like(labels) * self.y_target loss = self.loss_fn(model(x_adv), targets) best_adv_loss = self.loss_fn(model(best_x_adv), targets) if self.maximize is True: is_best = loss > best_adv_loss else: is_best = loss < best_adv_loss return torch.where( atleast_kd(is_best, len(x_adv.shape)), x_adv, best_x_adv, )