ModelWrapper

ModelWrapper is an object similar to keras.Model that trains, test and predict on datasets.

Using our wrapper makes it easier to do Monte-Carlo sampling with the iterations parameters. Another optimization that we do is that instead of using a for-loop to perform MC sampling, we stack examples.

Example

from baal.modelwrapper import ModelWrapper
from baal.active.dataset import ActiveLearningDataset
from torch.utils.data import Dataset

# You define ModelWrapper with a Pytorch model and a criterion.
wrapper = ModelWrapper(model=your_model, criterion=your_criterion)

# Assuming you have your ActiveLearningDataset ready,
al_dataset: ActiveLearningDataset = ...
test_dataset: Dataset = ...

train_history = wrapper.train_on_dataset(al_dataset, optimizer=your_optimizer, batch_size=32, epoch=10, use_cuda=True)
# We can also use BMA during test time using `average_predictions`.
test_values = wrapper.test_on_dataset(test_dataset, average_predictions=20, **kwargs)

# We use Monte-Carlo sampling using the `iterations` arguments.
predictions = wrapper.predict_on_dataset(al_dataset.pool, iterations=20, **kwargs)
predictions.shape
# > [len(al_dataset.pool), num_outputs, 20]

API

baal.ModelWrapper

Bases: MetricMixin

Wrapper created to ease the training/testing/loading.

Parameters:

Name	Type	Description	Default
`model`	`Module`	The model to optimize.	required
`criterion`	`Callable`	A loss function.	required
`replicate_in_memory`	`bool`	Replicate in memory optional.	`True`

Source code in baal/modelwrapper.py

class ModelWrapper(MetricMixin):
    """
    Wrapper created to ease the training/testing/loading.

    Args:
        model (nn.Module): The model to optimize.
        criterion (Callable): A loss function.
        replicate_in_memory (bool): Replicate in memory optional.
    """

    def __init__(self, model, criterion, replicate_in_memory=True):
        self.model = model
        self.criterion = criterion
        self.metrics = dict()
        self.active_learning_metrics = defaultdict(dict)
        self.add_metric("loss", lambda: Loss())
        self.replicate_in_memory = replicate_in_memory
        self._active_dataset_size = -1

        raise_warnings_cache_replicated(self.model, replicate_in_memory=replicate_in_memory)

    def train_on_dataset(
        self,
        dataset,
        optimizer,
        batch_size,
        epoch,
        use_cuda,
        workers=4,
        collate_fn: Optional[Callable] = None,
        regularizer: Optional[Callable] = None,
    ):
        """
        Train for `epoch` epochs on a Dataset `dataset.

        Args:
            dataset (Dataset): Pytorch Dataset to be trained on.
            optimizer (optim.Optimizer): Optimizer to use.
            batch_size (int): The batch size used in the DataLoader.
            epoch (int): Number of epoch to train for.
            use_cuda (bool): Use cuda or not.
            workers (int): Number of workers for the multiprocessing.
            collate_fn (Optional[Callable]): The collate function to use.
            regularizer (Optional[Callable]): The loss regularization for training.

        Returns:
            The training history.
        """
        dataset_size = len(dataset)
        self.train()
        self.set_dataset_size(dataset_size)
        history = []
        log.info("Starting training", epoch=epoch, dataset=dataset_size)
        collate_fn = collate_fn or default_collate
        for _ in range(epoch):
            self._reset_metrics("train")
            for data, target, *_ in DataLoader(
                dataset, batch_size, True, num_workers=workers, collate_fn=collate_fn
            ):
                _ = self.train_on_batch(data, target, optimizer, use_cuda, regularizer)
            history.append(self.get_metrics("train")["train_loss"])

        optimizer.zero_grad()  # Assert that the gradient is flushed.
        log.info("Training complete", train_loss=self.get_metrics("train")["train_loss"])
        self.active_step(dataset_size, self.get_metrics("train"))
        return history

    def test_on_dataset(
        self,
        dataset: Dataset,
        batch_size: int,
        use_cuda: bool,
        workers: int = 4,
        collate_fn: Optional[Callable] = None,
        average_predictions: int = 1,
    ):
        """
        Test the model on a Dataset `dataset`.

        Args:
            dataset (Dataset): Dataset to evaluate on.
            batch_size (int): Batch size used for evaluation.
            use_cuda (bool): Use Cuda or not.
            workers (int): Number of workers to use.
            collate_fn (Optional[Callable]): The collate function to use.
            average_predictions (int): The number of predictions to average to
                compute the test loss.

        Returns:
            Average loss value over the dataset.
        """
        self.eval()
        log.info("Starting evaluating", dataset=len(dataset))
        self._reset_metrics("test")

        for data, target, *_ in DataLoader(
            dataset, batch_size, False, num_workers=workers, collate_fn=collate_fn
        ):
            _ = self.test_on_batch(
                data, target, cuda=use_cuda, average_predictions=average_predictions
            )

        log.info("Evaluation complete", test_loss=self.get_metrics("test")["test_loss"])
        self.active_step(None, self.get_metrics("test"))
        return self.get_metrics("test")["test_loss"]

    def train_and_test_on_datasets(
        self,
        train_dataset: Dataset,
        test_dataset: Dataset,
        optimizer: Optimizer,
        batch_size: int,
        epoch: int,
        use_cuda: bool,
        workers: int = 4,
        collate_fn: Optional[Callable] = None,
        regularizer: Optional[Callable] = None,
        return_best_weights=False,
        patience=None,
        min_epoch_for_es=0,
        skip_epochs=1,
    ):
        """
        Train and test the model on both Dataset `train_dataset`, `test_dataset`.

        Args:
            train_dataset (Dataset): Dataset to train on.
            test_dataset (Dataset): Dataset to evaluate on.
            optimizer (Optimizer): Optimizer to use during training.
            batch_size (int): Batch size used.
            epoch (int): Number of epoch to train on.
            use_cuda (bool): Use Cuda or not.
            workers (int): Number of workers to use.
            collate_fn (Optional[Callable]): The collate function to use.
            regularizer (Optional[Callable]): The loss regularization for training.
            return_best_weights (bool): If True, will keep the best weights and return them.
            patience (Optional[int]): If provided, will use early stopping to stop after
                                        `patience` epoch without improvement.
            min_epoch_for_es (int): Epoch at which the early stopping starts.
            skip_epochs (int): Number of epochs to skip for test_on_dataset

        Returns:
            History and best weights if required.
        """
        best_weight = None
        best_loss = 1e10
        best_epoch = 0
        hist = []
        for e in range(epoch):
            _ = self.train_on_dataset(
                train_dataset, optimizer, batch_size, 1, use_cuda, workers, collate_fn, regularizer
            )
            if e % skip_epochs == 0:
                te_loss = self.test_on_dataset(
                    test_dataset, batch_size, use_cuda, workers, collate_fn
                )
                hist.append(self.get_metrics())
                if te_loss < best_loss:
                    best_epoch = e
                    best_loss = te_loss
                    if return_best_weights:
                        best_weight = deepcopy(self.state_dict())

                if patience is not None and (e - best_epoch) > patience and (e > min_epoch_for_es):
                    # Early stopping
                    break
            else:
                hist.append(self.get_metrics("train"))

        if return_best_weights:
            return hist, best_weight
        else:
            return hist

    def predict_on_dataset_generator(
        self,
        dataset: Dataset,
        batch_size: int,
        iterations: int,
        use_cuda: bool,
        workers: int = 4,
        collate_fn: Optional[Callable] = None,
        half=False,
        verbose=True,
    ):
        """
        Use the model to predict on a dataset `iterations` time.

        Args:
            dataset (Dataset): Dataset to predict on.
            batch_size (int):  Batch size to use during prediction.
            iterations (int): Number of iterations per sample.
            use_cuda (bool): Use CUDA or not.
            workers (int): Number of workers to use.
            collate_fn (Optional[Callable]): The collate function to use.
            half (bool): If True use half precision.
            verbose (bool): If True use tqdm to display progress

        Notes:
            The "batch" is made of `batch_size` * `iterations` samples.

        Returns:
            Generators [batch_size, n_classes, ..., n_iterations].
        """
        self.eval()
        if len(dataset) == 0:
            return None

        log.info("Start Predict", dataset=len(dataset))
        collate_fn = collate_fn or default_collate
        loader = DataLoader(dataset, batch_size, False, num_workers=workers, collate_fn=collate_fn)
        if verbose:
            loader = tqdm(loader, total=len(loader), file=sys.stdout)
        for idx, (data, *_) in enumerate(loader):

            pred = self.predict_on_batch(data, iterations, use_cuda)
            pred = map_on_tensor(lambda x: x.detach(), pred)
            if half:
                pred = map_on_tensor(lambda x: x.half(), pred)
            yield map_on_tensor(lambda x: x.cpu().numpy(), pred)

    def predict_on_dataset(
        self,
        dataset: Dataset,
        batch_size: int,
        iterations: int,
        use_cuda: bool,
        workers: int = 4,
        collate_fn: Optional[Callable] = None,
        half=False,
        verbose=True,
    ):
        """
        Use the model to predict on a dataset `iterations` time.

        Args:
            dataset (Dataset): Dataset to predict on.
            batch_size (int):  Batch size to use during prediction.
            iterations (int): Number of iterations per sample.
            use_cuda (bool): Use CUDA or not.
            workers (int): Number of workers to use.
            collate_fn (Optional[Callable]): The collate function to use.
            half (bool): If True use half precision.
            verbose (bool): If True use tqdm to show progress.

        Notes:
            The "batch" is made of `batch_size` * `iterations` samples.

        Returns:
            Array [n_samples, n_outputs, ..., n_iterations].
        """
        preds = list(
            self.predict_on_dataset_generator(
                dataset=dataset,
                batch_size=batch_size,
                iterations=iterations,
                use_cuda=use_cuda,
                workers=workers,
                collate_fn=collate_fn,
                half=half,
                verbose=verbose,
            )
        )

        if len(preds) > 0 and not isinstance(preds[0], Sequence):
            # Is an Array or a Tensor
            return np.vstack(preds)
        return [np.vstack(pr) for pr in zip(*preds)]

    def train_on_batch(
        self, data, target, optimizer, cuda=False, regularizer: Optional[Callable] = None
    ):
        """
        Train the current model on a batch using `optimizer`.

        Args:
            data (Tensor): The model input.
            target (Tensor): The ground truth.
            optimizer (optim.Optimizer): An optimizer.
            cuda (bool): Use CUDA or not.
            regularizer (Optional[Callable]): The loss regularization for training.


        Returns:
            Tensor, the loss computed from the criterion.
        """

        if cuda:
            data, target = to_cuda(data), to_cuda(target)
        optimizer.zero_grad()
        output = self.model(data)
        loss = self.criterion(output, target)

        if regularizer:
            regularized_loss = loss + regularizer()
            regularized_loss.backward()
        else:
            loss.backward()

        optimizer.step()
        self._update_metrics(output, target, loss, filter="train")
        return loss

    def test_on_batch(
        self,
        data: torch.Tensor,
        target: torch.Tensor,
        cuda: bool = False,
        average_predictions: int = 1,
    ):
        """
        Test the current model on a batch.

        Args:
            data (Tensor): The model input.
            target (Tensor): The ground truth.
            cuda (bool): Use CUDA or not.
            average_predictions (int): The number of predictions to average to
                compute the test loss.

        Returns:
            Tensor, the loss computed from the criterion.
        """
        with torch.no_grad():
            if cuda:
                data, target = to_cuda(data), to_cuda(target)

            preds = map_on_tensor(
                lambda p: p.mean(-1),
                self.predict_on_batch(data, iterations=average_predictions, cuda=cuda),
            )
            loss = self.criterion(preds, target)
            self._update_metrics(preds, target, loss, "test")
            return loss

    def predict_on_batch(self, data, iterations=1, cuda=False):
        """
        Get the model's prediction on a batch.

        Args:
            data (Tensor): The model input.
            iterations (int): Number of prediction to perform.
            cuda (bool): Use CUDA or not.

        Returns:
            Tensor, the loss computed from the criterion.
                    shape = {batch_size, nclass, n_iteration}.

        Raises:
            Raises RuntimeError if CUDA rans out of memory during data replication.
        """
        with torch.no_grad():
            if cuda:
                data = to_cuda(data)
            if self.replicate_in_memory:
                data = map_on_tensor(lambda d: stack_in_memory(d, iterations), data)
                try:
                    out = self.model(data)
                except RuntimeError as e:
                    raise RuntimeError(
                        """CUDA ran out of memory while BaaL tried to replicate data. See the exception above.
                    Use `replicate_in_memory=False` in order to reduce the memory requirements.
                    Note that there will be some speed trade-offs"""
                    ) from e
                out = map_on_tensor(lambda o: o.view([iterations, -1, *o.size()[1:]]), out)
                out = map_on_tensor(lambda o: o.permute(1, 2, *range(3, o.ndimension()), 0), out)
            else:
                out = [self.model(data) for _ in range(iterations)]
                out = _stack_preds(out)
            return out

    def get_params(self):
        """
        Return the parameters to optimize.

        Returns:
            Config for parameters.
        """
        return self.model.parameters()

    def state_dict(self):
        """Get the state dict(s)."""
        return self.model.state_dict()

    def load_state_dict(self, state_dict, strict=True):
        """Load the model with `state_dict`."""
        self.model.load_state_dict(state_dict, strict=strict)

    def train(self):
        """Set the model in `train` mode."""
        self.model.train()

    def eval(self):
        """Set the model in `eval mode`."""
        self.model.eval()

    def reset_fcs(self):
        """Reset all torch.nn.Linear layers."""

        def reset(m):
            if isinstance(m, torch.nn.Linear):
                m.reset_parameters()

        self.model.apply(reset)

    def reset_all(self):
        """Reset all *resetable* layers."""

        def reset(m):
            for m in self.model.modules():
                getattr(m, "reset_parameters", lambda: None)()

        self.model.apply(reset)

    def set_dataset_size(self, dataset_size: int):
        """
        Set state for dataset size. Useful for tracking.

        Args:
            dataset_size: Dataset state
        """
        self._active_dataset_size = dataset_size

`eval()`

Set the model in eval mode.

Source code in baal/modelwrapper.py

def eval(self):
    """Set the model in `eval mode`."""
    self.model.eval()

`get_params()`

Return the parameters to optimize.

Returns:

Type	Description
	Config for parameters.

Source code in baal/modelwrapper.py

def get_params(self):
    """
    Return the parameters to optimize.

    Returns:
        Config for parameters.
    """
    return self.model.parameters()

`load_state_dict(state_dict, strict=True)`

Load the model with state_dict.

Source code in baal/modelwrapper.py

def load_state_dict(self, state_dict, strict=True):
    """Load the model with `state_dict`."""
    self.model.load_state_dict(state_dict, strict=strict)

`predict_on_batch(data, iterations=1, cuda=False)`

Get the model's prediction on a batch.

Parameters:

Name	Type	Description	Default
`data`	`Tensor`	The model input.	required
`iterations`	`int`	Number of prediction to perform.	`1`
`cuda`	`bool`	Use CUDA or not.	`False`

Returns:

Type	Description
	Tensor, the loss computed from the criterion. shape = {batch_size, nclass, n_iteration}.

Source code in baal/modelwrapper.py

def predict_on_batch(self, data, iterations=1, cuda=False):
    """
    Get the model's prediction on a batch.

    Args:
        data (Tensor): The model input.
        iterations (int): Number of prediction to perform.
        cuda (bool): Use CUDA or not.

    Returns:
        Tensor, the loss computed from the criterion.
                shape = {batch_size, nclass, n_iteration}.

    Raises:
        Raises RuntimeError if CUDA rans out of memory during data replication.
    """
    with torch.no_grad():
        if cuda:
            data = to_cuda(data)
        if self.replicate_in_memory:
            data = map_on_tensor(lambda d: stack_in_memory(d, iterations), data)
            try:
                out = self.model(data)
            except RuntimeError as e:
                raise RuntimeError(
                    """CUDA ran out of memory while BaaL tried to replicate data. See the exception above.
                Use `replicate_in_memory=False` in order to reduce the memory requirements.
                Note that there will be some speed trade-offs"""
                ) from e
            out = map_on_tensor(lambda o: o.view([iterations, -1, *o.size()[1:]]), out)
            out = map_on_tensor(lambda o: o.permute(1, 2, *range(3, o.ndimension()), 0), out)
        else:
            out = [self.model(data) for _ in range(iterations)]
            out = _stack_preds(out)
        return out

`predict_on_dataset(dataset, batch_size, iterations, use_cuda, workers=4, collate_fn=None, half=False, verbose=True)`

Use the model to predict on a dataset iterations time.

Parameters:

Name	Type	Description	Default
`dataset`	`Dataset`	Dataset to predict on.	required
`batch_size`	`int`	Batch size to use during prediction.	required
`iterations`	`int`	Number of iterations per sample.	required
`use_cuda`	`bool`	Use CUDA or not.	required
`workers`	`int`	Number of workers to use.	`4`
`collate_fn`	`Optional[Callable]`	The collate function to use.	`None`
`half`	`bool`	If True use half precision.	`False`
`verbose`	`bool`	If True use tqdm to show progress.	`True`

Notes

The "batch" is made of batch_size * iterations samples.

Returns:

Type	Description
	Array [n_samples, n_outputs, ..., n_iterations].

Source code in baal/modelwrapper.py

def predict_on_dataset(
    self,
    dataset: Dataset,
    batch_size: int,
    iterations: int,
    use_cuda: bool,
    workers: int = 4,
    collate_fn: Optional[Callable] = None,
    half=False,
    verbose=True,
):
    """
    Use the model to predict on a dataset `iterations` time.

    Args:
        dataset (Dataset): Dataset to predict on.
        batch_size (int):  Batch size to use during prediction.
        iterations (int): Number of iterations per sample.
        use_cuda (bool): Use CUDA or not.
        workers (int): Number of workers to use.
        collate_fn (Optional[Callable]): The collate function to use.
        half (bool): If True use half precision.
        verbose (bool): If True use tqdm to show progress.

    Notes:
        The "batch" is made of `batch_size` * `iterations` samples.

    Returns:
        Array [n_samples, n_outputs, ..., n_iterations].
    """
    preds = list(
        self.predict_on_dataset_generator(
            dataset=dataset,
            batch_size=batch_size,
            iterations=iterations,
            use_cuda=use_cuda,
            workers=workers,
            collate_fn=collate_fn,
            half=half,
            verbose=verbose,
        )
    )

    if len(preds) > 0 and not isinstance(preds[0], Sequence):
        # Is an Array or a Tensor
        return np.vstack(preds)
    return [np.vstack(pr) for pr in zip(*preds)]

`predict_on_dataset_generator(dataset, batch_size, iterations, use_cuda, workers=4, collate_fn=None, half=False, verbose=True)`

Use the model to predict on a dataset iterations time.

Parameters:

Name	Type	Description	Default
`dataset`	`Dataset`	Dataset to predict on.	required
`batch_size`	`int`	Batch size to use during prediction.	required
`iterations`	`int`	Number of iterations per sample.	required
`use_cuda`	`bool`	Use CUDA or not.	required
`workers`	`int`	Number of workers to use.	`4`
`collate_fn`	`Optional[Callable]`	The collate function to use.	`None`
`half`	`bool`	If True use half precision.	`False`
`verbose`	`bool`	If True use tqdm to display progress	`True`

Notes

The "batch" is made of batch_size * iterations samples.

Returns:

Type	Description
	Generators [batch_size, n_classes, ..., n_iterations].

Source code in baal/modelwrapper.py

def predict_on_dataset_generator(
    self,
    dataset: Dataset,
    batch_size: int,
    iterations: int,
    use_cuda: bool,
    workers: int = 4,
    collate_fn: Optional[Callable] = None,
    half=False,
    verbose=True,
):
    """
    Use the model to predict on a dataset `iterations` time.

    Args:
        dataset (Dataset): Dataset to predict on.
        batch_size (int):  Batch size to use during prediction.
        iterations (int): Number of iterations per sample.
        use_cuda (bool): Use CUDA or not.
        workers (int): Number of workers to use.
        collate_fn (Optional[Callable]): The collate function to use.
        half (bool): If True use half precision.
        verbose (bool): If True use tqdm to display progress

    Notes:
        The "batch" is made of `batch_size` * `iterations` samples.

    Returns:
        Generators [batch_size, n_classes, ..., n_iterations].
    """
    self.eval()
    if len(dataset) == 0:
        return None

    log.info("Start Predict", dataset=len(dataset))
    collate_fn = collate_fn or default_collate
    loader = DataLoader(dataset, batch_size, False, num_workers=workers, collate_fn=collate_fn)
    if verbose:
        loader = tqdm(loader, total=len(loader), file=sys.stdout)
    for idx, (data, *_) in enumerate(loader):

        pred = self.predict_on_batch(data, iterations, use_cuda)
        pred = map_on_tensor(lambda x: x.detach(), pred)
        if half:
            pred = map_on_tensor(lambda x: x.half(), pred)
        yield map_on_tensor(lambda x: x.cpu().numpy(), pred)

`reset_all()`

Reset all resetable layers.

Source code in baal/modelwrapper.py

def reset_all(self):
    """Reset all *resetable* layers."""

    def reset(m):
        for m in self.model.modules():
            getattr(m, "reset_parameters", lambda: None)()

    self.model.apply(reset)

`reset_fcs()`

Reset all torch.nn.Linear layers.

Source code in baal/modelwrapper.py

def reset_fcs(self):
    """Reset all torch.nn.Linear layers."""

    def reset(m):
        if isinstance(m, torch.nn.Linear):
            m.reset_parameters()

    self.model.apply(reset)

`set_dataset_size(dataset_size)`

Set state for dataset size. Useful for tracking.

Parameters:

Name	Type	Description	Default
`dataset_size`	`int`	Dataset state	required

Source code in baal/modelwrapper.py

def set_dataset_size(self, dataset_size: int):
    """
    Set state for dataset size. Useful for tracking.

    Args:
        dataset_size: Dataset state
    """
    self._active_dataset_size = dataset_size

`state_dict()`

Get the state dict(s).

Source code in baal/modelwrapper.py

def state_dict(self):
    """Get the state dict(s)."""
    return self.model.state_dict()

`test_on_batch(data, target, cuda=False, average_predictions=1)`

Test the current model on a batch.

Parameters:

Name	Type	Description	Default
`data`	`Tensor`	The model input.	required
`target`	`Tensor`	The ground truth.	required
`cuda`	`bool`	Use CUDA or not.	`False`
`average_predictions`	`int`	The number of predictions to average to compute the test loss.	`1`

Returns:

Type	Description
	Tensor, the loss computed from the criterion.

Source code in baal/modelwrapper.py

def test_on_batch(
    self,
    data: torch.Tensor,
    target: torch.Tensor,
    cuda: bool = False,
    average_predictions: int = 1,
):
    """
    Test the current model on a batch.

    Args:
        data (Tensor): The model input.
        target (Tensor): The ground truth.
        cuda (bool): Use CUDA or not.
        average_predictions (int): The number of predictions to average to
            compute the test loss.

    Returns:
        Tensor, the loss computed from the criterion.
    """
    with torch.no_grad():
        if cuda:
            data, target = to_cuda(data), to_cuda(target)

        preds = map_on_tensor(
            lambda p: p.mean(-1),
            self.predict_on_batch(data, iterations=average_predictions, cuda=cuda),
        )
        loss = self.criterion(preds, target)
        self._update_metrics(preds, target, loss, "test")
        return loss

`test_on_dataset(dataset, batch_size, use_cuda, workers=4, collate_fn=None, average_predictions=1)`

Test the model on a Dataset dataset.

Parameters:

Name	Type	Description	Default
`dataset`	`Dataset`	Dataset to evaluate on.	required
`batch_size`	`int`	Batch size used for evaluation.	required
`use_cuda`	`bool`	Use Cuda or not.	required
`workers`	`int`	Number of workers to use.	`4`
`collate_fn`	`Optional[Callable]`	The collate function to use.	`None`
`average_predictions`	`int`	The number of predictions to average to compute the test loss.	`1`

Returns:

Type	Description
	Average loss value over the dataset.

Source code in baal/modelwrapper.py

def test_on_dataset(
    self,
    dataset: Dataset,
    batch_size: int,
    use_cuda: bool,
    workers: int = 4,
    collate_fn: Optional[Callable] = None,
    average_predictions: int = 1,
):
    """
    Test the model on a Dataset `dataset`.

    Args:
        dataset (Dataset): Dataset to evaluate on.
        batch_size (int): Batch size used for evaluation.
        use_cuda (bool): Use Cuda or not.
        workers (int): Number of workers to use.
        collate_fn (Optional[Callable]): The collate function to use.
        average_predictions (int): The number of predictions to average to
            compute the test loss.

    Returns:
        Average loss value over the dataset.
    """
    self.eval()
    log.info("Starting evaluating", dataset=len(dataset))
    self._reset_metrics("test")

    for data, target, *_ in DataLoader(
        dataset, batch_size, False, num_workers=workers, collate_fn=collate_fn
    ):
        _ = self.test_on_batch(
            data, target, cuda=use_cuda, average_predictions=average_predictions
        )

    log.info("Evaluation complete", test_loss=self.get_metrics("test")["test_loss"])
    self.active_step(None, self.get_metrics("test"))
    return self.get_metrics("test")["test_loss"]

`train()`

Set the model in train mode.

Source code in baal/modelwrapper.py

def train(self):
    """Set the model in `train` mode."""
    self.model.train()

`train_and_test_on_datasets(train_dataset, test_dataset, optimizer, batch_size, epoch, use_cuda, workers=4, collate_fn=None, regularizer=None, return_best_weights=False, patience=None, min_epoch_for_es=0, skip_epochs=1)`

Train and test the model on both Dataset train_dataset, test_dataset.

Parameters:

Name	Type	Description	Default
`train_dataset`	`Dataset`	Dataset to train on.	required
`test_dataset`	`Dataset`	Dataset to evaluate on.	required
`optimizer`	`Optimizer`	Optimizer to use during training.	required
`batch_size`	`int`	Batch size used.	required
`epoch`	`int`	Number of epoch to train on.	required
`use_cuda`	`bool`	Use Cuda or not.	required
`workers`	`int`	Number of workers to use.	`4`
`collate_fn`	`Optional[Callable]`	The collate function to use.	`None`
`regularizer`	`Optional[Callable]`	The loss regularization for training.	`None`
`return_best_weights`	`bool`	If True, will keep the best weights and return them.	`False`
`patience`	`Optional[int]`	If provided, will use early stopping to stop after `patience` epoch without improvement.	`None`
`min_epoch_for_es`	`int`	Epoch at which the early stopping starts.	`0`
`skip_epochs`	`int`	Number of epochs to skip for test_on_dataset	`1`

Returns:

Type	Description
	History and best weights if required.

Source code in baal/modelwrapper.py

def train_and_test_on_datasets(
    self,
    train_dataset: Dataset,
    test_dataset: Dataset,
    optimizer: Optimizer,
    batch_size: int,
    epoch: int,
    use_cuda: bool,
    workers: int = 4,
    collate_fn: Optional[Callable] = None,
    regularizer: Optional[Callable] = None,
    return_best_weights=False,
    patience=None,
    min_epoch_for_es=0,
    skip_epochs=1,
):
    """
    Train and test the model on both Dataset `train_dataset`, `test_dataset`.

    Args:
        train_dataset (Dataset): Dataset to train on.
        test_dataset (Dataset): Dataset to evaluate on.
        optimizer (Optimizer): Optimizer to use during training.
        batch_size (int): Batch size used.
        epoch (int): Number of epoch to train on.
        use_cuda (bool): Use Cuda or not.
        workers (int): Number of workers to use.
        collate_fn (Optional[Callable]): The collate function to use.
        regularizer (Optional[Callable]): The loss regularization for training.
        return_best_weights (bool): If True, will keep the best weights and return them.
        patience (Optional[int]): If provided, will use early stopping to stop after
                                    `patience` epoch without improvement.
        min_epoch_for_es (int): Epoch at which the early stopping starts.
        skip_epochs (int): Number of epochs to skip for test_on_dataset

    Returns:
        History and best weights if required.
    """
    best_weight = None
    best_loss = 1e10
    best_epoch = 0
    hist = []
    for e in range(epoch):
        _ = self.train_on_dataset(
            train_dataset, optimizer, batch_size, 1, use_cuda, workers, collate_fn, regularizer
        )
        if e % skip_epochs == 0:
            te_loss = self.test_on_dataset(
                test_dataset, batch_size, use_cuda, workers, collate_fn
            )
            hist.append(self.get_metrics())
            if te_loss < best_loss:
                best_epoch = e
                best_loss = te_loss
                if return_best_weights:
                    best_weight = deepcopy(self.state_dict())

            if patience is not None and (e - best_epoch) > patience and (e > min_epoch_for_es):
                # Early stopping
                break
        else:
            hist.append(self.get_metrics("train"))

    if return_best_weights:
        return hist, best_weight
    else:
        return hist

`train_on_batch(data, target, optimizer, cuda=False, regularizer=None)`

Train the current model on a batch using optimizer.

Parameters:

Name	Type	Description	Default
`data`	`Tensor`	The model input.	required
`target`	`Tensor`	The ground truth.	required
`optimizer`	`Optimizer`	An optimizer.	required
`cuda`	`bool`	Use CUDA or not.	`False`
`regularizer`	`Optional[Callable]`	The loss regularization for training.	`None`

Returns:

Type	Description
	Tensor, the loss computed from the criterion.

Source code in baal/modelwrapper.py

def train_on_batch(
    self, data, target, optimizer, cuda=False, regularizer: Optional[Callable] = None
):
    """
    Train the current model on a batch using `optimizer`.

    Args:
        data (Tensor): The model input.
        target (Tensor): The ground truth.
        optimizer (optim.Optimizer): An optimizer.
        cuda (bool): Use CUDA or not.
        regularizer (Optional[Callable]): The loss regularization for training.


    Returns:
        Tensor, the loss computed from the criterion.
    """

    if cuda:
        data, target = to_cuda(data), to_cuda(target)
    optimizer.zero_grad()
    output = self.model(data)
    loss = self.criterion(output, target)

    if regularizer:
        regularized_loss = loss + regularizer()
        regularized_loss.backward()
    else:
        loss.backward()

    optimizer.step()
    self._update_metrics(output, target, loss, filter="train")
    return loss

`train_on_dataset(dataset, optimizer, batch_size, epoch, use_cuda, workers=4, collate_fn=None, regularizer=None)`

Train for epoch epochs on a Dataset `dataset.

Parameters:

Name	Type	Description	Default
`dataset`	`Dataset`	Pytorch Dataset to be trained on.	required
`optimizer`	`Optimizer`	Optimizer to use.	required
`batch_size`	`int`	The batch size used in the DataLoader.	required
`epoch`	`int`	Number of epoch to train for.	required
`use_cuda`	`bool`	Use cuda or not.	required
`workers`	`int`	Number of workers for the multiprocessing.	`4`
`collate_fn`	`Optional[Callable]`	The collate function to use.	`None`
`regularizer`	`Optional[Callable]`	The loss regularization for training.	`None`

Returns:

Type	Description
	The training history.

Source code in baal/modelwrapper.py

def train_on_dataset(
    self,
    dataset,
    optimizer,
    batch_size,
    epoch,
    use_cuda,
    workers=4,
    collate_fn: Optional[Callable] = None,
    regularizer: Optional[Callable] = None,
):
    """
    Train for `epoch` epochs on a Dataset `dataset.

    Args:
        dataset (Dataset): Pytorch Dataset to be trained on.
        optimizer (optim.Optimizer): Optimizer to use.
        batch_size (int): The batch size used in the DataLoader.
        epoch (int): Number of epoch to train for.
        use_cuda (bool): Use cuda or not.
        workers (int): Number of workers for the multiprocessing.
        collate_fn (Optional[Callable]): The collate function to use.
        regularizer (Optional[Callable]): The loss regularization for training.

    Returns:
        The training history.
    """
    dataset_size = len(dataset)
    self.train()
    self.set_dataset_size(dataset_size)
    history = []
    log.info("Starting training", epoch=epoch, dataset=dataset_size)
    collate_fn = collate_fn or default_collate
    for _ in range(epoch):
        self._reset_metrics("train")
        for data, target, *_ in DataLoader(
            dataset, batch_size, True, num_workers=workers, collate_fn=collate_fn
        ):
            _ = self.train_on_batch(data, target, optimizer, use_cuda, regularizer)
        history.append(self.get_metrics("train")["train_loss"])

    optimizer.zero_grad()  # Assert that the gradient is flushed.
    log.info("Training complete", train_loss=self.get_metrics("train")["train_loss"])
    self.active_step(dataset_size, self.get_metrics("train"))
    return history