Skip to content

Heuristics

Heuristics take a set of predictions and output an uncertainty value. They are agnostic to the method used for predicting, so they work with MC sampling and Ensembles.

Example

Using BALD, we can compute the uncertainty of many predictions.

import numpy as np
from baal.active.heuristics import BALD

# output from ModelWrapper.predict_on_dataset with shape [1000, num_classes, 20]
predictions: np.ndarray = ... 

# To get the full uncertainty score
uncertainty = BALD().compute_score(predictions)

# To get ranks
most_uncertain = BALD()(predictions)

# If you wish to mix BALD and Uniform sampling,
# you can modify the `shuffle_prop` parameter.
BALD(shuffle_prop=0.1)

# When working with Sequence or Segmentation models, you can specify how to aggregate
# values using the "reduction" parameter.
BALD(reduction="mean")

API

baal.active.heuristics.AbstractHeuristic

Abstract class that defines a Heuristic.

Parameters:

Name Type Description Default
shuffle_prop float

shuffle proportion.

DEPRECATED
reverse bool

True if the most uncertain sample has the highest value.

False
reduction Union[str, Callable]

Reduction used after computing the score.

'none'
Source code in baal/active/heuristics/heuristics.py
class AbstractHeuristic:
    """
    Abstract class that defines a Heuristic.

    Args:
        shuffle_prop (float): shuffle proportion.
        reverse (bool): True if the most uncertain sample has the highest value.
        reduction (Union[str, Callable]): Reduction used after computing the score.
    """

    def __init__(self, shuffle_prop=DEPRECATED, reverse=False, reduction="none"):
        if shuffle_prop != DEPRECATED and shuffle_prop < 1.0:
            warnings.warn(SHUFFLE_PROP_DEPRECATION_NOTICE, DeprecationWarning)
        else:
            shuffle_prop = 0.0
        self.shuffle_prop = shuffle_prop
        self.reversed = reverse
        assert reduction in available_reductions or callable(reduction)
        self._reduction_name = reduction
        self.reduction = reduction if callable(reduction) else available_reductions[reduction]

    def compute_score(self, predictions):
        """
        Compute the score according to the heuristic.

        Args:
            predictions (ndarray): Array of predictions

        Returns:
            Array of scores.
        """
        raise NotImplementedError

    def get_uncertainties_generator(self, predictions):
        """
        Compute the score according to the heuristic.

        Args:
            predictions (Iterable): Generator of predictions

        Raises:
            ValueError if the generator is empty.

        Returns:
            Array of scores.
        """
        acc = []
        for pred in predictions:
            acc.append(self.get_uncertainties(pred))
        if len(acc) == 0:
            raise ValueError("No prediction! Cannot order the values!")
        return np.concatenate(acc)

    def get_uncertainties(self, predictions):
        """
        Get the uncertainties.

        Args:
            predictions (ndarray): Array of predictions

        Returns:
            Array of uncertainties

        """
        if isinstance(predictions, Tensor):
            predictions = predictions.numpy()
        scores = self.compute_score(predictions)
        scores = self.reduction(scores)
        if not np.all(np.isfinite(scores)):
            fixed = 0.0 if self.reversed else 10000
            warnings.warn(f"Invalid value in the score, will be put to {fixed}", UserWarning)
            scores[~np.isfinite(scores)] = fixed
        return scores

    def reorder_indices(self, scores):
        """
        Order indices given their uncertainty score.

        Args:
            scores (ndarray/ List[ndarray]): Array of uncertainties or
                list of arrays.

        Returns:
            ordered index according to the uncertainty (highest to lowes).

        Raises:
            ValueError if `scores` is not uni-dimensional.
        """
        if isinstance(scores, Sequence):
            scores = np.concatenate(scores)

        if scores.ndim > 1:
            raise ValueError(
                (
                    f"Can't order sequence with more than 1 dimension."
                    f"Currently {scores.ndim} dimensions."
                    f"Is the heuristic reduction method set: {self._reduction_name}"
                )
            )
        assert scores.ndim == 1  # We want the uncertainty value per sample.
        ranks = np.argsort(scores)
        if self.reversed:
            ranks = ranks[::-1]
        ranks = _shuffle_subset(ranks, self.shuffle_prop)
        return ranks

    def get_ranks(self, predictions):
        """
        Rank the predictions according to their uncertainties.

        Args:
            predictions (ndarray): [batch_size, C, ..., Iterations]

        Returns:
            Ranked index according to the uncertainty (highest to lowes).
            Scores for all predictions.

        """
        if isinstance(predictions, types.GeneratorType):
            scores = self.get_uncertainties_generator(predictions)
        else:
            scores = self.get_uncertainties(predictions)

        return self.reorder_indices(scores), scores

    def __call__(self, predictions):
        """Rank the predictions according to their uncertainties.

        Only return the scores and not the associated uncertainties.
        """
        return self.get_ranks(predictions)[0]

__call__(predictions)

Rank the predictions according to their uncertainties.

Only return the scores and not the associated uncertainties.

Source code in baal/active/heuristics/heuristics.py
def __call__(self, predictions):
    """Rank the predictions according to their uncertainties.

    Only return the scores and not the associated uncertainties.
    """
    return self.get_ranks(predictions)[0]

compute_score(predictions)

Compute the score according to the heuristic.

Parameters:

Name Type Description Default
predictions ndarray

Array of predictions

required

Returns:

Type Description

Array of scores.

Source code in baal/active/heuristics/heuristics.py
def compute_score(self, predictions):
    """
    Compute the score according to the heuristic.

    Args:
        predictions (ndarray): Array of predictions

    Returns:
        Array of scores.
    """
    raise NotImplementedError

get_ranks(predictions)

Rank the predictions according to their uncertainties.

Parameters:

Name Type Description Default
predictions ndarray

[batch_size, C, ..., Iterations]

required

Returns:

Type Description

Ranked index according to the uncertainty (highest to lowes).

Scores for all predictions.

Source code in baal/active/heuristics/heuristics.py
def get_ranks(self, predictions):
    """
    Rank the predictions according to their uncertainties.

    Args:
        predictions (ndarray): [batch_size, C, ..., Iterations]

    Returns:
        Ranked index according to the uncertainty (highest to lowes).
        Scores for all predictions.

    """
    if isinstance(predictions, types.GeneratorType):
        scores = self.get_uncertainties_generator(predictions)
    else:
        scores = self.get_uncertainties(predictions)

    return self.reorder_indices(scores), scores

get_uncertainties(predictions)

Get the uncertainties.

Parameters:

Name Type Description Default
predictions ndarray

Array of predictions

required

Returns:

Type Description

Array of uncertainties

Source code in baal/active/heuristics/heuristics.py
def get_uncertainties(self, predictions):
    """
    Get the uncertainties.

    Args:
        predictions (ndarray): Array of predictions

    Returns:
        Array of uncertainties

    """
    if isinstance(predictions, Tensor):
        predictions = predictions.numpy()
    scores = self.compute_score(predictions)
    scores = self.reduction(scores)
    if not np.all(np.isfinite(scores)):
        fixed = 0.0 if self.reversed else 10000
        warnings.warn(f"Invalid value in the score, will be put to {fixed}", UserWarning)
        scores[~np.isfinite(scores)] = fixed
    return scores

get_uncertainties_generator(predictions)

Compute the score according to the heuristic.

Parameters:

Name Type Description Default
predictions Iterable

Generator of predictions

required

Returns:

Type Description

Array of scores.

Source code in baal/active/heuristics/heuristics.py
def get_uncertainties_generator(self, predictions):
    """
    Compute the score according to the heuristic.

    Args:
        predictions (Iterable): Generator of predictions

    Raises:
        ValueError if the generator is empty.

    Returns:
        Array of scores.
    """
    acc = []
    for pred in predictions:
        acc.append(self.get_uncertainties(pred))
    if len(acc) == 0:
        raise ValueError("No prediction! Cannot order the values!")
    return np.concatenate(acc)

reorder_indices(scores)

Order indices given their uncertainty score.

Parameters:

Name Type Description Default
scores ndarray / List[ndarray]

Array of uncertainties or list of arrays.

required

Returns:

Type Description

ordered index according to the uncertainty (highest to lowes).

Source code in baal/active/heuristics/heuristics.py
def reorder_indices(self, scores):
    """
    Order indices given their uncertainty score.

    Args:
        scores (ndarray/ List[ndarray]): Array of uncertainties or
            list of arrays.

    Returns:
        ordered index according to the uncertainty (highest to lowes).

    Raises:
        ValueError if `scores` is not uni-dimensional.
    """
    if isinstance(scores, Sequence):
        scores = np.concatenate(scores)

    if scores.ndim > 1:
        raise ValueError(
            (
                f"Can't order sequence with more than 1 dimension."
                f"Currently {scores.ndim} dimensions."
                f"Is the heuristic reduction method set: {self._reduction_name}"
            )
        )
    assert scores.ndim == 1  # We want the uncertainty value per sample.
    ranks = np.argsort(scores)
    if self.reversed:
        ranks = ranks[::-1]
    ranks = _shuffle_subset(ranks, self.shuffle_prop)
    return ranks

baal.active.heuristics.BALD

Bases: AbstractHeuristic

Sort by the highest acquisition function value.

Parameters:

Name Type Description Default
shuffle_prop float

Amount of noise to put in the ranking. Helps with selection bias (default: 0.0).

DEPRECATED
reduction Union[str, callable]

function that aggregates the results (default: 'none`).

'none'
References

https://arxiv.org/abs/1703.02910

Source code in baal/active/heuristics/heuristics.py
class BALD(AbstractHeuristic):
    """
    Sort by the highest acquisition function value.

    Args:
        shuffle_prop (float): Amount of noise to put in the ranking. Helps with selection bias
            (default: 0.0).
        reduction (Union[str, callable]): function that aggregates the results
            (default: 'none`).

    References:
        https://arxiv.org/abs/1703.02910
    """

    def __init__(self, shuffle_prop=DEPRECATED, reduction="none"):
        super().__init__(shuffle_prop=shuffle_prop, reverse=True, reduction=reduction)

    @require_single_item
    @requireprobs
    def compute_score(self, predictions):
        """
        Compute the score according to the heuristic.

        Args:
            predictions (ndarray): Array of predictions

        Returns:
            Array of scores.
        """
        assert predictions.ndim >= 3
        # [n_sample, n_class, ..., n_iterations]

        expected_entropy = -np.mean(
            np.sum(xlogy(predictions, predictions), axis=1), axis=-1
        )  # [batch size, ...]
        expected_p = np.mean(predictions, axis=-1)  # [batch_size, n_classes, ...]
        entropy_expected_p = -np.sum(xlogy(expected_p, expected_p), axis=1)  # [batch size, ...]
        bald_acq = entropy_expected_p - expected_entropy
        return bald_acq

compute_score(predictions)

Compute the score according to the heuristic.

Parameters:

Name Type Description Default
predictions ndarray

Array of predictions

required

Returns:

Type Description

Array of scores.

Source code in baal/active/heuristics/heuristics.py
@require_single_item
@requireprobs
def compute_score(self, predictions):
    """
    Compute the score according to the heuristic.

    Args:
        predictions (ndarray): Array of predictions

    Returns:
        Array of scores.
    """
    assert predictions.ndim >= 3
    # [n_sample, n_class, ..., n_iterations]

    expected_entropy = -np.mean(
        np.sum(xlogy(predictions, predictions), axis=1), axis=-1
    )  # [batch size, ...]
    expected_p = np.mean(predictions, axis=-1)  # [batch_size, n_classes, ...]
    entropy_expected_p = -np.sum(xlogy(expected_p, expected_p), axis=1)  # [batch size, ...]
    bald_acq = entropy_expected_p - expected_entropy
    return bald_acq

baal.active.heuristics.Random

Bases: Precomputed

Random heuristic.

Parameters:

Name Type Description Default
shuffle_prop float

UNUSED

DEPRECATED
reduction Union[str, callable]

UNUSED.

'none'
seed Optional[int]

If provided, will seed the random generator.

None
Source code in baal/active/heuristics/heuristics.py
class Random(Precomputed):
    """Random heuristic.

    Args:
        shuffle_prop (float): UNUSED
        reduction (Union[str, callable]): UNUSED.
        seed (Optional[int]): If provided, will seed the random generator.
    """

    def __init__(self, shuffle_prop=DEPRECATED, reduction="none", seed=None):
        super().__init__(1.0, False)
        if seed is not None:
            self.rng = np.random.RandomState(seed)
        else:
            self.rng = np.random

    def compute_score(self, predictions):
        return self.rng.rand(predictions.shape[0])

baal.active.heuristics.Entropy

Bases: AbstractHeuristic

Sort by the highest entropy.

Parameters:

Name Type Description Default
shuffle_prop float

Amount of noise to put in the ranking. Helps with selection bias (default: 0.0).

DEPRECATED
reduction Union[str, callable]

function that aggregates the results (default: none).

'none'
Source code in baal/active/heuristics/heuristics.py
class Entropy(AbstractHeuristic):
    """
    Sort by the highest entropy.

    Args:
        shuffle_prop (float): Amount of noise to put in the ranking. Helps with selection bias
            (default: 0.0).
        reduction (Union[str, callable]): function that aggregates the results (default: `none`).
    """

    def __init__(self, shuffle_prop=DEPRECATED, reduction="none"):
        super().__init__(shuffle_prop=shuffle_prop, reverse=True, reduction=reduction)

    @require_single_item
    @singlepass
    @requireprobs
    def compute_score(self, predictions):
        return scipy.stats.entropy(np.swapaxes(predictions, 0, 1))