# Heuristics

Heuristics take a set of predictions and output an uncertainty value. They are agnostic to the method used for predicting, so they work with MC sampling and Ensembles.

### Example

Using BALD, we can compute the uncertainty of many predictions.

import numpy as np
from baal.active.heuristics import BALD

# output from ModelWrapper.predict_on_dataset with shape [1000, num_classes, 20]
predictions: np.ndarray = ...

# To get the full uncertainty score
uncertainty = BALD().compute_score(predictions)

# To get ranks
most_uncertain = BALD()(predictions)

# If you wish to mix BALD and Uniform sampling,
# you can modify the shuffle_prop parameter.
BALD(shuffle_prop=0.1)

# When working with Sequence or Segmentation models, you can specify how to aggregate
# values using the "reduction" parameter.
BALD(reduction="mean")


### baal.active.heuristics.AbstractHeuristic

Abstract class that defines a Heuristic.

Parameters:

Name Type Description Default
shuffle_prop float

shuffle proportion.

DEPRECATED
reverse bool

True if the most uncertain sample has the highest value.

False
reduction Union[str, Callable]

Reduction used after computing the score.

'none'
 class AbstractHeuristic:
    """
    Abstract class that defines a Heuristic.

    Args:
        shuffle_prop (float): shuffle proportion.
        reverse (bool): True if the most uncertain sample has the highest value.
        reduction (Union[str, Callable]): Reduction used after computing the score.
    """

    def __init__(self, shuffle_prop=DEPRECATED, reverse=False, reduction="none"):
        if shuffle_prop != DEPRECATED and shuffle_prop < 1.0:
            warnings.warn(SHUFFLE_PROP_DEPRECATION_NOTICE, DeprecationWarning)
        else:
            shuffle_prop = 0.0
        self.shuffle_prop = shuffle_prop
        self.reversed = reverse
        assert reduction in available_reductions or callable(reduction)
        self._reduction_name = reduction
        self.reduction = reduction if callable(reduction) else available_reductions[reduction]

    def compute_score(self, predictions):
        """
        Compute the score according to the heuristic.

        Args:
            predictions (ndarray): Array of predictions

        Returns:
            Array of scores.
        """
        raise NotImplementedError

    def get_uncertainties_generator(self, predictions):
        """
        Compute the score according to the heuristic.

        Args:
            predictions (Iterable): Generator of predictions

        Raises:
            ValueError if the generator is empty.

        Returns:
            Array of scores.
        """
        acc = []
        for pred in predictions:
            acc.append(self.get_uncertainties(pred))
        if len(acc) == 0:
            raise ValueError("No prediction! Cannot order the values!")
        return np.concatenate(acc)

    def get_uncertainties(self, predictions):
        """
        Get the uncertainties.

        Args:
            predictions (ndarray): Array of predictions

        Returns:
            Array of uncertainties
        """
        if isinstance(predictions, Tensor):
            predictions = predictions.numpy()
        scores = self.compute_score(predictions)
        scores = self.reduction(scores)
        if not np.all(np.isfinite(scores)):
            fixed = 0.0 if self.reversed else 10000
            warnings.warn(f"Invalid value in the score, will be put to {fixed}", UserWarning)
            scores[~np.isfinite(scores)] = fixed
        return scores

    def reorder_indices(self, scores):
        """
        Order indices given their uncertainty score.

        Args:
            scores (ndarray/ List[ndarray]): Array of uncertainties or list of arrays.

        Returns:
            ordered index according to the uncertainty (highest to lowes).

        Raises:
            ValueError if scores is not uni-dimensional.
        """
        if isinstance(scores, Sequence):
            scores = np.concatenate(scores)
        if scores.ndim > 1:
            raise ValueError(
                (
                    f"Can't order sequence with more than 1 dimension."
                    f"Currently {scores.ndim} dimensions."
                    f"Is the heuristic reduction method set: {self._reduction_name}"
                )
            )
        assert scores.ndim == 1  # We want the uncertainty value per sample.
        ranks = np.argsort(scores)
        if self.reversed:
            ranks = ranks[::-1]
        ranks = _shuffle_subset(ranks, self.shuffle_prop)
        return ranks

    def get_ranks(self, predictions):
        """
        Rank the predictions according to their uncertainties.

        Args:
            predictions (ndarray): [batch_size, C, ..., Iterations]

        Returns:
            Ranked index according to the uncertainty (highest to lowes).
            Scores for all predictions.
        """
        if isinstance(predictions, types.GeneratorType):
            scores = self.get_uncertainties_generator(predictions)
        else:
            scores = self.get_uncertainties(predictions)
        return self.reorder_indices(scores), scores

    def __call__(self, predictions):
        """Rank the predictions according to their uncertainties.
        Only return the scores and not the associated uncertainties.
        """
        return self.get_ranks(predictions)[0] 

## __call__(predictions)

Rank the predictions according to their uncertainties.

Only return the scores and not the associated uncertainties.

 def __call__(self, predictions):
    """Rank the predictions according to their uncertainties.
    Only return the scores and not the associated uncertainties.
    """
    return self.get_ranks(predictions)[0] 

## compute_score(predictions)

Compute the score according to the heuristic.

Parameters:

Name Type Description Default
predictions ndarray

Array of predictions

required

Returns:

Type Description

Array of scores.

 def compute_score(self, predictions):
    """
    Compute the score according to the heuristic.

    Args:
        predictions (ndarray): Array of predictions

    Returns:
        Array of scores.
    """
    raise NotImplementedError 

## get_ranks(predictions)

Rank the predictions according to their uncertainties.

Parameters:

Name Type Description Default
predictions ndarray

[batch_size, C, ..., Iterations]

required

Returns:

Type Description

Ranked index according to the uncertainty (highest to lowes).

Scores for all predictions.

 def get_ranks(self, predictions):
    """
    Rank the predictions according to their uncertainties.

    Args:
        predictions (ndarray): [batch_size, C, ..., Iterations]

    Returns:
        Ranked index according to the uncertainty (highest to lowes).
        Scores for all predictions.
    """
    if isinstance(predictions, types.GeneratorType):
        scores = self.get_uncertainties_generator(predictions)
    else:
        scores = self.get_uncertainties(predictions)
    return self.reorder_indices(scores), scores 

## get_uncertainties(predictions)

Get the uncertainties.

Parameters:

Name Type Description Default
predictions ndarray

Array of predictions

required

Returns:

Type Description

Array of uncertainties

 def get_uncertainties(self, predictions):
    """
    Get the uncertainties.

    Args:
        predictions (ndarray): Array of predictions

    Returns:
        Array of uncertainties
    """
    if isinstance(predictions, Tensor):
        predictions = predictions.numpy()
    scores = self.compute_score(predictions)
    scores = self.reduction(scores)
    if not np.all(np.isfinite(scores)):
        fixed = 0.0 if self.reversed else 10000
        warnings.warn(f"Invalid value in the score, will be put to {fixed}", UserWarning)
        scores[~np.isfinite(scores)] = fixed
    return scores 

## get_uncertainties_generator(predictions)

Compute the score according to the heuristic.

Parameters:

Name Type Description Default
predictions Iterable

Generator of predictions

required

Returns:

Type Description

Array of scores.

 def get_uncertainties_generator(self, predictions):
    """
    Compute the score according to the heuristic.

    Args:
        predictions (Iterable): Generator of predictions

    Raises:
        ValueError if the generator is empty.

    Returns:
        Array of scores.
    """
    acc = []
    for pred in predictions:
        acc.append(self.get_uncertainties(pred))
    if len(acc) == 0:
        raise ValueError("No prediction! Cannot order the values!")
    return np.concatenate(acc) 

## reorder_indices(scores)

Order indices given their uncertainty score.

Parameters:

Name Type Description Default
scores ndarray / List[ndarray]

Array of uncertainties or list of arrays.

required

Returns:

Type Description

ordered index according to the uncertainty (highest to lowes).

 def reorder_indices(self, scores):
    """
    Order indices given their uncertainty score.

    Args:
        scores (ndarray/ List[ndarray]): Array of uncertainties or list of arrays.

    Returns:
        ordered index according to the uncertainty (highest to lowes).

    Raises:
        ValueError if scores is not uni-dimensional.
    """
    if isinstance(scores, Sequence):
        scores = np.concatenate(scores)
    if scores.ndim > 1:
        raise ValueError(
            (
                f"Can't order sequence with more than 1 dimension."
                f"Currently {scores.ndim} dimensions."
                f"Is the heuristic reduction method set: {self._reduction_name}"
            )
        )
    assert scores.ndim == 1  # We want the uncertainty value per sample.
    ranks = np.argsort(scores)
    if self.reversed:
        ranks = ranks[::-1]
    ranks = _shuffle_subset(ranks, self.shuffle_prop)
    return ranks 

### baal.active.heuristics.BALD

Bases: AbstractHeuristic

Sort by the highest acquisition function value.

Parameters:

Name Type Description Default
shuffle_prop float

Amount of noise to put in the ranking. Helps with selection bias (default: 0.0).

DEPRECATED
reduction Union[str, callable]

function that aggregates the results (default: 'none).

'none'
References

https://arxiv.org/abs/1703.02910

 class BALD(AbstractHeuristic):
    """
    Sort by the highest acquisition function value.

    Args:
        shuffle_prop (float): Amount of noise to put in the ranking. Helps with selection bias
            (default: 0.0).
        reduction (Union[str, callable]): function that aggregates the results
            (default: 'none).

    References:
        https://arxiv.org/abs/1703.02910
    """

    def __init__(self, shuffle_prop=DEPRECATED, reduction="none"):
        super().__init__(shuffle_prop=shuffle_prop, reverse=True, reduction=reduction)

    @require_single_item
    @requireprobs
    def compute_score(self, predictions):
        """
        Compute the score according to the heuristic.

        Args:
            predictions (ndarray): Array of predictions

        Returns:
            Array of scores.
        """
        assert predictions.ndim >= 3
        # [n_sample, n_class, ..., n_iterations]
        expected_entropy = -np.mean(
            np.sum(xlogy(predictions, predictions), axis=1), axis=-1
        )  # [batch size, ...]
        expected_p = np.mean(predictions, axis=-1)  # [batch_size, n_classes, ...]
        entropy_expected_p = -np.sum(xlogy(expected_p, expected_p), axis=1)  # [batch size, ...]
        bald_acq = entropy_expected_p - expected_entropy
        return bald_acq 

## compute_score(predictions)

Compute the score according to the heuristic.

Parameters:

Name Type Description Default
predictions ndarray

Array of predictions

required

Returns:

Type Description

Array of scores.

 @require_single_item
@requireprobs
def compute_score(self, predictions):
    """
    Compute the score according to the heuristic.

    Args:
        predictions (ndarray): Array of predictions

    Returns:
        Array of scores.
    """
    assert predictions.ndim >= 3
    # [n_sample, n_class, ..., n_iterations]
    expected_entropy = -np.mean(
        np.sum(xlogy(predictions, predictions), axis=1), axis=-1
    )  # [batch size, ...]
    expected_p = np.mean(predictions, axis=-1)  # [batch_size, n_classes, ...]
    entropy_expected_p = -np.sum(xlogy(expected_p, expected_p), axis=1)  # [batch size, ...]
    bald_acq = entropy_expected_p - expected_entropy
    return bald_acq 

### baal.active.heuristics.Random

Bases: Precomputed

Random heuristic.

Parameters:

Name Type Description Default
shuffle_prop float

UNUSED

DEPRECATED
reduction Union[str, callable]

UNUSED.

'none'
seed Optional[int]

If provided, will seed the random generator.

None
 class Random(Precomputed):
    """Random heuristic.

    Args:
        shuffle_prop (float): UNUSED
        reduction (Union[str, callable]): UNUSED.
        seed (Optional[int]): If provided, will seed the random generator.
    """

    def __init__(self, shuffle_prop=DEPRECATED, reduction="none", seed=None):
        super().__init__(1.0, False)
        if seed is not None:
            self.rng = np.random.RandomState(seed)
        else:
            self.rng = np.random

    def compute_score(self, predictions):
        return self.rng.rand(predictions.shape[0]) 

### baal.active.heuristics.Entropy

Bases: AbstractHeuristic

Sort by the highest entropy.

Parameters:

Name Type Description Default
shuffle_prop float

Amount of noise to put in the ranking. Helps with selection bias (default: 0.0).

DEPRECATED
reduction Union[str, callable]

function that aggregates the results (default: none).

'none'
 class Entropy(AbstractHeuristic):
    """
    Sort by the highest entropy.

    Args:
        shuffle_prop (float): Amount of noise to put in the ranking. Helps with selection bias
            (default: 0.0).
        reduction (Union[str, callable]): function that aggregates the results
            (default: none).
    """

    def __init__(self, shuffle_prop=DEPRECATED, reduction="none"):
        super().__init__(shuffle_prop=shuffle_prop, reverse=True, reduction=reduction)

    @require_single_item
    @singlepass
    @requireprobs
    def compute_score(self, predictions):
        return scipy.stats.entropy(np.swapaxes(predictions, 0, 1))