Source code for ignite.metrics.nlp.bleu

import math
from collections import Counter
from typing import Any, Callable, Sequence, Tuple, Union, ValuesView

import torch

from ignite.exceptions import NotComputableError
from ignite.metrics.metric import Metric, reinit__is_reduced, sync_all_reduce
from ignite.metrics.nlp.utils import modified_precision

__all__ = ["Bleu"]


def _closest_ref_length(references: Sequence[Sequence[Any]], hyp_len: int) -> int:
    ref_lens = (len(reference) for reference in references)
    closest_ref_len = min(ref_lens, key=lambda ref_len: (abs(ref_len - hyp_len), ref_len))
    return closest_ref_len


class _Smoother:
    """
    Smoothing helper
    http://acl2014.org/acl2014/W14-33/pdf/W14-3346.pdf
    """

    def __init__(self, method: str):
        valid = ["no_smooth", "smooth1", "nltk_smooth2", "smooth2"]
        if method not in valid:
            raise ValueError(f"Smooth is not valid (expected: {valid}, got: {method})")
        self.smooth = method

    def __call__(self, numerators: Counter, denominators: Counter) -> Sequence[float]:
        method = getattr(self, self.smooth)
        return method(numerators, denominators)

    @staticmethod
    def smooth1(numerators: Counter, denominators: Counter) -> Sequence[float]:
        epsilon = 0.1
        denominators_ = [max(1, d) for d in denominators.values()]
        return [n / d if n != 0 else epsilon / d for n, d in zip(numerators.values(), denominators_)]

    @staticmethod
    def nltk_smooth2(numerators: Counter, denominators: Counter) -> Sequence[float]:
        denominators_ = [max(1, d) for d in denominators.values()]
        return _Smoother._smooth2(numerators.values(), denominators_)

    @staticmethod
    def smooth2(numerators: Counter, denominators: Counter) -> Sequence[float]:
        return _Smoother._smooth2(numerators.values(), denominators.values())

    @staticmethod
    def _smooth2(
        numerators: Union[ValuesView[int], Sequence[int]], denominators: Union[ValuesView[int], Sequence[int]]
    ) -> Sequence[float]:
        return [(n + 1) / (d + 1) if i != 0 else n / d for i, (n, d) in enumerate(zip(numerators, denominators))]

    @staticmethod
    def no_smooth(numerators: Counter, denominators: Counter) -> Sequence[float]:
        denominators_ = [max(1, d) for d in denominators.values()]
        return [n / d for n, d in zip(numerators.values(), denominators_)]


[docs]class Bleu(Metric):
    r"""Calculates the `BLEU score <https://en.wikipedia.org/wiki/BLEU>`_.

    .. math::
       \text{BLEU} = b_{p} \cdot \exp \left( \sum_{n=1}^{N} w_{n} \: \log p_{n} \right)

    where :math:`N` is the order of n-grams, :math:`b_{p}` is a sentence brevety penalty, :math:`w_{n}` are
    positive weights summing to one and :math:`p_{n}` are modified n-gram precisions.

    More details can be found in `Papineni et al. 2002`__.

    __ https://www.aclweb.org/anthology/P02-1040.pdf

    In addition, a review of smoothing techniques can be found in `Chen et al. 2014`__

    __ http://acl2014.org/acl2014/W14-33/pdf/W14-3346.pdf

    Remark :

        This implementation is inspired by nltk

    Args:
        ngram: order of n-grams.
        smooth: enable smoothing. Valid are ``no_smooth``, ``smooth1``, ``nltk_smooth2`` or ``smooth2``.
            Default: ``no_smooth``.
        output_transform: a callable that is used to transform the
            :class:`~ignite.engine.engine.Engine`'s ``process_function``'s output into the
            form expected by the metric. This can be useful if, for example, you have a multi-output model and
            you want to compute the metric with respect to one of the outputs.
            By default, metrics require the output as ``(y_pred, y)`` or ``{'y_pred': y_pred, 'y': y}``.
        device: specifies which device updates are accumulated on. Setting the
            metric's device to be the same as your ``update`` arguments ensures the ``update`` method is
            non-blocking. By default, CPU.

    Example:

    .. code-block:: python

        from ignite.metrics.nlp import Bleu

        m = Bleu(ngram=4, smooth="smooth1")

        y_pred = "the the the the the the the"
        y = ["the cat is on the mat", "there is a cat on the mat"]

        m.update((y_pred.split(), [y.split()]))

        print(m.compute())

    .. versionadded:: 0.4.5
    """

    def __init__(
        self,
        ngram: int = 4,
        smooth: str = "no_smooth",
        output_transform: Callable = lambda x: x,
        device: Union[str, torch.device] = torch.device("cpu"),
    ):
        if ngram <= 0:
            raise ValueError(f"ngram order must be greater than zero (got: {ngram})")
        self.ngrams_order = ngram
        self.weights = [1 / self.ngrams_order] * self.ngrams_order
        self.smoother = _Smoother(method=smooth)
        super(Bleu, self).__init__(output_transform=output_transform, device=device)

    def _corpus_bleu(self, references: Sequence[Sequence[Any]], candidates: Sequence[Sequence[Any]],) -> float:
        p_numerators: Counter = Counter()
        p_denominators: Counter = Counter()

        if len(references) != len(candidates):
            raise ValueError(
                f"nb of candidates should be equal to nb of reference lists ({len(candidates)} != "
                f"{len(references)})"
            )

        # Iterate through each hypothesis and their corresponding references.
        for refs, hyp in zip(references, candidates):
            # For each order of ngram, calculate the numerator and
            # denominator for the corpus-level modified precision.
            for i in range(1, self.ngrams_order + 1):
                numerator, denominator = modified_precision(refs, hyp, i)
                p_numerators[i] += numerator
                p_denominators[i] += denominator

        # Returns 0 if there's no matching n-grams
        # We only need to check for p_numerators[1] == 0, since if there's
        # no unigrams, there won't be any higher order ngrams.
        if p_numerators[1] == 0:
            return 0

        # If no smoother, returns 0 if there's at least one a not matching n-grams
        if self.smoother.smooth == "no_smooth" and min(p_numerators.values()) == 0:
            return 0

        # Calculate the hypothesis lengths
        hyp_lengths = [len(hyp) for hyp in candidates]

        # Calculate the closest reference lengths.
        ref_lengths = [_closest_ref_length(refs, hyp_len) for refs, hyp_len in zip(references, hyp_lengths)]

        # Sum of hypothesis and references lengths
        hyp_len = sum(hyp_lengths)
        ref_len = sum(ref_lengths)

        # Calculate corpus-level brevity penalty.
        if hyp_len < ref_len:
            bp = math.exp(1 - ref_len / hyp_len) if hyp_len > 0 else 0.0
        else:
            bp = 1.0

        # Smoothing
        p_n = self.smoother(p_numerators, p_denominators)

        # Compute the geometric mean
        s = [w_i * math.log(p_i) for w_i, p_i in zip(self.weights, p_n)]
        gm = bp * math.exp(math.fsum(s))
        return gm

[docs]    @reinit__is_reduced
    def reset(self) -> None:
        self._sum_of_bleu = torch.tensor(0.0, dtype=torch.double, device=self._device)
        self._num_sentences = 0

[docs]    @reinit__is_reduced
    def update(self, output: Tuple[Sequence[Any], Sequence[Sequence[Any]]]) -> None:
        y_pred, y = output
        self._sum_of_bleu += self._corpus_bleu(references=[y], candidates=[y_pred])
        self._num_sentences += 1

[docs]    @sync_all_reduce("_sum_of_bleu", "_num_sentences")
    def compute(self) -> torch.Tensor:
        if self._num_sentences == 0:
            raise NotComputableError("Bleu must have at least one example before it can be computed.")
        return self._sum_of_bleu / self._num_sentences