Source code for embedded_voting.rules.singlewinner_rules.rule_mle_gaussian

import numpy as np
from embedded_voting.embeddings_from_ratings.embeddings_from_ratings_covariance import EmbeddingsFromRatingsCovariance
from embedded_voting.ratings.ratings_generator_epistemic_multivariate import RatingsGeneratorEpistemicMultivariate
from embedded_voting.rules.singlewinner_rules.rule import Rule
from embedded_voting.utils.cached import cached_property
from embedded_voting.utils.miscellaneous import clean_zeros, pseudo_inverse_scalar


[docs]class RuleMLEGaussian(Rule):
    """
    A rule that computes the scores of the candidates, assuming that the embeddings of the voters correspond to a
    covariance matrix.

    For this rule, the embeddings must be a matrix `n_voters` * `n_voters`.

    Examples
    --------
    Consider a generating epistemic model, where the true value of each candidate is uniformly drawn in a given
    interval, and where the voters add a noise which is multivariate Gaussian.

    >>> np.random.seed(42)
    >>> covariance_matrix = np.array([
    ...     [2.02, 1.96, 0.86, 0.81, 1.67],
    ...     [1.96, 3.01, 1.46, 0.69, 1.59],
    ...     [0.86, 1.46, 0.94, 0.39, 0.7 ],
    ...     [0.81, 0.69, 0.39, 0.51, 0.9 ],
    ...     [1.67, 1.59, 0.7 , 0.9 , 1.78]
    ... ])
    >>> ratings_generator = RatingsGeneratorEpistemicMultivariate(covariance_matrix=covariance_matrix)
    >>> ratings = ratings_generator(n_candidates=2)
    >>> ratings_generator.ground_truth_
    array([17.73956049, 14.3887844 ])
    >>> ratings
    Ratings([[17.56232759, 14.51592899],
             [16.82544972, 15.78818081],
             [17.51952581, 14.44449175],
             [17.34964888, 14.4010885 ],
             [16.69480298, 14.9281998 ]])

    If we know the covariance matrix of the noises, then `RuleMLEGaussian` is the maximum likelihood
    estimator of the ground truth:

    >>> election = RuleMLEGaussian()(ratings, embeddings=covariance_matrix)
    >>> election.scores_ # doctest: +ELLIPSIS
    [268.6683142..., 221.5083075...]


    """

    def __init__(self, embeddings_from_ratings=None, tol=1e-6):
        self.tol = tol
        if embeddings_from_ratings is None:
            embeddings_from_ratings = EmbeddingsFromRatingsCovariance()
        super().__init__(score_components=1, embeddings_from_ratings=embeddings_from_ratings)

    @cached_property
    def pinv_covariance_(self):
        tol = self.tol
        n, m = self.embeddings_.shape
        min_d = min(n, m)
        u, s, v = np.linalg.svd(self.embeddings_)
        clean_zeros(s, tol=tol)
        dia = np.zeros((m, n))
        dia[:min_d, :min_d] = np.diag([pseudo_inverse_scalar(e) for e in s])
        inverse = v.T @ dia @ u.T
        clean_zeros(inverse, tol=tol)
        return inverse

    @cached_property
    def weights_(self):
        return self.pinv_covariance_.sum(axis=0)

    def _score_(self, candidate):
        return float(self.ratings_.candidate_ratings(candidate) @ self.weights_)


"""
    #>>> np.linalg.norm(ratings_generator.ground_truth_ - election.scores_)  # Error estimation
    #0.4783006898563199

    As a baseline, consider the error for the naive arithmetic mean:

    #>>> scores_average = np.mean(ratings, axis=0)
    #>>> np.linalg.norm(ratings_generator.ground_truth_ - scores_average, 2)
    0.6911799682033576

    However, in practice, we often do not know the covariance matrix of the noise. A workaround can be to
    use the covariance matrix of the ratings:

    #>>> embeddings = EmbeddingsFromRatingsCovariance()(ratings)
    #>>> election = RuleMLEGaussian()(ratings, embeddings)
    #>>> election.scores_  # doctest: +ELLIPSIS
    #[12.90546983325..., 19.502265626617...]

    Actually, this is the default behavior of `RuleMLEGaussian` when no embeddings are given:

    #>>> election = RuleMLEGaussian()(ratings)
    #>>> election.scores_  # doctest: +ELLIPSIS
    #[12.90546983325..., 19.502265626617...]

    Unfortunately, this approximation is relevant if there are a large number of candidates (to have a good estimation
    of the covariance matrix) and if the noise is large compared to the differences between true values (so
    that the covariance of ratings approximates well the covariance of noises), which is not a common case. In our
    example, the assumptions are not met, and the result is not even as good as the naive arithmetic mean:

    #>>> np.linalg.norm(ratings_generator.ground_truth_ - election.scores_)  # Error estimation
    #0.839945516610...
    
"""