Source code for embedded_voting.experiments.aggregation

import numpy as np
import embedded_voting as ev
from tqdm import tqdm
from multiprocess.pool import Pool


[docs]def evaluate(list_agg, truth, testing, training, pool=None):
    """
    Run a sim.
    Parameters
    ----------
    list_agg: :class:`list`
        Rules to test.
    truth: :class:`~numpy.ndarray`
        Ground truth of testing values (n_tries X n_candidates).
    testing: :class:`~numpy.ndarray`
        Estimated scores (n_agents X n_tries X n_candidates).
    training: :class:`~numpy.ndarray`
        Training scores  (n_agents X training_size).
    pool: :class:`~mulyiprocess.pool.Pool`, optional.
        Use parallelism.
    Returns
    -------
    :class:`~numpy.ndarray`
        Efficiency of each algorithm.
    Examples
    --------
    >>> np.random.seed(42)
    >>> n_training = 10
    >>> n_tries = 100
    >>> n_c = 20
    >>> generator = make_generator()
    >>> training = generator(n_training)
    >>> testing = generator(n_tries*n_c).reshape(generator.n_voters, n_tries, n_c)
    >>> truth = generator.ground_truth_.reshape(n_tries, n_c)
    >>> list_agg = make_aggs(order=default_order+['Rand'])
    >>> with Pool() as p:
    ...     res = evaluate(list_agg=list_agg[:-1], truth=truth, testing=testing, training=training, pool=p)
    >>> ', '.join( f"{a.name}: {r:.2f}" for a, r in zip(list_agg, res) )
    'MA: 0.94, PL+: 0.89, EV+: 0.95, EV: 0.94, AV: 0.90, PV: 0.86, RV: 0.85, Single: 0.82, PL: 0.78'
    >>> res = evaluate(list_agg=list_agg, truth=truth, testing=testing, training=training)
    >>> ', '.join( f"{a.name}: {r:.2f}" for a, r in zip(list_agg, res) )
    'MA: 0.94, PL+: 0.89, EV+: 0.95, EV: 0.94, AV: 0.90, PV: 0.86, RV: 0.85, Single: 0.82, PL: 0.78, Rand: 0.49'
    """
    n_tries = testing.shape[1]
    for agg in list_agg:
        if agg.name.endswith('+'):
            _ = agg(training).winner_
            agg.train()
    results = np.zeros(len(list_agg))

    def election(bundle):
        ratings_candidates, truth_candidates = bundle
        res = np.zeros(len(list_agg))
        # Reset the trained aggregators for consistency
        for agg in list_agg:
            if agg.name[-1] != '+':
                agg.reset()
        # Welfare
        welfare = ev.RuleSumRatings()(ev.Ratings([truth_candidates])).welfare_
        # We run the aggregators, and we look at the welfare of the winner
        for k, agg in enumerate(list_agg):
            w = agg(ratings_candidates).winner_
            res[k] += welfare[w]
        return res

    if pool is not None:
        chunk_size = max(1, int(n_tries / 100))
        for result in pool.imap_unordered(election,
                                       tqdm(((testing[:, i, :], truth[i, :])
                                             for i in range(n_tries)), total=n_tries),
                                       chunksize=chunk_size):
                results += result
    else:
        for result in (election( (testing[:, i, :], truth[i, :]) )
                       for i in tqdm(range(n_tries))):
            results += result
    return results / n_tries


[docs]def make_generator(groups=None, truth=None, features=None,
                   feat_noise=1, feat_f=None,
                   dist_noise=.1, dist_f=None):
    """
    Parameters
    ----------
    groups: :class:`list` of `int`
        Sizes of each group.
    truth: :class:`TruthGenerator`, default=N(0, 1)
        Ground truth generator.
    features: :class:`~numpy.ndarray`
        Features correlations.
    feat_noise: :class:`float`, default=1.0
        Feature noise intensity.
    feat_f: `method`, default to normal law
        Feature noise distribution.
    dist_noise: :class:`float`, default=0.1
        Distinct noise intensity.
    dist_f: `method`, default to normal law
        Distinct noise distribution.
    Returns
    -------
    :class:`Generator`
        Provides grounds truth and estimates.
    Examples
    --------
    >>> np.random.seed(42)
    >>> generator = make_generator()
    >>> ratings = generator(2)
    >>> truth = generator.ground_truth_
    >>> truth[0]
    0.4967141530112327
    >>> ratings[:, 0]
    Ratings([1.22114616, 1.09745525, 1.1986587 , 1.09806092, 1.09782972,
             1.16859892, 0.95307467, 0.97191091, 1.08817394, 1.04311958,
             1.17582742, 1.05360028, 1.00317232, 1.29096757, 1.12182506,
             1.15115551, 1.00192787, 1.08996442, 1.15549495, 1.02930333,
             2.05731381, 0.20249691, 0.23340782, 2.01575631])
    >>> truth[1]
    -0.13826430117118466
    >>> ratings[:, 1]
    Ratings([ 1.73490024,  1.51804687,  1.58119528,  1.73370001,  1.78786054,
              1.73115071,  1.70244906,  1.68390351,  1.56616168,  1.64202946,
              1.66795001,  1.81972611,  1.74837571,  1.53770987,  1.74642228,
              1.67550566,  1.64632168,  1.77518151,  1.81711384,  1.8071419 ,
             -0.23568328, -1.22689647,  0.71740695, -1.26155344])
    """
    if groups is None:
        groups = [20] + [1]*4
    if truth is None:
        truth = ev.TruthGeneratorNormal(0, 1)
    if features is None:
        features = np.eye(len(groups))
    if feat_f is None:
        feat_f = np.random.normal
    if dist_f is None:
        dist_f = np.random.normal
    generator_parameters = {
        "truth_generator": truth,
        "groups_sizes": groups,  # Number of estimators in each group
        "groups_features": features,  # Features of the groups
        "group_noise": feat_noise,  # Standard deviation of the feature noise
        "group_noise_f": feat_f,  # Distribution for feature noise
        "independent_noise": dist_noise,  # Standard deviation of the distinct noise
        "independent_noise_f": dist_f,  # Distribution for feature noise
    }
    return ev.RatingsGeneratorEpistemicGroupsMixFree(**generator_parameters)


[docs]def f_max(ratings_v, history_mean, history_std):
    """
    Parameters
    ----------
    ratings_v: :class:`~numpy.ndarray`
        Score vector.
    history_mean: :class:`float`
        Observed mean.
    history_std: :class:`float`
        Observed standard deviation
    Returns
    -------
    :class:`~numpy.ndarray`
        The positive part of the normalized scores.
    Examples
    --------
    >>> f_max(10, 5, 2)
    2.5
    >>> f_max(10, 20, 10)
    0.0
    """
    return np.maximum(0, (ratings_v - history_mean) / history_std)


[docs]def f_renorm(ratings_v, history_mean, history_std):
    """
    Parameters
    ----------
    ratings_v: :class:`~numpy.ndarray`
        Score vector.
    history_mean: :class:`float`
        Observed mean.
    history_std: :class:`float`
        Observed standard deviation
    Returns
    -------
    :class:`~numpy.ndarray`
        The scores with mean and std normalized.
    Examples
    --------
    >>> f_renorm(10, 5, 2)
    2.5
    >>> f_renorm(10, 20, 10)
    -1.0
    """
    return (ratings_v - history_mean) / history_std


[docs]class SingleEstimator:
    """
    Returns the best estimation of one given agent. Mimics a `Rule`.
    Parameters
    ----------
    i: :class:`int`
        Index of the selected agents.
    Examples
    --------
    >>> np.random.seed(42)
    >>> generator = make_generator()
    >>> ratings = generator(7)
    >>> rule = SingleEstimator(10)
    >>> ratings[10, :]
    Ratings([ 1.2709017 ,  0.03209107,  1.98196138,  1.12347711, -1.55465272,
             -0.72448238,  0.63366952])
    >>> rule(ratings).winner_
    2
    """

    def __init__(self, i):
        self.i = i
        self.winner_ = None

    def __call__(self, ratings, embeddings=None):
        self.winner_ = np.argmax(ratings[self.i])
        return self


[docs]class RandomWinner:
    """
    Returns a random winner. Mimics a `Rule`.
    Examples
    --------
    >>> np.random.seed(42)
    >>> generator = make_generator()
    >>> ratings = generator(7)
    >>> rule = RandomWinner()
    >>> rule(ratings).winner_
    4
    >>> rule(ratings).winner_
    3
    """

    def __init__(self):
        self.winner_ = None

    def __call__(self, ratings, embeddings=None):
        self.winner_ = np.random.randint(ratings.shape[1])
        return self


[docs]def make_aggs(groups=None, order=None, features=None, group_noise=1, distinct_noise=.1):
    """
    Crafts a list of aggregator rules.
    Parameters
    ----------
    groups: :class:`list` of `int`
        Sizes of each group (for the Model-Aware rule).
    order: :class:`list`, optional
        Short names of the aggregators to return.
    features: :class:`~numpy.ndarray`, optional
        Features correlations (for the Model-Aware rule). Default to independent groups.
    group_noise: :class:`float`, default=1.0
        Feature noise intensity.
    distinct_noise: :class:`float`, default=0.1
        Distinct noise intensity.
    Returns
    -------
    :class:`list`
        Aggregators.
    Examples
    --------
    >>> list_agg = make_aggs()
    >>> [agg.name for agg in list_agg]
    ['MA', 'PL+', 'EV+', 'EV', 'AV', 'PV', 'RV', 'Single', 'PL']
    """
    if groups is None:
        groups = [20] + [1]*4
    if order is None:
        order = default_order
    if features is None:
        features = np.eye(len(groups))
    dict_agg = {
        'MA': ev.Aggregator(rule=ev.RuleModelAware(groups,
                                                   features,
                                                   group_noise,
                                                   distinct_noise), name="MA"),
        'EV': ev.Aggregator(rule=ev.RuleFastNash(), name="EV"),
        'AV': ev.Aggregator(rule=ev.RuleRatingsHistory(rule=ev.RuleApprovalProduct(), f=f_max), name="AV"),
        'PV': ev.Aggregator(rule=ev.RuleRatingsHistory(rule=ev.RuleShiftProduct(), f=f_renorm), name="PV"),
        'RV': ev.Aggregator(rule=ev.RuleRatingsHistory(rule=ev.RuleSumRatings(), f=f_renorm), name="RV"),
        'PL': ev.Aggregator(rule=ev.RuleRatingsHistory(rule=ev.RuleMLEGaussian(), f=f_renorm), name="PL"),
        'PL+': ev.Aggregator(rule=ev.RuleRatingsHistory(rule=ev.RuleMLEGaussian(), f=f_renorm), name="PL+",
                             default_train=False, default_add=False),
        'EV+': ev.Aggregator(rule=ev.RuleFastNash(), name="EV+", default_train=False, default_add=False),
        'Single': ev.Aggregator(rule=SingleEstimator(groups[0] - 1), name="Single"),
        'Rand': ev.Aggregator(rule=RandomWinner(), name="Rand"),
    }
    return [dict_agg[k] for k in order]


default_order = ['MA', 'PL+', 'EV+', 'EV', 'AV', 'PV', 'RV', 'Single', 'PL']

colors = {"EV": "#de302a", "AV": "#32e62c", "RV": "#dee046", "PL": "#2488ed", "PL+": "#4540cf", "EV+": "#a83d3d",
          "Rand": "#686868",
          "MA": "#454545", "Single": "#808080", "PV": "#ed921a"}

default_handles = {"EV": "\\gls{ev}", "AV": "\\gls{av}", "RV": "\\gls{rv}", "PL": "\\gls{ml}", "PL+": "\\gls{ml+}",
           "EV+": "\\gls{ev+}",
           "Rand": "\\gls{rw}", "MA": "\\gls{ga}", "Single": "\\gls{sa}", "PV": "\\gls{np}"}

handles = {"EV": "EV", "AV": "AV", "RV": "RV", "PL": "PL", "PL+": "PL+",
           "EV+": "EV+",
           "Rand": "RW", "MA": "MA", "Single": "SA", "PV": "NP"}