Source code for axelrod.strategies.bush_mosteller

import random

from axelrod import random_choice
from axelrod.action import Action
from axelrod.player import Player

C, D = Action.C, Action.D


[docs]class BushMosteller(Player):
    """
    A player that is based on Bush Mosteller reinforced learning algorithm, it
    decides what it will
    play only depending on its own previous payoffs.

    The probability of playing C or D will be updated using a stimulus which
    represents a win or a loss of value based on its previous play's payoff in
    the specified probability.  The more a play will be rewarded through rounds,
    the more the player will be tempted to use it.

    Names:

    - Bush Mosteller: [Luis2008]_
    """

    name = "Bush Mosteller"
    classifier = {
        "memory_depth": float("inf"),
        "stochastic": True,
        "makes_use_of": set(["game"]),
        "long_run_time": False,
        "inspects_source": False,
        "manipulates_source": False,
        "manipulates_state": False,
    }

    def __init__(
        self,
        c_prob: float = 0.5,
        d_prob: float = 0.5,
        aspiration_level_divider: float = 3.0,
        learning_rate: float = 0.5,
    ) -> None:
        """
        Parameters

        c_prob: float, 0.5
           Probability to play C , is modified during the match
        d_prob: float, 0.5
           Probability to play D , is modified during the match
        aspiration_level_divider: float, 3.0
            Value that regulates the aspiration level,
            isn't modified during match
        learning rate [0 , 1]
            Percentage of learning speed
        Variables / Constants
        stimulus (Var: [-1 , 1]): float
            Value that impacts the changes of action probability
        _aspiration_level: float
            Value that impacts the stimulus changes, isn't modified during match
        _init_c_prob , _init_d_prob : float
            Values used to properly set up reset(),
            set to original probabilities
        """
        super().__init__()
        self._c_prob, self._d_prob = c_prob, d_prob
        self._init_c_prob, self._init_d_prob = c_prob, d_prob
        self._aspiration_level = abs(
            (max(self.match_attributes["game"].RPST()) / aspiration_level_divider)
        )

        self._stimulus = 0.0
        self._learning_rate = learning_rate

[docs]    def stimulus_update(self, opponent: Player):
        """
        Updates the stimulus attribute based on the opponent's history. Used by
        the strategy.

        Parameters

        opponent : axelrod.Player
            The current opponent
        """
        game = self.match_attributes["game"]

        last_round = (self.history[-1], opponent.history[-1])

        scores = game.score(last_round)

        previous_play = scores[0]

        self._stimulus = (previous_play - self._aspiration_level) / abs(
            (max(self.match_attributes["game"].RPST()) - self._aspiration_level)
        )
        # Lowest range for stimulus
        # Highest doesn't need to be tested since it is divided by the highest
        # reward possible
        if self._stimulus < -1:
            self._stimulus = -1

        # Updates probability following previous choice C
        if self.history[-1] == C:

            if self._stimulus >= 0:
                self._c_prob += (
                    self._learning_rate * self._stimulus * (1 - self._c_prob)
                )

            elif self._stimulus < 0:
                self._c_prob += self._learning_rate * self._stimulus * self._c_prob

        # Updates probability following previous choice D
        if self.history[-1] == D:
            if self._stimulus >= 0:
                self._d_prob += (
                    self._learning_rate * self._stimulus * (1 - self._d_prob)
                )

            elif self._stimulus < 0:
                self._d_prob += self._learning_rate * self._stimulus * self._d_prob

[docs]    def strategy(self, opponent: Player) -> Action:

        # First turn
        if len(self.history) == 0:
            return random_choice(self._c_prob / (self._c_prob + self._d_prob))

        # Updating stimulus depending on his own latest choice
        self.stimulus_update(opponent)

        return random_choice(self._c_prob / (self._c_prob + self._d_prob))