Source code for scml.oneshot.rl.reward

from typing import Any, Protocol, runtime_checkable

from negmas import SAOResponse

from scml.oneshot.awi import OneShotAWI

__all__ = ["RewardFunction", "DefaultRewardFunction"]


@runtime_checkable

[docs]
class RewardFunction(Protocol):
    """
    Represents a reward function.

    Remarks:
        - `before_action` is called before the action is executed for initialization and should return info to be passed to the call
        - `__call__` is called with the awi (to get the state), action and info and should return the reward

    """


[docs]
    def before_action(self, awi: OneShotAWI) -> Any:
        """
        Called before executing the action from the RL agent to save any required information for
        calculating the reward in its return

        Remarks:
            The returned value will be passed as `info` to `__call__()` when it is time to calculate
            the reward.
        """
        ...



[docs]
    def __call__(
        self, awi: OneShotAWI, action: dict[str, SAOResponse], info: Any
    ) -> float:
        """
        Called to calculate the reward to be given to the agent at the end of a step.

        Args:
            awi: `OneShotAWI` to access the agent's state
            action: The action (decoded) as a mapping from partner ID to responses to their last offer.
            info: Information generated from `before_action()`. You an use this to store baselines for calculating the reward

        Returns:
            The reward (a number) to be given to the agent at the end of the step.
        """
        ...





[docs]
class DefaultRewardFunction(RewardFunction):
    """
    The default reward function of SCML

    Remarks:
        - The reward is the difference between the balance before the action and after it.

    """


[docs]
    def before_action(self, awi: OneShotAWI) -> float:
        return awi.current_score



[docs]
    def __call__(self, awi: OneShotAWI, action: dict[str, SAOResponse], info: float):
        _ = action
        return awi.current_score - info