Howto RL-001: Reward
Executable code
## -------------------------------------------------------------------------------------------------
## -- Project : MLPro - A Synoptic Framework for Standardized Machine Learning Tasks
## -- Package : mlpro.rl.examples
## -- Module : howto_rl_001_reward.py
## -------------------------------------------------------------------------------------------------
## -- History :
## -- yyyy-mm-dd Ver. Auth. Description
## -- 2021-05-30 0.0.0 DA Creation
## -- 2021-05-31 1.0.0 DA Release of first version
## -- 2021-09-11 1.0.1 MRD Change Header information to match our new library name
## -- 2022-10-13 1.0.2 SY Refactoring
## -- 2023-03-02 1.0.3 LSB Refactoring
## -------------------------------------------------------------------------------------------------
"""
Ver. 1.0.3 (2023-03-02)
This module shows how to create and interprete reward objects in own projects.
You will learn:
1. How to use the reward class of MLPro.
2. How to use reward class for different reward types supported in MLPro.
"""
from mlpro.bf.various import Log
from mlpro.rl import Reward
class MyLog(Log):
C_TYPE = 'Reward Demo'
C_NAME = ''
if __name__ == "__main__":
# 1 Some initial stuff
my_log = MyLog()
# 1.1 Unique agent ids
C_AGENT_1 = 1
C_AGENT_2 = 2
C_AGENT_3 = 3
# 1.2 Unique action ids
C_AGENT_1_ACT_1 = 1
C_AGENT_1_ACT_2 = 2
C_AGENT_1_ACT_3 = 3
C_AGENT_2_ACT_1 = 4
# 2 Rewards as single overall scalar values (independent from agents and actions)
my_log.log(Log.C_LOG_TYPE_I, 'Example for reward type C_TYPE_OVERALL:')
reward = Reward(p_type=Reward.C_TYPE_OVERALL)
reward.set_overall_reward(4.77)
my_log.log(Log.C_LOG_TYPE_I, 'Reward is just a scalar...', reward.get_agent_reward(0), '\n')
# 3 Rewards as scalar values for every agent
my_log.log(Log.C_LOG_TYPE_I, 'Example for reward type C_TYPE_EVERY_AGENT')
reward = Reward(p_type=Reward.C_TYPE_EVERY_AGENT)
my_log.log(Log.C_LOG_TYPE_I, 'Reward is a list with entries for each agent...')
reward.add_agent_reward(C_AGENT_1, 4.77)
my_log.log(Log.C_LOG_TYPE_I, 'Reward for agent 1 added:', reward.get_agent_reward(C_AGENT_1))
reward.add_agent_reward(C_AGENT_2, 5.19)
my_log.log(Log.C_LOG_TYPE_I, 'Reward for agent 2 added:', reward.get_agent_reward(C_AGENT_2))
reward.add_agent_reward(C_AGENT_3, 0.23)
my_log.log(Log.C_LOG_TYPE_I, 'Reward for agent 3 added:', reward.get_agent_reward(C_AGENT_3), '\n')
# 4 Rewards as scalar values for every agent and it's actions
my_log.log(Log.C_LOG_TYPE_I, 'Example for reward type C_TYPE_EVERY_ACTION')
reward = Reward(p_type=Reward.C_TYPE_EVERY_ACTION)
my_log.log(Log.C_LOG_TYPE_I, 'Reward is a list with entries for each agent and its action components...')
reward.add_action_reward(C_AGENT_1, C_AGENT_1_ACT_1, 1.23)
my_log.log(Log.C_LOG_TYPE_I, 'Reward for agent 1, action 1 added:', reward.get_action_reward(C_AGENT_1, C_AGENT_1_ACT_1))
reward.add_action_reward(C_AGENT_1, C_AGENT_1_ACT_2, 0.47)
my_log.log(Log.C_LOG_TYPE_I, 'Reward for agent 1, action 2 added:', reward.get_action_reward(C_AGENT_1, C_AGENT_1_ACT_2))
reward.add_action_reward(C_AGENT_1, C_AGENT_1_ACT_3, 1.63)
my_log.log(Log.C_LOG_TYPE_I, 'Reward for agent 1, action 3 added:', reward.get_action_reward(C_AGENT_1, C_AGENT_1_ACT_3))
reward.add_action_reward(C_AGENT_2, C_AGENT_2_ACT_1, 4.23)
my_log.log(Log.C_LOG_TYPE_I, 'Reward for agent 2, action 4 added:', reward.get_action_reward(C_AGENT_2, C_AGENT_2_ACT_1))
Cross Reference