Howto GT-DG-002: Train Multi-Player in Stackelberg Games
Prerequisites
- Please install the following packages to run this examples properly:
Executable code
## -------------------------------------------------------------------------------------------------
## -- Project : MLPro - A Synoptic Framework for Standardized Machine Learning Tasks
## -- Package : mlpro.gt.examples
## -- Module : howto_gt_dg_002_train_own_multi_player_in_stackelberg_games.py
## -------------------------------------------------------------------------------------------------
## -- History :
## -- yyyy-mm-dd Ver. Auth. Description
## -- 2023-04-12 0.0.0 SY Creation
## -- 2023-04-12 1.0.0 SY Release of first version
## -- 2023-05-11 1.0.1 SY Refactoring
## -- 2021-08-22 1.0.2 SY Refactoring due to compatibility in mlpro.gt.dynamicsgames
## -- 2024-02-16 1.0.3 SY Renaming Module
## -------------------------------------------------------------------------------------------------
"""
Ver. 1.0.3 (2024-02-16)
This module shows how to train an own multi-player in stackelberg games.
You will learn:
1) How to set up your own players' policies
2) How to set up your own game in dynamic stackelberg games, including players and game board interaction
3) How to assign a player with a role as leader or follower
4) How to run the GT training and train your own players
"""
from mlpro.rl import *
from mlpro.gt import *
from mlpro.gt.dynamicgames.stackelberg import *
from mlpro.gt.pool.boards.bglp import BGLP_GT
import random
import numpy as np
from pathlib import Path
# 1 Implement your own player policy
class MyPolicy(Policy):
C_NAME = 'MyPolicy'
def compute_action(self, p_state: State, p_action_leaders=False) -> Action:
# 1 Create a numpy array for your action values
my_action_values = np.zeros(self._action_space.get_num_dim())
# 2 Computing action values is up to you...
for d in range(self._action_space.get_num_dim()):
my_action_values[d] = random.random()
# 3 Return an action object with your values
return Action(self._id, self._action_space, my_action_values)
def _adapt(self, **p_args) -> bool:
# 1 Adapting the internal policy is up to you...
self.log(self.C_LOG_TYPE_I, 'Sorry, I am a stupid agent...')
# 2 Only return True if something has been adapted...
return False
# 2 Implement your own game
class MyGame(Game):
C_NAME = 'Matrix'
def _setup(self, p_mode, p_ada: bool, p_visualize: bool, p_logging) -> Model:
# 1 Setup Multi-Player Environment (consisting of 3 OpenAI Gym Cartpole envs)
self._env = BGLP_GT(p_logging=p_logging)
# 2 Setup Multi-Player
# 2.1 Create empty Multi-Player
multi_player = GTMultiPlayer_SG(
p_name='BGLP Players with Random Policies',
p_ada=p_ada,
p_visualize=p_visualize,
p_logging=p_logging
)
# 2.2 Add Single-Players with own policy
ss_ids = self._env.get_state_space().get_dim_ids()
as_ids = self._env.get_action_space().get_dim_ids()
# Player 1
multi_player.add_player(
p_player=GTPlayer_SG(
p_policy=MyPolicy(
p_observation_space=self._env.get_state_space().spawn([ss_ids[0],ss_ids[1]]),
p_action_space=self._env.get_action_space().spawn([as_ids[0]]),
p_buffer_size=1,
p_ada=p_ada,
p_visualize=p_visualize,
p_logging=p_logging
),
p_name='BELT_CONVEYOR_A (Leader)',
p_id=0,
p_ada=p_ada,
p_visualize=p_visualize,
p_logging=p_logging,
p_role=GTPlayer_SG.C_PLAYER_LEADER
),
p_weight=1.0
)
# Player 2
multi_player.add_player(
p_player=GTPlayer_SG(
p_policy=MyPolicy(
p_observation_space=self._env.get_state_space().spawn([ss_ids[1],ss_ids[2]]),
p_action_space=self._env.get_action_space().spawn([as_ids[1]]),
p_buffer_size=1,
p_ada=p_ada,
p_visualize=p_visualize,
p_logging=p_logging
),
p_name='VACUUM_PUMP_B (Follower)',
p_id=1,
p_ada=p_ada,
p_visualize=p_visualize,
p_logging=p_logging,
p_role=GTPlayer_SG.C_PLAYER_FOLLOWER
),
p_weight=1.0
)
# Player 3
multi_player.add_player(
p_player=GTPlayer_SG(
p_policy=MyPolicy(
p_observation_space=self._env.get_state_space().spawn([ss_ids[2],ss_ids[3]]),
p_action_space=self._env.get_action_space().spawn([as_ids[2]]),
p_buffer_size=1,
p_ada=p_ada,
p_visualize=p_visualize,
p_logging=p_logging
),
p_name='VIBRATORY_CONVEYOR_B (Follower)',
p_id=2,
p_ada=p_ada,
p_visualize=p_visualize,
p_logging=p_logging,
p_role=GTPlayer_SG.C_PLAYER_FOLLOWER
),
p_weight=1.0
)
# Player 4
multi_player.add_player(
p_player=GTPlayer_SG(
p_policy=MyPolicy(
p_observation_space=self._env.get_state_space().spawn([ss_ids[3],ss_ids[4]]),
p_action_space=self._env.get_action_space().spawn([as_ids[3]]),
p_buffer_size=1,
p_ada=p_ada,
p_visualize=p_visualize,
p_logging=p_logging
),
p_name='VACUUM_PUMP_C (Follower)',
p_id=3,
p_ada=p_ada,
p_visualize=p_visualize,
p_logging=p_logging,
p_role=GTPlayer_SG.C_PLAYER_FOLLOWER
),
p_weight=1.0
)
# Player 5
multi_player.add_player(
p_player=GTPlayer_SG(
p_policy=MyPolicy(
p_observation_space=self._env.get_state_space().spawn([ss_ids[4],ss_ids[5]]),
p_action_space=self._env.get_action_space().spawn([as_ids[4]]),
p_buffer_size=1,
p_ada=p_ada,
p_visualize=p_visualize,
p_logging=p_logging
),
p_name='ROTARY_FEEDER_C (Leader)',
p_id=4,
p_ada=p_ada,
p_visualize=p_visualize,
p_logging=p_logging,
p_role=GTPlayer_SG.C_PLAYER_LEADER
),
p_weight=1.0
)
# 2.3 Return multi-player as adaptive model
return multi_player
# 3 Create game and run some cycles
if __name__ == "__main__":
# 3.1 Parameters for demo mode
cycle_limit = 200
logging = Log.C_LOG_ALL
visualize = True
path = str(Path.home())
else:
# 3.2 Parameters for internal unit test
cycle_limit = 10
logging = Log.C_LOG_NOTHING
visualize = False
path = None
# 3.3 Create and run training object
training = GTTraining(
p_game_cls=MyGame,
p_cycle_limit=cycle_limit,
p_path=path,
p_visualize=visualize,
p_logging=logging )
training.run()
Results
YYYY-MM-DD HH:MM:SS.SSSSSS I Training "GT Training": Instantiated
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Game "Matrix": Instantiated
YYYY-MM-DD HH:MM:SS.SSSSSS I Game Board "BGLP_GT": Instantiated
YYYY-MM-DD HH:MM:SS.SSSSSS I Game Board "BGLP_GT": Reset
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Multi-Player SG "": Instantiated
YYYY-MM-DD HH:MM:SS.SSSSSS S GT Multi-Player SG "BGLP Players with Random Policies": Adaptivity switched on
YYYY-MM-DD HH:MM:SS.SSSSSS I Policy "MyPolicy": Instantiated
YYYY-MM-DD HH:MM:SS.SSSSSS S Policy "MyPolicy 478c892f-f980-4f5a-b894-f6d57dd357f7": Adaptivity switched on
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Player SG "BELT_CONVEYOR_A (Leader)": Instantiated
YYYY-MM-DD HH:MM:SS.SSSSSS S GT Player SG "BELT_CONVEYOR_A (Leader) 478c892f-f980-4f5a-b894-f6d57dd357f7": Adaptivity switched on
YYYY-MM-DD HH:MM:SS.SSSSSS S Policy "MyPolicy 478c892f-f980-4f5a-b894-f6d57dd357f7": Adaptivity switched on
YYYY-MM-DD HH:MM:SS.SSSSSS S GT Player SG "BELT_CONVEYOR_A (Leader) 478c892f-f980-4f5a-b894-f6d57dd357f7": Adaptivity switched on
YYYY-MM-DD HH:MM:SS.SSSSSS S Policy "MyPolicy 478c892f-f980-4f5a-b894-f6d57dd357f7": Adaptivity switched on
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Multi-Player SG "BGLP Players with Random Policies": GT Player SG BELT_CONVEYOR_A (Leader) 478c892f-f980-4f5a-b894-f6d57dd357f7 added.
YYYY-MM-DD HH:MM:SS.SSSSSS I Policy "MyPolicy": Instantiated
YYYY-MM-DD HH:MM:SS.SSSSSS S Policy "MyPolicy 61ceca2b-52fc-4f34-8fbb-36bff56d9e1e": Adaptivity switched on
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Player SG "VACUUM_PUMP_B (Follower)": Instantiated
YYYY-MM-DD HH:MM:SS.SSSSSS S GT Player SG "VACUUM_PUMP_B (Follower) 61ceca2b-52fc-4f34-8fbb-36bff56d9e1e": Adaptivity switched on
YYYY-MM-DD HH:MM:SS.SSSSSS S Policy "MyPolicy 61ceca2b-52fc-4f34-8fbb-36bff56d9e1e": Adaptivity switched on
YYYY-MM-DD HH:MM:SS.SSSSSS S GT Player SG "VACUUM_PUMP_B (Follower) 61ceca2b-52fc-4f34-8fbb-36bff56d9e1e": Adaptivity switched on
YYYY-MM-DD HH:MM:SS.SSSSSS S Policy "MyPolicy 61ceca2b-52fc-4f34-8fbb-36bff56d9e1e": Adaptivity switched on
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Multi-Player SG "BGLP Players with Random Policies": GT Player SG VACUUM_PUMP_B (Follower) 61ceca2b-52fc-4f34-8fbb-36bff56d9e1e added.
YYYY-MM-DD HH:MM:SS.SSSSSS I Policy "MyPolicy": Instantiated
YYYY-MM-DD HH:MM:SS.SSSSSS S Policy "MyPolicy 6b4f1ea8-9b24-48b1-b4bc-f58662dc9cef": Adaptivity switched on
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Player SG "VIBRATORY_CONVEYOR_B (Follower)": Instantiated
YYYY-MM-DD HH:MM:SS.SSSSSS S GT Player SG "VIBRATORY_CONVEYOR_B (Follower) 6b4f1ea8-9b24-48b1-b4bc-f58662dc9cef": Adaptivity switched on
YYYY-MM-DD HH:MM:SS.SSSSSS S Policy "MyPolicy 6b4f1ea8-9b24-48b1-b4bc-f58662dc9cef": Adaptivity switched on
YYYY-MM-DD HH:MM:SS.SSSSSS S GT Player SG "VIBRATORY_CONVEYOR_B (Follower) 6b4f1ea8-9b24-48b1-b4bc-f58662dc9cef": Adaptivity switched on
YYYY-MM-DD HH:MM:SS.SSSSSS S Policy "MyPolicy 6b4f1ea8-9b24-48b1-b4bc-f58662dc9cef": Adaptivity switched on
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Multi-Player SG "BGLP Players with Random Policies": GT Player SG VIBRATORY_CONVEYOR_B (Follower) 6b4f1ea8-9b24-48b1-b4bc-f58662dc9cef added.
YYYY-MM-DD HH:MM:SS.SSSSSS I Policy "MyPolicy": Instantiated
YYYY-MM-DD HH:MM:SS.SSSSSS S Policy "MyPolicy 2aef81a6-135c-4ce5-9b47-01576e635930": Adaptivity switched on
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Player SG "VACUUM_PUMP_C (Follower)": Instantiated
YYYY-MM-DD HH:MM:SS.SSSSSS S GT Player SG "VACUUM_PUMP_C (Follower) 2aef81a6-135c-4ce5-9b47-01576e635930": Adaptivity switched on
YYYY-MM-DD HH:MM:SS.SSSSSS S Policy "MyPolicy 2aef81a6-135c-4ce5-9b47-01576e635930": Adaptivity switched on
YYYY-MM-DD HH:MM:SS.SSSSSS S GT Player SG "VACUUM_PUMP_C (Follower) 2aef81a6-135c-4ce5-9b47-01576e635930": Adaptivity switched on
YYYY-MM-DD HH:MM:SS.SSSSSS S Policy "MyPolicy 2aef81a6-135c-4ce5-9b47-01576e635930": Adaptivity switched on
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Multi-Player SG "BGLP Players with Random Policies": GT Player SG VACUUM_PUMP_C (Follower) 2aef81a6-135c-4ce5-9b47-01576e635930 added.
YYYY-MM-DD HH:MM:SS.SSSSSS I Policy "MyPolicy": Instantiated
YYYY-MM-DD HH:MM:SS.SSSSSS S Policy "MyPolicy 88354d45-5265-4ed1-b51a-acd9f9f77f15": Adaptivity switched on
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Player SG "ROTARY_FEEDER_C (Leader)": Instantiated
YYYY-MM-DD HH:MM:SS.SSSSSS S GT Player SG "ROTARY_FEEDER_C (Leader) 88354d45-5265-4ed1-b51a-acd9f9f77f15": Adaptivity switched on
YYYY-MM-DD HH:MM:SS.SSSSSS S Policy "MyPolicy 88354d45-5265-4ed1-b51a-acd9f9f77f15": Adaptivity switched on
YYYY-MM-DD HH:MM:SS.SSSSSS S GT Player SG "ROTARY_FEEDER_C (Leader) 88354d45-5265-4ed1-b51a-acd9f9f77f15": Adaptivity switched on
YYYY-MM-DD HH:MM:SS.SSSSSS S Policy "MyPolicy 88354d45-5265-4ed1-b51a-acd9f9f77f15": Adaptivity switched on
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Multi-Player SG "BGLP Players with Random Policies": GT Player SG ROTARY_FEEDER_C (Leader) 88354d45-5265-4ed1-b51a-acd9f9f77f15 added.
YYYY-MM-DD HH:MM:SS.SSSSSS I Training "GT Training": Training started (without hyperparameter tuning)
YYYY-MM-DD HH:MM:SS.SSSSSS I Results "RL": Instantiated
YYYY-MM-DD HH:MM:SS.SSSSSS W Training "GT Training": ------------------------------------------------------------------------------
YYYY-MM-DD HH:MM:SS.SSSSSS W Training "GT Training": ------------------------------------------------------------------------------
YYYY-MM-DD HH:MM:SS.SSSSSS W Training "GT Training": -- Training run 0 started...
YYYY-MM-DD HH:MM:SS.SSSSSS W Training "GT Training": ------------------------------------------------------------------------------
YYYY-MM-DD HH:MM:SS.SSSSSS W Training "GT Training": ------------------------------------------------------------------------------
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Game "Matrix": Process time 0:00:00 : Scenario reset with seed 0
YYYY-MM-DD HH:MM:SS.SSSSSS I Game Board "BGLP_GT": Reset
YYYY-MM-DD HH:MM:SS.SSSSSS W Training "GT Training": ------------------------------------------------------------------------------
YYYY-MM-DD HH:MM:SS.SSSSSS W Training "GT Training": -- Training episode 0 started...
YYYY-MM-DD HH:MM:SS.SSSSSS W Training "GT Training": ------------------------------------------------------------------------------
YYYY-MM-DD HH:MM:SS.SSSSSS S GT Game "Matrix": Process time 0:00:00 : Start of cycle 0
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Game "Matrix": Process time 0:00:00 : Agent computes action...
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Multi-Player SG "BGLP Players with Random Policies": Start of action computation for all agents...
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Player SG "BELT_CONVEYOR_A (Leader) 478c892f-f980-4f5a-b894-f6d57dd357f7": Action computation started
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Player SG "BELT_CONVEYOR_A (Leader) 478c892f-f980-4f5a-b894-f6d57dd357f7": Action computation finished
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Player SG "ROTARY_FEEDER_C (Leader) 88354d45-5265-4ed1-b51a-acd9f9f77f15": Action computation started
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Player SG "ROTARY_FEEDER_C (Leader) 88354d45-5265-4ed1-b51a-acd9f9f77f15": Action computation finished
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Player SG "VACUUM_PUMP_B (Follower) 61ceca2b-52fc-4f34-8fbb-36bff56d9e1e": Action computation started
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Player SG "VACUUM_PUMP_B (Follower) 61ceca2b-52fc-4f34-8fbb-36bff56d9e1e": Action computation finished
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Player SG "VIBRATORY_CONVEYOR_B (Follower) 6b4f1ea8-9b24-48b1-b4bc-f58662dc9cef": Action computation started
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Player SG "VIBRATORY_CONVEYOR_B (Follower) 6b4f1ea8-9b24-48b1-b4bc-f58662dc9cef": Action computation finished
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Player SG "VACUUM_PUMP_C (Follower) 2aef81a6-135c-4ce5-9b47-01576e635930": Action computation started
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Player SG "VACUUM_PUMP_C (Follower) 2aef81a6-135c-4ce5-9b47-01576e635930": Action computation finished
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Multi-Player SG "BGLP Players with Random Policies": End of action computation for all agents...
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Game "Matrix": Process time 0:00:00 : Env processes action...
YYYY-MM-DD HH:MM:SS.SSSSSS I Game Board "BGLP_GT": Start processing action
YYYY-MM-DD HH:MM:SS.SSSSSS I Game Board "BGLP_GT": Actions of agent 478c892f-f980-4f5a-b894-f6d57dd357f7 = [0.84442185]
YYYY-MM-DD HH:MM:SS.SSSSSS I Game Board "BGLP_GT": Actions of agent 88354d45-5265-4ed1-b51a-acd9f9f77f15 = [0.7579544]
YYYY-MM-DD HH:MM:SS.SSSSSS I Game Board "BGLP_GT": Actions of agent 61ceca2b-52fc-4f34-8fbb-36bff56d9e1e = [0.42057158]
YYYY-MM-DD HH:MM:SS.SSSSSS I Game Board "BGLP_GT": Actions of agent 6b4f1ea8-9b24-48b1-b4bc-f58662dc9cef = [0.25891675]
YYYY-MM-DD HH:MM:SS.SSSSSS I Game Board "BGLP_GT": Actions of agent 2aef81a6-135c-4ce5-9b47-01576e635930 = [0.51127472]
YYYY-MM-DD HH:MM:SS.SSSSSS I Game Board "BGLP_GT": Assessment for success...
YYYY-MM-DD HH:MM:SS.SSSSSS I Game Board "BGLP_GT": Assessment for breakdown...
YYYY-MM-DD HH:MM:SS.SSSSSS I Game Board "BGLP_GT": Action processing finished successfully
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Game "Matrix": Process time 0:00:01 : Agent adapts policy...
YYYY-MM-DD HH:MM:SS.SSSSSS S GT Multi-Player SG "BGLP Players with Random Policies": Adaptation started
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Multi-Player SG "BGLP Players with Random Policies": Start of adaptation for all agents...
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Multi-Player SG "BGLP Players with Random Policies": Start adaption for agent 478c892f-f980-4f5a-b894-f6d57dd357f7
YYYY-MM-DD HH:MM:SS.SSSSSS S GT Player SG "BELT_CONVEYOR_A (Leader) 478c892f-f980-4f5a-b894-f6d57dd357f7": Adaptation started
YYYY-MM-DD HH:MM:SS.SSSSSS S Policy "MyPolicy 478c892f-f980-4f5a-b894-f6d57dd357f7": Adaptation started
YYYY-MM-DD HH:MM:SS.SSSSSS I Policy "MyPolicy 478c892f-f980-4f5a-b894-f6d57dd357f7": Sorry, I am a stupid agent...
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Multi-Player SG "BGLP Players with Random Policies": Start adaption for agent 88354d45-5265-4ed1-b51a-acd9f9f77f15
YYYY-MM-DD HH:MM:SS.SSSSSS S GT Player SG "ROTARY_FEEDER_C (Leader) 88354d45-5265-4ed1-b51a-acd9f9f77f15": Adaptation started
YYYY-MM-DD HH:MM:SS.SSSSSS S Policy "MyPolicy 88354d45-5265-4ed1-b51a-acd9f9f77f15": Adaptation started
YYYY-MM-DD HH:MM:SS.SSSSSS I Policy "MyPolicy 88354d45-5265-4ed1-b51a-acd9f9f77f15": Sorry, I am a stupid agent...
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Multi-Player SG "BGLP Players with Random Policies": Start adaption for agent 61ceca2b-52fc-4f34-8fbb-36bff56d9e1e
YYYY-MM-DD HH:MM:SS.SSSSSS S GT Player SG "VACUUM_PUMP_B (Follower) 61ceca2b-52fc-4f34-8fbb-36bff56d9e1e": Adaptation started
YYYY-MM-DD HH:MM:SS.SSSSSS S Policy "MyPolicy 61ceca2b-52fc-4f34-8fbb-36bff56d9e1e": Adaptation started
YYYY-MM-DD HH:MM:SS.SSSSSS I Policy "MyPolicy 61ceca2b-52fc-4f34-8fbb-36bff56d9e1e": Sorry, I am a stupid agent...
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Multi-Player SG "BGLP Players with Random Policies": Start adaption for agent 6b4f1ea8-9b24-48b1-b4bc-f58662dc9cef
YYYY-MM-DD HH:MM:SS.SSSSSS S GT Player SG "VIBRATORY_CONVEYOR_B (Follower) 6b4f1ea8-9b24-48b1-b4bc-f58662dc9cef": Adaptation started
YYYY-MM-DD HH:MM:SS.SSSSSS S Policy "MyPolicy 6b4f1ea8-9b24-48b1-b4bc-f58662dc9cef": Adaptation started
YYYY-MM-DD HH:MM:SS.SSSSSS I Policy "MyPolicy 6b4f1ea8-9b24-48b1-b4bc-f58662dc9cef": Sorry, I am a stupid agent...
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Multi-Player SG "BGLP Players with Random Policies": Start adaption for agent 2aef81a6-135c-4ce5-9b47-01576e635930
YYYY-MM-DD HH:MM:SS.SSSSSS S GT Player SG "VACUUM_PUMP_C (Follower) 2aef81a6-135c-4ce5-9b47-01576e635930": Adaptation started
YYYY-MM-DD HH:MM:SS.SSSSSS S Policy "MyPolicy 2aef81a6-135c-4ce5-9b47-01576e635930": Adaptation started
YYYY-MM-DD HH:MM:SS.SSSSSS I Policy "MyPolicy 2aef81a6-135c-4ce5-9b47-01576e635930": Sorry, I am a stupid agent...
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Multi-Player SG "BGLP Players with Random Policies": End of adaptation for all agents...
YYYY-MM-DD HH:MM:SS.SSSSSS S GT Game "Matrix": Process time 0:00:01 : End of cycle 0
....
YYYY-MM-DD HH:MM:SS.SSSSSS W Results "RL": ------------------------------------------------------------------------------
YYYY-MM-DD HH:MM:SS.SSSSSS W Results "RL": -- Training Results of run 0
YYYY-MM-DD HH:MM:SS.SSSSSS W Results "RL": ------------------------------------------------------------------------------
YYYY-MM-DD HH:MM:SS.SSSSSS W Results "RL": ------------------------------------------------------------------------------
YYYY-MM-DD HH:MM:SS.SSSSSS W Results "RL": -- Scenario : GT Game Matrix
YYYY-MM-DD HH:MM:SS.SSSSSS W Results "RL": -- Model : GT Multi-Player SG BGLP Players with Random Policies
YYYY-MM-DD HH:MM:SS.SSSSSS W Results "RL": -- Start time stamp : YYYY-MM-DD HH:MM:SS.SSSSSS
YYYY-MM-DD HH:MM:SS.SSSSSS W Results "RL": -- End time stamp : YYYY-MM-DD HH:MM:SS.SSSSSS
YYYY-MM-DD HH:MM:SS.SSSSSS W Results "RL": -- Duration : 0:00:02.156790
YYYY-MM-DD HH:MM:SS.SSSSSS W Results "RL": -- Start cycle id : 0
YYYY-MM-DD HH:MM:SS.SSSSSS W Results "RL": -- End cycle id : 199
YYYY-MM-DD HH:MM:SS.SSSSSS W Results "RL": -- Training cycles : 200
YYYY-MM-DD HH:MM:SS.SSSSSS W Results "RL": -- Evaluation cycles : 0
YYYY-MM-DD HH:MM:SS.SSSSSS W Results "RL": -- Adaptations : 0
YYYY-MM-DD HH:MM:SS.SSSSSS W Results "RL": -- High score : None
YYYY-MM-DD HH:MM:SS.SSSSSS W Results "RL": -- Training Episodes : 2
YYYY-MM-DD HH:MM:SS.SSSSSS W Results "RL": -- Evaluations : 0
YYYY-MM-DD HH:MM:SS.SSSSSS W Results "RL": ------------------------------------------------------------------------------
YYYY-MM-DD HH:MM:SS.SSSSSS W Results "RL": ------------------------------------------------------------------------------
YYYY-MM-DD HH:MM:SS.SSSSSS I Training "GT Training": Training completed
Cross Reference