Howto GT-DG-002: Train Multi-Player in Stackelberg Games
Prerequisites
- Please install the following packages to run this examples properly:
Executable code
## -------------------------------------------------------------------------------------------------
## -- Project : MLPro - The integrative middleware framework for standardized machine learning
## -- Package : mlpro.gt.examples
## -- Module : howto_gt_dg_002_train_own_multi_player_in_stackelberg_games.py
## -------------------------------------------------------------------------------------------------
## -- History :
## -- yyyy-mm-dd Ver. Auth. Description
## -- 2023-04-12 0.0.0 SY Creation
## -- 2023-04-12 1.0.0 SY Release of first version
## -- 2023-05-11 1.0.1 SY Refactoring
## -- 2021-08-22 1.0.2 SY Refactoring due to compatibility in mlpro.gt.dynamicsgames
## -- 2024-02-16 1.0.3 SY Renaming Module
## -- 2025-07-18 1.1.0 DA Refactoring
## -------------------------------------------------------------------------------------------------
"""
Ver. 1.1.0 (2025-07-18)
This module shows how to train an own multi-player in stackelberg games.
You will learn:
1) How to set up your own players' policies
2) How to set up your own game in dynamic stackelberg games, including players and game board interaction
3) How to assign a player with a role as leader or follower
4) How to run the GT training and train your own players
"""
import random
from pathlib import Path
import numpy as np
from mlpro.bf import Log
from mlpro.bf.systems import State, Action
from mlpro.bf.ml import Model
from mlpro.rl import *
from mlpro.gt import *
from mlpro.gt.dynamicgames.stackelberg import *
from mlpro.gt.pool.boards.bglp import BGLP_GT
# 1 Implement your own player policy
class MyPolicy(Policy):
C_NAME = 'MyPolicy'
def compute_action(self, p_state: State, p_action_leaders=False) -> Action:
# 1 Create a numpy array for your action values
my_action_values = np.zeros(self._action_space.get_num_dim())
# 2 Computing action values is up to you...
for d in range(self._action_space.get_num_dim()):
my_action_values[d] = random.random()
# 3 Return an action object with your values
return Action(self._id, self._action_space, my_action_values)
def _adapt(self, **p_args) -> bool:
# 1 Adapting the internal policy is up to you...
self.log(self.C_LOG_TYPE_I, 'Sorry, I am a stupid agent...')
# 2 Only return True if something has been adapted...
return False
# 2 Implement your own game
class MyGame(Game):
C_NAME = 'Matrix'
def _setup(self, p_mode, p_ada: bool, p_visualize: bool, p_logging) -> Model:
# 1 Setup Multi-Player Environment (consisting of 3 OpenAI Gym Cartpole envs)
self._env = BGLP_GT(p_logging=p_logging)
# 2 Setup Multi-Player
# 2.1 Create empty Multi-Player
multi_player = GTMultiPlayer_SG(
p_name='BGLP Players with Random Policies',
p_ada=p_ada,
p_visualize=p_visualize,
p_logging=p_logging
)
# 2.2 Add Single-Players with own policy
ss_ids = self._env.get_state_space().get_dim_ids()
as_ids = self._env.get_action_space().get_dim_ids()
# Player 1
multi_player.add_player(
p_player=GTPlayer_SG(
p_policy=MyPolicy(
p_observation_space=self._env.get_state_space().spawn([ss_ids[0],ss_ids[1]]),
p_action_space=self._env.get_action_space().spawn([as_ids[0]]),
p_buffer_size=1,
p_ada=p_ada,
p_visualize=p_visualize,
p_logging=p_logging
),
p_name='BELT_CONVEYOR_A (Leader)',
p_id=0,
p_ada=p_ada,
p_visualize=p_visualize,
p_logging=p_logging,
p_role=GTPlayer_SG.C_PLAYER_LEADER
),
p_weight=1.0
)
# Player 2
multi_player.add_player(
p_player=GTPlayer_SG(
p_policy=MyPolicy(
p_observation_space=self._env.get_state_space().spawn([ss_ids[1],ss_ids[2]]),
p_action_space=self._env.get_action_space().spawn([as_ids[1]]),
p_buffer_size=1,
p_ada=p_ada,
p_visualize=p_visualize,
p_logging=p_logging
),
p_name='VACUUM_PUMP_B (Follower)',
p_id=1,
p_ada=p_ada,
p_visualize=p_visualize,
p_logging=p_logging,
p_role=GTPlayer_SG.C_PLAYER_FOLLOWER
),
p_weight=1.0
)
# Player 3
multi_player.add_player(
p_player=GTPlayer_SG(
p_policy=MyPolicy(
p_observation_space=self._env.get_state_space().spawn([ss_ids[2],ss_ids[3]]),
p_action_space=self._env.get_action_space().spawn([as_ids[2]]),
p_buffer_size=1,
p_ada=p_ada,
p_visualize=p_visualize,
p_logging=p_logging
),
p_name='VIBRATORY_CONVEYOR_B (Follower)',
p_id=2,
p_ada=p_ada,
p_visualize=p_visualize,
p_logging=p_logging,
p_role=GTPlayer_SG.C_PLAYER_FOLLOWER
),
p_weight=1.0
)
# Player 4
multi_player.add_player(
p_player=GTPlayer_SG(
p_policy=MyPolicy(
p_observation_space=self._env.get_state_space().spawn([ss_ids[3],ss_ids[4]]),
p_action_space=self._env.get_action_space().spawn([as_ids[3]]),
p_buffer_size=1,
p_ada=p_ada,
p_visualize=p_visualize,
p_logging=p_logging
),
p_name='VACUUM_PUMP_C (Follower)',
p_id=3,
p_ada=p_ada,
p_visualize=p_visualize,
p_logging=p_logging,
p_role=GTPlayer_SG.C_PLAYER_FOLLOWER
),
p_weight=1.0
)
# Player 5
multi_player.add_player(
p_player=GTPlayer_SG(
p_policy=MyPolicy(
p_observation_space=self._env.get_state_space().spawn([ss_ids[4],ss_ids[5]]),
p_action_space=self._env.get_action_space().spawn([as_ids[4]]),
p_buffer_size=1,
p_ada=p_ada,
p_visualize=p_visualize,
p_logging=p_logging
),
p_name='ROTARY_FEEDER_C (Leader)',
p_id=4,
p_ada=p_ada,
p_visualize=p_visualize,
p_logging=p_logging,
p_role=GTPlayer_SG.C_PLAYER_LEADER
),
p_weight=1.0
)
# 2.3 Return multi-player as adaptive model
return multi_player
# 3 Create game and run some cycles
if __name__ == "__main__":
# 3.1 Parameters for demo mode
cycle_limit = 200
logging = Log.C_LOG_ALL
visualize = True
path = str(Path.home())
else:
# 3.2 Parameters for internal unit test
cycle_limit = 10
logging = Log.C_LOG_NOTHING
visualize = False
path = None
# 3.3 Create and run training object
training = GTTraining(
p_game_cls=MyGame,
p_cycle_limit=cycle_limit,
p_path=path,
p_visualize=visualize,
p_logging=logging )
training.run()
Results
YYYY-MM-DD HH:MM:SS.SSSSSS I Training "GT Training": Instantiated
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Game "Matrix": Instantiated
YYYY-MM-DD HH:MM:SS.SSSSSS I Game Board "BGLP_GT": Instantiated
YYYY-MM-DD HH:MM:SS.SSSSSS I Game Board "BGLP_GT": Reset
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Multi-Player SG "": Instantiated
YYYY-MM-DD HH:MM:SS.SSSSSS S GT Multi-Player SG "BGLP Players with Random Policies": Adaptivity switched on
YYYY-MM-DD HH:MM:SS.SSSSSS I Policy "MyPolicy": Instantiated
YYYY-MM-DD HH:MM:SS.SSSSSS S Policy "MyPolicy 478c892f-f980-4f5a-b894-f6d57dd357f7": Adaptivity switched on
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Player SG "BELT_CONVEYOR_A (Leader)": Instantiated
YYYY-MM-DD HH:MM:SS.SSSSSS S GT Player SG "BELT_CONVEYOR_A (Leader) 478c892f-f980-4f5a-b894-f6d57dd357f7": Adaptivity switched on
YYYY-MM-DD HH:MM:SS.SSSSSS S Policy "MyPolicy 478c892f-f980-4f5a-b894-f6d57dd357f7": Adaptivity switched on
YYYY-MM-DD HH:MM:SS.SSSSSS S GT Player SG "BELT_CONVEYOR_A (Leader) 478c892f-f980-4f5a-b894-f6d57dd357f7": Adaptivity switched on
YYYY-MM-DD HH:MM:SS.SSSSSS S Policy "MyPolicy 478c892f-f980-4f5a-b894-f6d57dd357f7": Adaptivity switched on
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Multi-Player SG "BGLP Players with Random Policies": GT Player SG BELT_CONVEYOR_A (Leader) 478c892f-f980-4f5a-b894-f6d57dd357f7 added.
YYYY-MM-DD HH:MM:SS.SSSSSS I Policy "MyPolicy": Instantiated
YYYY-MM-DD HH:MM:SS.SSSSSS S Policy "MyPolicy 61ceca2b-52fc-4f34-8fbb-36bff56d9e1e": Adaptivity switched on
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Player SG "VACUUM_PUMP_B (Follower)": Instantiated
YYYY-MM-DD HH:MM:SS.SSSSSS S GT Player SG "VACUUM_PUMP_B (Follower) 61ceca2b-52fc-4f34-8fbb-36bff56d9e1e": Adaptivity switched on
YYYY-MM-DD HH:MM:SS.SSSSSS S Policy "MyPolicy 61ceca2b-52fc-4f34-8fbb-36bff56d9e1e": Adaptivity switched on
YYYY-MM-DD HH:MM:SS.SSSSSS S GT Player SG "VACUUM_PUMP_B (Follower) 61ceca2b-52fc-4f34-8fbb-36bff56d9e1e": Adaptivity switched on
YYYY-MM-DD HH:MM:SS.SSSSSS S Policy "MyPolicy 61ceca2b-52fc-4f34-8fbb-36bff56d9e1e": Adaptivity switched on
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Multi-Player SG "BGLP Players with Random Policies": GT Player SG VACUUM_PUMP_B (Follower) 61ceca2b-52fc-4f34-8fbb-36bff56d9e1e added.
YYYY-MM-DD HH:MM:SS.SSSSSS I Policy "MyPolicy": Instantiated
YYYY-MM-DD HH:MM:SS.SSSSSS S Policy "MyPolicy 6b4f1ea8-9b24-48b1-b4bc-f58662dc9cef": Adaptivity switched on
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Player SG "VIBRATORY_CONVEYOR_B (Follower)": Instantiated
YYYY-MM-DD HH:MM:SS.SSSSSS S GT Player SG "VIBRATORY_CONVEYOR_B (Follower) 6b4f1ea8-9b24-48b1-b4bc-f58662dc9cef": Adaptivity switched on
YYYY-MM-DD HH:MM:SS.SSSSSS S Policy "MyPolicy 6b4f1ea8-9b24-48b1-b4bc-f58662dc9cef": Adaptivity switched on
YYYY-MM-DD HH:MM:SS.SSSSSS S GT Player SG "VIBRATORY_CONVEYOR_B (Follower) 6b4f1ea8-9b24-48b1-b4bc-f58662dc9cef": Adaptivity switched on
YYYY-MM-DD HH:MM:SS.SSSSSS S Policy "MyPolicy 6b4f1ea8-9b24-48b1-b4bc-f58662dc9cef": Adaptivity switched on
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Multi-Player SG "BGLP Players with Random Policies": GT Player SG VIBRATORY_CONVEYOR_B (Follower) 6b4f1ea8-9b24-48b1-b4bc-f58662dc9cef added.
YYYY-MM-DD HH:MM:SS.SSSSSS I Policy "MyPolicy": Instantiated
YYYY-MM-DD HH:MM:SS.SSSSSS S Policy "MyPolicy 2aef81a6-135c-4ce5-9b47-01576e635930": Adaptivity switched on
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Player SG "VACUUM_PUMP_C (Follower)": Instantiated
YYYY-MM-DD HH:MM:SS.SSSSSS S GT Player SG "VACUUM_PUMP_C (Follower) 2aef81a6-135c-4ce5-9b47-01576e635930": Adaptivity switched on
YYYY-MM-DD HH:MM:SS.SSSSSS S Policy "MyPolicy 2aef81a6-135c-4ce5-9b47-01576e635930": Adaptivity switched on
YYYY-MM-DD HH:MM:SS.SSSSSS S GT Player SG "VACUUM_PUMP_C (Follower) 2aef81a6-135c-4ce5-9b47-01576e635930": Adaptivity switched on
YYYY-MM-DD HH:MM:SS.SSSSSS S Policy "MyPolicy 2aef81a6-135c-4ce5-9b47-01576e635930": Adaptivity switched on
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Multi-Player SG "BGLP Players with Random Policies": GT Player SG VACUUM_PUMP_C (Follower) 2aef81a6-135c-4ce5-9b47-01576e635930 added.
YYYY-MM-DD HH:MM:SS.SSSSSS I Policy "MyPolicy": Instantiated
YYYY-MM-DD HH:MM:SS.SSSSSS S Policy "MyPolicy 88354d45-5265-4ed1-b51a-acd9f9f77f15": Adaptivity switched on
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Player SG "ROTARY_FEEDER_C (Leader)": Instantiated
YYYY-MM-DD HH:MM:SS.SSSSSS S GT Player SG "ROTARY_FEEDER_C (Leader) 88354d45-5265-4ed1-b51a-acd9f9f77f15": Adaptivity switched on
YYYY-MM-DD HH:MM:SS.SSSSSS S Policy "MyPolicy 88354d45-5265-4ed1-b51a-acd9f9f77f15": Adaptivity switched on
YYYY-MM-DD HH:MM:SS.SSSSSS S GT Player SG "ROTARY_FEEDER_C (Leader) 88354d45-5265-4ed1-b51a-acd9f9f77f15": Adaptivity switched on
YYYY-MM-DD HH:MM:SS.SSSSSS S Policy "MyPolicy 88354d45-5265-4ed1-b51a-acd9f9f77f15": Adaptivity switched on
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Multi-Player SG "BGLP Players with Random Policies": GT Player SG ROTARY_FEEDER_C (Leader) 88354d45-5265-4ed1-b51a-acd9f9f77f15 added.
YYYY-MM-DD HH:MM:SS.SSSSSS I Training "GT Training": Training started (without hyperparameter tuning)
YYYY-MM-DD HH:MM:SS.SSSSSS I Results "RL": Instantiated
YYYY-MM-DD HH:MM:SS.SSSSSS W Training "GT Training": ------------------------------------------------------------------------------
YYYY-MM-DD HH:MM:SS.SSSSSS W Training "GT Training": ------------------------------------------------------------------------------
YYYY-MM-DD HH:MM:SS.SSSSSS W Training "GT Training": -- Training run 0 started...
YYYY-MM-DD HH:MM:SS.SSSSSS W Training "GT Training": ------------------------------------------------------------------------------
YYYY-MM-DD HH:MM:SS.SSSSSS W Training "GT Training": ------------------------------------------------------------------------------
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Game "Matrix": Process time 0:00:00 : Scenario reset with seed 0
YYYY-MM-DD HH:MM:SS.SSSSSS I Game Board "BGLP_GT": Reset
YYYY-MM-DD HH:MM:SS.SSSSSS W Training "GT Training": ------------------------------------------------------------------------------
YYYY-MM-DD HH:MM:SS.SSSSSS W Training "GT Training": -- Training episode 0 started...
YYYY-MM-DD HH:MM:SS.SSSSSS W Training "GT Training": ------------------------------------------------------------------------------
YYYY-MM-DD HH:MM:SS.SSSSSS S GT Game "Matrix": Process time 0:00:00 : Start of cycle 0
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Game "Matrix": Process time 0:00:00 : Agent computes action...
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Multi-Player SG "BGLP Players with Random Policies": Start of action computation for all agents...
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Player SG "BELT_CONVEYOR_A (Leader) 478c892f-f980-4f5a-b894-f6d57dd357f7": Action computation started
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Player SG "BELT_CONVEYOR_A (Leader) 478c892f-f980-4f5a-b894-f6d57dd357f7": Action computation finished
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Player SG "ROTARY_FEEDER_C (Leader) 88354d45-5265-4ed1-b51a-acd9f9f77f15": Action computation started
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Player SG "ROTARY_FEEDER_C (Leader) 88354d45-5265-4ed1-b51a-acd9f9f77f15": Action computation finished
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Player SG "VACUUM_PUMP_B (Follower) 61ceca2b-52fc-4f34-8fbb-36bff56d9e1e": Action computation started
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Player SG "VACUUM_PUMP_B (Follower) 61ceca2b-52fc-4f34-8fbb-36bff56d9e1e": Action computation finished
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Player SG "VIBRATORY_CONVEYOR_B (Follower) 6b4f1ea8-9b24-48b1-b4bc-f58662dc9cef": Action computation started
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Player SG "VIBRATORY_CONVEYOR_B (Follower) 6b4f1ea8-9b24-48b1-b4bc-f58662dc9cef": Action computation finished
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Player SG "VACUUM_PUMP_C (Follower) 2aef81a6-135c-4ce5-9b47-01576e635930": Action computation started
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Player SG "VACUUM_PUMP_C (Follower) 2aef81a6-135c-4ce5-9b47-01576e635930": Action computation finished
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Multi-Player SG "BGLP Players with Random Policies": End of action computation for all agents...
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Game "Matrix": Process time 0:00:00 : Env processes action...
YYYY-MM-DD HH:MM:SS.SSSSSS I Game Board "BGLP_GT": Start processing action
YYYY-MM-DD HH:MM:SS.SSSSSS I Game Board "BGLP_GT": Actions of agent 478c892f-f980-4f5a-b894-f6d57dd357f7 = [0.84442185]
YYYY-MM-DD HH:MM:SS.SSSSSS I Game Board "BGLP_GT": Actions of agent 88354d45-5265-4ed1-b51a-acd9f9f77f15 = [0.7579544]
YYYY-MM-DD HH:MM:SS.SSSSSS I Game Board "BGLP_GT": Actions of agent 61ceca2b-52fc-4f34-8fbb-36bff56d9e1e = [0.42057158]
YYYY-MM-DD HH:MM:SS.SSSSSS I Game Board "BGLP_GT": Actions of agent 6b4f1ea8-9b24-48b1-b4bc-f58662dc9cef = [0.25891675]
YYYY-MM-DD HH:MM:SS.SSSSSS I Game Board "BGLP_GT": Actions of agent 2aef81a6-135c-4ce5-9b47-01576e635930 = [0.51127472]
YYYY-MM-DD HH:MM:SS.SSSSSS I Game Board "BGLP_GT": Assessment for success...
YYYY-MM-DD HH:MM:SS.SSSSSS I Game Board "BGLP_GT": Assessment for breakdown...
YYYY-MM-DD HH:MM:SS.SSSSSS I Game Board "BGLP_GT": Action processing finished successfully
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Game "Matrix": Process time 0:00:01 : Agent adapts policy...
YYYY-MM-DD HH:MM:SS.SSSSSS S GT Multi-Player SG "BGLP Players with Random Policies": Adaptation started
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Multi-Player SG "BGLP Players with Random Policies": Start of adaptation for all agents...
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Multi-Player SG "BGLP Players with Random Policies": Start adaption for agent 478c892f-f980-4f5a-b894-f6d57dd357f7
YYYY-MM-DD HH:MM:SS.SSSSSS S GT Player SG "BELT_CONVEYOR_A (Leader) 478c892f-f980-4f5a-b894-f6d57dd357f7": Adaptation started
YYYY-MM-DD HH:MM:SS.SSSSSS S Policy "MyPolicy 478c892f-f980-4f5a-b894-f6d57dd357f7": Adaptation started
YYYY-MM-DD HH:MM:SS.SSSSSS I Policy "MyPolicy 478c892f-f980-4f5a-b894-f6d57dd357f7": Sorry, I am a stupid agent...
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Multi-Player SG "BGLP Players with Random Policies": Start adaption for agent 88354d45-5265-4ed1-b51a-acd9f9f77f15
YYYY-MM-DD HH:MM:SS.SSSSSS S GT Player SG "ROTARY_FEEDER_C (Leader) 88354d45-5265-4ed1-b51a-acd9f9f77f15": Adaptation started
YYYY-MM-DD HH:MM:SS.SSSSSS S Policy "MyPolicy 88354d45-5265-4ed1-b51a-acd9f9f77f15": Adaptation started
YYYY-MM-DD HH:MM:SS.SSSSSS I Policy "MyPolicy 88354d45-5265-4ed1-b51a-acd9f9f77f15": Sorry, I am a stupid agent...
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Multi-Player SG "BGLP Players with Random Policies": Start adaption for agent 61ceca2b-52fc-4f34-8fbb-36bff56d9e1e
YYYY-MM-DD HH:MM:SS.SSSSSS S GT Player SG "VACUUM_PUMP_B (Follower) 61ceca2b-52fc-4f34-8fbb-36bff56d9e1e": Adaptation started
YYYY-MM-DD HH:MM:SS.SSSSSS S Policy "MyPolicy 61ceca2b-52fc-4f34-8fbb-36bff56d9e1e": Adaptation started
YYYY-MM-DD HH:MM:SS.SSSSSS I Policy "MyPolicy 61ceca2b-52fc-4f34-8fbb-36bff56d9e1e": Sorry, I am a stupid agent...
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Multi-Player SG "BGLP Players with Random Policies": Start adaption for agent 6b4f1ea8-9b24-48b1-b4bc-f58662dc9cef
YYYY-MM-DD HH:MM:SS.SSSSSS S GT Player SG "VIBRATORY_CONVEYOR_B (Follower) 6b4f1ea8-9b24-48b1-b4bc-f58662dc9cef": Adaptation started
YYYY-MM-DD HH:MM:SS.SSSSSS S Policy "MyPolicy 6b4f1ea8-9b24-48b1-b4bc-f58662dc9cef": Adaptation started
YYYY-MM-DD HH:MM:SS.SSSSSS I Policy "MyPolicy 6b4f1ea8-9b24-48b1-b4bc-f58662dc9cef": Sorry, I am a stupid agent...
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Multi-Player SG "BGLP Players with Random Policies": Start adaption for agent 2aef81a6-135c-4ce5-9b47-01576e635930
YYYY-MM-DD HH:MM:SS.SSSSSS S GT Player SG "VACUUM_PUMP_C (Follower) 2aef81a6-135c-4ce5-9b47-01576e635930": Adaptation started
YYYY-MM-DD HH:MM:SS.SSSSSS S Policy "MyPolicy 2aef81a6-135c-4ce5-9b47-01576e635930": Adaptation started
YYYY-MM-DD HH:MM:SS.SSSSSS I Policy "MyPolicy 2aef81a6-135c-4ce5-9b47-01576e635930": Sorry, I am a stupid agent...
YYYY-MM-DD HH:MM:SS.SSSSSS I GT Multi-Player SG "BGLP Players with Random Policies": End of adaptation for all agents...
YYYY-MM-DD HH:MM:SS.SSSSSS S GT Game "Matrix": Process time 0:00:01 : End of cycle 0
....
YYYY-MM-DD HH:MM:SS.SSSSSS W Results "RL": ------------------------------------------------------------------------------
YYYY-MM-DD HH:MM:SS.SSSSSS W Results "RL": -- Training Results of run 0
YYYY-MM-DD HH:MM:SS.SSSSSS W Results "RL": ------------------------------------------------------------------------------
YYYY-MM-DD HH:MM:SS.SSSSSS W Results "RL": ------------------------------------------------------------------------------
YYYY-MM-DD HH:MM:SS.SSSSSS W Results "RL": -- Scenario : GT Game Matrix
YYYY-MM-DD HH:MM:SS.SSSSSS W Results "RL": -- Model : GT Multi-Player SG BGLP Players with Random Policies
YYYY-MM-DD HH:MM:SS.SSSSSS W Results "RL": -- Start time stamp : YYYY-MM-DD HH:MM:SS.SSSSSS
YYYY-MM-DD HH:MM:SS.SSSSSS W Results "RL": -- End time stamp : YYYY-MM-DD HH:MM:SS.SSSSSS
YYYY-MM-DD HH:MM:SS.SSSSSS W Results "RL": -- Duration : 0:00:02.156790
YYYY-MM-DD HH:MM:SS.SSSSSS W Results "RL": -- Start cycle id : 0
YYYY-MM-DD HH:MM:SS.SSSSSS W Results "RL": -- End cycle id : 199
YYYY-MM-DD HH:MM:SS.SSSSSS W Results "RL": -- Training cycles : 200
YYYY-MM-DD HH:MM:SS.SSSSSS W Results "RL": -- Evaluation cycles : 0
YYYY-MM-DD HH:MM:SS.SSSSSS W Results "RL": -- Adaptations : 0
YYYY-MM-DD HH:MM:SS.SSSSSS W Results "RL": -- High score : None
YYYY-MM-DD HH:MM:SS.SSSSSS W Results "RL": -- Training Episodes : 2
YYYY-MM-DD HH:MM:SS.SSSSSS W Results "RL": -- Evaluations : 0
YYYY-MM-DD HH:MM:SS.SSSSSS W Results "RL": ------------------------------------------------------------------------------
YYYY-MM-DD HH:MM:SS.SSSSSS W Results "RL": ------------------------------------------------------------------------------
YYYY-MM-DD HH:MM:SS.SSSSSS I Training "GT Training": Training completed
Cross Reference