Howto RL-HT-002: Hyperparameter Tuning using Optuna

Prerequisites

Please install the following packages to run this examples properly:

Executable code

## -------------------------------------------------------------------------------------------------
## -- Project : MLPro - A Synoptic Framework for Standardized Machine Learning Tasks
## -- Package : mlpro.rl.examples
## -- Module  : howto_rl_ht_002_optuna.py
## -------------------------------------------------------------------------------------------------
## -- History :
## -- yyyy-mm-dd  Ver.      Auth.    Description
## -- 2022-03-24  0.0.0     SY       Creation 
## -- 2022-03-24  1.0.0     SY       Release of first version
## -- 2022-04-05  1.0.1     SY       Add tuning recap visualization
## -- 2022-10-12  1.0.2     DA       Renaming and minor fixes
## -- 2022-10-17  1.0.3     SY       Refactoring 
## -- 2022-11-02  1.0.4     DA       Refactoring 
## -- 2022-11-09  1.1.0     DA       Refactoring 
## -- 2023-03-27  1.2.0     DA       Refactoring 
## -------------------------------------------------------------------------------------------------

"""
Ver. 1.2.0 (2023-03-27)

This module demonstrates how to utilize wrapper class for Optuna in RL context.

You will learn:

1) How to set up a policy and its parameters

2) How to use optuna wrapper.

3) How to tune the parameters using optuna.
    
"""


from mlpro.wrappers.optuna import *
from mlpro.rl.pool.envs.bglp import BGLP
from mlpro.rl import *
import random
from pathlib import Path




## -------------------------------------------------------------------------------------------------
## -------------------------------------------------------------------------------------------------

# 1. Create a policy and setup the hyperparameters
class myPolicy (Policy):

    C_NAME      = 'MyPolicy'
    
## -------------------------------------------------------------------------------------------------
    def __init__(self, p_observation_space:MSpace, p_action_space:MSpace, p_buffer_size=1, p_ada=True, p_logging=True):
        """
         Parameters:
            p_observation_space     Subspace of an environment that is observed by the policy
            p_action_space          Action space object
            p_buffer_size           Size of the buffer
            p_ada                   Boolean switch for adaptivity
            p_logging               Boolean switch for logging functionality
        """
        super().__init__(p_observation_space, p_action_space, p_buffer_size, p_ada, p_logging)
        self._hyperparam_space  = HyperParamSpace()
        self._hyperparam_tuple  = None
        self._init_hyperparam()
    

## -------------------------------------------------------------------------------------------------
    def set_random_seed(self, p_seed=None):
        random.seed(p_seed)
    

## -------------------------------------------------------------------------------------------------
    def _init_hyperparam(self):
        self._hyperparam_space.add_dim(HyperParam('num_states','Z', p_boundaries = [1,100]))
        self._hyperparam_space.add_dim(HyperParam('smoothing','R', p_boundaries = [0.1,0.5]))
        self._hyperparam_space.add_dim(HyperParam('lr_rate','R', p_boundaries = [0.001,0.1]))
        self._hyperparam_space.add_dim(HyperParam('buffer_size','Z', p_boundaries = [10000,100000]))
        self._hyperparam_space.add_dim(HyperParam('update_rate','Z', p_boundaries = [5,20]))
        self._hyperparam_space.add_dim(HyperParam('sampling_size','Z', p_boundaries = [64,256]))
        self._hyperparam_tuple = HyperParamTuple(self._hyperparam_space)
        
        ids_ = self._hyperparam_tuple.get_dim_ids()
        self._hyperparam_tuple.set_value(ids_[0], 100)
        self._hyperparam_tuple.set_value(ids_[1], 0.035)
        self._hyperparam_tuple.set_value(ids_[2], 0.0001)
        self._hyperparam_tuple.set_value(ids_[3], 100000)
        self._hyperparam_tuple.set_value(ids_[4], 100)
        self._hyperparam_tuple.set_value(ids_[5], 256)
    

## -------------------------------------------------------------------------------------------------
    def compute_action(self, p_state: State) -> Action:
        my_action_values = np.zeros(self._action_space.get_num_dim())
        for d in range(self._action_space.get_num_dim()):
            self.set_random_seed(None)
            my_action_values[d] = random.random() 
        return Action(self._id, self._action_space, my_action_values)
    

## -------------------------------------------------------------------------------------------------
    def _adapt(self, p_sars_elem:SARSElement) -> bool:
        self.log(self.C_LOG_TYPE_W, 'Sorry, I am a stupid agent...')
        return False





## -------------------------------------------------------------------------------------------------
## -------------------------------------------------------------------------------------------------

# 2. Create a Scenario
class BGLP_Rnd(RLScenario):

    C_NAME      = 'BGLP_Dummy'

## -------------------------------------------------------------------------------------------------
    def _setup(self, p_mode, p_ada: bool, p_visualize: bool, p_logging) -> Model:
        self._env       = BGLP(p_logging=logging)
        _agent          = MultiAgent(p_name='Dummy Policy', p_ada=1, p_logging=p_logging)
        state_space     = self._env.get_state_space()
        action_space    = self._env.get_action_space()
        
        
        # Agent 1
        _name         = 'BELT_CONVEYOR_A'
        _ospace       = state_space.spawn([state_space.get_dim_ids()[0],state_space.get_dim_ids()[1]])
        _aspace       = action_space.spawn([action_space.get_dim_ids()[0]])
        _policy       = myPolicy(p_observation_space=_ospace, p_action_space=_aspace, p_buffer_size=1, p_ada=1, p_logging=False)
        _agent.add_agent(
            p_agent=Agent(
                p_policy=_policy,
                p_envmodel=None,
                p_name=_name,
                p_ada=p_ada,
                p_visualize=p_visualize,
                p_logging=logging),
            p_weight=1.0
            )
        
        
        # Agent 2
        _name         = 'VACUUM_PUMP_B'
        _ospace       = state_space.spawn([state_space.get_dim_ids()[1],state_space.get_dim_ids()[2]])
        _aspace       = action_space.spawn([action_space.get_dim_ids()[1]])
        _policy       = myPolicy(p_observation_space=_ospace, p_action_space=_aspace, p_buffer_size=1, p_ada=1, p_logging=False)
        _agent.add_agent(
            p_agent=Agent(
                p_policy=_policy,
                p_envmodel=None,
                p_name=_name,
                p_ada=p_ada,
                p_visualize=p_visualize,
                p_logging=logging),
            p_weight=1.0
            )
        
        
        # Agent 3
        _name         = 'VIBRATORY_CONVEYOR_B'
        _ospace       = state_space.spawn([state_space.get_dim_ids()[2],state_space.get_dim_ids()[3]])
        _aspace       = action_space.spawn([action_space.get_dim_ids()[2]])
        _policy       = myPolicy(p_observation_space=_ospace, p_action_space=_aspace, p_buffer_size=1, p_ada=1, p_logging=False)
        _agent.add_agent(
            p_agent=Agent(
                p_policy=_policy,
                p_envmodel=None,
                p_name=_name,
                p_ada=p_ada,
                p_visualize=p_visualize,
                p_logging=logging),
            p_weight=1.0
            )
        
        
        # Agent 4
        _name         = 'VACUUM_PUMP_C'
        _ospace       = state_space.spawn([state_space.get_dim_ids()[3],state_space.get_dim_ids()[4]])
        _aspace       = action_space.spawn([action_space.get_dim_ids()[3]])
        _policy       = myPolicy(p_observation_space=_ospace, p_action_space=_aspace, p_buffer_size=1, p_ada=1, p_logging=False)
        _agent.add_agent(
            p_agent=Agent(
                p_policy=_policy,
                p_envmodel=None,
                p_name=_name,
                p_ada=p_ada,
                p_visualize=p_visualize,
                p_logging=logging),
            p_weight=1.0
            )
        
        
        # Agent 5
        _name         = 'ROTARY_FEEDER_C'
        _ospace       = state_space.spawn([state_space.get_dim_ids()[4],state_space.get_dim_ids()[5]])
        _aspace       = action_space.spawn([action_space.get_dim_ids()[4]])
        _policy       = myPolicy(p_observation_space=_ospace, p_action_space=_aspace, p_buffer_size=1, p_ada=1, p_logging=False)
        _agent.add_agent(
            p_agent=Agent(
                p_policy=_policy,
                p_envmodel=None,
                p_name=_name,
                p_ada=p_ada,
                p_visualize=p_visualize,
                p_logging=logging),
            p_weight=1.0
            )
        
        return _agent





## -------------------------------------------------------------------------------------------------
## -------------------------------------------------------------------------------------------------

if __name__ == "__main__":
    # Parameters for demo mode
    logging         = Log.C_LOG_ALL
    visualize       = False
    dest_path       = str(Path.home())
    cycle_limit     = 100
    cycle_per_ep    = 10
    eval_freq       = 2
    eval_grp_size   = 5
    adapt_limit     = 0
    stagnant_limit  = 5
    score_ma_hor    = 5
 
else:
    # Parameters for internal unit test
    logging         = Log.C_LOG_NOTHING
    visualize       = False
    dest_path       = None
    cycle_limit     = 3
    cycle_per_ep    = 1
    eval_freq       = 2
    eval_grp_size   = 1
    adapt_limit     = 0
    stagnant_limit  = 0
    score_ma_hor    = 0


# 3. Instantiate a hyperopt wrapper
myOptuna = WrHPTOptuna(p_logging=logging,
                       p_ids=None,
                       p_visualization=visualize)
    

# 4. Train players in the scenario and turn the hyperparamter tuning on
training        = RLTraining(
    p_scenario_cls=BGLP_Rnd,
    p_cycle_limit=cycle_limit,
    p_cycles_per_epi_limit=cycle_per_ep,
    p_eval_frequency=eval_freq,
    p_eval_grp_size=eval_grp_size,
    p_adaptation_limit=adapt_limit,
    p_stagnation_limit=stagnant_limit,
    p_score_ma_horizon=score_ma_hor,
    p_hpt=myOptuna,
    p_hpt_trials=10,
    p_collect_states=True,
    p_collect_actions=True,
    p_collect_rewards=True,
    p_path=dest_path,
    p_logging=logging
)

training.run()

Results

2023-02-12  16:50:55.790961  I  Wrapper "Optuna": Instantiated
2023-02-12  16:50:56.033963  I  Wrapper "Optuna": Wrapped package optuna installed in version 3.1.0
2023-02-12  16:50:56.033963  I  Wrapper "Optuna": Optuna configuration is successful
2023-02-12  16:50:56.033963  I  Training "RL": Instantiated
2023-02-12  16:50:56.033963  I  Training "RL": Training started (with hyperparameter tuning)
2023-02-12  16:50:56.035961  I  Environment "BGLP": Instantiated
2023-02-12  16:50:56.035961  I  Environment "BGLP": Reset
2023-02-12  16:50:56.037961  I  Policy "MyPolicy 41746e2c-045e-485e-9dbf-7cbfc1acaddb": Instantiated
2023-02-12  16:50:56.037961  I  Policy "MyPolicy 41746e2c-045e-485e-9dbf-7cbfc1acaddb": Adaptivity switched on
2023-02-12  16:50:56.037961  I  Agent "BELT_CONVEYOR_A": Instantiated
2023-02-12  16:50:56.037961  I  Agent "BELT_CONVEYOR_A": Adaptivity switched on
2023-02-12  16:50:56.037961  I  Policy "MyPolicy 41746e2c-045e-485e-9dbf-7cbfc1acaddb": Adaptivity switched on
2023-02-12  16:50:56.037961  I  Agent "BELT_CONVEYOR_A": Adaptivity switched on
2023-02-12  16:50:56.037961  I  Policy "MyPolicy 41746e2c-045e-485e-9dbf-7cbfc1acaddb": Adaptivity switched on
2023-02-12  16:50:56.038960  I  Policy "MyPolicy 4d6eff7c-d873-4806-ad4d-b02cc70bb689": Instantiated
2023-02-12  16:50:56.038960  I  Policy "MyPolicy 4d6eff7c-d873-4806-ad4d-b02cc70bb689": Adaptivity switched on
2023-02-12  16:50:56.038960  I  Agent "VACUUM_PUMP_B": Instantiated
2023-02-12  16:50:56.039961  I  Agent "VACUUM_PUMP_B": Adaptivity switched on
2023-02-12  16:50:56.039961  I  Policy "MyPolicy 4d6eff7c-d873-4806-ad4d-b02cc70bb689": Adaptivity switched on
2023-02-12  16:50:56.039961  I  Agent "VACUUM_PUMP_B": Adaptivity switched on
2023-02-12  16:50:56.039961  I  Policy "MyPolicy 4d6eff7c-d873-4806-ad4d-b02cc70bb689": Adaptivity switched on
2023-02-12  16:50:56.039961  I  Policy "MyPolicy 47697bb1-df82-412e-b11d-da9be4d71fbb": Instantiated
2023-02-12  16:50:56.039961  I  Policy "MyPolicy 47697bb1-df82-412e-b11d-da9be4d71fbb": Adaptivity switched on
2023-02-12  16:50:56.040959  I  Agent "VIBRATORY_CONVEYOR_B": Instantiated
2023-02-12  16:50:56.040959  I  Agent "VIBRATORY_CONVEYOR_B": Adaptivity switched on
2023-02-12  16:50:56.040959  I  Policy "MyPolicy 47697bb1-df82-412e-b11d-da9be4d71fbb": Adaptivity switched on
2023-02-12  16:50:56.040959  I  Agent "VIBRATORY_CONVEYOR_B": Adaptivity switched on
2023-02-12  16:50:56.040959  I  Policy "MyPolicy 47697bb1-df82-412e-b11d-da9be4d71fbb": Adaptivity switched on
2023-02-12  16:50:56.040959  I  Policy "MyPolicy 23203733-d4bc-44c4-a154-cad82824f622": Instantiated
2023-02-12  16:50:56.040959  I  Policy "MyPolicy 23203733-d4bc-44c4-a154-cad82824f622": Adaptivity switched on
2023-02-12  16:50:56.041961  I  Agent "VACUUM_PUMP_C": Instantiated
2023-02-12  16:50:56.041961  I  Agent "VACUUM_PUMP_C": Adaptivity switched on
2023-02-12  16:50:56.041961  I  Policy "MyPolicy 23203733-d4bc-44c4-a154-cad82824f622": Adaptivity switched on
2023-02-12  16:50:56.041961  I  Agent "VACUUM_PUMP_C": Adaptivity switched on
2023-02-12  16:50:56.041961  I  Policy "MyPolicy 23203733-d4bc-44c4-a154-cad82824f622": Adaptivity switched on
2023-02-12  16:50:56.041961  I  Policy "MyPolicy b821f2cc-2add-42e3-9326-7a37bef1ba3d": Instantiated
2023-02-12  16:50:56.041961  I  Policy "MyPolicy b821f2cc-2add-42e3-9326-7a37bef1ba3d": Adaptivity switched on
2023-02-12  16:50:56.042961  I  Agent "ROTARY_FEEDER_C": Instantiated
2023-02-12  16:50:56.042961  I  Agent "ROTARY_FEEDER_C": Adaptivity switched on
2023-02-12  16:50:56.042961  I  Policy "MyPolicy b821f2cc-2add-42e3-9326-7a37bef1ba3d": Adaptivity switched on
2023-02-12  16:50:56.042961  I  Agent "ROTARY_FEEDER_C": Adaptivity switched on
2023-02-12  16:50:56.042961  I  Policy "MyPolicy b821f2cc-2add-42e3-9326-7a37bef1ba3d": Adaptivity switched on
[I 2023-02-12 16:50:56,044] A new study created in memory with name: no-name-92b887d1-0699-42f7-8d74-ddc065e13b15
C:\MLPro\MLPro\src\mlpro\wrappers\optuna.py:245: FutureWarning: suggest_uniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use :func:`~optuna.trial.Trial.suggest_float` instead.
  parameters.append(trial.suggest_uniform(hp_object.get_name_short()+'_'+str(x),hp_low,hp_high))
2023-02-12  16:50:56.046964  I  Wrapper "Optuna": Trial number 0 has started
2023-02-12  16:50:56.046964  I  Wrapper "Optuna": ------------------------------------------------------------------------------

2023-02-12  16:50:56.047961  I  Environment "BGLP": Instantiated
2023-02-12  16:50:56.047961  I  Environment "BGLP": Reset
2023-02-12  16:50:56.048960  I  Policy "MyPolicy 2343b475-8b8a-4fa9-9551-3f91751bf067": Instantiated
2023-02-12  16:50:56.048960  I  Policy "MyPolicy 2343b475-8b8a-4fa9-9551-3f91751bf067": Adaptivity switched on
2023-02-12  16:50:56.049965  I  Agent "BELT_CONVEYOR_A": Instantiated
2023-02-12  16:50:56.049965  I  Agent "BELT_CONVEYOR_A": Adaptivity switched on
2023-02-12  16:50:56.049965  I  Policy "MyPolicy 2343b475-8b8a-4fa9-9551-3f91751bf067": Adaptivity switched on
2023-02-12  16:50:56.049965  I  Agent "BELT_CONVEYOR_A": Adaptivity switched on
2023-02-12  16:50:56.049965  I  Policy "MyPolicy 2343b475-8b8a-4fa9-9551-3f91751bf067": Adaptivity switched on
2023-02-12  16:50:56.049965  I  Policy "MyPolicy 00d3b281-3e2f-459a-8b96-f5d45b0ef172": Instantiated
2023-02-12  16:50:56.049965  I  Policy "MyPolicy 00d3b281-3e2f-459a-8b96-f5d45b0ef172": Adaptivity switched on
2023-02-12  16:50:56.050961  I  Agent "VACUUM_PUMP_B": Instantiated
2023-02-12  16:50:56.050961  I  Agent "VACUUM_PUMP_B": Adaptivity switched on
2023-02-12  16:50:56.050961  I  Policy "MyPolicy 00d3b281-3e2f-459a-8b96-f5d45b0ef172": Adaptivity switched on
2023-02-12  16:50:56.050961  I  Agent "VACUUM_PUMP_B": Adaptivity switched on
2023-02-12  16:50:56.050961  I  Policy "MyPolicy 00d3b281-3e2f-459a-8b96-f5d45b0ef172": Adaptivity switched on
2023-02-12  16:50:56.050961  I  Policy "MyPolicy 50ca0db6-d3f7-435b-82db-5273a2df4798": Instantiated
2023-02-12  16:50:56.050961  I  Policy "MyPolicy 50ca0db6-d3f7-435b-82db-5273a2df4798": Adaptivity switched on
2023-02-12  16:50:56.051965  I  Agent "VIBRATORY_CONVEYOR_B": Instantiated
2023-02-12  16:50:56.051965  I  Agent "VIBRATORY_CONVEYOR_B": Adaptivity switched on
2023-02-12  16:50:56.051965  I  Policy "MyPolicy 50ca0db6-d3f7-435b-82db-5273a2df4798": Adaptivity switched on
2023-02-12  16:50:56.051965  I  Agent "VIBRATORY_CONVEYOR_B": Adaptivity switched on
2023-02-12  16:50:56.051965  I  Policy "MyPolicy 50ca0db6-d3f7-435b-82db-5273a2df4798": Adaptivity switched on
2023-02-12  16:50:56.051965  I  Policy "MyPolicy 2ce4e729-cacd-47e9-9e55-4137858c7467": Instantiated
2023-02-12  16:50:56.051965  I  Policy "MyPolicy 2ce4e729-cacd-47e9-9e55-4137858c7467": Adaptivity switched on
2023-02-12  16:50:56.052964  I  Agent "VACUUM_PUMP_C": Instantiated
2023-02-12  16:50:56.052964  I  Agent "VACUUM_PUMP_C": Adaptivity switched on
2023-02-12  16:50:56.052964  I  Policy "MyPolicy 2ce4e729-cacd-47e9-9e55-4137858c7467": Adaptivity switched on
2023-02-12  16:50:56.052964  I  Agent "VACUUM_PUMP_C": Adaptivity switched on
2023-02-12  16:50:56.052964  I  Policy "MyPolicy 2ce4e729-cacd-47e9-9e55-4137858c7467": Adaptivity switched on
2023-02-12  16:50:56.052964  I  Policy "MyPolicy 41c9f1a1-4e3f-4819-a768-fd714135f20c": Instantiated
2023-02-12  16:50:56.052964  I  Policy "MyPolicy 41c9f1a1-4e3f-4819-a768-fd714135f20c": Adaptivity switched on
2023-02-12  16:50:56.053962  I  Agent "ROTARY_FEEDER_C": Instantiated
2023-02-12  16:50:56.053962  I  Agent "ROTARY_FEEDER_C": Adaptivity switched on
2023-02-12  16:50:56.053962  I  Policy "MyPolicy 41c9f1a1-4e3f-4819-a768-fd714135f20c": Adaptivity switched on
2023-02-12  16:50:56.053962  I  Agent "ROTARY_FEEDER_C": Adaptivity switched on
2023-02-12  16:50:56.053962  I  Policy "MyPolicy 41c9f1a1-4e3f-4819-a768-fd714135f20c": Adaptivity switched on
2023-02-12  16:50:56.065960  I  Wrapper "Optuna": New parameters for optuna tuner is ready
2023-02-12  16:50:56.065960  I  Wrapper "Optuna": ------------------------------------------------------------------------------

Cross Reference