Howto RL-ENV-002: Manual Validation of Double Pendulum

Prerequisites
Please install the following packages to run this examples properly:
Executable code
## -------------------------------------------------------------------------------------------------
## -- Project : MLPro - A Synoptic Framework for Standardized Machine Learning Tasks
## -- Package : mlpro.rl.examples
## -- Module  : howto_rl_env_002_manual_validation_of_double_pendulum.py
## -------------------------------------------------------------------------------------------------
## -- History :
## -- yyyy-mm-dd  Ver.      Auth.    Description
## -- 2022-10-10  1.0.0     LSB      Creation/Release
## -- 2023-02-02  1.1.0     DA       Renamed and refactored
## -- 2023-02-23  1.2.0     DA       Renamed
## -------------------------------------------------------------------------------------------------


'''
Ver. 1.2.0 (2023-02-23)

This module is used to validate the dp environment. This howto enables:

1. Setting up a double pendulum environment.

2. Validating the double pendulum environment for user defined actions.

'''




from mlpro.bf.math import *
from mlpro.rl.models import *
from mlpro.rl.pool.envs.doublependulum import *
import numpy as np




if __name__ == '__main__':
    p_input = True
else:
    p_input = False




## -----------------------------------------------------------------------------------------------
## -----------------------------------------------------------------------------------------------
class ActionGenerator(Policy):
    """
    Action Generation based on user input
    """

    def set_user_action(self, p_action):
        if p_input:
            self.user_action = np.asarray([p_action])
        else:
           self.user_action = np.zeros(1)

    def compute_action(self, p_state:State):

        return Action(self._id, self._action_space, self.user_action)

    def _adapt(self, **p_kwargs) -> bool:
        self.log(self.C_LOG_TYPE_W, 'Sorry I am not adapting anything')
        return False



# 1 Implement the random RL scenario
class ScenarioDoublePendulum(RLScenario):

    C_NAME      = 'Double Pendulum with Random Actions'

    def _setup(self, p_mode, p_ada, p_visualize,  p_logging):
        self.user_action_cycles = 0

        # 1.1 Setup environment
        self._env   = DoublePendulumS7( p_init_angles='up', 
                                        p_max_torque=10, 
                                        p_visualize=p_visualize, 
                                        p_logging=p_logging )


        # 1.2 Setup and return random action agent
        policy_user = ActionGenerator(p_observation_space=self._env.get_state_space(),
                                        p_action_space=self._env.get_action_space(),
                                        p_buffer_size=1,
                                        p_ada=1,
                                        p_logging=p_logging)

        self.user_action_cycles = 0

        return Agent(
            p_policy=policy_user,
            p_envmodel=None,
            p_name='Smith',
            p_ada=p_ada,
            p_logging=p_logging
        )


    def _run_cycle(self):

        if p_input and self.get_cycle_id() == self.user_action_cycles:
            p_torque = int(input('Enter the amount of torque in Nm:'))
            self.get_agent()._policy.set_user_action(p_torque)
            p_cycles = int(input('Enter the amount of cycles to be executed:'))

        elif not p_input:
            self.get_agent()._policy.set_user_action(0)
            p_cycles = 0

        else:
            p_cycles = 0

        self.user_action_cycles += p_cycles
        success, error, adapted, end_of_data = super()._run_cycle()
        return success, error, adapted, end_of_data


# 2 Create scenario and run the scenario
if __name__ == "__main__":
    # 2.1 Parameters for demo mode
    cycle_limit         = 20000
    logging             = Log.C_LOG_ALL
    visualize           = True
    plotting            = True
else:
    # 2.2 This demo is not suitable for unit test
    exit(0)



# 3 Create your scenario and run some cycles
myscenario  = ScenarioDoublePendulum(
    p_mode=Mode.C_MODE_SIM,
    p_ada=True,
    p_cycle_limit=cycle_limit,
    p_visualize=visualize,
    p_logging=logging
)

myscenario.reset(p_seed=3)
myscenario.run()
Results
Executing the above script will ask the user to input the agent’s action and the number cycles for the which the action shall be simulated.
Cross Reference
API Reference - RL Agent

API Reference - RL Environments

API Reference - RL Scenario and Training