## -------------------------------------------------------------------------------------------------
## -- Project : MLPro - A Synoptic Framework for Standardized Machine Learning Tasks
## -- Package : mlpro.rl.examples
## -- Module : howto_rl_020_run_double_pendulum_with_random_actions.py
## -------------------------------------------------------------------------------------------------
## -- History :
## -- yyyy-mm-dd Ver. Auth. Description
## -- 2022-04-23 0.0.0 YI Creation
## -- 2022-04-28 0.0.0 YI Changing the Scenario and Debugging
## -- 2022-05-16 1.0.0 SY Code cleaning, remove unnecessary, release the first version
## -- 2022-06-21 1.0.1 SY Adjust the name of the module, utilize RandomGenerator class
## -- 2022-08-02 1.0.2 LSB Parameters for internal unit testing
## -- 2022-08-05 1.0.3 SY Refactoring
## -- 2022-08-23 1.0.4 DA Refactoring
## -- 2022-09-06 1.0.5 LSB/DA Refactoring
## -------------------------------------------------------------------------------------------------
"""
Ver. 1.0.5 (2022-09-06)
This module shows how to run the double pendulum environment using random actions agent.
You will learn:
1) How to set up an own agent using MLPro's builtin random actions policy
2) How to set up an own RL scenario including your agent and MLPro's double pendulum environment
3) How to reset and run your own scenario
"""
from mlpro.bf.math import *
from mlpro.rl.models import *
from mlpro.rl.pool.envs.doublependulum import *
from mlpro.rl.pool.policies.randomgenerator import RandomGenerator
from pathlib import Path
# 1 Implement the random RL scenario
class ScenarioDoublePendulum(RLScenario):
C_NAME = 'Double Pendulum with Random Actions'
def _setup(self, p_mode, p_ada, p_logging):
# 1.1 Setup environment
self._env = DoublePendulumS7(p_init_angles='random', p_max_torque=10, p_logging=p_logging)
# 1.2 Setup and return random action agent
policy_random = RandomGenerator(p_observation_space=self._env.get_state_space(),
p_action_space=self._env.get_action_space(),
p_buffer_size=1,
p_ada=1,
p_logging=p_logging)
return Agent(
p_policy=policy_random,
p_envmodel=None,
p_name='Smith',
p_ada=p_ada,
p_logging=p_logging
)
# 2 Create scenario and run the scenario
if __name__ == "__main__":
# 2.1 Parameters for demo mode
cycle_limit = 200
logging = Log.C_LOG_ALL
visualize = True
plotting = True
else:
# 2.2 Parameters for unittest
cycle_limit = 20
logging = Log.C_LOG_NOTHING
visualize = False
plotting = False
# 3 Create your scenario and run some cycles
myscenario = ScenarioDoublePendulum(
p_mode=Mode.C_MODE_SIM,
p_ada=True,
p_cycle_limit=cycle_limit,
p_visualize=visualize,
p_logging=logging
)
myscenario.reset(p_seed=3)
myscenario.run()