Custom Environments Model

../../../_images/MLPro-RL-Env_class_EnvModel_commented.png

Environment Model Creation

To create environment model, the adaptive function needs to created first. In this case, our adaptive function will predict the next state of the environment based on provided action.

from mlpro.sl.pool.afct.afct_pytorch import TorchAFct, TorchBufferElement, TorchBuffer

# Create the adaptive function based on Pytorch adaptive function module.
class OurStatePredictor(TorchAFct):
    C_NAME = "Our State Predictor"
    C_BUFFER_CLS = TorchBuffer

    def _setup_model(self):
        # Setup your neural network
        # Setup your optimizer
        # Setup your loss function

    def _input_preproc(self, p_input: torch.Tensor) -> torch.Tensor:
        # Do something here for pre-processing input
        input = something
        return input

    def _output_postproc(self, p_output: torch.Tensor) -> torch.Tensor:
        # Do something here for post-processing output
        output = something
        return output

    def _adapt(self, p_input: Element, p_output: Element) -> bool:
        # Define you adaptation or how to update your neural network
        return True

After that, we need to create another class that is inherited from the actual environment module and EnvModel, in this case RobotHTM. For now, we only use the state transition model. The reward, success and broken model are taken from the original environment module.

from mlpro.rl.model_env import EnvModel
from mlpro.rl.pool.envs.robotinhtm import RobotHTM

class OurEnvModel(RobotHTM, EnvModel):
    C_NAME = "Our Env Model"

    # Put necessary input argument in initialization
    def __init__(
        self,
        p_num_joints=4,
        p_target_mode="Random",
        p_ada=True,
        p_logging=False,
    ):

        # Initialize the actual environment to get all environment functionalities, such as
        # _simulate_reaction, _reset, _compute_reward, _compute_broken and _compute_success
        RobotHTM.__init__(self, p_num_joints=p_num_joints, p_target_mode=p_target_mode)

        # Setup Adaptive Function
        afct_strans = AFctSTrans(
            OurStatePredictor,
            p_state_space=self._state_space,
            p_action_space=self._action_space,
            p_threshold=1.8,
            p_buffer_size=20000,
            p_ada=p_ada,
            p_logging=p_logging,
        )

        # In this case set only p_afct_strans, which tells the module to use
        # _simulate_reaction from the adaptive function instead of from the actual environment
        # Set to None to use function such as compute_reward, compute_broken and compute_success
        # from the actual environment
        EnvModel.__init__(
            self,
            p_observation_space=self._state_space,
            p_action_space=self._action_space,
            p_latency=timedelta(seconds=self.dt),
            p_afct_strans=afct_strans,
            p_afct_reward=None,
            p_afct_success=None,
            p_afct_broken=None,
            p_ada=p_ada,
            p_logging=p_logging,
        )

        self.reset()