------------------------------------------------------------------------------- -- | -- Module : Classifiers.RL.Control.MonadEnv -- Copyright : (c) Sentenai 2017 -- License : BSD3 -- Maintainer: sam@sentenai.com -- Stability : experimental -- Portability: non-portable -- -- User-facing API for MonadEnv, typeclass used to implement an environment ------------------------------------------------------------------------------- {-# LANGUAGE FunctionalDependencies #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE InstanceSigs #-} {-# LANGUAGE UndecidableInstances #-} module Control.MonadEnv ( MonadEnv(..) , Obs(..) , Initial(..) , Reward ) where import Reinforce.Prelude -- * Environment Types -- | A concrete reward signal. type Reward = Double -- | When starting an episode, we want to send an indication that the environment -- is starting without conflating this type with future steps (in @Obs r o@) data Initial o = Initial !o | EmptyEpisode -- | An observation of the environment will either show that the environment is -- done with the episode (yielding 'Done'), that the environment has already -- 'Terminated', or will return the reward of the last action performed and the -- next state -- TODO: return @Terminal@ (or return ()) on failure data Obs r o = Next !r !o | Done !r !(Maybe o) | Terminated deriving (Show, Eq) -- * The Environment Monad -- | The environment monad -- TODO: Think about two typeclasses: ContinuousMonadEnv and EpisodicMonadEnv class (Num r, Monad e) => MonadEnv e s a r | e -> s a r where -- | Any environment must be initialized with 'reset'. This can be used to -- reset the environment at any time. It's expected that resetting an -- environment begins a new episode (and can only be called once in a -- continuous environment). reset :: e (Initial s) -- | Step though an environment with an action, run the action in the -- environment, and return a reward and the new state of the environment. step :: a -> e (Obs r s) -- -- Perform an action given to the environment by an agent and run -- -- all effects in the environment -- runAction :: a -> e () -- -- Calculate how much reward is given when running an action in the -- -- context of the environment -- reward :: a -> e r -- ** lifted instances for MTL instance MonadEnv e s a r => MonadEnv (ReaderT t e) s a r where reset :: ReaderT t e (Initial s) reset = lift reset step :: a -> ReaderT t e (Obs r s) step a = lift $ step a instance MonadEnv e s a r => MonadEnv (StateT t e) s a r where reset :: StateT t e (Initial s) reset = lift reset step :: a -> StateT t e (Obs r s) step a = lift $ step a instance (Monoid t, MonadEnv e s a r) => MonadEnv (WriterT t e) s a r where reset :: WriterT t e (Initial s) reset = lift reset step :: a -> WriterT t e (Obs r s) step a = lift $ step a instance (Monoid writer, MonadEnv e s a r) => MonadEnv (RWST reader writer state e) s a r where reset :: RWST reader writer state e (Initial s) reset = lift reset step :: a -> RWST reader writer state e (Obs r s) step a = lift $ step a