reinforce-0.0.0.1: Reinforcement learning in Haskell

Copyright(c) Sentenai 2017
LicenseBSD3
Maintainersam@sentenai.com
Stabilityexperimental
Portabilitynon-portable
Safe HaskellNone
LanguageHaskell2010

Environments.Gym.Internal

Description

Underlying implementation to run a Gym environment using the gym-http-client.

Synopsis

Documentation

newtype GymEnvironmentT s a t x Source #

The gym environment, which is shared through all openai/gym instances

Constructors

GymEnvironmentT 

Fields

Instances

(MonadIO t, MonadThrow t) => MonadEnv (EnvironmentT t) State Action Reward Source # 
(MonadThrow t, MonadIO t) => MonadEnv (EnvironmentT t) State Action Reward Source # 
(MonadIO t, MonadThrow t) => MonadEnv (EnvironmentT t) State Action Reward Source # 
(MonadThrow t, MonadIO t) => MonadEnv (EnvironmentT t) StateFL Action Reward Source # 
Monad t => MonadWriter (DList (Event Reward s a)) (GymEnvironmentT s a t) Source # 

Methods

writer :: (a, DList (Event Reward s a)) -> GymEnvironmentT s a t a #

tell :: DList (Event Reward s a) -> GymEnvironmentT s a t () #

listen :: GymEnvironmentT s a t a -> GymEnvironmentT s a t (a, DList (Event Reward s a)) #

pass :: GymEnvironmentT s a t (a, DList (Event Reward s a) -> DList (Event Reward s a)) -> GymEnvironmentT s a t a #

MonadTrans (GymEnvironmentT s a) Source # 

Methods

lift :: Monad m => m a -> GymEnvironmentT s a m a #

Monad t => Monad (GymEnvironmentT s a t) Source # 

Methods

(>>=) :: GymEnvironmentT s a t a -> (a -> GymEnvironmentT s a t b) -> GymEnvironmentT s a t b #

(>>) :: GymEnvironmentT s a t a -> GymEnvironmentT s a t b -> GymEnvironmentT s a t b #

return :: a -> GymEnvironmentT s a t a #

fail :: String -> GymEnvironmentT s a t a #

Functor t => Functor (GymEnvironmentT s a t) Source # 

Methods

fmap :: (a -> b) -> GymEnvironmentT s a t a -> GymEnvironmentT s a t b #

(<$) :: a -> GymEnvironmentT s a t b -> GymEnvironmentT s a t a #

Monad t => Applicative (GymEnvironmentT s a t) Source # 

Methods

pure :: a -> GymEnvironmentT s a t a #

(<*>) :: GymEnvironmentT s a t (a -> b) -> GymEnvironmentT s a t a -> GymEnvironmentT s a t b #

(*>) :: GymEnvironmentT s a t a -> GymEnvironmentT s a t b -> GymEnvironmentT s a t b #

(<*) :: GymEnvironmentT s a t a -> GymEnvironmentT s a t b -> GymEnvironmentT s a t a #

MonadIO t => MonadIO (GymEnvironmentT s a t) Source # 

Methods

liftIO :: IO a -> GymEnvironmentT s a t a #

MonadThrow t => MonadThrow (GymEnvironmentT s a t) Source # 

Methods

throwM :: Exception e => e -> GymEnvironmentT s a t a #

(MonadIO t, MonadMWCRandom t) => MonadMWCRandom (GymEnvironmentT s a t) Source # 

type RunnerT s a t x = Bool -> GymEnvironmentT s a t x -> t (Either ServantError (DList (Event Reward s a))) Source #

type alias for a runEnvironment type which executes an environment action

type Runner s a x = RunnerT s a IO x Source #

type alias of RunnerT in IO

runEnvironmentT :: forall o a t x. MonadIO t => GymEnv -> Manager -> BaseUrl -> Bool -> GymEnvironmentT o a t x -> t (Either ServantError (DList (Event Reward o a))) Source #

run an effectful gym to completion and return either an error, or the history of the agent. FIXME: move this into a History monad seperate from the Environment so that we don't blow up memory.

runEnvironment :: GymEnv -> Manager -> BaseUrl -> Bool -> GymEnvironment o a x -> IO (Either ServantError (DList (Event Reward o a))) Source #

same as runEnvironmentT, but with IO as the base monad

runDefaultEnvironmentT :: MonadIO t => GymEnv -> Bool -> GymEnvironmentT o a t x -> t (Either ServantError (DList (Event Reward o a))) Source #

same as runEnvironmentT, however use http-client's default manager settings

runDefaultEnvironment :: GymEnv -> Bool -> GymEnvironment o a x -> IO (Either ServantError (DList (Event Reward o a))) Source #

same as runEnvironment, however use http-client's default manager settings

_reset :: (MonadIO t, MonadThrow t, FromJSON o) => GymEnvironmentT o a t (Initial o) Source #

generic rest function which makes a call to the gym and returns the first observation

_step :: (MonadIO t, MonadThrow t, ToJSON a, r ~ Reward, FromJSON o) => a -> GymEnvironmentT o a t (Obs r o) Source #

generic step function which takes a ToJSONable action and returns a reward and a FromJSONable state