Copyright | (c) Sentenai 2017 |
---|---|
License | BSD3 |
Maintainer | sam@sentenai.com |
Stability | experimental |
Portability | non-portable |
Safe Haskell | None |
Language | Haskell2010 |
Underlying implementation to run a Gym environment using the
gym-http-client
.
- newtype GymEnvironmentT s a t x = GymEnvironmentT {
- getEnvironmentT :: RWST GymConfigs (DList (Event Reward s a)) (LastState s) (ClientT t) x
- type RunnerT s a t x = Bool -> GymEnvironmentT s a t x -> t (Either ServantError (DList (Event Reward s a)))
- type Runner s a x = RunnerT s a IO x
- data GymException
- runEnvironmentT :: forall o a t x. MonadIO t => GymEnv -> Manager -> BaseUrl -> Bool -> GymEnvironmentT o a t x -> t (Either ServantError (DList (Event Reward o a)))
- runEnvironment :: GymEnv -> Manager -> BaseUrl -> Bool -> GymEnvironment o a x -> IO (Either ServantError (DList (Event Reward o a)))
- runDefaultEnvironmentT :: MonadIO t => GymEnv -> Bool -> GymEnvironmentT o a t x -> t (Either ServantError (DList (Event Reward o a)))
- runDefaultEnvironment :: GymEnv -> Bool -> GymEnvironment o a x -> IO (Either ServantError (DList (Event Reward o a)))
- _reset :: (MonadIO t, MonadThrow t, FromJSON o) => GymEnvironmentT o a t (Initial o)
- _step :: (MonadIO t, MonadThrow t, ToJSON a, r ~ Reward, FromJSON o) => a -> GymEnvironmentT o a t (Obs r o)
Documentation
newtype GymEnvironmentT s a t x Source #
The gym environment, which is shared through all openai/gym instances
GymEnvironmentT | |
|
(MonadIO t, MonadThrow t) => MonadEnv (EnvironmentT t) State Action Reward Source # | |
(MonadThrow t, MonadIO t) => MonadEnv (EnvironmentT t) State Action Reward Source # | |
(MonadIO t, MonadThrow t) => MonadEnv (EnvironmentT t) State Action Reward Source # | |
(MonadThrow t, MonadIO t) => MonadEnv (EnvironmentT t) StateFL Action Reward Source # | |
Monad t => MonadWriter (DList (Event Reward s a)) (GymEnvironmentT s a t) Source # | |
MonadTrans (GymEnvironmentT s a) Source # | |
Monad t => Monad (GymEnvironmentT s a t) Source # | |
Functor t => Functor (GymEnvironmentT s a t) Source # | |
Monad t => Applicative (GymEnvironmentT s a t) Source # | |
MonadIO t => MonadIO (GymEnvironmentT s a t) Source # | |
MonadThrow t => MonadThrow (GymEnvironmentT s a t) Source # | |
(MonadIO t, MonadMWCRandom t) => MonadMWCRandom (GymEnvironmentT s a t) Source # | |
type RunnerT s a t x = Bool -> GymEnvironmentT s a t x -> t (Either ServantError (DList (Event Reward s a))) Source #
type alias for a runEnvironment type which executes an environment action
data GymException Source #
Possible errors we might encounter while interacting with our environment
runEnvironmentT :: forall o a t x. MonadIO t => GymEnv -> Manager -> BaseUrl -> Bool -> GymEnvironmentT o a t x -> t (Either ServantError (DList (Event Reward o a))) Source #
run an effectful gym to completion and return either an error, or the history of the agent. FIXME: move this into a History monad seperate from the Environment so that we don't blow up memory.
runEnvironment :: GymEnv -> Manager -> BaseUrl -> Bool -> GymEnvironment o a x -> IO (Either ServantError (DList (Event Reward o a))) Source #
same as runEnvironmentT
, but with IO as the base monad
runDefaultEnvironmentT :: MonadIO t => GymEnv -> Bool -> GymEnvironmentT o a t x -> t (Either ServantError (DList (Event Reward o a))) Source #
same as runEnvironmentT
, however use http-client's default manager settings
runDefaultEnvironment :: GymEnv -> Bool -> GymEnvironment o a x -> IO (Either ServantError (DList (Event Reward o a))) Source #
same as runEnvironment
, however use http-client's default manager settings
_reset :: (MonadIO t, MonadThrow t, FromJSON o) => GymEnvironmentT o a t (Initial o) Source #
generic rest function which makes a call to the gym and returns the first observation
_step :: (MonadIO t, MonadThrow t, ToJSON a, r ~ Reward, FromJSON o) => a -> GymEnvironmentT o a t (Obs r o) Source #
generic step function which takes a ToJSONable action and returns a reward and a FromJSONable state