Copyright	(c) Sentenai 2017
License	Proprietary
Maintainer	sam@sentenai.com
Stability	experimental
Portability	non-portable
Safe Haskell	None
Language	Haskell2010

Environments.CartPole

Description

CartPole by Sutton et al.

Taken from https://webdocs.cs.ualberta.ca/~sutton/book/code/pole.c with some added insights from the OpenAI gym

cart_and_pole: the cart and pole dynamics; given action and current state, estimates next state

cart_pole: Takes an action (0 or 1) and the current values of the four state variables and updates their values by estimating the state TAU seconds later.

Synopsis

Documentation

newtype Environment a Source #

A cartpole environment

Constructors

Environment
Fields getEnvironment :: RWST CartPoleConf (DList Event) CartPoleState IO a

Instances

Monad Environment Source #
Methods (>>=) :: Environment a -> (a -> Environment b) -> Environment b # (>>) :: Environment a -> Environment b -> Environment b # return :: a -> Environment a # fail :: String -> Environment a #
Functor Environment Source #
Methods fmap :: (a -> b) -> Environment a -> Environment b # (<$) :: a -> Environment b -> Environment a #
Applicative Environment Source #
Methods pure :: a -> Environment a # (<>) :: Environment (a -> b) -> Environment a -> Environment b # (>) :: Environment a -> Environment b -> Environment b # (<*) :: Environment a -> Environment b -> Environment a #
MonadIO Environment Source #
Methods liftIO :: IO a -> Environment a #
MonadThrow Environment Source #
Methods throwM :: Exception e => e -> Environment a #
MonadMWCRandom Environment Source #
Methods getGen :: Environment GenIO Source #
MonadEnv Environment StateCP Action Reward Source #
Methods reset :: Environment (Initial StateCP) Source # step :: Action -> Environment (Obs Reward StateCP) Source #
MonadWriter (DList Event) Environment Source #
Methods writer :: (a, DList Event) -> Environment a # tell :: DList Event -> Environment () # listen :: Environment a -> Environment (a, DList Event) # pass :: Environment (a, DList Event -> DList Event) -> Environment a #

runEnvironmentWithSeed :: Environment () -> GenIO -> IO (DList Event) Source #

run an environment with an explicit seed

runEnvironmentWithSeed_ :: Environment () -> GenIO -> IO () Source #

same as runEnvironmentWithSeed but don't return history

runEnvironment :: Environment () -> IO (DList Event) Source #

run an environment and create a new random generator for each effectful action

runEnvironment_ :: Environment () -> IO () Source #

same as runEnvironment but don't return history

data Event r o a Source #

Our primary datatype for an event in a trace. Contains the episode number, reward, state, and action taken (in that order). TODO: change the ordering to Event Integer s a r

Constructors

Event Integer r o a

Instances

MonadWriter (DList Event) Environment #
Methods writer :: (a, DList Event) -> Environment a # tell :: DList Event -> Environment () # listen :: Environment a -> Environment (a, DList Event) # pass :: Environment (a, DList Event -> DList Event) -> Environment a #
Monad t => MonadWriter (DList (Event Reward s a)) (GymEnvironmentT s a t) #
Methods writer :: (a, DList (Event Reward s a)) -> GymEnvironmentT s a t a # tell :: DList (Event Reward s a) -> GymEnvironmentT s a t () # listen :: GymEnvironmentT s a t a -> GymEnvironmentT s a t (a, DList (Event Reward s a)) # pass :: GymEnvironmentT s a t (a, DList (Event Reward s a) -> DList (Event Reward s a)) -> GymEnvironmentT s a t a #
(Show a, Show o, Show r) => Show (Event r o a) Source #
Methods showsPrec :: Int -> Event r o a -> ShowS # show :: Event r o a -> String # showList :: [Event r o a] -> ShowS #

data Action Source #

Cartpole can only go left or right has an action space of "discrete 2" containing {0..n-1}.

FIXME: Migrate this to either a more generic "directions" actions (would need things like "up", "down" versions as well) or a "discrete actions" version. I'm a fan of the former.

Instances

Bounded Action Source #
Methods minBound :: Action # maxBound :: Action #
Enum Action Source #
Methods succ :: Action -> Action # pred :: Action -> Action # toEnum :: Int -> Action # fromEnum :: Action -> Int # enumFrom :: Action -> [Action] # enumFromThen :: Action -> Action -> [Action] # enumFromTo :: Action -> Action -> [Action] # enumFromThenTo :: Action -> Action -> Action -> [Action] #
Eq Action Source #
Methods (==) :: Action -> Action -> Bool # (/=) :: Action -> Action -> Bool #
Ord Action Source #
Methods compare :: Action -> Action -> Ordering # (<) :: Action -> Action -> Bool # (<=) :: Action -> Action -> Bool # (>) :: Action -> Action -> Bool # (>=) :: Action -> Action -> Bool # max :: Action -> Action -> Action # min :: Action -> Action -> Action #
Show Action Source #
Methods showsPrec :: Int -> Action -> ShowS # show :: Action -> String # showList :: [Action] -> ShowS #
Generic Action Source #
Associated Types type Rep Action :: * -> * # Methods from :: Action -> Rep Action x # to :: Rep Action x -> Action #
Hashable Action Source #
Methods hashWithSalt :: Int -> Action -> Int # hash :: Action -> Int #
ToJSON Action Source #
Methods toJSON :: Action -> Value # toEncoding :: Action -> Encoding # toJSONList :: [Action] -> Value # toEncodingList :: [Action] -> Encoding #
DiscreteActionSpace Action Source #
Associated Types type Size Action :: Nat Source # Methods toAction :: Int -> Action Source # fromAction :: Action -> Int Source #
MonadEnv Environment StateCP Action Reward Source #
Methods reset :: Environment (Initial StateCP) Source # step :: Action -> Environment (Obs Reward StateCP) Source #
MonadWriter (DList Event) Environment #
Methods writer :: (a, DList Event) -> Environment a # tell :: DList Event -> Environment () # listen :: Environment a -> Environment (a, DList Event) # pass :: Environment (a, DList Event -> DList Event) -> Environment a #
type Rep Action Source #
type Rep Action = D1 (MetaData "Action" "Data.CartPole" "reinforce-0.0.0.1-BYNakn0URySEY5wecxfdnO" False) ((:+:) (C1 (MetaCons "GoLeft" PrefixI False) U1) (C1 (MetaCons "GoRight" PrefixI False) U1))
type Size Action Source #
type Size Action = 2

data StateCP Source #

The state of a cart on a pole in a CartPole environment

Instances

Eq StateCP Source #
Methods (==) :: StateCP -> StateCP -> Bool # (/=) :: StateCP -> StateCP -> Bool #
Ord StateCP Source #
Methods compare :: StateCP -> StateCP -> Ordering # (<) :: StateCP -> StateCP -> Bool # (<=) :: StateCP -> StateCP -> Bool # (>) :: StateCP -> StateCP -> Bool # (>=) :: StateCP -> StateCP -> Bool # max :: StateCP -> StateCP -> StateCP # min :: StateCP -> StateCP -> StateCP #
Show StateCP Source #
Methods showsPrec :: Int -> StateCP -> ShowS # show :: StateCP -> String # showList :: [StateCP] -> ShowS #
Generic StateCP Source #
Associated Types type Rep StateCP :: * -> * # Methods from :: StateCP -> Rep StateCP x # to :: Rep StateCP x -> StateCP #
Monoid StateCP Source #
Methods mempty :: StateCP # mappend :: StateCP -> StateCP -> StateCP # mconcat :: [StateCP] -> StateCP #
Hashable StateCP Source #
Methods hashWithSalt :: Int -> StateCP -> Int # hash :: StateCP -> Int #
FromJSON StateCP Source #
Methods parseJSON :: Value -> Parser StateCP # parseJSONList :: Value -> Parser [StateCP] #
StateSpace StateCP Source #
Methods toVector :: StateCP -> Vector Double Source # fromVector :: MonadThrow m => Vector Double -> m StateCP Source #
StateSpaceStatic StateCP Source #
Associated Types type Size StateCP :: Nat Source # Methods toR :: StateCP -> R (Size StateCP) Source #
MonadEnv Environment StateCP Action Reward Source #
Methods reset :: Environment (Initial StateCP) Source # step :: Action -> Environment (Obs Reward StateCP) Source #
MonadWriter (DList Event) Environment #
Methods writer :: (a, DList Event) -> Environment a # tell :: DList Event -> Environment () # listen :: Environment a -> Environment (a, DList Event) # pass :: Environment (a, DList Event -> DList Event) -> Environment a #
type Rep StateCP Source #
type Rep StateCP = D1 (MetaData "StateCP" "Data.CartPole" "reinforce-0.0.0.1-BYNakn0URySEY5wecxfdnO" False) (C1 (MetaCons "StateCP" PrefixI True) ((::) ((::) (S1 (MetaSel (Just Symbol "position") NoSourceUnpackedness NoSourceStrictness DecidedLazy) (Rec0 Float)) (S1 (MetaSel (Just Symbol "angle") NoSourceUnpackedness NoSourceStrictness DecidedLazy) (Rec0 Float))) ((:*:) (S1 (MetaSel (Just Symbol "velocity") NoSourceUnpackedness NoSourceStrictness DecidedLazy) (Rec0 Float)) (S1 (MetaSel (Just Symbol "angleRate") NoSourceUnpackedness NoSourceStrictness DecidedLazy) (Rec0 Float)))))
type Size StateCP Source #
type Size StateCP = 4