Copyright	(c) Sentenai 2017
License	BSD3
Maintainer	sam@sentenai.com
Stability	experimental
Portability	non-portable
Safe Haskell	None
Language	Haskell2010

Environments.Gym.ClassicControl.CartPoleV0

Contents

Orphan instances

Description

Environment description: > A pole is attached by an un-actuated joint to a cart, which moves along a > frictionless track. The system is controlled by applying a force of +1 or -1 > to the cart. The pendulum starts upright, and the goal is to prevent it from > falling over. A reward of +1 is provided for every timestep that the pole > remains upright. The episode ends when the pole is more than 15 degrees from > vertical, or the cart moves more than 2.4 units from the center.

https://gym.openai.com/envs/CartPole-v0

Synopsis

Documentation

data Action Source #

Cartpole can only go left or right has an action space of "discrete 2" containing {0..n-1}.

FIXME: Migrate this to either a more generic "directions" actions (would need things like "up", "down" versions as well) or a "discrete actions" version. I'm a fan of the former.

Constructors

GoLeft
GoRight

Instances

Bounded Action Source #
Methods minBound :: Action # maxBound :: Action #
Enum Action Source #
Methods succ :: Action -> Action # pred :: Action -> Action # toEnum :: Int -> Action # fromEnum :: Action -> Int # enumFrom :: Action -> [Action] # enumFromThen :: Action -> Action -> [Action] # enumFromTo :: Action -> Action -> [Action] # enumFromThenTo :: Action -> Action -> Action -> [Action] #
Eq Action Source #
Methods (==) :: Action -> Action -> Bool # (/=) :: Action -> Action -> Bool #
Ord Action Source #
Methods compare :: Action -> Action -> Ordering # (<) :: Action -> Action -> Bool # (<=) :: Action -> Action -> Bool # (>) :: Action -> Action -> Bool # (>=) :: Action -> Action -> Bool # max :: Action -> Action -> Action # min :: Action -> Action -> Action #
Show Action Source #
Methods showsPrec :: Int -> Action -> ShowS # show :: Action -> String # showList :: [Action] -> ShowS #
Generic Action Source #
Associated Types type Rep Action :: * -> * # Methods from :: Action -> Rep Action x # to :: Rep Action x -> Action #
Hashable Action Source #
Methods hashWithSalt :: Int -> Action -> Int # hash :: Action -> Int #
ToJSON Action Source #
Methods toJSON :: Action -> Value # toEncoding :: Action -> Encoding # toJSONList :: [Action] -> Value # toEncodingList :: [Action] -> Encoding #
DiscreteActionSpace Action Source #
Associated Types type Size Action :: Nat Source # Methods toAction :: Int -> Action Source # fromAction :: Action -> Int Source #
MonadEnv Environment StateCP Action Reward Source #
Methods reset :: Environment (Initial StateCP) Source # step :: Action -> Environment (Obs Reward StateCP) Source #
MonadWriter (DList Event) Environment #
Methods writer :: (a, DList Event) -> Environment a # tell :: DList Event -> Environment () # listen :: Environment a -> Environment (a, DList Event) # pass :: Environment (a, DList Event -> DList Event) -> Environment a #
type Rep Action Source #
type Rep Action = D1 (MetaData "Action" "Data.CartPole" "reinforce-0.0.0.1-BYNakn0URySEY5wecxfdnO" False) ((:+:) (C1 (MetaCons "GoLeft" PrefixI False) U1) (C1 (MetaCons "GoRight" PrefixI False) U1))
type Size Action Source #
type Size Action = 2

type Runner s a x = RunnerT s a IO x Source #

type alias of RunnerT in IO

data StateCP Source #

The state of a cart on a pole in a CartPole environment

Constructors

StateCP
Fields position :: Float position of the cart on the track angle :: Float angle of the pole with the vertical velocity :: Float cart velocity angleRate :: Float rate of change of the angle

Instances

Eq StateCP Source #
Methods (==) :: StateCP -> StateCP -> Bool # (/=) :: StateCP -> StateCP -> Bool #
Ord StateCP Source #
Methods compare :: StateCP -> StateCP -> Ordering # (<) :: StateCP -> StateCP -> Bool # (<=) :: StateCP -> StateCP -> Bool # (>) :: StateCP -> StateCP -> Bool # (>=) :: StateCP -> StateCP -> Bool # max :: StateCP -> StateCP -> StateCP # min :: StateCP -> StateCP -> StateCP #
Show StateCP Source #
Methods showsPrec :: Int -> StateCP -> ShowS # show :: StateCP -> String # showList :: [StateCP] -> ShowS #
Generic StateCP Source #
Associated Types type Rep StateCP :: * -> * # Methods from :: StateCP -> Rep StateCP x # to :: Rep StateCP x -> StateCP #
Monoid StateCP Source #
Methods mempty :: StateCP # mappend :: StateCP -> StateCP -> StateCP # mconcat :: [StateCP] -> StateCP #
Hashable StateCP Source #
Methods hashWithSalt :: Int -> StateCP -> Int # hash :: StateCP -> Int #
FromJSON StateCP Source #
Methods parseJSON :: Value -> Parser StateCP # parseJSONList :: Value -> Parser [StateCP] #
StateSpace StateCP Source #
Methods toVector :: StateCP -> Vector Double Source # fromVector :: MonadThrow m => Vector Double -> m StateCP Source #
StateSpaceStatic StateCP Source #
Associated Types type Size StateCP :: Nat Source # Methods toR :: StateCP -> R (Size StateCP) Source #
MonadEnv Environment StateCP Action Reward Source #
Methods reset :: Environment (Initial StateCP) Source # step :: Action -> Environment (Obs Reward StateCP) Source #
MonadWriter (DList Event) Environment #
Methods writer :: (a, DList Event) -> Environment a # tell :: DList Event -> Environment () # listen :: Environment a -> Environment (a, DList Event) # pass :: Environment (a, DList Event -> DList Event) -> Environment a #
type Rep StateCP Source #
type Rep StateCP = D1 (MetaData "StateCP" "Data.CartPole" "reinforce-0.0.0.1-BYNakn0URySEY5wecxfdnO" False) (C1 (MetaCons "StateCP" PrefixI True) ((::) ((::) (S1 (MetaSel (Just Symbol "position") NoSourceUnpackedness NoSourceStrictness DecidedLazy) (Rec0 Float)) (S1 (MetaSel (Just Symbol "angle") NoSourceUnpackedness NoSourceStrictness DecidedLazy) (Rec0 Float))) ((:*:) (S1 (MetaSel (Just Symbol "velocity") NoSourceUnpackedness NoSourceStrictness DecidedLazy) (Rec0 Float)) (S1 (MetaSel (Just Symbol "angleRate") NoSourceUnpackedness NoSourceStrictness DecidedLazy) (Rec0 Float)))))
type Size StateCP Source #
type Size StateCP = 4

type Environment = EnvironmentT IO Source #

Alias to EnvironmentT in IO

type EnvironmentT t = GymEnvironmentT StateCP Action t Source #

Alias to GymEnvironmentT with CartPoleV0 type dependencies

runEnvironment :: Manager -> BaseUrl -> RunnerT StateCP Action IO x Source #

Alias to runEnvironment in IO

runEnvironmentT :: MonadIO t => Manager -> BaseUrl -> RunnerT StateCP Action t x Source #

Alias to runEnvironmentT

runDefaultEnvironment :: RunnerT StateCP Action IO x Source #

Alias to runDefaultEnvironment in IO

runDefaultEnvironmentT :: MonadIO t => RunnerT StateCP Action t x Source #

Alias to runDefaultEnvironmentT

Orphan instances

(MonadIO t, MonadThrow t) => MonadEnv (EnvironmentT t) StateCP Action Reward Source #
Methods reset :: EnvironmentT t (Initial StateCP) Source # step :: Action -> EnvironmentT t (Obs Reward StateCP) Source #