Copyright	(c) Sentenai 2017
License	Proprietary
Maintainer	sam@sentenai.com
Stability	experimental
Portability	non-portable
Safe Haskell	None
Language	Haskell2010

Environments.Bandits

Description

Implementation of an n-armed bandit environment.

FIXME: currently this is only for a 10-armed bandit. This needs to be tied to a config.

Synopsis

Documentation

newtype Environment a Source #

Monad for an n-armed bandit environment

Constructors

Environment
Fields getEnvironment :: RWST Config (DList Event) () IO a

Instances

Monad Environment Source #
Methods (>>=) :: Environment a -> (a -> Environment b) -> Environment b # (>>) :: Environment a -> Environment b -> Environment b # return :: a -> Environment a # fail :: String -> Environment a #
Functor Environment Source #
Methods fmap :: (a -> b) -> Environment a -> Environment b # (<$) :: a -> Environment b -> Environment a #
Applicative Environment Source #
Methods pure :: a -> Environment a # (<>) :: Environment (a -> b) -> Environment a -> Environment b # (>) :: Environment a -> Environment b -> Environment b # (<*) :: Environment a -> Environment b -> Environment a #
MonadIO Environment Source #
Methods liftIO :: IO a -> Environment a #
MonadThrow Environment Source #
Methods throwM :: Exception e => e -> Environment a #
MonadMWCRandom Environment Source #
Methods getGen :: Environment GenIO Source #
MonadState () Environment Source #
Methods get :: Environment () # put :: () -> Environment () # state :: (() -> (a, ())) -> Environment a #
MonadEnv Environment () Action Reward Source #
Methods reset :: Environment (Initial ()) Source # step :: Action -> Environment (Obs Reward ()) Source #

runEnvironment :: Config -> Environment () -> IO (DList Event) Source #

run an n-armed bandit environment

data Event r o a Source #

Our primary datatype for an event in a trace. Contains the episode number, reward, state, and action taken (in that order). TODO: change the ordering to Event Integer s a r

Constructors

Event Integer r o a

Instances

MonadWriter (DList Event) Environment #
Methods writer :: (a, DList Event) -> Environment a # tell :: DList Event -> Environment () # listen :: Environment a -> Environment (a, DList Event) # pass :: Environment (a, DList Event -> DList Event) -> Environment a #
Monad t => MonadWriter (DList (Event Reward s a)) (GymEnvironmentT s a t) #
Methods writer :: (a, DList (Event Reward s a)) -> GymEnvironmentT s a t a # tell :: DList (Event Reward s a) -> GymEnvironmentT s a t () # listen :: GymEnvironmentT s a t a -> GymEnvironmentT s a t (a, DList (Event Reward s a)) # pass :: GymEnvironmentT s a t (a, DList (Event Reward s a) -> DList (Event Reward s a)) -> GymEnvironmentT s a t a #
(Show a, Show o, Show r) => Show (Event r o a) Source #
Methods showsPrec :: Int -> Event r o a -> ShowS # show :: Event r o a -> String # showList :: [Event r o a] -> ShowS #

data Action Source #

The slot machine index whose arm will be pulled

Instances

Bounded Action Source #
Methods minBound :: Action # maxBound :: Action #
Enum Action Source #
Methods succ :: Action -> Action # pred :: Action -> Action # toEnum :: Int -> Action # fromEnum :: Action -> Int # enumFrom :: Action -> [Action] # enumFromThen :: Action -> Action -> [Action] # enumFromTo :: Action -> Action -> [Action] # enumFromThenTo :: Action -> Action -> Action -> [Action] #
Eq Action Source #
Methods (==) :: Action -> Action -> Bool # (/=) :: Action -> Action -> Bool #
Ord Action Source #
Methods compare :: Action -> Action -> Ordering # (<) :: Action -> Action -> Bool # (<=) :: Action -> Action -> Bool # (>) :: Action -> Action -> Bool # (>=) :: Action -> Action -> Bool # max :: Action -> Action -> Action # min :: Action -> Action -> Action #
Show Action Source #
Methods showsPrec :: Int -> Action -> ShowS # show :: Action -> String # showList :: [Action] -> ShowS #
Generic Action Source #
Associated Types type Rep Action :: * -> * # Methods from :: Action -> Rep Action x # to :: Rep Action x -> Action #
Hashable Action Source #
Methods hashWithSalt :: Int -> Action -> Int # hash :: Action -> Int #
MonadEnv Environment () Action Reward Source #
Methods reset :: Environment (Initial ()) Source # step :: Action -> Environment (Obs Reward ()) Source #
type Rep Action Source #
type Rep Action = D1 (MetaData "Action" "Environments.Bandits" "reinforce-0.0.0.1-BYNakn0URySEY5wecxfdnO" True) (C1 (MetaCons "Action" PrefixI True) (S1 (MetaSel (Just Symbol "unAction") NoSourceUnpackedness NoSourceStrictness DecidedLazy) (Rec0 Int)))

mkBandits :: Int -> Int -> Float -> GenIO -> Config Source #

helper function to build a bandits config with normally-distributed reward functions

defaultBandits :: GenIO -> Config Source #

Give the default config of a 10-armed bandit

mkAction :: Int -> Environment Action Source #

Convert an Int to an Action in the bandit environment. Throw if the Int falls out of bounds.