reinforce-0.0.0.1: Reinforcement learning in Haskell

Copyright(c) Sentenai 2017
LicenseProprietary
Maintainersam@sentenai.com
Stabilityexperimental
Portabilitynon-portable
Safe HaskellNone
LanguageHaskell2010

Environments.Bandits

Description

Implementation of an n-armed bandit environment.

FIXME: currently this is only for a 10-armed bandit. This needs to be tied to a config.

Synopsis

Documentation

newtype Environment a Source #

Monad for an n-armed bandit environment

Constructors

Environment 

Fields

runEnvironment :: Config -> Environment () -> IO (DList Event) Source #

run an n-armed bandit environment

data Event r o a Source #

Our primary datatype for an event in a trace. Contains the episode number, reward, state, and action taken (in that order). TODO: change the ordering to Event Integer s a r

Constructors

Event Integer r o a 

Instances

MonadWriter (DList Event) Environment # 
Monad t => MonadWriter (DList (Event Reward s a)) (GymEnvironmentT s a t) # 

Methods

writer :: (a, DList (Event Reward s a)) -> GymEnvironmentT s a t a #

tell :: DList (Event Reward s a) -> GymEnvironmentT s a t () #

listen :: GymEnvironmentT s a t a -> GymEnvironmentT s a t (a, DList (Event Reward s a)) #

pass :: GymEnvironmentT s a t (a, DList (Event Reward s a) -> DList (Event Reward s a)) -> GymEnvironmentT s a t a #

(Show a, Show o, Show r) => Show (Event r o a) Source # 

Methods

showsPrec :: Int -> Event r o a -> ShowS #

show :: Event r o a -> String #

showList :: [Event r o a] -> ShowS #

data Action Source #

The slot machine index whose arm will be pulled

Instances

Bounded Action Source # 
Enum Action Source # 
Eq Action Source # 

Methods

(==) :: Action -> Action -> Bool #

(/=) :: Action -> Action -> Bool #

Ord Action Source # 
Show Action Source # 
Generic Action Source # 

Associated Types

type Rep Action :: * -> * #

Methods

from :: Action -> Rep Action x #

to :: Rep Action x -> Action #

Hashable Action Source # 

Methods

hashWithSalt :: Int -> Action -> Int #

hash :: Action -> Int #

MonadEnv Environment () Action Reward Source # 
type Rep Action Source # 
type Rep Action = D1 (MetaData "Action" "Environments.Bandits" "reinforce-0.0.0.1-BYNakn0URySEY5wecxfdnO" True) (C1 (MetaCons "Action" PrefixI True) (S1 (MetaSel (Just Symbol "unAction") NoSourceUnpackedness NoSourceStrictness DecidedLazy) (Rec0 Int)))

mkBandits :: Int -> Int -> Float -> GenIO -> Config Source #

helper function to build a bandits config with normally-distributed reward functions

defaultBandits :: GenIO -> Config Source #

Give the default config of a 10-armed bandit

mkAction :: Int -> Environment Action Source #

Convert an Int to an Action in the bandit environment. Throw if the Int falls out of bounds.