Copyright | (c) Sentenai 2017 |
---|---|
License | Proprietary |
Maintainer | sam@sentenai.com |
Stability | experimental |
Portability | non-portable |
Safe Haskell | None |
Language | Haskell2010 |
Implementation of an n-armed bandit environment.
FIXME: currently this is only for a 10-armed bandit. This needs to be tied to a config.
- newtype Environment a = Environment {
- getEnvironment :: RWST Config (DList Event) () IO a
- runEnvironment :: Config -> Environment () -> IO (DList Event)
- data Event r o a = Event Integer r o a
- data Action
- mkBandits :: Int -> Int -> Float -> GenIO -> Config
- defaultBandits :: GenIO -> Config
- mkAction :: Int -> Environment Action
Documentation
newtype Environment a Source #
Monad for an n-armed bandit environment
Environment | |
|
runEnvironment :: Config -> Environment () -> IO (DList Event) Source #
run an n-armed bandit environment
Our primary datatype for an event in a trace. Contains the episode number,
reward, state, and action taken (in that order).
TODO: change the ordering to Event Integer s a r
MonadWriter (DList Event) Environment # | |
Monad t => MonadWriter (DList (Event Reward s a)) (GymEnvironmentT s a t) # | |
(Show a, Show o, Show r) => Show (Event r o a) Source # | |
The slot machine index whose arm will be pulled
mkBandits :: Int -> Int -> Float -> GenIO -> Config Source #
helper function to build a bandits config with normally-distributed reward functions
defaultBandits :: GenIO -> Config Source #
Give the default config of a 10-armed bandit