--------------------------------------------------------------------------------
-- |
-- Module    :  Environment.Gym.ToyText.FrozenLakeV0
-- Copyright :  (c) Sentenai 2017
-- License   :  BSD3
-- Maintainer:  sam@sentenai.com
-- Stability :  experimental
--
-- The agent controls the movement of a character in a grid world. Some tiles of
-- the grid are walkable, and others lead to the agent falling into the water.
-- Additionally, the movement direction of the agent is uncertain and only
-- partially depends on the chosen direction. The agent is rewarded for
-- finding a walkable path to a goal tile.
--
-- https://gym.openai.com/envs/FrozenLake-v0
--------------------------------------------------------------------------------
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE InstanceSigs #-}
module Environments.Gym.ToyText.FrozenLakeV0
  ( I.Runner
  , StateFL(..)
  , toVector
  , mkStateFL
  , Environment
  , EnvironmentT
  , Environments.Gym.ToyText.FrozenLakeV0.runEnvironment
  , Environments.Gym.ToyText.FrozenLakeV0.runEnvironmentT
  , Environments.Gym.ToyText.FrozenLakeV0.runDefaultEnvironment
  , Environments.Gym.ToyText.FrozenLakeV0.runDefaultEnvironmentT
  , Action(..)
  ) where

import Reinforce.Prelude
import Control.MonadEnv
import Environments.Gym.Internal hiding (runEnvironment)
import qualified Environments.Gym.Internal as I

import qualified Data.Vector as V
import Data.Aeson
import Data.Aeson.Types
import OpenAI.Gym (GymEnv(FrozenLakeV0))


-- | The current position of the agent on the frozen lake
newtype StateFL = Position { unPosition :: Int }
  deriving (Show, Eq, Generic, Ord, Hashable)

-- | Convert 'StateFL' to a computable type
toVector :: StateFL -> Vector Int
toVector (Position p) = V.generate 16 (\i -> fromEnum (i == p))

-- | Build a FrozenLakeV0 state, throwing if the position is out of bounds.
mkStateFL :: MonadThrow m => Int -> m StateFL
mkStateFL i
  | i < 16 && i >= 0 = pure $ Position i
  |        otherwise = throwString $ "no state exists for " ++ show i

instance FromJSON StateFL where
  parseJSON :: Value -> Parser StateFL
  parseJSON n@(Number _) = parseJSON n >>= pure . Position
  parseJSON invalid      = typeMismatch "StateFL" invalid

-- | Actions that can be performed in FrozenLakeV0
data Action = Left | Down | Right | Up
  deriving (Enum, Bounded, Ord, Show, Eq, Generic, Hashable)

instance ToJSON Action where
  toJSON :: Action -> Value
  toJSON = toJSON . fromEnum

-- ========================================================================= --
-- | Alias to 'Environments.Gym.Internal.GymEnvironmentT' with FrozenLakeV0 type dependencies
type EnvironmentT t = GymEnvironmentT StateFL Action t

-- | Alias to 'EnvironmentT' in IO
type Environment = EnvironmentT IO

-- | Alias to 'Environments.Gym.Internal.runEnvironmentT'
runEnvironmentT :: MonadIO t => Manager -> BaseUrl -> I.RunnerT StateFL Action t x
runEnvironmentT = I.runEnvironmentT FrozenLakeV0

-- | Alias to 'Environments.Gym.Internal.runEnvironment' in IO
runEnvironment :: Manager -> BaseUrl -> I.RunnerT StateFL Action IO x
runEnvironment = I.runEnvironmentT FrozenLakeV0

-- | Alias to 'Environments.Gym.Internal.runDefaultEnvironmentT'
runDefaultEnvironmentT :: MonadIO t => I.RunnerT StateFL Action t x
runDefaultEnvironmentT = I.runDefaultEnvironmentT FrozenLakeV0

-- | Alias to 'Environments.Gym.Internal.runDefaultEnvironment' in IO
runDefaultEnvironment :: I.RunnerT StateFL Action IO x
runDefaultEnvironment = I.runDefaultEnvironmentT FrozenLakeV0

instance (MonadThrow t, MonadIO t) => MonadEnv (EnvironmentT t) StateFL Action Reward where
  reset = I._reset
  step = I._step