--------------------------------------------------------------------------------
-- |
-- Module    :  Environment.Gym.ClassicControl.PendulumV0
-- Copyright :  (c) Sentenai 2017
-- License   :  BSD3
-- Maintainer:  sam@sentenai.com
-- Stability :  experimental
--
-- The inverted pendulum swingup problem is a classic problem in the control
-- literature. In this version of the problem, the pendulum starts in a random
-- position, and the goal is to swing it up so it stays upright.
--
-- https://gym.openai.com/envs/Pendulum-v0
--------------------------------------------------------------------------------
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE InstanceSigs #-}
module Environments.Gym.ClassicControl.PendulumV0
  ( Action(..)
  , I.Runner
  , State(..)
  , Environment
  , EnvironmentT
  , Environments.Gym.ClassicControl.PendulumV0.runEnvironment
  , Environments.Gym.ClassicControl.PendulumV0.runEnvironmentT
  , Environments.Gym.ClassicControl.PendulumV0.runDefaultEnvironment
  , Environments.Gym.ClassicControl.PendulumV0.runDefaultEnvironmentT
  ) where

import Reinforce.Prelude hiding (State)
import Control.MonadEnv (MonadEnv(..), Reward)
import Environments.Gym.Internal (GymEnvironmentT)
import qualified Environments.Gym.Internal as I

import Data.Aeson.Types
import OpenAI.Gym (GymEnv(PendulumV0))


-- | State of a PendulumV0 environment
-- FIXME: give these semantics or move to a tuple?
data State = State
  { cosTheta  :: Float
  , sinTheta  :: Float
  , thetaDot  :: Float
  } deriving (Show, Eq, Generic, Ord, Hashable)


instance FromJSON State where
  parseJSON :: Value -> Parser State
  parseJSON arr@(Array _)= do
    (a, b, c) <- parseJSON arr :: Parser (Float, Float, Float)
    return $ State a b c
  parseJSON invalid = typeMismatch "Environment State" invalid

-- | Force to exert on the pendulum
newtype Action = Action { getAction :: Float }
  deriving (Ord, Show, Eq, Generic, Hashable)

instance ToJSON Action where
  toJSON :: Action -> Value
  toJSON = toJSON . getAction

-- ========================================================================= --
-- | Alias to 'Environments.Gym.Internal.GymEnvironmentT' with PendulumV0 type dependencies
type EnvironmentT t = GymEnvironmentT State Action t

-- | Alias to 'EnvironmentT' in IO
type Environment = EnvironmentT IO

-- | Alias to 'Environments.Gym.Internal.runEnvironmentT'
runEnvironmentT :: MonadIO t => Manager -> BaseUrl -> I.RunnerT State Action t x
runEnvironmentT = I.runEnvironmentT PendulumV0

-- | Alias to 'Environments.Gym.Internal.runEnvironment' in IO
runEnvironment :: Manager -> BaseUrl -> I.RunnerT State Action IO x
runEnvironment = I.runEnvironmentT PendulumV0

-- | Alias to 'Environments.Gym.Internal.runDefaultEnvironmentT'
runDefaultEnvironmentT :: MonadIO t => I.RunnerT State Action t x
runDefaultEnvironmentT = I.runDefaultEnvironmentT PendulumV0

-- | Alias to 'Environments.Gym.Internal.runDefaultEnvironment' in IO
runDefaultEnvironment :: I.RunnerT State Action IO x
runDefaultEnvironment = I.runDefaultEnvironmentT PendulumV0


instance (MonadIO t, MonadThrow t) => MonadEnv (EnvironmentT t) State Action Reward where
  reset = I._reset
  step = I._step