--------------------------------------------------------------------------------
-- |
-- Module    :  Environment.Gym.ClassicControl.AcrobotV1
-- Copyright :  (c) Sentenai 2017
-- License   :  BSD3
-- Maintainer:  sam@sentenai.com
-- Stability :  experimental
-- Portability: non-portable
--
-- The inverted pendulum swingup problem is a classic problem in the control
-- literature. In this version of the problem, the pendulum starts in a random
-- position, and the goal is to swing it up so it stays upright.
--
-- https://gym.openai.com/envs/Acrobot-v1
--------------------------------------------------------------------------------
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE InstanceSigs #-}
module Environments.Gym.ClassicControl.AcrobotV1
  ( Action(..)
  , I.Runner
  , State(..)
  , Environment
  , EnvironmentT
  , Environments.Gym.ClassicControl.AcrobotV1.runEnvironment
  , Environments.Gym.ClassicControl.AcrobotV1.runEnvironmentT
  , Environments.Gym.ClassicControl.AcrobotV1.runDefaultEnvironment
  , Environments.Gym.ClassicControl.AcrobotV1.runDefaultEnvironmentT
  ) where


import Reinforce.Prelude hiding (State)
import Control.MonadEnv (MonadEnv(..), Reward)
import Environments.Gym.Internal (GymEnvironmentT)
import qualified Environments.Gym.Internal as I

import Data.Aeson.Types
import OpenAI.Gym (GymEnv(AcrobotV1))


-- | The Acrobot's State
-- FIXME: keep semantics or move to a tuple?
data State = State
  { cosVel0     :: Float
  , sinVel0     :: Float
  , cosVel1     :: Float
  , sinVel1     :: Float
  , joint0Angle :: Float
  , joint1Angle :: Float
  } deriving (Show, Eq, Generic, Ord, Hashable)


instance FromJSON State where
  parseJSON :: Value -> Parser State
  parseJSON arr@(Array _)= do
    (a, b, c, d, e, f) <- parseJSON arr :: Parser (Float, Float, Float, Float, Float, Float)
    return $ State a b c d e f
  parseJSON invalid = typeMismatch "Environment State" invalid

-- | Acrobot Actions
data Action = RightTorque | NoTorque | LeftTorque
  deriving (Enum, Bounded, Ord, Show, Eq, Generic, Hashable)

instance ToJSON Action where
  toJSON :: Action -> Value
  toJSON = toJSON . subtract 1 . fromEnum

-- ========================================================================= --
-- | Alias to 'Environments.Gym.Internal.GymEnvironmentT' with AcrobotV1 type dependencies
type EnvironmentT t = GymEnvironmentT State Action t

-- | Alias to 'EnvironmentT' in IO
type Environment = EnvironmentT IO

-- | Alias to 'Environments.Gym.Internal.runEnvironmentT'
runEnvironmentT :: MonadIO t => Manager -> BaseUrl -> I.RunnerT State Action t x
runEnvironmentT = I.runEnvironmentT AcrobotV1

-- | Alias to 'Environments.Gym.Internal.runEnvironment' in IO
runEnvironment :: Manager -> BaseUrl -> I.RunnerT State Action IO x
runEnvironment = I.runEnvironmentT AcrobotV1

-- | Alias to 'Environments.Gym.Internal.runDefaultEnvironmentT'
runDefaultEnvironmentT :: MonadIO t => I.RunnerT State Action t x
runDefaultEnvironmentT = I.runDefaultEnvironmentT AcrobotV1

-- | Alias to 'Environments.Gym.Internal.runDefaultEnvironment' in IO
runDefaultEnvironment :: I.RunnerT State Action IO x
runDefaultEnvironment = I.runDefaultEnvironmentT AcrobotV1


instance (MonadIO t, MonadThrow t) => MonadEnv (EnvironmentT t) State Action Reward where
  reset = I._reset
  step = I._step