11module BlackJack
22
3- export BlackJackEnv, reset!, observe, interact!
4-
5- using ReinforcementLearningEnvironments
6- import ReinforcementLearningEnvironments: reset!, observe, interact!
3+ export BlackJackEnv
74
5+ using ReinforcementLearningCore
86using Random
97
108const ACTIONS = [:hit , :stick ]
@@ -48,10 +46,11 @@ mutable struct BlackJackEnv <: AbstractEnv
4846 reward:: Float64
4947 is_exploring_start:: Bool
5048 init:: Union{Nothing,Tuple{Hands,Hands}}
51- observation_space:: DiscreteSpace
52- action_space:: DiscreteSpace
5349end
5450
51+ RLBase. get_observation_space (env:: BlackJackEnv ) = DiscreteSpace (length (INDS))
52+ RLBase. get_action_space (env:: BlackJackEnv ) = DiscreteSpace (2 )
53+
5554function BlackJackEnv (; is_exploring_start = false , init = nothing )
5655 env = BlackJackEnv (
5756 Hands (),
@@ -60,8 +59,6 @@ function BlackJackEnv(; is_exploring_start = false, init = nothing)
6059 0.0 ,
6160 is_exploring_start,
6261 init,
63- DiscreteSpace (length (INDS)),
64- DiscreteSpace (2 ),
6562 )
6663 init_hands! (env)
6764 env
@@ -87,7 +84,7 @@ function init_hands!(env::BlackJackEnv)
8784 env. player_hands, env. dealer_hands = player_hands, dealer_hands
8885end
8986
90- function interact! (env:: BlackJackEnv , a:: Int )
87+ function (env:: BlackJackEnv )( a:: Int )
9188 if ACTIONS[a] == :hit
9289 push! (env. player_hands, deal_card ())
9390 if is_busted (env. player_hands)
@@ -117,7 +114,7 @@ function interact!(env::BlackJackEnv, a::Int)
117114 nothing
118115end
119116
120- function reset! (env:: BlackJackEnv )
117+ function RLBase . reset! (env:: BlackJackEnv )
121118 env. is_end = false
122119 env. reward = 0.0
123120
@@ -133,7 +130,7 @@ encode(env) =
133130 2 <= env. dealer_hands. sum <= 10 ? env. dealer_hands. sum : 1 ,
134131 ]
135132
136- observe (env:: BlackJackEnv ) =
137- Observation (reward = env. reward, terminal = env. is_end, state = encode (env))
133+ RLBase . observe (env:: BlackJackEnv ) =
134+ (reward = env. reward, terminal = env. is_end, state = encode (env))
138135
139136end
0 commit comments