Skip to content

Commit 88a7699

Browse files
committed
sync
1 parent ca4cddf commit 88a7699

11 files changed

+5891
-3285
lines changed

Manifest.toml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -112,9 +112,9 @@ version = "0.5.1"
112112

113113
[[CuArrays]]
114114
deps = ["AbstractFFTs", "Adapt", "CEnum", "CUDAapi", "CUDAdrv", "CUDAnative", "DataStructures", "GPUArrays", "Libdl", "LinearAlgebra", "MacroTools", "NNlib", "Printf", "Random", "Requires", "SparseArrays", "TimerOutputs"]
115-
git-tree-sha1 = "9aac17f7e09017107c84ed2657f462e86b1d56b3"
115+
git-tree-sha1 = "7c20c5a45bb245cf248f454d26966ea70255b271"
116116
uuid = "3a865a2d-5b23-5a0f-bc46-62713ec82fae"
117-
version = "1.7.1"
117+
version = "1.7.2"
118118

119119
[[DataAPI]]
120120
git-tree-sha1 = "674b67f344687a88310213ddfa8a2b3c76cc4252"
@@ -494,7 +494,7 @@ uuid = "189a3867-3050-52da-a836-e630ba90ab69"
494494
version = "0.2.0"
495495

496496
[[ReinforcementLearningBase]]
497-
deps = ["Random"]
497+
deps = ["Distributions", "Random"]
498498
path = "/home/tj/workspace/github/ReinforcementLearningBase.jl/"
499499
uuid = "e575027e-6cd6-5018-9292-cdc6200d2b44"
500500
version = "0.5.0"
@@ -592,9 +592,9 @@ version = "1.0.0"
592592

593593
[[Tables]]
594594
deps = ["DataAPI", "DataValueInterfaces", "IteratorInterfaceExtensions", "LinearAlgebra", "TableTraits", "Test"]
595-
git-tree-sha1 = "aaed7b3b00248ff6a794375ad6adf30f30ca5591"
595+
git-tree-sha1 = "22eea0f6619df63954b9404c8d47cd42c5d529f9"
596596
uuid = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
597-
version = "0.2.11"
597+
version = "1.0.1"
598598

599599
[[Test]]
600600
deps = ["Distributed", "InteractiveUtils", "Logging", "Random"]

Project.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ version = "0.2.0"
77
DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
88
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
99
Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
10+
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
1011
MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
1112
Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
1213
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"

notebooks/Chapter02_Ten_Armed_Testbed.ipynb

Lines changed: 3181 additions & 788 deletions
Large diffs are not rendered by default.

notebooks/Chapter06_Cliff_Walking.ipynb

Lines changed: 956 additions & 960 deletions
Large diffs are not rendered by default.

notebooks/Chapter06_Maximization_Bias.ipynb

Lines changed: 252 additions & 244 deletions
Large diffs are not rendered by default.

notebooks/Chapter06_Random_Walk.ipynb

Lines changed: 374 additions & 394 deletions
Large diffs are not rendered by default.

notebooks/Chapter06_Windy_Grid_World.ipynb

Lines changed: 891 additions & 886 deletions
Large diffs are not rendered by default.

notebooks/Manifest.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -480,7 +480,7 @@ deps = ["InteractiveUtils", "Markdown", "Sockets"]
480480
uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
481481

482482
[[RLIntro]]
483-
deps = ["DataStructures", "Distributions", "Flux", "MacroTools", "Plots", "Random", "ReinforcementLearningBase", "ReinforcementLearningCore", "SparseArrays", "StatsBase", "StatsPlots"]
483+
deps = ["DataStructures", "Distributions", "Flux", "LinearAlgebra", "MacroTools", "Plots", "Random", "ReinforcementLearningBase", "ReinforcementLearningCore", "SparseArrays", "StatsBase", "StatsPlots"]
484484
path = ".."
485485
uuid = "02c1da58-b9a1-11e8-0212-f9611b8fe936"
486486
version = "0.2.0"

src/extensions/learners/gradient_bandit_learner.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ Base.@kwdef mutable struct GradientBanditLearner{A,O,B} <: AbstractLearner
1111
baseline::B
1212
end
1313

14-
(learner::GradientBanditLearner)(obs) = obs |> get_state |> learner.approximator |> softmax
14+
(learner::GradientBanditLearner)(s::Int) = s |> learner.approximator |> softmax
1515

1616
RLBase.update!(learner::GradientBanditLearner, ::Nothing) = nothing
1717

src/extensions/learners/monte_carlo_learner.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,8 @@ SamplingStyle(::MonteCarloLearner{T,A,R,S}) where {T,A,R,S} = S
5151

5252
RLBase.ApproximatorStyle(m::MonteCarloLearner) = ApproximatorStyle(m.approximator)
5353

54-
(learner::MonteCarloLearner)(obs) = learner.approximator(get_state(obs))
55-
(learner::MonteCarloLearner)(obs, a) = learner.approximator(get_state(s), a)
54+
(learner::MonteCarloLearner)(obs) = learner.approximator(obs)
55+
(learner::MonteCarloLearner)(obs, a) = learner.approximator(s, a)
5656

5757
RLBase.update!(learner::MonteCarloLearner, experience) = update!(learner, VisitStyle(learner), ApproximatorStyle(learner), SamplingStyle(learner), experience)
5858

0 commit comments

Comments
 (0)