|
6 | 6 | "metadata": {}, |
7 | 7 | "outputs": [], |
8 | 8 | "source": [ |
9 | | - "using ReinforcementLearning, ReinforcementLearningEnvironments" |
| 9 | + "using ReinforcementLearningCore\n", |
| 10 | + "using RLIntro" |
10 | 11 | ] |
11 | 12 | }, |
12 | 13 | { |
|
51 | 52 | "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n", |
52 | 53 | "<svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" width=\"600\" height=\"400\" viewBox=\"0 0 2400 1600\">\n", |
53 | 54 | "<defs>\n", |
54 | | - " <clipPath id=\"clip3200\">\n", |
| 55 | + " <clipPath id=\"clip3300\">\n", |
55 | 56 | " <rect x=\"0\" y=\"0\" width=\"2400\" height=\"1600\"/>\n", |
56 | 57 | " </clipPath>\n", |
57 | 58 | "</defs>\n", |
58 | | - "<polygon clip-path=\"url(#clip3200)\" points=\"\n", |
59 | | - "0,1600 2400,1600 2400,0 0,0 \n", |
| 59 | + "<path clip-path=\"url(#clip3300)\" d=\"\n", |
| 60 | + "M0 1600 L2400 1600 L2400 0 L0 0 Z\n", |
60 | 61 | " \" fill=\"#ffffff\" fill-rule=\"evenodd\" fill-opacity=\"1\"/>\n", |
61 | 62 | "<defs>\n", |
62 | | - " <clipPath id=\"clip3201\">\n", |
| 63 | + " <clipPath id=\"clip3301\">\n", |
63 | 64 | " <rect x=\"480\" y=\"0\" width=\"1681\" height=\"1600\"/>\n", |
64 | 65 | " </clipPath>\n", |
65 | 66 | "</defs>\n", |
66 | | - "<polygon clip-path=\"url(#clip3200)\" points=\"\n", |
67 | | - "153.898,1487.47 2352.76,1487.47 2352.76,47.2441 153.898,47.2441 \n", |
| 67 | + "<path clip-path=\"url(#clip3300)\" d=\"\n", |
| 68 | + "M153.898 1487.47 L2352.76 1487.47 L2352.76 47.2441 L153.898 47.2441 Z\n", |
68 | 69 | " \" fill=\"#ffffff\" fill-rule=\"evenodd\" fill-opacity=\"1\"/>\n", |
69 | 70 | "<defs>\n", |
70 | | - " <clipPath id=\"clip3202\">\n", |
| 71 | + " <clipPath id=\"clip3302\">\n", |
71 | 72 | " <rect x=\"153\" y=\"47\" width=\"2200\" height=\"1441\"/>\n", |
72 | 73 | " </clipPath>\n", |
73 | 74 | "</defs>\n", |
74 | | - "<polyline clip-path=\"url(#clip3202)\" style=\"stroke:#000000; stroke-width:2; stroke-opacity:0.1; fill:none\" points=\"\n", |
| 75 | + "<polyline clip-path=\"url(#clip3302)\" style=\"stroke:#000000; stroke-width:2; stroke-opacity:0.1; fill:none\" points=\"\n", |
75 | 76 | " 194.963,1487.47 194.963,47.2441 \n", |
76 | 77 | " \"/>\n", |
77 | | - "<polyline clip-path=\"url(#clip3202)\" style=\"stroke:#000000; stroke-width:2; stroke-opacity:0.1; fill:none\" points=\"\n", |
| 78 | + "<polyline clip-path=\"url(#clip3302)\" style=\"stroke:#000000; stroke-width:2; stroke-opacity:0.1; fill:none\" points=\"\n", |
78 | 79 | " 724.145,1487.47 724.145,47.2441 \n", |
79 | 80 | " \"/>\n", |
80 | | - "<polyline clip-path=\"url(#clip3202)\" style=\"stroke:#000000; stroke-width:2; stroke-opacity:0.1; fill:none\" points=\"\n", |
| 81 | + "<polyline clip-path=\"url(#clip3302)\" style=\"stroke:#000000; stroke-width:2; stroke-opacity:0.1; fill:none\" points=\"\n", |
81 | 82 | " 1253.33,1487.47 1253.33,47.2441 \n", |
82 | 83 | " \"/>\n", |
83 | | - "<polyline clip-path=\"url(#clip3202)\" style=\"stroke:#000000; stroke-width:2; stroke-opacity:0.1; fill:none\" points=\"\n", |
| 84 | + "<polyline clip-path=\"url(#clip3302)\" style=\"stroke:#000000; stroke-width:2; stroke-opacity:0.1; fill:none\" points=\"\n", |
84 | 85 | " 1782.51,1487.47 1782.51,47.2441 \n", |
85 | 86 | " \"/>\n", |
86 | | - "<polyline clip-path=\"url(#clip3202)\" style=\"stroke:#000000; stroke-width:2; stroke-opacity:0.1; fill:none\" points=\"\n", |
| 87 | + "<polyline clip-path=\"url(#clip3302)\" style=\"stroke:#000000; stroke-width:2; stroke-opacity:0.1; fill:none\" points=\"\n", |
87 | 88 | " 2311.69,1487.47 2311.69,47.2441 \n", |
88 | 89 | " \"/>\n", |
89 | | - "<polyline clip-path=\"url(#clip3202)\" style=\"stroke:#000000; stroke-width:2; stroke-opacity:0.1; fill:none\" points=\"\n", |
| 90 | + "<polyline clip-path=\"url(#clip3302)\" style=\"stroke:#000000; stroke-width:2; stroke-opacity:0.1; fill:none\" points=\"\n", |
90 | 91 | " 153.898,1449.58 2352.76,1449.58 \n", |
91 | 92 | " \"/>\n", |
92 | | - "<polyline clip-path=\"url(#clip3202)\" style=\"stroke:#000000; stroke-width:2; stroke-opacity:0.1; fill:none\" points=\"\n", |
| 93 | + "<polyline clip-path=\"url(#clip3302)\" style=\"stroke:#000000; stroke-width:2; stroke-opacity:0.1; fill:none\" points=\"\n", |
93 | 94 | " 153.898,1167.19 2352.76,1167.19 \n", |
94 | 95 | " \"/>\n", |
95 | | - "<polyline clip-path=\"url(#clip3202)\" style=\"stroke:#000000; stroke-width:2; stroke-opacity:0.1; fill:none\" points=\"\n", |
| 96 | + "<polyline clip-path=\"url(#clip3302)\" style=\"stroke:#000000; stroke-width:2; stroke-opacity:0.1; fill:none\" points=\"\n", |
96 | 97 | " 153.898,884.807 2352.76,884.807 \n", |
97 | 98 | " \"/>\n", |
98 | | - "<polyline clip-path=\"url(#clip3202)\" style=\"stroke:#000000; stroke-width:2; stroke-opacity:0.1; fill:none\" points=\"\n", |
| 99 | + "<polyline clip-path=\"url(#clip3302)\" style=\"stroke:#000000; stroke-width:2; stroke-opacity:0.1; fill:none\" points=\"\n", |
99 | 100 | " 153.898,602.42 2352.76,602.42 \n", |
100 | 101 | " \"/>\n", |
101 | | - "<polyline clip-path=\"url(#clip3202)\" style=\"stroke:#000000; stroke-width:2; stroke-opacity:0.1; fill:none\" points=\"\n", |
| 102 | + "<polyline clip-path=\"url(#clip3302)\" style=\"stroke:#000000; stroke-width:2; stroke-opacity:0.1; fill:none\" points=\"\n", |
102 | 103 | " 153.898,320.033 2352.76,320.033 \n", |
103 | 104 | " \"/>\n", |
104 | | - "<polyline clip-path=\"url(#clip3200)\" style=\"stroke:#000000; stroke-width:4; stroke-opacity:1; fill:none\" points=\"\n", |
| 105 | + "<polyline clip-path=\"url(#clip3300)\" style=\"stroke:#000000; stroke-width:4; stroke-opacity:1; fill:none\" points=\"\n", |
105 | 106 | " 153.898,1487.47 2352.76,1487.47 \n", |
106 | 107 | " \"/>\n", |
107 | | - "<polyline clip-path=\"url(#clip3200)\" style=\"stroke:#000000; stroke-width:4; stroke-opacity:1; fill:none\" points=\"\n", |
| 108 | + "<polyline clip-path=\"url(#clip3300)\" style=\"stroke:#000000; stroke-width:4; stroke-opacity:1; fill:none\" points=\"\n", |
108 | 109 | " 153.898,1487.47 153.898,47.2441 \n", |
109 | 110 | " \"/>\n", |
110 | | - "<polyline clip-path=\"url(#clip3200)\" style=\"stroke:#000000; stroke-width:4; stroke-opacity:1; fill:none\" points=\"\n", |
111 | | - " 194.963,1487.47 194.963,1465.87 \n", |
| 111 | + "<polyline clip-path=\"url(#clip3300)\" style=\"stroke:#000000; stroke-width:4; stroke-opacity:1; fill:none\" points=\"\n", |
| 112 | + " 194.963,1487.47 194.963,1470.19 \n", |
112 | 113 | " \"/>\n", |
113 | | - "<polyline clip-path=\"url(#clip3200)\" style=\"stroke:#000000; stroke-width:4; stroke-opacity:1; fill:none\" points=\"\n", |
114 | | - " 724.145,1487.47 724.145,1465.87 \n", |
| 114 | + "<polyline clip-path=\"url(#clip3300)\" style=\"stroke:#000000; stroke-width:4; stroke-opacity:1; fill:none\" points=\"\n", |
| 115 | + " 724.145,1487.47 724.145,1470.19 \n", |
115 | 116 | " \"/>\n", |
116 | | - "<polyline clip-path=\"url(#clip3200)\" style=\"stroke:#000000; stroke-width:4; stroke-opacity:1; fill:none\" points=\"\n", |
117 | | - " 1253.33,1487.47 1253.33,1465.87 \n", |
| 117 | + "<polyline clip-path=\"url(#clip3300)\" style=\"stroke:#000000; stroke-width:4; stroke-opacity:1; fill:none\" points=\"\n", |
| 118 | + " 1253.33,1487.47 1253.33,1470.19 \n", |
118 | 119 | " \"/>\n", |
119 | | - "<polyline clip-path=\"url(#clip3200)\" style=\"stroke:#000000; stroke-width:4; stroke-opacity:1; fill:none\" points=\"\n", |
120 | | - " 1782.51,1487.47 1782.51,1465.87 \n", |
| 120 | + "<polyline clip-path=\"url(#clip3300)\" style=\"stroke:#000000; stroke-width:4; stroke-opacity:1; fill:none\" points=\"\n", |
| 121 | + " 1782.51,1487.47 1782.51,1470.19 \n", |
121 | 122 | " \"/>\n", |
122 | | - "<polyline clip-path=\"url(#clip3200)\" style=\"stroke:#000000; stroke-width:4; stroke-opacity:1; fill:none\" points=\"\n", |
123 | | - " 2311.69,1487.47 2311.69,1465.87 \n", |
| 123 | + "<polyline clip-path=\"url(#clip3300)\" style=\"stroke:#000000; stroke-width:4; stroke-opacity:1; fill:none\" points=\"\n", |
| 124 | + " 2311.69,1487.47 2311.69,1470.19 \n", |
124 | 125 | " \"/>\n", |
125 | | - "<polyline clip-path=\"url(#clip3200)\" style=\"stroke:#000000; stroke-width:4; stroke-opacity:1; fill:none\" points=\"\n", |
126 | | - " 153.898,1449.58 186.881,1449.58 \n", |
| 126 | + "<polyline clip-path=\"url(#clip3300)\" style=\"stroke:#000000; stroke-width:4; stroke-opacity:1; fill:none\" points=\"\n", |
| 127 | + " 153.898,1449.58 180.284,1449.58 \n", |
127 | 128 | " \"/>\n", |
128 | | - "<polyline clip-path=\"url(#clip3200)\" style=\"stroke:#000000; stroke-width:4; stroke-opacity:1; fill:none\" points=\"\n", |
129 | | - " 153.898,1167.19 186.881,1167.19 \n", |
| 129 | + "<polyline clip-path=\"url(#clip3300)\" style=\"stroke:#000000; stroke-width:4; stroke-opacity:1; fill:none\" points=\"\n", |
| 130 | + " 153.898,1167.19 180.284,1167.19 \n", |
130 | 131 | " \"/>\n", |
131 | | - "<polyline clip-path=\"url(#clip3200)\" style=\"stroke:#000000; stroke-width:4; stroke-opacity:1; fill:none\" points=\"\n", |
132 | | - " 153.898,884.807 186.881,884.807 \n", |
| 132 | + "<polyline clip-path=\"url(#clip3300)\" style=\"stroke:#000000; stroke-width:4; stroke-opacity:1; fill:none\" points=\"\n", |
| 133 | + " 153.898,884.807 180.284,884.807 \n", |
133 | 134 | " \"/>\n", |
134 | | - "<polyline clip-path=\"url(#clip3200)\" style=\"stroke:#000000; stroke-width:4; stroke-opacity:1; fill:none\" points=\"\n", |
135 | | - " 153.898,602.42 186.881,602.42 \n", |
| 135 | + "<polyline clip-path=\"url(#clip3300)\" style=\"stroke:#000000; stroke-width:4; stroke-opacity:1; fill:none\" points=\"\n", |
| 136 | + " 153.898,602.42 180.284,602.42 \n", |
136 | 137 | " \"/>\n", |
137 | | - "<polyline clip-path=\"url(#clip3200)\" style=\"stroke:#000000; stroke-width:4; stroke-opacity:1; fill:none\" points=\"\n", |
138 | | - " 153.898,320.033 186.881,320.033 \n", |
| 138 | + "<polyline clip-path=\"url(#clip3300)\" style=\"stroke:#000000; stroke-width:4; stroke-opacity:1; fill:none\" points=\"\n", |
| 139 | + " 153.898,320.033 180.284,320.033 \n", |
139 | 140 | " \"/>\n", |
140 | | - "<g clip-path=\"url(#clip3200)\">\n", |
| 141 | + "<g clip-path=\"url(#clip3300)\">\n", |
141 | 142 | "<text style=\"fill:#000000; fill-opacity:1; font-family:Arial,Helvetica Neue,Helvetica,sans-serif; font-size:48px; text-anchor:middle;\" transform=\"rotate(0, 194.963, 1541.47)\" x=\"194.963\" y=\"1541.47\">0</text>\n", |
142 | 143 | "</g>\n", |
143 | | - "<g clip-path=\"url(#clip3200)\">\n", |
| 144 | + "<g clip-path=\"url(#clip3300)\">\n", |
144 | 145 | "<text style=\"fill:#000000; fill-opacity:1; font-family:Arial,Helvetica Neue,Helvetica,sans-serif; font-size:48px; text-anchor:middle;\" transform=\"rotate(0, 724.145, 1541.47)\" x=\"724.145\" y=\"1541.47\">25</text>\n", |
145 | 146 | "</g>\n", |
146 | | - "<g clip-path=\"url(#clip3200)\">\n", |
| 147 | + "<g clip-path=\"url(#clip3300)\">\n", |
147 | 148 | "<text style=\"fill:#000000; fill-opacity:1; font-family:Arial,Helvetica Neue,Helvetica,sans-serif; font-size:48px; text-anchor:middle;\" transform=\"rotate(0, 1253.33, 1541.47)\" x=\"1253.33\" y=\"1541.47\">50</text>\n", |
148 | 149 | "</g>\n", |
149 | | - "<g clip-path=\"url(#clip3200)\">\n", |
| 150 | + "<g clip-path=\"url(#clip3300)\">\n", |
150 | 151 | "<text style=\"fill:#000000; fill-opacity:1; font-family:Arial,Helvetica Neue,Helvetica,sans-serif; font-size:48px; text-anchor:middle;\" transform=\"rotate(0, 1782.51, 1541.47)\" x=\"1782.51\" y=\"1541.47\">75</text>\n", |
151 | 152 | "</g>\n", |
152 | | - "<g clip-path=\"url(#clip3200)\">\n", |
| 153 | + "<g clip-path=\"url(#clip3300)\">\n", |
153 | 154 | "<text style=\"fill:#000000; fill-opacity:1; font-family:Arial,Helvetica Neue,Helvetica,sans-serif; font-size:48px; text-anchor:middle;\" transform=\"rotate(0, 2311.69, 1541.47)\" x=\"2311.69\" y=\"1541.47\">100</text>\n", |
154 | 155 | "</g>\n", |
155 | | - "<g clip-path=\"url(#clip3200)\">\n", |
| 156 | + "<g clip-path=\"url(#clip3300)\">\n", |
156 | 157 | "<text style=\"fill:#000000; fill-opacity:1; font-family:Arial,Helvetica Neue,Helvetica,sans-serif; font-size:48px; text-anchor:end;\" transform=\"rotate(0, 129.898, 1467.08)\" x=\"129.898\" y=\"1467.08\">0.0</text>\n", |
157 | 158 | "</g>\n", |
158 | | - "<g clip-path=\"url(#clip3200)\">\n", |
| 159 | + "<g clip-path=\"url(#clip3300)\">\n", |
159 | 160 | "<text style=\"fill:#000000; fill-opacity:1; font-family:Arial,Helvetica Neue,Helvetica,sans-serif; font-size:48px; text-anchor:end;\" transform=\"rotate(0, 129.898, 1184.69)\" x=\"129.898\" y=\"1184.69\">0.2</text>\n", |
160 | 161 | "</g>\n", |
161 | | - "<g clip-path=\"url(#clip3200)\">\n", |
| 162 | + "<g clip-path=\"url(#clip3300)\">\n", |
162 | 163 | "<text style=\"fill:#000000; fill-opacity:1; font-family:Arial,Helvetica Neue,Helvetica,sans-serif; font-size:48px; text-anchor:end;\" transform=\"rotate(0, 129.898, 902.307)\" x=\"129.898\" y=\"902.307\">0.4</text>\n", |
163 | 164 | "</g>\n", |
164 | | - "<g clip-path=\"url(#clip3200)\">\n", |
| 165 | + "<g clip-path=\"url(#clip3300)\">\n", |
165 | 166 | "<text style=\"fill:#000000; fill-opacity:1; font-family:Arial,Helvetica Neue,Helvetica,sans-serif; font-size:48px; text-anchor:end;\" transform=\"rotate(0, 129.898, 619.92)\" x=\"129.898\" y=\"619.92\">0.6</text>\n", |
166 | 167 | "</g>\n", |
167 | | - "<g clip-path=\"url(#clip3200)\">\n", |
| 168 | + "<g clip-path=\"url(#clip3300)\">\n", |
168 | 169 | "<text style=\"fill:#000000; fill-opacity:1; font-family:Arial,Helvetica Neue,Helvetica,sans-serif; font-size:48px; text-anchor:end;\" transform=\"rotate(0, 129.898, 337.533)\" x=\"129.898\" y=\"337.533\">0.8</text>\n", |
169 | 170 | "</g>\n", |
170 | | - "<polyline clip-path=\"url(#clip3202)\" style=\"stroke:#009af9; stroke-width:4; stroke-opacity:1; fill:none\" points=\"\n", |
| 171 | + "<polyline clip-path=\"url(#clip3302)\" style=\"stroke:#009af9; stroke-width:4; stroke-opacity:1; fill:none\" points=\"\n", |
171 | 172 | " 216.13,1446.71 237.297,1442.3 258.464,1436.56 279.632,1431.36 300.799,1425.04 321.966,1417.02 343.134,1410.31 364.301,1404.01 385.468,1396.38 406.635,1388.22 \n", |
172 | 173 | " 427.803,1378.49 448.97,1368.17 470.137,1357.5 491.305,1351.4 512.472,1344.49 533.639,1335.66 554.806,1327.3 575.974,1316.56 597.141,1303.97 618.308,1296.16 \n", |
173 | 174 | " 639.476,1285.85 660.643,1271.85 681.81,1260.98 702.977,1246.06 724.145,1223.67 745.312,1219.3 766.479,1212.74 787.647,1204.13 808.814,1196.33 829.981,1186.85 \n", |
|
179 | 180 | " 1909.51,472.673 1930.68,457.581 1951.85,443.408 1973.01,426.225 1994.18,407.866 2015.35,385.975 2036.52,362.768 2057.68,338.688 2078.85,325.039 2100.02,309.486 \n", |
180 | 181 | " 2121.19,289.607 2142.35,270.793 2163.52,246.643 2184.69,218.271 2205.85,200.75 2227.02,177.534 2248.19,146.021 2269.36,121.579 2290.52,88.0053 \n", |
181 | 182 | " \"/>\n", |
182 | | - "<polygon clip-path=\"url(#clip3200)\" points=\"\n", |
183 | | - "1989.93,251.724 2280.76,251.724 2280.76,130.764 1989.93,130.764 \n", |
| 183 | + "<path clip-path=\"url(#clip3300)\" d=\"\n", |
| 184 | + "M1989.93 251.724 L2280.76 251.724 L2280.76 130.764 L1989.93 130.764 Z\n", |
184 | 185 | " \" fill=\"#ffffff\" fill-rule=\"evenodd\" fill-opacity=\"1\"/>\n", |
185 | | - "<polyline clip-path=\"url(#clip3200)\" style=\"stroke:#000000; stroke-width:4; stroke-opacity:1; fill:none\" points=\"\n", |
| 186 | + "<polyline clip-path=\"url(#clip3300)\" style=\"stroke:#000000; stroke-width:4; stroke-opacity:1; fill:none\" points=\"\n", |
186 | 187 | " 1989.93,251.724 2280.76,251.724 2280.76,130.764 1989.93,130.764 1989.93,251.724 \n", |
187 | 188 | " \"/>\n", |
188 | | - "<polyline clip-path=\"url(#clip3200)\" style=\"stroke:#009af9; stroke-width:4; stroke-opacity:1; fill:none\" points=\"\n", |
| 189 | + "<polyline clip-path=\"url(#clip3300)\" style=\"stroke:#009af9; stroke-width:4; stroke-opacity:1; fill:none\" points=\"\n", |
189 | 190 | " 2013.93,191.244 2157.93,191.244 \n", |
190 | 191 | " \"/>\n", |
191 | | - "<g clip-path=\"url(#clip3200)\">\n", |
| 192 | + "<g clip-path=\"url(#clip3300)\">\n", |
192 | 193 | "<text style=\"fill:#000000; fill-opacity:1; font-family:Arial,Helvetica Neue,Helvetica,sans-serif; font-size:48px; text-anchor:start;\" transform=\"rotate(0, 2181.93, 208.744)\" x=\"2181.93\" y=\"208.744\">y1</text>\n", |
193 | 194 | "</g>\n", |
194 | 195 | "</svg>\n" |
|
200 | 201 | } |
201 | 202 | ], |
202 | 203 | "source": [ |
203 | | - "V = TabularVApproximator(1+WinCapital)\n", |
| 204 | + "V = TabularApproximator(n_state=1+WinCapital)\n", |
204 | 205 | "value_iteration!(V=V, model=GamblerProblemEnvModel, γ=1.0, max_iter=1000)\n", |
205 | 206 | "plot(V.table[2:end-1])" |
206 | 207 | ] |
207 | 208 | } |
208 | 209 | ], |
209 | 210 | "metadata": { |
210 | 211 | "kernelspec": { |
211 | | - "display_name": "Julia 1.3.0-rc2", |
| 212 | + "display_name": "Julia 1.3.0", |
212 | 213 | "language": "julia", |
213 | 214 | "name": "julia-1.3" |
214 | 215 | }, |
|
0 commit comments