Learn practical skills, build real-world projects, and advance your career
Created 3 years ago
import gym
import random
random.seed(1234)
streets = gym.make("Taxi-v3").env #New versions keep getting released; if -v3 doesn't work, try -v2 or -v4
streets.render()
+---------+
|R: | : :G|
| : | : : |
| : : : : |
| | : | : |
|Y| : |B: |
+---------+
initial_state = streets.encode(2, 3, 2, 0)
streets.s = initial_state
streets.render()
+---------+
|R: | : :G|
| : | : : |
| : : : : |
| | : | : |
|Y| : |B: |
+---------+
import numpy as np
q_table = np.zeros([streets.observation_space.n, streets.action_space.n])
# a 2D array that represent every possible state and action in the virtual space and initialize all of them to 0
learning_rate = 0.1
discount_factor = 0.6
exploration = 0.1
epochs = 10000
for taxi_run in range(epochs):
state = streets.reset()
done = False
while not done:
random_value = random.uniform(0, 1)
if (random_value < exploration):
action = streets.action_space.sample() # Explore a random action
else:
action = np.argmax(q_table[state]) # Use the action with the highest q-value
next_state, reward, done, info = streets.step(action)
prev_q = q_table[state, action]
next_max_q = np.max(q_table[next_state])
new_q = (1 - learning_rate) * prev_q + learning_rate * (reward + discount_factor * next_max_q)
q_table[state, action] = new_q
state = next_state
q_table[initial_state]
array([-2.40090669, -2.41412198, -2.41767969, -2.3639511 , -6.84836069,
-8.62169302])
q_table[streets.encode(1,0,2,0)]
array([-2.12208981, -2.23981204, -2.25062334, -2.22939021, -7.50948405,
-7.91650559])