Question: ################# # DO NOT REMOVE # Versions # numpy==1.18.0 # gym==0.17.2 ################ import gym import numpy as np from math import sqrt from gym.envs import
#################
# DO NOT REMOVE
# Versions
# numpy==1.18.0
# gym==0.17.2
################
import gym
import numpy as np
from math import sqrt
from gym.envs import toy_text
class FrozenLakeAgent(object):
def __init__(self):
pass
def amap_to_gym(self, amap='FFGG'):
"""Maps the `amap` string to a gym env"""
amap = np.asarray(amap, dtype='c')
side = int(sqrt(amap.shape[0]))
amap = amap.reshape((side, side))
return gym.make('FrozenLake-v1', desc=amap).unwrapped
def action_epsilon_greedy(self, q_table, epsilon, current_state):
return np.random.randint(0, 4) if np.random.uniform(0, 1) < epsilon else np.argmax(q_table[current_state, :])
def calculate_td_update(self, q_table, reward, gamma, state, action, next_state, next_action):
return reward + gamma * q_table[next_state, next_action] - q_table[state, action]
def read_out_policy(self, q_table, action_dict):
return [action_dict[np.argmax(q_table[i, :])] for i in range(q_table.shape[0])]
def solve(self, amap, gamma, alpha, epsilon, n_episodes, seed):
"""Implement the agent"""
env = self.amap_to_gym(amap)
np.random.seed(seed)
env.seed(seed)
action_dict = {0: '<', 1: 'v', 2: '>', 3: '^'}
no_states = env.observation_space.n
no_actions = env.action_space.n
q_table = np.zeros((no_states, no_actions))
for episode_no in range(n_episodes):
current_state = env.reset()
current_action = self.action_epsilon_greedy(q_table, epsilon, current_state)
finished = False
while not finished:
new_state, reward, finished, information = env.step(current_action)
new_action = self.action_epsilon_greedy(q_table, epsilon, new_state)
q_table[current_state, current_action] += alpha * self.calculate_td_update(q_table, reward, gamma, current_state, current_action, new_state, new_action)
current_state = new_state
current_action = new_action
env.close()
policy_list = self.read_out_policy(q_table, action_dict)
# TODO: Implement the algorithm
policy = ''.join([str(entry) for entry in policy_list])
return policy
TEST CASES:
import unittest
class TestQNotebook(unittest.TestCase):
def setUp(self):
self.agent = FrozenLakeAgent()
def test_case_1(self):
example1 = self.agent.solve(
amap='SFFFHFFFFFFFFFFG',
gamma=1.0,
alpha=0.25,
epsilon=0.29,
n_episodes=14697,
seed=741684
)
assert(example1 == '^vv><>>vvv>v>>><')
def test_case_2(self):
example2 = self.agent.solve(
amap='SFFFFHFFFFFFFFFFFFFFFFFFG',
gamma=0.91,
alpha=0.12,
epsilon=0.13,
n_episodes=42271,
seed=983459
)
assert(example2 == '^>>>><>>>vvv>>vv>>>>v>>^<')
def test_case_3(self):
example3 = self.agent.solve(
amap='SFFG',
gamma=1.0,
alpha=0.24,
epsilon=0.09,
n_episodes=49553,
seed=20240
)
assert(example3 == '< def test_case_4(self): example4 = self.agent.solve( amap='SFFHHFFHHFFHHFFG', gamma=0.99, alpha=0.5, epsilon=0.29, n_episodes=23111, seed=44323 ) assert(example4=='^><<<>^<<><<<>^<') def test_case_5(self): example5 = self.agent.solve( amap='SFFFFHFFFHHFFFFFFFFHHFFFG', gamma=0.88, alpha=0.15, epsilon=0.16, n_episodes=112312, seed=6854343 ) assert(example5 == '^>><^<>><<<>v<^v>v<<<>vv<') unittest.main(argv=[''], verbosity=2, exit=False Whenever I try to run this code with these test cases, it gives me an error that the environment does not have an attribute seed. Could you please help fix this code to where these test cases pass correctly. This was done in Jupyter Notebook
Step by Step Solution
There are 3 Steps involved in it
Get step-by-step solutions from verified subject matter experts
