Question: ################# # DO NOT REMOVE # Versions # numpy==1.18.0 # gym==0.17.2 ################ import gym import numpy as np from math import sqrt from gym.envs import

#################

# DO NOT REMOVE

# Versions

# numpy==1.18.0

# gym==0.17.2

################

import gym

import numpy as np

from math import sqrt

from gym.envs import toy_text

class FrozenLakeAgent(object):

def __init__(self):

pass

def amap_to_gym(self, amap='FFGG'):

"""Maps the `amap` string to a gym env"""

amap = np.asarray(amap, dtype='c')

side = int(sqrt(amap.shape[0]))

amap = amap.reshape((side, side))

return gym.make('FrozenLake-v1', desc=amap).unwrapped

def action_epsilon_greedy(self, q_table, epsilon, current_state):

return np.random.randint(0, 4) if np.random.uniform(0, 1) < epsilon else np.argmax(q_table[current_state, :])

def calculate_td_update(self, q_table, reward, gamma, state, action, next_state, next_action):

return reward + gamma * q_table[next_state, next_action] - q_table[state, action]

def read_out_policy(self, q_table, action_dict):

return [action_dict[np.argmax(q_table[i, :])] for i in range(q_table.shape[0])]

def solve(self, amap, gamma, alpha, epsilon, n_episodes, seed):

"""Implement the agent"""

env = self.amap_to_gym(amap)

np.random.seed(seed)

env.seed(seed)

action_dict = {0: '<', 1: 'v', 2: '>', 3: '^'}

no_states = env.observation_space.n

no_actions = env.action_space.n

q_table = np.zeros((no_states, no_actions))

for episode_no in range(n_episodes):

current_state = env.reset()

current_action = self.action_epsilon_greedy(q_table, epsilon, current_state)

finished = False

while not finished:

new_state, reward, finished, information = env.step(current_action)

new_action = self.action_epsilon_greedy(q_table, epsilon, new_state)

q_table[current_state, current_action] += alpha * self.calculate_td_update(q_table, reward, gamma, current_state, current_action, new_state, new_action)

current_state = new_state

current_action = new_action

env.close()

policy_list = self.read_out_policy(q_table, action_dict)

# TODO: Implement the algorithm

policy = ''.join([str(entry) for entry in policy_list])

return policy

TEST CASES:

import unittest

class TestQNotebook(unittest.TestCase):

def setUp(self):

self.agent = FrozenLakeAgent()

def test_case_1(self):

example1 = self.agent.solve(

amap='SFFFHFFFFFFFFFFG',

gamma=1.0,

alpha=0.25,

epsilon=0.29,

n_episodes=14697,

seed=741684

)

assert(example1 == '^vv><>>vvv>v>>><')

def test_case_2(self):

example2 = self.agent.solve(

amap='SFFFFHFFFFFFFFFFFFFFFFFFG',

gamma=0.91,

alpha=0.12,

epsilon=0.13,

n_episodes=42271,

seed=983459

)

assert(example2 == '^>>>><>>>vvv>>vv>>>>v>>^<')

def test_case_3(self):

example3 = self.agent.solve(

amap='SFFG',

gamma=1.0,

alpha=0.24,

epsilon=0.09,

n_episodes=49553,

seed=20240

)

assert(example3 == '<

def test_case_4(self):

example4 = self.agent.solve(

amap='SFFHHFFHHFFHHFFG',

gamma=0.99,

alpha=0.5,

epsilon=0.29,

n_episodes=23111,

seed=44323

)

assert(example4=='^><<<>^<<><<<>^<')

def test_case_5(self):

example5 = self.agent.solve(

amap='SFFFFHFFFHHFFFFFFFFHHFFFG',

gamma=0.88,

alpha=0.15,

epsilon=0.16,

n_episodes=112312,

seed=6854343

)

assert(example5 == '^>><^<>><<<>v<^v>v<<<>vv<')

unittest.main(argv=[''], verbosity=2, exit=False

Whenever I try to run this code with these test cases, it gives me an error that the environment does not have an attribute seed. Could you please help fix this code to where these test cases pass correctly. This was done in Jupyter Notebook

Step by Step Solution

There are 3 Steps involved in it

1 Expert Approved Answer
Step: 1 Unlock blur-text-image
Question Has Been Solved by an Expert!

Get step-by-step solutions from verified subject matter experts

Step: 2 Unlock
Step: 3 Unlock

Students Have Also Explored These Related Databases Questions!