Question: ################# # DO NOT REMOVE # Versions # numpy==1.18.0 # gym==0.17.2 ################ import gym import numpy as np from math import sqrt from gym.envs import

#################

# DO NOT REMOVE

# Versions

# numpy==1.18.0

# gym==0.17.2

################

import gym

import numpy as np

from math import sqrt

from gym.envs import toy_text

class FrozenLakeAgent(object):

def __init__(self):

pass

def amap_to_gym(self, amap='FFGG'):

"""Maps the `amap` string to a gym env"""

amap = np.asarray(amap, dtype='c')

side = int(sqrt(amap.shape[0]))

amap = amap.reshape((side, side))

return gym.make('FrozenLake-v1', desc=amap).unwrapped

def action_epsilon_greedy(self, q_table, epsilon, current_state):

return np.random.randint(0, 4) if np.random.uniform(0, 1) < epsilon else np.argmax(q_table[current_state, :])

def calculate_td_update(self, q_table, reward, gamma, state, action, next_state, next_action):

return reward + gamma * q_table[next_state, next_action] - q_table[state, action]

def read_out_policy(self, q_table, action_dict):

return [action_dict[np.argmax(q_table[i, :])] for i in range(q_table.shape[0])]

def solve(self, amap, gamma, alpha, epsilon, n_episodes, seed):

"""Implement the agent"""

env = self.amap_to_gym(amap)

np.random.seed(seed)

env.seed(seed)

action_dict = {0: '<', 1: 'v', 2: '>', 3: '^'}

no_states = env.observation_space.n

no_actions = env.action_space.n

q_table = np.zeros((no_states, no_actions))

for episode_no in range(n_episodes):

current_state = env.reset()

current_action = self.action_epsilon_greedy(q_table, epsilon, current_state)

finished = False

while not finished:

new_state, reward, finished, information = env.step(current_action)

new_action = self.action_epsilon_greedy(q_table, epsilon, new_state)

q_table[current_state, current_action] += alpha * self.calculate_td_update(q_table, reward, gamma, current_state, current_action, new_state, new_action)

current_state = new_state

current_action = new_action

env.close()

policy_list = self.read_out_policy(q_table, action_dict)

# TODO: Implement the algorithm

policy = ''.join([str(entry) for entry in policy_list])

return policy

TEST CASES:

import unittest

class TestQNotebook(unittest.TestCase):

def setUp(self):

self.agent = FrozenLakeAgent()

def test_case_1(self):

example1 = self.agent.solve(

amap='SFFFHFFFFFFFFFFG',

gamma=1.0,

alpha=0.25,

epsilon=0.29,

n_episodes=14697,

seed=741684

)

assert(example1 == '^vv><>>vvv>v>>><')

def test_case_2(self):

example2 = self.agent.solve(

amap='SFFFFHFFFFFFFFFFFFFFFFFFG',

gamma=0.91,

alpha=0.12,

epsilon=0.13,

n_episodes=42271,

seed=983459

)

assert(example2 == '^>>>><>>>vvv>>vv>>>>v>>^<')

def test_case_3(self):

example3 = self.agent.solve(

amap='SFFG',

gamma=1.0,

alpha=0.24,

epsilon=0.09,

n_episodes=49553,

seed=20240

)

assert(example3 == '<

def test_case_4(self):

example4 = self.agent.solve(

amap='SFFHHFFHHFFHHFFG',

gamma=0.99,

alpha=0.5,

epsilon=0.29,

n_episodes=23111,

seed=44323

)

assert(example4=='^><<<>^<<><<<>^<')

def test_case_5(self):

example5 = self.agent.solve(

amap='SFFFFHFFFHHFFFFFFFFHHFFFG',

gamma=0.88,

alpha=0.15,

epsilon=0.16,

n_episodes=112312,

seed=6854343

)

assert(example5 == '^>><^<>><<<>v<^v>v<<<>vv<')

unittest.main(argv=[''], verbosity=2, exit=False

Whenever I try to run this code with these test cases, it gives me an error that the environment does not have an attribute seed. Could you please help fix this code to where these test cases pass correctly. This was done in Jupyter Notebook

Step by Step Solution

There are 3 Steps involved in it

1 Expert Approved Answer

Step: 1 Unlock blur-text-image

Question Has Been Solved by an Expert!

Get step-by-step solutions from verified subject matter experts

Step: 2 Unlock

Step: 3 Unlock

Students Have Also Explored These Related Databases Questions!

How would write these in an if else statement inside of a do while loop? I already have done 1 and 6. It is to be done in java. In this lab, you will create an environment where the user can cnter a...

You are currently using a cellular network plan that provides maximum speeds of up to 1 0 Gbps for your smart phone. Which of the following cellular network types are you using? 2 G 3 G 5 G 4 G A...

In Java Language. This is an intro class, so we haven't learned all that much, so if you could, try not to use too advanced formatting as we likely have not learned them. Please try and run all the...

They need to be in their certain folders (.h and .cpp) for each CLASS please!! in C++ You are writing a program to keep track of your library of video games. Your program should allow users to load...

CSE 110: Principles of Programming Languages Assignment 6 Overview In this assignment you will write a program that will model a bank with multiple patrons who each have multiple accounts. The user...

C++ plz , And please make sure all results are when apply these examples! 7.22 LAB*: Program: Online shopping cart (Part 2) This program extends the earlier "Online shopping cart program. Consider...

book.cpp file BookList Sequence Containers Homework Last updated: Friday, February 12, 2021 The following class diagrams should help you visualize the BookList interface, and to remind you what the...

Sample Run 2 1. Problem: Create a Java application that enables a user to create a favorites bar from a collection (i.e., history) of previously visited Web sites. To clarify the problem, I have...

For this project, you get to design a simple program to keep track of various video game characters. Your program will store, display and remove human characters, enemy characters and enemy "boss"...

Having a hard time getting it to work. I was wondering if you could look at it and help me figure out the problems?? ItemToPurchase.cpp #include "ItemToPurchase.h" #include using namespace std;...

Use capacitor combination circuit as shown in the figure and Calctate totulenergy sete 40 F 10 uF 10

Determine each of the following. (a) (53/4)(513/4) (b) (73/5)/(718/5) (c) (51/2)(201/2)

Say you invest $ 5 0 0 today in a risk - free investment that pays 4 % interest annually. You then reinvest both interest and principal for a total investment horizon of 4 years. Determine how much...

Wingate Company, a wholesale distributor of electronic equipment, has been experiencing losses for some time, as shown by its most recent monthly contribution format income statement: In an effort to...

KEY QUESTION Using the concepts in Chapter 19s discussion of consumer behavior, explain how health care insurance results in an overallocation of resources to the health care industry. Use a demand...

KEY QUESTION Briefly discuss the major causes of income inequality. With respect to income inequality, is there any difference between inheriting property and inheriting a high IQ? Explain.

LAST WORD Go to Table 1 in the Last Word and compute the ratio of average wealth to median wealth for each of the 4 years. What trend do you find? What is your explanation for the trend? The Federal...