Question: import numpy as np import random # Define the grid world GRID _ SIZE = ( 4 , 5 ) START _ STATE = (

import numpy as np

import random

# Define the grid world

GRID

_

SIZE

= (4, 5)

START

_

STATE

= (0, 0)

GOAL

_

STATE

= (3, 4)

OBSTACLES

= [(1, 1), (2, 2), (1, 3)]

# Q

-

learning parameters

LEARNING

_

RATE

= 0.1

DISCOUNT

_

FACTOR

= 0.9

EPISODES

= 500

# Initialize Q

-

table

_

table

=

.

zeros

((

GRID

_

SIZE

[0],

GRID

_

SIZE

[1], 4))

4

actions: up

,

down, left, right

# Define actions

ACTIONS

= ["

",

"DOWN", "LEFT", "RIGHT"

]

# Function to choose an action using epsilon

-

greedy strategy

def choose

_

action

(

state

,

epsilon

)

if random.uniform

(0, 1) <

epsilon:

return random.choice

(

range

(4))

# choose a random action

else:

return np

.

argmax

(

_

table

[

state

[0],

state

[1]])

# Function to perform Q

-

learning

def q

_

learning

()

for episode in range

(

EPISODES

)

state

=

START

_

STATE

while state

! =

GOAL

_

STATE:

action

=

choose

_

action

(

state

,

epsilon

= 0.1)

_

state

=

take

_

action

(

state

,

action

)

reward

=

calculate

_

reward

(

_

state

)

update

_

_

table

(

state

,

action, reward, next

_

state

)

state

=

_

state

# Function to take an action and return the next state

def take

_

action

(

state

,

action

)

if action

= = 0

: # UP

return

(

max

(0,

state

[0] - 1),

state

[1])

elif action

= = 1

: # DOWN

return

(

min

(

GRID

_

SIZE

[0] - 1,

state

[0] + 1),

state

[1])

elif action

= = 2

: # LEFT

return

(

state

[0],

max

(0,

state

[1] - 1))

elif action

= = 3

: # RIGHT

return

(

state

[0],

min

(

GRID

_

SIZE

[1] - 1,

state

[1] + 1))

# Function to calculate the reward for a given state

def calculate

_

reward

(

state

)

if state

= =

GOAL

_

STATE:

return

1

elif state in OBSTACLES:

return

- 1

else:

return

0

# Function to update the Q

-

table based on the Q

-

learning update rule

def update

_

_

table

(

state

,

action, reward, next

_

state

)

best

_

future

_

value

=

.

max

(

_

table

[

_

state

[0],

_

state

[1]])

current

_

value

=

_

table

[

state

[0],

state

[1],

action

]

new

_

value

= (1 -

LEARNING

_

RATE

) *

current

_

value

+

LEARNING

_

RATE

* (

reward

+

DISCOUNT

_

FACTOR

*

best

_

future

_

value

)

_

table

[

state

[0],

state

[1],

action

] =

new

_

value

# Run Q

-

learning algorithm

_

learning

()

# Print the learned Q

-

table

("

Learned Q

-

table:"

)

(

_

table

)

Step by Step Solution

There are 3 Steps involved in it

1 Expert Approved Answer

Step: 1 Unlock blur-text-image

Question Has Been Solved by an Expert!

Get step-by-step solutions from verified subject matter experts

Step: 2 Unlock

Step: 3 Unlock

Students Have Also Explored These Related Databases Questions!

Show your goal searching process with step - to - go curve, sum of squared error and / or theoretical value table with diagrams and graphs and table for the following below code import numpy as np...

import numpy as np import random # Define the grid world GRID _ SIZE = ( 4 , 5 ) START _ STATE = ( 0 , 0 ) GOAL _ STATE = ( 3 , 4 ) OBSTACLES = [ ( 1 , 1 ) , ( 2 , 2 ) , ( 1 , 3 ) ] # Q - learning...

Identify the main types of traditional and horizontal organization structure. Using an organization of your choice as an example how is the structure of the organization a reflection of its...

1. Given the information available and your knowledge of different forecasting techniques, recommend a specific forecasting technique for the study. Consider the advantages and disadvantages of your...

El ensayo provee el espacio para realizar un estudio abarcador sobre varios temas. True False

Seved Help 14 Wisconsin Snowmobile Corp. is considering a switch to level production Cost efficiencies would occur under level production, and aftertax costs would decline by $31,500, but inventory...

4. What actions have you taken in the past that have helped you to cope successfully with stressful circumstances? Could any of these approaches be useful to Diane?

5. What might you do in your organization to encourage laughter and fun at work in a way that contributes to stress reduction and increased effectiveness and productivity?

2. What did you do in response to the challenge? How did your responses improve your ability to cope or make you stronger, more flexible, or capable?