Question: import numpy as np import random # Define the grid world GRID _ SIZE = ( 4 , 5 ) START _ STATE = (
import numpy as np
import random
# Define the grid world
GRIDSIZE
STARTSTATE
GOALSTATE
OBSTACLES
# Qlearning parameters
LEARNINGRATE
DISCOUNTFACTOR
EPISODES
# Initialize Qtable
qtable npzerosGRIDSIZE GRIDSIZE # actions: up down, left, right
# Define actions
ACTIONS UP "DOWN", "LEFT", "RIGHT"
# Function to choose an action using epsilongreedy strategy
def chooseactionstate epsilon:
if random.uniform epsilon:
return random.choicerange # choose a random action
else:
return npargmaxqtablestate state
# Function to perform Qlearning
def qlearning:
for episode in rangeEPISODES:
state STARTSTATE
while state GOALSTATE:
action chooseactionstate epsilon
nextstate takeactionstate action
reward calculaterewardnextstate
updateqtablestate action, reward, nextstate
state nextstate
# Function to take an action and return the next state
def takeactionstate action:
if action : # UP
return max state state
elif action : # DOWN
return minGRIDSIZE state state
elif action : # LEFT
return state max state
elif action : # RIGHT
return state minGRIDSIZE state
# Function to calculate the reward for a given state
def calculaterewardstate:
if state GOALSTATE:
return
elif state in OBSTACLES:
return
else:
return
# Function to update the Qtable based on the Qlearning update rule
def updateqtablestate action, reward, nextstate:
bestfuturevalue npmaxqtablenextstate nextstate
currentvalue qtablestate state action
newvalue LEARNINGRATE currentvalue LEARNINGRATE reward DISCOUNTFACTOR bestfuturevalue
qtablestate state action newvalue
# Run Qlearning algorithm
qlearning
# Print the learned Qtable
printLearned Qtable:"
printqtable
Step by Step Solution
There are 3 Steps involved in it
1 Expert Approved Answer
Step: 1 Unlock
Question Has Been Solved by an Expert!
Get step-by-step solutions from verified subject matter experts
Step: 2 Unlock
Step: 3 Unlock
