Question: I need help with my code as it not running to completion. def qtrain(model, maze, **opt): # exploration factor global epsilon # number of epochs

I need help with my code as it not running to completion.

def qtrain(model, maze, **opt):

# exploration factor global epsilon

# number of epochs n_epoch = opt.get('n_epoch', 15000)

# maximum memory to store episodes max_memory = opt.get('max_memory', 1000)

# maximum data size for training data_size = opt.get('data_size', 50)

# start time start_time = datetime.datetime.now()

# Construct environment/game from numpy array: maze (see above) qmaze = TreasureMaze(maze)

# Initialize experience replay object experience = GameExperience(model, max_memory=max_memory) win_history = [] # history of win/lose game hsize = qmaze.maze.size//2 # history window size win_rate = 0.0 # pseudocode: for epoch in range(n_epoch): loss = 0.0 n_episodes = 0 agent_cell = random.choice(qmaze.free_cells) qmaze.reset(agent_cell) envstate = qmaze.observe() while True: prev_envstate = envstate # Exploration vs Exploitation if np.random.rand() < epsilon: action = np.random.choice(num_actions) else: q_values = model.predict(prev_envstate, verbose=0) action = np.argmax(q_values[0]) # Act envstate, reward, game_status = qmaze.act(action) episode = [prev_envstate, action, reward, envstate, game_status] experience.remember(episode) # Train inputs, targets = experience.get_data(data_size=data_size) history = model.fit(inputs, targets, epochs=1, verbose=0) loss += history.history['loss'][0]

n_episodes += 1 if game_status in ['win', 'lose']: win_history.append(1 if game_status == 'win' else 0) break if len(win_history) > hsize: win_rate = sum(win_history[-hsize:]) / hsize

dt = datetime.datetime.now() - start_time t = format_time(dt.total_seconds()) template = "Epoch: {:03d}/{:d} | Loss: {:.4f} | Episodes: {:d} | Win count: {:d} | Win rate: {:.3f} | time: {}" print(template.format(epoch, n_epoch-1, loss, n_episodes, sum(win_history), win_rate, t)) if win_rate > 0.9: epsilon = 0.05 if sum(win_history[-hsize:]) == hsize and completion_check(model, qmaze): print("Reached 100% win rate at epoch: %d" % (epoch,))

# For each epoch: # Agent_cell = randomly select a free cell # Reset the maze with agent set to above position # This method resets the pirate's position. def reset(self, pirate): self.pirate = pirate self.maze = np.copy(self._maze) nrows, ncols = self.maze.shape row, col = pirate self.maze[row, col] = pirate_mark self.state = (row, col, 'start') # To prevent the game from running excessively long, a minimum reward is defined. self.min_reward = -0.5 * self.maze.size self.total_reward = 0 self.visited = set() # Hint: Review the reset method in the TreasureMaze.py class. # envstate = Environment.current_state # Hint: Review the observe method in the TreasureMaze.py class. def observe(self): canvas = self.draw_env() envstate = canvas.reshape((1, -1)) return envstate # While state is not game over: # previous_envstate = envstate # Action = randomly choose action (left, right, up, down) either by exploration or by exploitation # envstate, reward, game_status = qmaze.act(action) # Hint: Review the act method in the TreasureMaze.py class. def valid_actions(self, cell=None): if cell is None: row, col, mode = self.state else: row, col = cell actions = [0, 1, 2, 3] nrows, ncols = self.maze.shape if row == 0: actions.remove(1) elif row == nrows-1: actions.remove(3)

if col == 0: actions.remove(0) elif col == ncols-1: actions.remove(2)

if row>0 and self.maze[row-1,col] == 0.0: actions.remove(1) if row

if col>0 and self.maze[row,col-1] == 0.0: actions.remove(0) if col

return actions

# episode = [previous_envstate, action, reward, envstate, game_status] # Store episode in Experience replay object # Hint: Review the remember method in the GameExperience.py class. def remember(self, episode): # episode = [envstate, action, reward, envstate_next, game_over] # memory[i] = episode # envstate == flattened 1d maze cells info, including pirate cell (see method: observe) self.memory.append(episode) if len(self.memory) > self.max_memory: del self.memory[0] # Train neural network model and evaluate loss # Hint: Call GameExperience.get_data to retrieve training data (input and target) and pass to model.fit method # to train the model. You can call model.evaluate to determine loss. # Returns input and targets from memory, defaults to data size of 10 def get_data(self, data_size=10): env_size = self.memory[0][0].shape[1] # envstate 1d size (1st element of episode) mem_size = len(self.memory) data_size = min(mem_size, data_size) inputs = np.zeros((data_size, env_size)) targets = np.zeros((data_size, self.num_actions)) for i, j in enumerate(np.random.choice(range(mem_size), data_size, replace=False)): envstate, action, reward, envstate_next, game_over = self.memory[j] inputs[i] = envstate # There should be no target values for actions not taken. targets[i] = self.predict(envstate) # Q_sa = derived policy = max quality env/action = max_a' Q(s', a') Q_sa = np.max(self.predict(envstate_next)) if game_over: targets[i, action] = reward else: # reward + gamma * max_a' Q(s', a') targets[i, action] = reward + self.discount * Q_sa return inputs, targets # If the win rate is above the threshold and your model passes the completion check, that would be your epoch

#Print the epoch, loss, episodes, win count, and win rate for each epoch dt = datetime.datetime.now() - start_time t = format_time(dt.total_seconds()) template = "Epoch: {:03d}/{:d} | Loss: {:.4f} | Episodes: {:d} | Win count: {:d} | Win rate: {:.3f} | time: {}" print(template.format(epoch, n_epoch-1, loss, n_episodes, sum(win_history), win_rate, t)) # We simply check if training has exhausted all free cells and if in all # cases the agent won. if win_rate > 0.9 : epsilon = 0.05 if sum(win_history[-hsize:]) == hsize and completion_check(model, qmaze): print("Reached 100% win rate at epoch: %d" % (epoch,)) # Determine the total time for training dt = datetime.datetime.now() - start_time seconds = dt.total_seconds() t = format_time(seconds) # number of epochs n_epoch = opt.get('n_epoch', 15000)

print("n_epoch: %d, max_mem: %d, data: %d, time: %s" % (epoch, max_memory, data_size, t)) return seconds

# This is a small utility for printing readable time strings: def format_time(seconds): if seconds < 400: s = float(seconds) return "%.1f seconds" % (s,) elif seconds < 4000: m = seconds / 60.0 return "%.2f minutes" % (m,) else: h = seconds / 3600.0 return "%.2f hours" % (h,)

These are my errors, what am I missing with the below feedback?

ValueError Traceback (most recent call last) in 1 model = build_model(maze) ----> 2 qtrain(model, maze, epochs=1000, max_memory=8*maze.size, data_size=32)

in qtrain(model, maze, **opt) 70 epsilon = 0.05 71 if sum(win_history[-hsize:]) == hsize and completion_check(model, qmaze): ---> 72 print("Reached 100% win rate at epoch: %d" % (epoch,)) 73 74

ValueError: unsupported format character 'w' (0x77) at index 13

Step by Step Solution

There are 3 Steps involved in it

1 Expert Approved Answer

Step: 1 Unlock blur-text-image

Question Has Been Solved by an Expert!

Get step-by-step solutions from verified subject matter experts

Step: 2 Unlock

Step: 3 Unlock

Students Have Also Explored These Related Accounting Questions!

I WILL GIVE POSITIVE FEEDBACK FOR WHOEVER ANSWERS THIS!! Complete the code for the Q-Training Algorithm section in your Jupyter Notebook. In order to successfully complete the code, you must do the...

For this homework I have to create the code described in the pseudocode. I created 22 lines of code following the pseudocode, following the instructions in the pseudocode and the expectations from...

from __future__ import print_function import os, sys, time, datetime, json, random import numpy as np from keras.models import Sequential from keras.layers.core import Dense, Activation from...

PLEASE HELP!! Complete the algorithm outlined by the psuedocode in class TreasureHuntGame. ...CODE... Treasurehunt.ipynb from __future__ import print_function import os, sys, time, datetime, json,...

14. [-/4 Points] DETAILS HARMATHAP12 1.6.040. Complete the problem by using the accompanying gure, which shows a supply function and a demand function. (Assume price is measured in dollars.) p 40 30...

Performance Gap Analysis IT Help Desk Case Study - January 2005 Notice: The contents of this briefing are not intended to serve as legal advice related to any individual situation. This material is...

4. [-I2 Points] DETAILS HARMATHAP12 2.1.015.Ml. Solve the equation by using the quadratic formula. (Enter your answers as a comma-separated list. If there is no real solution, enter NO REAL...

3:57 PM Wed Sep 14 . . . 1 89% webassign.net a 34. [-/0.23 Points] DETAILS SPRECALC7 2.7.028.MI. 0/5 Submissions Used MY NOTES ASK YOUR TEACHER Use f(x) = 3x - 5 and g(x) = 4 - x2 to evaluate the...

Chapter 4 Homework Q(7) Ledger accounts, adjusting entries, financial statements, andclosing entries; optional spreadsheet Chrome File Edit View History Bookmarks Profiles Tab Window Help [ Mon 3:20...

On June 1, Alexander Corporation sold goods to a foreign customer at a price of 1,190,000 pesos and will receive payment in three months on September 1. On June 1, Alexander acquired an option to...

Why might an investor require a greater expected return for an investment of longer maturity? Do you feel you can forecast inflation 2 years from now with greater accuracy than inflation in 20 years?

Tartaric acid, H2C4H4O6, has two acidic hydrogens. The acid is often present in wines and precipitates from solution as the wine ages. A solution containing an unknown concentration of the acid is...

When auditing accrued property taxes, Question content area bottom Part 1 A . property taxes should only be charged to one expense account. B . the auditors will generally only verify the larger...

Discuss qualitative research designs in detail?