Question: I need help with my code as it not running to completion. def qtrain(model, maze, **opt): # exploration factor global epsilon # number of epochs
I need help with my code as it not running to completion.
def qtrain(model, maze, **opt):
# exploration factor global epsilon
# number of epochs n_epoch = opt.get('n_epoch', 15000)
# maximum memory to store episodes max_memory = opt.get('max_memory', 1000)
# maximum data size for training data_size = opt.get('data_size', 50)
# start time start_time = datetime.datetime.now()
# Construct environment/game from numpy array: maze (see above) qmaze = TreasureMaze(maze)
# Initialize experience replay object experience = GameExperience(model, max_memory=max_memory) win_history = [] # history of win/lose game hsize = qmaze.maze.size//2 # history window size win_rate = 0.0 # pseudocode: for epoch in range(n_epoch): loss = 0.0 n_episodes = 0 agent_cell = random.choice(qmaze.free_cells) qmaze.reset(agent_cell) envstate = qmaze.observe() while True: prev_envstate = envstate # Exploration vs Exploitation if np.random.rand() < epsilon: action = np.random.choice(num_actions) else: q_values = model.predict(prev_envstate, verbose=0) action = np.argmax(q_values[0]) # Act envstate, reward, game_status = qmaze.act(action) episode = [prev_envstate, action, reward, envstate, game_status] experience.remember(episode) # Train inputs, targets = experience.get_data(data_size=data_size) history = model.fit(inputs, targets, epochs=1, verbose=0) loss += history.history['loss'][0]
n_episodes += 1 if game_status in ['win', 'lose']: win_history.append(1 if game_status == 'win' else 0) break if len(win_history) > hsize: win_rate = sum(win_history[-hsize:]) / hsize
dt = datetime.datetime.now() - start_time t = format_time(dt.total_seconds()) template = "Epoch: {:03d}/{:d} | Loss: {:.4f} | Episodes: {:d} | Win count: {:d} | Win rate: {:.3f} | time: {}" print(template.format(epoch, n_epoch-1, loss, n_episodes, sum(win_history), win_rate, t)) if win_rate > 0.9: epsilon = 0.05 if sum(win_history[-hsize:]) == hsize and completion_check(model, qmaze): print("Reached 100% win rate at epoch: %d" % (epoch,))
# For each epoch: # Agent_cell = randomly select a free cell # Reset the maze with agent set to above position # This method resets the pirate's position. def reset(self, pirate): self.pirate = pirate self.maze = np.copy(self._maze) nrows, ncols = self.maze.shape row, col = pirate self.maze[row, col] = pirate_mark self.state = (row, col, 'start') # To prevent the game from running excessively long, a minimum reward is defined. self.min_reward = -0.5 * self.maze.size self.total_reward = 0 self.visited = set() # Hint: Review the reset method in the TreasureMaze.py class. # envstate = Environment.current_state # Hint: Review the observe method in the TreasureMaze.py class. def observe(self): canvas = self.draw_env() envstate = canvas.reshape((1, -1)) return envstate # While state is not game over: # previous_envstate = envstate # Action = randomly choose action (left, right, up, down) either by exploration or by exploitation # envstate, reward, game_status = qmaze.act(action) # Hint: Review the act method in the TreasureMaze.py class. def valid_actions(self, cell=None): if cell is None: row, col, mode = self.state else: row, col = cell actions = [0, 1, 2, 3] nrows, ncols = self.maze.shape if row == 0: actions.remove(1) elif row == nrows-1: actions.remove(3)
if col == 0: actions.remove(0) elif col == ncols-1: actions.remove(2)
if row>0 and self.maze[row-1,col] == 0.0: actions.remove(1) if row if col>0 and self.maze[row,col-1] == 0.0: actions.remove(0) if col return actions # episode = [previous_envstate, action, reward, envstate, game_status] # Store episode in Experience replay object # Hint: Review the remember method in the GameExperience.py class. def remember(self, episode): # episode = [envstate, action, reward, envstate_next, game_over] # memory[i] = episode # envstate == flattened 1d maze cells info, including pirate cell (see method: observe) self.memory.append(episode) if len(self.memory) > self.max_memory: del self.memory[0] # Train neural network model and evaluate loss # Hint: Call GameExperience.get_data to retrieve training data (input and target) and pass to model.fit method # to train the model. You can call model.evaluate to determine loss. # Returns input and targets from memory, defaults to data size of 10 def get_data(self, data_size=10): env_size = self.memory[0][0].shape[1] # envstate 1d size (1st element of episode) mem_size = len(self.memory) data_size = min(mem_size, data_size) inputs = np.zeros((data_size, env_size)) targets = np.zeros((data_size, self.num_actions)) for i, j in enumerate(np.random.choice(range(mem_size), data_size, replace=False)): envstate, action, reward, envstate_next, game_over = self.memory[j] inputs[i] = envstate # There should be no target values for actions not taken. targets[i] = self.predict(envstate) # Q_sa = derived policy = max quality env/action = max_a' Q(s', a') Q_sa = np.max(self.predict(envstate_next)) if game_over: targets[i, action] = reward else: # reward + gamma * max_a' Q(s', a') targets[i, action] = reward + self.discount * Q_sa return inputs, targets # If the win rate is above the threshold and your model passes the completion check, that would be your epoch #Print the epoch, loss, episodes, win count, and win rate for each epoch dt = datetime.datetime.now() - start_time t = format_time(dt.total_seconds()) template = "Epoch: {:03d}/{:d} | Loss: {:.4f} | Episodes: {:d} | Win count: {:d} | Win rate: {:.3f} | time: {}" print(template.format(epoch, n_epoch-1, loss, n_episodes, sum(win_history), win_rate, t)) # We simply check if training has exhausted all free cells and if in all # cases the agent won. if win_rate > 0.9 : epsilon = 0.05 if sum(win_history[-hsize:]) == hsize and completion_check(model, qmaze): print("Reached 100% win rate at epoch: %d" % (epoch,)) # Determine the total time for training dt = datetime.datetime.now() - start_time seconds = dt.total_seconds() t = format_time(seconds) # number of epochs n_epoch = opt.get('n_epoch', 15000) print("n_epoch: %d, max_mem: %d, data: %d, time: %s" % (epoch, max_memory, data_size, t)) return seconds # This is a small utility for printing readable time strings: def format_time(seconds): if seconds < 400: s = float(seconds) return "%.1f seconds" % (s,) elif seconds < 4000: m = seconds / 60.0 return "%.2f minutes" % (m,) else: h = seconds / 3600.0 return "%.2f hours" % (h,) These are my errors, what am I missing with the below feedback? ValueError Traceback (most recent call last) ValueError: unsupported format character 'w' (0x77) at index 13
Step by Step Solution
There are 3 Steps involved in it
Get step-by-step solutions from verified subject matter experts
