Question: Please help modify car.py to fit the test case .test_car.py in python: #!/usr/bin/python import argparse import logging import sys import numpy as np import gym

Please help modify car.py to fit the test case .test_car.py in python:

#!/usr/bin/python

import argparse import logging import sys

import numpy as np

import gym #import gym.scoreboard.scoring from gym import wrappers, logger

#Global variables OUTOFBOUNDSTATE = -1; X_MAX = 0.6 X_MIN = -1.2 X_RANGE = 1.8;

XDOT_MAX = 0.7 XDOT_MIN = -0.7 XDOT_RANGE = 1.4

# Function to descritize state, could potientially be parallelized with # mapReduce technique def discretize_state( x, xdot, xRes, xdotRes ): # Return -1 for out of bounds state if X_MIN > x > X_MAX: return OUTOFBOUNDSTATE

if XDOT_MIN > xdot > XDOT_MAX: return OUTOFBOUNDSTATE

#Calculates x and y coordinates of state. s_x = discretize_state_helper(x, xRes, X_MAX, X_MIN, X_RANGE) s_y = discretize_state_helper(xdot, xdotRes, XDOT_MAX, XDOT_MIN, XDOT_RANGE) #return flattened value which corresponds to unique index of state return s_x * xdotRes + s_y

# Helper function that bins state variables and returns state in 1D def discretize_state_helper( val, res, maxi, mini, rng ): for box in range(res): if val < ( rng*(1+box)/res + mini ): return box

if __name__ == '__main__': parser = argparse.ArgumentParser(description=None)

parser.add_argument('env_id', nargs='?', default='MountainCar-v0', help='Select the environment to run') args = parser.parse_args()

logger = logging.getLogger() formatter = logging.Formatter('[%(asctime)s] %(message)s') handler = logging.StreamHandler(sys.stderr) handler.setFormatter(formatter) logger.addHandler(handler)

# You can set the level to logging.DEBUG or logging.WARN if you # want to change the amount of output. logger.setLevel(logging.INFO)

env = gym.make(args.env_id) outdir = '/tmp/' + 'qagent' + '-results' env = wrappers.Monitor(env, outdir, write_upon_reset=True, force=True)

env.seed(0)

Q = np.zeros([41, env.action_space.n])

alpha = 0.7 gamma = 0.97 #Resolution variables for state space xres = 10 xdotres = 4

n_episodes = 50001 for episode in range(n_episodes): tick = 0 reward = 0 done = False state = env.reset() s = discretize_state(state[0], state[1], xres, xdotres ) while done != True: tick += 1 action = 0 ri = -999 for q in range(env.action_space.n): if Q[s][q] > ri: action = q ri = Q[s][q] state, reward, done, info = env.step(action) #print( reward, done) sprime = discretize_state(state[0], state[1], xres, xdotres ) predicted_value = np.max(Q[sprime]) if sprime < 0: predicted_value = 0 reward = -5 Q[s,action] += alpha*(reward + gamma*predicted_value - Q[s,action]) #print(Q[s,action], ri, sprime, Q[s][action]) s = sprime

if episode % 1000 == 0: alpha *= .99 #decay rate for alpha, each 1000 print reward if state[0] >= 0.5: print "success" else: if episode % 1000 ==0: print "fail ", state[0], Q[s,action]

Test Case :

#!/usr/bin/env python3 from car import MountainCar import unittest import numpy as np

class TestTicTacToe(unittest.TestCase): # def test_init_board(self): # ttt = TicTacToe3D() # # brd,winner = ttt.play_game() # self.assertEqual(ttt.board.shape, (3,3,3))

def test_1(self): player_first = 1 expected_winner = 1 env_id = 'MountainCar-v0' mountain_car = MountainCar(env_id, False, True, 'car.npy') all_states = mountain_car.run() max_ = np.max(all_states, axis=0) result = max_[0] > 0.5 print("Your highest attained position = {}".format(max_[0])) print("Position threshold for success >= {}".format(0.5)) self.assertEqual(result,True)

unittest.main()

Step by Step Solution

There are 3 Steps involved in it

1 Expert Approved Answer
Step: 1 Unlock blur-text-image
Question Has Been Solved by an Expert!

Get step-by-step solutions from verified subject matter experts

Step: 2 Unlock
Step: 3 Unlock

Students Have Also Explored These Related Databases Questions!