Question: Your goal is to train a Deep Reinforcement Learning model based on your Tic-Tac-Toe game. (provided below) To do that, you will need to use
Your goal is to train a Deep Reinforcement Learning model based on your Tic-Tac-Toe game. (provided below) To do that, you will need to use stable-baselines3 and OpenAI gym libraries and the code stubs I provide. Here are the steps that you will need to follow:
Open your project in PyCharm and the "Terminal" tab in the bottom section of the IDE. If you are not using PyCharm, you can just open a terminal, even though before you install any libraries, I recommend you create a Conda environment using miniconda. More on the Conda can be found here. (https://www.machinelearningplus.com/deployment/conda-create-environment-and-everything-you-need-to-know-to-manage-conda-virtual-environment/)
Install needed libraries with the following two commands: pip install stable-baselines3 pip install pip install pyglet==1.5.27
After the installation is complete, create a new Python script file and insert code from the first stub (added below). That's the main script that you will run for this assignment. At this step, you may try running the code to see whether everything has worked. If you see CartPole animation after a while, you succeeded.
Use env.py (added below) as your starting point for your custom gym environment. Resolve all the TODOs in both files.
Submit all the Python files you used.
You may find the documentation for the libraries here:
StableBaselines3 (https://stable-baselines3.readthedocs.io/en/master/index.html)
OpenAI Gym (https://www.gymlibrary.dev/)
Tic-Tac-Toe game code:
import random
def show_board(game_board): print(" ") print(game_board[0] + "|" + game_board[1] + "|" + game_board[2] + " ") print(game_board[3] + "|" + game_board[4] + "|" + game_board[5] + " ") print(game_board[6] + "|" + game_board[7] + "|" + game_board[8] + " ")
def check_winner(game_board): if game_board[0] == game_board[1] == game_board[2] and game_board[0] != ' ': winner = game_board[0] elif game_board[3] == game_board[4] == game_board[5] and game_board[3] != ' ': winner = game_board[3] elif game_board[6] == game_board[7] == game_board[8] and game_board[6] != ' ': winner = game_board[6] elif game_board[0] == game_board[3] == game_board[6] and game_board[0] != ' ': winner = game_board[0] elif game_board[1] == game_board[4] == game_board[7] and game_board[1] != ' ': winner = game_board[1] elif game_board[2] == game_board[5] == game_board[8] and game_board[2] != ' ': winner = game_board[2] elif game_board[0] == game_board[4] == game_board[8] and game_board[0] != ' ': winner = game_board[0] elif game_board[2] == game_board[4] == game_board[6] and game_board[2] != ' ': winner = game_board[2] else: winner = None return winner
def is_draw(game_board): x = 0 for y in game_board: if y == " ": x += 1
if check_winner(game_board) == None and x == 0: return True return False
def make_optimal_move(game_board, player): if player == "O": opponent = "X" else: opponent = "O" if game_board[4] == " ": game_board[4] = opponent elif game_board[0] == " ": game_board[0] = opponent elif game_board[2] == " ": game_board[2] = opponent elif game_board[6] == " ": game_board[6] = opponent elif game_board[8] == " ": game_board[8] = opponent else: move_made = False while not move_made: rand_move = random.randint(0, 8) if game_board[rand_move] == " ": game_board[rand_move] = opponent move_made = True
def main(): game_board = [" ", " ", " ", " ", " ", " ", " ", " ", " "] show_board(game_board) player = input("choose X or O? ") while player != "X" and player != "O": player = input("enter a valid input (X or O). ") player_turn = True while True: if player_turn: player_move = int(input("enter the number of the cell you want to play in ")) while player_move < 0 or player_move > 8 or game_board[player_move] != " ": player_move = int(input("this cell is already full. enter the number of an empty cell ")) game_board[player_move] = player else: make_optimal_move(game_board, player) show_board(game_board) winner = check_winner(game_board) if winner is not None: print(winner + " wins") break if is_draw(game_board) == True: print("draw") break player_turn = not player_turn
main()
first stub:
import gym #TODO: import your custom environment from separate file from stable_baselines3 import A2C
def main():
env = gym.make("CartPole-v1") # TODO: replace with your custom environment (e.g. env = MyCustomEnv())
model = A2C("MlpPolicy", env, verbose=1) model.learn(total_timesteps=10_000)
vec_env = model.get_env() obs = vec_env.reset() win_count = 0 num_of_games_played = 0 for i in range(1000): action, _state = model.predict(obs, deterministic=True) obs, reward, done, info = vec_env.step(action) if done: num_of_games_played += 1 #TODO: get information from the environment whether the agent has won. If so, increment win_count vec_env.render() print(f"Win rate: {win_count/num_of_games_played}")
if __name__ == '__main__': main()
env.py:
import gym from gym import spaces
class TicTacToeEnv(gym.Env):
def __init__(self): self.observation_space = 0 #TODO replace with the observation space self.action_space = 0 #TODO replace with the action space
def reset(self): #Reset function is called once in the beginning of each episode (of each game) #TODO: create TicTacToe board and set it to the initial state #TODO: determine whether the agent is X or O #TODO: create MINIMAX player who will be the opponnent for our agent observation = self._get_obs() # Get the initial observation info = self._get_info() # Get the initial info
return observation, info
def step(self, action): #Step function is called every time the agent does an action #TODO: place agents X or O on the board based on the action they provided #TODO: make MINIMAX oponnent to make a move and place its X or O on the board done = False #TODO: set to True if the game is over reward = 1 #TODO: set to 1 if the agent won, -1 if the agent lost, 0 if the game is a draw observation = self._get_obs() info = self._get_info()
return observation, reward, done, info
def render(self, mode='console'): #Method render is called every time the environment is to be displayed on screen #TODO print the board to the console pass
def _get_obs(self): #TODO: return the observation represented as a dictionary of the type of your observation space return 0
def _get_info(self): #TODO: return -2 if the game is in process, 1 if the agent won, -1 if the agent lost, 0 if the game is a draw return 0
Step by Step Solution
There are 3 Steps involved in it
Get step-by-step solutions from verified subject matter experts
