Question: Overview For this assignment you will write a parser using a parser generator . You will describe the Cool grammar in an appropriate input format

Overview

For this assignment you will write a parser using a parser generator. You will describe the Cool grammar in an appropriate input format and the parser generator will generate actual code (in OCaml). You will also write additional code to unserialize the tokens produced by the lexer stage and to serialize the abstract syntax tree produced by your parser.

Specification

You must create a program that takes a single command-line argument (for example, file.cl-lex). That argument will be an ASCII text Cool tokens file (as described in PA2). The cl-lex file will always be well-formed (i.e., there will be no syntax errors in the cl-lex file itself). However, the cl-lex file may describe a sequence of Cool tokens that do not form a valid Cool program.

Your program must either indicate that there is an error in the Cool program described by the cl-lex file (e.g., a parse error in the Cool file) or emit file.cl-ast, a serialized Cool abstract syntax tree. Your programs main parser component must be constructed by a parser generator. The glue code for processing command-line arguments, unserializing tokens and serializing the resulting abstract syntax tree should be written by hand. If your program is called parser, invoking parser file.cl-lex should yield the same output as cool --parse file.cl. Your program will consist of a number of OCaml files.

You must use ocamlyacc. Do not write your entire parser by hand. Parts of it must be tool-generated from context-free grammar rules you provide.

The .cl-ast File Format

If there are no errors in file.cl-lex your program should create file.cl-ast and serialize the abstract syntax tree to it. The general format of a .cl-ast file follows the Cool Reference Manual Syntax chart. Basically, we do a pre-order traversal of the abstract syntax tree, writing down every node as we come to it.

----------------------------------------------

main.ml

---------------------------------------------

open Printf open Parser

let read_tokens token_filename = let fin = open_in token_filename in let tokens_queue = Queue.create () in let get_line () = String.trim (input_line fin) in begin try while true do let line_number = int_of_string (get_line ()) in let token_type = get_line () in let token = match token_type with | "at" -> AT(line_number) | "case" -> CASE(line_number) | "class" -> CLASS(line_number) | "colon" -> COLON(line_number) | "comma" -> COMMA(line_number) | "divide" -> DIVIDE(line_number) | "dot" -> DOT(line_number) | "else" -> ELSE(line_number) | "equals" -> EQUALS(line_number) | "esac" -> ESAC(line_number) | "false" -> FALSE(line_number) | "fi" -> FI(line_number) | "identifier" -> IDENTIFIER(line_number, get_line ()) | "if" -> IF(line_number) | "in" -> IN(line_number) | "inherits" -> INHERITS(line_number) | "integer" -> INTEGER(line_number, get_line ()) | "isvoid" -> ISVOID(line_number) | "larrow" -> LARROW(line_number) | "lbrace" -> LBRACE(line_number) | "le" -> LE(line_number) | "let" -> LET(line_number) | "loop" -> LOOP(line_number) | "lparen" -> LPAREN(line_number) | "lt" -> LT(line_number) | "minus" -> MINUS(line_number) | "new" -> NEW(line_number) | "not" -> NOT(line_number) | "of" -> OF(line_number) | "plus" -> PLUS(line_number) | "pool" -> POOL(line_number) | "rarrow" -> RARROW(line_number) | "rbrace" -> RBRACE(line_number) | "rparen" -> RPAREN(line_number) | "semi" -> SEMI(line_number) | "string" -> STRING(line_number, get_line ()) | "then" -> THEN(line_number) | "tilde" -> TILDE(line_number) | "times" -> TIMES(line_number) | "true" -> TRUE(line_number) | "type" -> TYPE(line_number, get_line ()) | "while" -> WHILE(line_number) | _ -> begin Printf.printf "Unexpected token type: %s " token_type; exit 1 end in (* We add an extra line number for conveniently handling parsing * errors *) Queue.add (line_number, token) tokens_queue ; done with _ -> () end; close_in fin; tokens_queue

let main () = begin (* Read in the tokens from the lexer cl-lex file *) let token_filename = Sys.argv.(1) in let tokens_queue = read_tokens token_filename in (* Fake up a Lexer using our queue of tokens *) let lexbuf = Lexing.from_string "" in

let last_line_number = ref 1 in

let lexer_token lb = if Queue.is_empty tokens_queue then EOF else begin let line_number, next_token = Queue.take tokens_queue in last_line_number := line_number ; next_token end in let ast = try cool_program_rule lexer_token lexbuf with _ -> begin printf "ERROR: %d: Parser: Parse error " !last_line_number ; exit 1 end in

let output_filename = (Filename.chop_extension Sys.argv.(1)) ^ ".cl-ast" in let f = open_out output_filename in let s = Ast.cool_program_to_string ast in fprintf f "%s" s; close_out f; end

let () = main ()

---------------------------------------------------------

ast.ml

---------------------------------------------------------

open Printf

(******************************************************************************) (* Type definitions *)

type identifier = int * string (* line number, lexeme *)

type cool_class = | ClassNoInherits of identifier * feature list

and feature = | AttributeNoInit of identifier * identifier

type cool_program = cool_class list

(******************************************************************************) (* string building functions *)

let rec cool_program_to_string ast = let buf = Buffer.create 1024 in class_list_out ast buf; Buffer.contents buf

and class_list_out ast buf = bprintf buf "%d " (List.length ast) ; List.iter (fun x -> class_out x buf) ast

and class_out ast buf = match ast with | ClassNoInherits(class_name, class_features) -> identifier_out class_name buf; bprintf buf "no_inherits " ; bprintf buf "%d " (List.length class_features) ; List.iter (fun x -> feature_out x buf) class_features

and feature_out ast buf = match ast with | AttributeNoInit(attr_name, type_name) -> bprintf buf "attribute_no_init " ; identifier_out attr_name buf; identifier_out type_name buf;

and identifier_out (line_number, lexeme) buf = bprintf buf "%d %s " line_number lexeme

---------------------------------------------

parser.mly

----------------------------------------------

%{

open Ast

%}

/* Convention: uppercase is token/terminal, lowercase is non-terminal */

%token CLASS INHERITS ISVOID NEW %token CASE ELSE ESAC FI IF IN LET LOOP OF POOL THEN WHILE %token AT COLON COMMA DOT LARROW LBRACE LPAREN RARROW RBRACE RPAREN RPAREN SEMI %token EQUALS DIVIDE LE LT MINUS NOT PLUS TILDE TIMES %token TRUE FALSE %token IDENTIFIER INTEGER STRING TYPE %token EOF

%start cool_program_rule /* the entry point */ %type cool_program_rule

%%

cool_program_rule : class_list EOF { $1 } ;

class_list : /* nothing */ { [] } | cool_class SEMI class_list { $1 :: $3 } ;

cool_class : CLASS TYPE LBRACE feature_list RBRACE { ClassNoInherits($2, $4) } ;

feature_list : /* nothing */ { [] } | feature SEMI feature_list { $1 :: $3 } ;

feature : IDENTIFIER COLON TYPE { AttributeNoInit($1, $3) } ;

Step by Step Solution

There are 3 Steps involved in it

1 Expert Approved Answer
Step: 1 Unlock blur-text-image
Question Has Been Solved by an Expert!

Get step-by-step solutions from verified subject matter experts

Step: 2 Unlock
Step: 3 Unlock

Students Have Also Explored These Related Databases Questions!