Question: Overview For this assignment you will write a parser using a parser generator . You will describe the Cool grammar in an appropriate input format
Overview
For this assignment you will write a parser using a parser generator. You will describe the Cool grammar in an appropriate input format and the parser generator will generate actual code (in OCaml). You will also write additional code to unserialize the tokens produced by the lexer stage and to serialize the abstract syntax tree produced by your parser.
Specification
You must create a program that takes a single command-line argument (for example, file.cl-lex). That argument will be an ASCII text Cool tokens file (as described in PA2). The cl-lex file will always be well-formed (i.e., there will be no syntax errors in the cl-lex file itself). However, the cl-lex file may describe a sequence of Cool tokens that do not form a valid Cool program.
Your program must either indicate that there is an error in the Cool program described by the cl-lex file (e.g., a parse error in the Cool file) or emit file.cl-ast, a serialized Cool abstract syntax tree. Your programs main parser component must be constructed by a parser generator. The glue code for processing command-line arguments, unserializing tokens and serializing the resulting abstract syntax tree should be written by hand. If your program is called parser, invoking parser file.cl-lex should yield the same output as cool --parse file.cl. Your program will consist of a number of OCaml files.
You must use ocamlyacc. Do not write your entire parser by hand. Parts of it must be tool-generated from context-free grammar rules you provide.
The .cl-ast File Format
If there are no errors in file.cl-lex your program should create file.cl-ast and serialize the abstract syntax tree to it. The general format of a .cl-ast file follows the Cool Reference Manual Syntax chart. Basically, we do a pre-order traversal of the abstract syntax tree, writing down every node as we come to it.
----------------------------------------------
main.ml
---------------------------------------------
open Printf open Parser
let read_tokens token_filename = let fin = open_in token_filename in let tokens_queue = Queue.create () in let get_line () = String.trim (input_line fin) in begin try while true do let line_number = int_of_string (get_line ()) in let token_type = get_line () in let token = match token_type with | "at" -> AT(line_number) | "case" -> CASE(line_number) | "class" -> CLASS(line_number) | "colon" -> COLON(line_number) | "comma" -> COMMA(line_number) | "divide" -> DIVIDE(line_number) | "dot" -> DOT(line_number) | "else" -> ELSE(line_number) | "equals" -> EQUALS(line_number) | "esac" -> ESAC(line_number) | "false" -> FALSE(line_number) | "fi" -> FI(line_number) | "identifier" -> IDENTIFIER(line_number, get_line ()) | "if" -> IF(line_number) | "in" -> IN(line_number) | "inherits" -> INHERITS(line_number) | "integer" -> INTEGER(line_number, get_line ()) | "isvoid" -> ISVOID(line_number) | "larrow" -> LARROW(line_number) | "lbrace" -> LBRACE(line_number) | "le" -> LE(line_number) | "let" -> LET(line_number) | "loop" -> LOOP(line_number) | "lparen" -> LPAREN(line_number) | "lt" -> LT(line_number) | "minus" -> MINUS(line_number) | "new" -> NEW(line_number) | "not" -> NOT(line_number) | "of" -> OF(line_number) | "plus" -> PLUS(line_number) | "pool" -> POOL(line_number) | "rarrow" -> RARROW(line_number) | "rbrace" -> RBRACE(line_number) | "rparen" -> RPAREN(line_number) | "semi" -> SEMI(line_number) | "string" -> STRING(line_number, get_line ()) | "then" -> THEN(line_number) | "tilde" -> TILDE(line_number) | "times" -> TIMES(line_number) | "true" -> TRUE(line_number) | "type" -> TYPE(line_number, get_line ()) | "while" -> WHILE(line_number) | _ -> begin Printf.printf "Unexpected token type: %s " token_type; exit 1 end in (* We add an extra line number for conveniently handling parsing * errors *) Queue.add (line_number, token) tokens_queue ; done with _ -> () end; close_in fin; tokens_queue
let main () = begin (* Read in the tokens from the lexer cl-lex file *) let token_filename = Sys.argv.(1) in let tokens_queue = read_tokens token_filename in (* Fake up a Lexer using our queue of tokens *) let lexbuf = Lexing.from_string "" in
let last_line_number = ref 1 in
let lexer_token lb = if Queue.is_empty tokens_queue then EOF else begin let line_number, next_token = Queue.take tokens_queue in last_line_number := line_number ; next_token end in let ast = try cool_program_rule lexer_token lexbuf with _ -> begin printf "ERROR: %d: Parser: Parse error " !last_line_number ; exit 1 end in
let output_filename = (Filename.chop_extension Sys.argv.(1)) ^ ".cl-ast" in let f = open_out output_filename in let s = Ast.cool_program_to_string ast in fprintf f "%s" s; close_out f; end
let () = main ()
---------------------------------------------------------
ast.ml
---------------------------------------------------------
open Printf
(******************************************************************************) (* Type definitions *)
type identifier = int * string (* line number, lexeme *)
type cool_class = | ClassNoInherits of identifier * feature list
and feature = | AttributeNoInit of identifier * identifier
type cool_program = cool_class list
(******************************************************************************) (* string building functions *)
let rec cool_program_to_string ast = let buf = Buffer.create 1024 in class_list_out ast buf; Buffer.contents buf
and class_list_out ast buf = bprintf buf "%d " (List.length ast) ; List.iter (fun x -> class_out x buf) ast
and class_out ast buf = match ast with | ClassNoInherits(class_name, class_features) -> identifier_out class_name buf; bprintf buf "no_inherits " ; bprintf buf "%d " (List.length class_features) ; List.iter (fun x -> feature_out x buf) class_features
and feature_out ast buf = match ast with | AttributeNoInit(attr_name, type_name) -> bprintf buf "attribute_no_init " ; identifier_out attr_name buf; identifier_out type_name buf;
and identifier_out (line_number, lexeme) buf = bprintf buf "%d %s " line_number lexeme
---------------------------------------------
parser.mly
----------------------------------------------
%{
open Ast
%}
/* Convention: uppercase is token/terminal, lowercase is non-terminal */
%token
%start cool_program_rule /* the entry point */ %type
%%
cool_program_rule : class_list EOF { $1 } ;
class_list : /* nothing */ { [] } | cool_class SEMI class_list { $1 :: $3 } ;
cool_class : CLASS TYPE LBRACE feature_list RBRACE { ClassNoInherits($2, $4) } ;
feature_list : /* nothing */ { [] } | feature SEMI feature_list { $1 :: $3 } ;
feature : IDENTIFIER COLON TYPE { AttributeNoInit($1, $3) } ;
Step by Step Solution
There are 3 Steps involved in it
Get step-by-step solutions from verified subject matter experts
