Question: Lexer.cc #include #include #include #include #include #include lexer.h #include inputbuf.h using namespace std; string reserved[] = { END_OF_FILE, IF, WHILE, DO, THEN, PRINT, PLUS, MINUS,

Lexer.cc #include #include #include #include #include #include "lexer.h" #include "inputbuf.h" using $namespace std; string reserved[] = { "END_OF_FILE", "IF", "WHILE", "DO", "THEN", "PRINT",$

Lexer.cc

#include #include #include #include #include

#include "lexer.h" #include "inputbuf.h"

using namespace std;

string reserved[] = { "END_OF_FILE", "IF", "WHILE", "DO", "THEN", "PRINT", "PLUS", "MINUS", "DIV", "MULT", "EQUAL", "COLON", "COMMA", "SEMICOLON", "LBRAC", "RBRAC", "LPAREN", "RPAREN", "NOTEQUAL", "GREATER", "LESS", "LTEQ", "GTEQ", "DOT", "NUM", "ID", "ERROR" // TODO: Add labels for new token types here (as string) };

#define KEYWORDS_COUNT 5 string keyword[] = { "IF", "WHILE", "DO", "THEN", "PRINT" };

void Token::Print() { cout lexeme token_type] line_no

LexicalAnalyzer::LexicalAnalyzer() { this->line_no = 1; tmp.lexeme = ""; tmp.line_no = 1; tmp.token_type = ERROR; }

bool LexicalAnalyzer::SkipSpace() { char c; bool space_encountered = false;

input.GetChar(c); line_no += (c == ' ');

while (!input.EndOfInput() && isspace(c)) { space_encountered = true; input.GetChar(c); line_no += (c == ' '); }

if (!input.EndOfInput()) { input.UngetChar(c); } return space_encountered; }

bool LexicalAnalyzer::IsKeyword(string s) { for (int i = 0; i

TokenType LexicalAnalyzer::FindKeywordIndex(string s) { for (int i = 0; i

Token LexicalAnalyzer::ScanNumber() { char c;

input.GetChar(c); if (isdigit(c)) { if (c == '0') { tmp.lexeme = "0"; } else { tmp.lexeme = ""; while (!input.EndOfInput() && isdigit(c)) { tmp.lexeme += c; input.GetChar(c); } if (!input.EndOfInput()) { input.UngetChar(c); } } // TODO: You can check for REALNUM, BASE08NUM and BASE16NUM here! tmp.token_type = NUM; tmp.line_no = line_no; return tmp; } else { if (!input.EndOfInput()) { input.UngetChar(c); } tmp.lexeme = ""; tmp.token_type = ERROR; tmp.line_no = line_no; return tmp; } }

Token LexicalAnalyzer::ScanIdOrKeyword() { char c; input.GetChar(c);

if (isalpha(c)) { tmp.lexeme = ""; while (!input.EndOfInput() && isalnum(c)) { tmp.lexeme += c; input.GetChar(c); } if (!input.EndOfInput()) { input.UngetChar(c); } tmp.line_no = line_no; if (IsKeyword(tmp.lexeme)) tmp.token_type = FindKeywordIndex(tmp.lexeme); else tmp.token_type = ID; } else { if (!input.EndOfInput()) { input.UngetChar(c); } tmp.lexeme = ""; tmp.token_type = ERROR; } return tmp; }

// you should unget tokens in the reverse order in which they // are obtained. If you execute // // t1 = lexer.GetToken(); // t2 = lexer.GetToken(); // t3 = lexer.GetToken(); // // in this order, you should execute // // lexer.UngetToken(t3); // lexer.UngetToken(t2); // lexer.UngetToken(t1); // // if you want to unget all three tokens. Note that it does not // make sense to unget t1 without first ungetting t2 and t3 // TokenType LexicalAnalyzer::UngetToken(Token tok) { tokens.push_back(tok);; return tok.token_type; }

Token LexicalAnalyzer::GetToken() { char c;

// if there are tokens that were previously // stored due to UngetToken(), pop a token and // return it without reading from input if (!tokens.empty()) { tmp = tokens.back(); tokens.pop_back(); return tmp; }

SkipSpace(); tmp.lexeme = ""; tmp.line_no = line_no; input.GetChar(c); switch (c) { case '.': tmp.token_type = DOT; return tmp; case '+': tmp.token_type = PLUS; return tmp; case '-': tmp.token_type = MINUS; return tmp; case '/': tmp.token_type = DIV; return tmp; case '*': tmp.token_type = MULT; return tmp; case '=': tmp.token_type = EQUAL; return tmp; case ':': tmp.token_type = COLON; return tmp; case ',': tmp.token_type = COMMA; return tmp; case ';': tmp.token_type = SEMICOLON; return tmp; case '[': tmp.token_type = LBRAC; return tmp; case ']': tmp.token_type = RBRAC; return tmp; case '(': tmp.token_type = LPAREN; return tmp; case ')': tmp.token_type = RPAREN; return tmp; case '') { tmp.token_type = NOTEQUAL; } else { if (!input.EndOfInput()) { input.UngetChar(c); } tmp.token_type = LESS; } return tmp; case '>': input.GetChar(c); if (c == '=') { tmp.token_type = GTEQ; } else { if (!input.EndOfInput()) { input.UngetChar(c); } tmp.token_type = GREATER; } return tmp; default: if (isdigit(c)) { input.UngetChar(c); return ScanNumber(); } else if (isalpha(c)) { input.UngetChar(c); return ScanIdOrKeyword(); } else if (input.EndOfInput()) tmp.token_type = END_OF_FILE; else tmp.token_type = ERROR;

return tmp; } }

int main() { LexicalAnalyzer lexer; Token token;

token = lexer.GetToken(); token.Print(); while (token.token_type != END_OF_FILE) { token = lexer.GetToken(); token.Print(); } }

Lexer.h

#ifndef __LEXER__H__ #define __LEXER__H__

#include #include

#include "inputbuf.h"

// ------- token types -------------------

typedef enum { END_OF_FILE = 0, IF, WHILE, DO, THEN, PRINT, PLUS, MINUS, DIV, MULT, EQUAL, COLON, COMMA, SEMICOLON, LBRAC, RBRAC, LPAREN, RPAREN, NOTEQUAL, GREATER, LESS, LTEQ, GTEQ, DOT, NUM, ID, ERROR // TODO: Add labels for new token types here } TokenType;

class Token { public: void Print();

std::string lexeme; TokenType token_type; int line_no; };

class LexicalAnalyzer { public: Token GetToken(); TokenType UngetToken(Token); LexicalAnalyzer();

private: std::vector tokens; int line_no; Token tmp; InputBuffer input;

bool SkipSpace(); bool IsKeyword(std::string); TokenType FindKeywordIndex(std::string); Token ScanIdOrKeyword(); Token ScanNumber(); };

#endif //__LEXER__H__

inputbuf.cc

#include #include #include #include #include

#include "inputbuf.h"

using namespace std;

bool InputBuffer::EndOfInput() { if (!input_buffer.empty()) return false; else return cin.eof(); }

char InputBuffer::UngetChar(char c) { if (c != EOF) input_buffer.push_back(c);; return c; }

void InputBuffer::GetChar(char& c) { if (!input_buffer.empty()) { c = input_buffer.back(); input_buffer.pop_back(); } else { cin.get(c); } }

string InputBuffer::UngetString(string s) { for (int i = 0; i

inputbuf.h

#ifndef __INPUT_BUFFER__H__

#define __INPUT_BUFFER__H__

#include

class InputBuffer {

public:

void GetChar(char&);

char UngetChar(char);

std::string UngetString(std::string);

bool EndOfInput();

private:

std::vector input_buffer;

};

#endif //__INPUT_BUFFER__H__

CSE340 Fall 2017 Project 1: Lexical Analysis Due: Friday, September 1, 2017 by 11:59 pm MST The goal of this project is to give you hands-on experience with lexical analysis. You will extend the provided lexical analyzer to support more token types. The next section lists al new token types that you need to implement. 1. Description Modify the lexer to support the following 3 token types REALNUM (pdigit digit*) DOT digit digit* + 0 DOT digit* pdigit digit* BASE08NUM= ((pdigit8 digit) + 0) (x) (08) BASE16NUM= ((pdigit 16 digit16*) + 0) (x) (16) Where pdigit 1+ 2+ 3+ 4+5+6+7+8+9 digit =0+1+2+3+4+5+6+7+8+9 pdigit8 =1+2+3+4+5+6+7 digit8 =0+1+2+3+4+5+6+7 pdigit16 = 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + A + B + C + D + E + F digit16 =0+1+2+3+4+5+6+7+8+9+A+B+C+D+E+F Note that NUM and DOT are already defined in the lexer, but here are the regular expressions for the sake of completeness (DOT is a single dot character, the quotes are used to avoid ambiguity) NUM = (pdigit digit*) + 0 DOT'. The list of valid tokens including the existing tokens in the code would be as follows. This list should be used to determine the token, if the input matches more than one regular expression. 1. IF 2. WHILE 3. DO 4. THEN 5. PRINT 6. PLUS 7. MINUS 8. DIV 9. MULT 10. EQUAL 11. COLON 12. COMMA 15. RBRAC 16. LPAREN 17. RPAREN 18. NOTEQUAL 19. GREATER 20. LESS 21. LTEOQ 22. GTEQ 23. DOT 24. NUM 25. ID 26. REALNUM 27. BASE08NUM 28. BASE16NUM 13. SEMICOLON 14. LBRAC

Step by Step Solution

There are 3 Steps involved in it

1 Expert Approved Answer

Step: 1 Unlock blur-text-image

Question Has Been Solved by an Expert!

Get step-by-step solutions from verified subject matter experts

Step: 2 Unlock

Step: 3 Unlock

Students Have Also Explored These Related Databases Questions!

Need help to complete the C++ parser Write a parser to parse the input according to the grammar and store the information being parsed by your parser in appropriate data structures to allow your...

Creating a simple compiler, i really need someone to help me with this parse.cc--- #include #include #include "parser.h" using namespace std; void Parser::syntax_error() { cout exit(1); } // this...

PLEASE HELP , SHOW YOUR WORK, USE THE CODE GIVEN TO YOU, SHARE SCREENSHOTS OF YOU ACHIEVING THE EXAMPLES BELOW, this code evaluates around the use of implementing a simple polynomial-parse tree...

My project evaluates around the use of implementing a simple polynomial-parse tree evaluator. This is executed through the following files (please read and/or copy the following code): parser.cc...

PLEASE HELP , this code evaluates around the use of implementing a simple polynomial-parse tree evaluator. This is executed through the following files (please read and/or copy the following code):...

My project evaluates around the use of implementing a simple polynomial-parse tree evaluator. This is executed through the following files (please read and/or copy the following code): parser.cc...

//main.cpp /* Simple integer arithmetic calculator according to the following BNF exps --> exp | exp NEWLINE exps exp --> term {addop term} addop --> + | - term --> factor {mulop factor} mulop --> *...

In this programming assignment, you will be building a lexical analyzer for small programming language and a program to test it. This assignment will be followed by two other assignments to build a...

In this assignment, it need to implement a recursive descent parser in C++ for the following CFG: 1. exps --> exp | exp NEWLINE exps 2. exp --> term {addop term} 3. addop --> + | - 4. term --> factor...

Describe what is meant by a 13D letter.

Presented here are the accounts of Quick and EZ Delivery for the year ended December 31, 2024. (Click the icon to view the accounts.) Read the requirements. OXER Requirement 1. Prepare Quick and EZ...

2 . In January 2 0 2 2 , Mr . B and Mr . J incorporated a company known as ABC Bhd . By December 2 0 2 2 , ABC Bhd holds 2 0 % shares in MBJ Bhd . In January 2 0 2 3 , MBJ Bhd purchased 8 0 % shares...

CT Corp Comprehensive Question Canadian Tire Corporation, Limited ( Canadian Tire ) is a family of companies that includes a retail segment and a financial services division, among others. The retail...

What lessons in OD contracting does this case represent?

Does the code suggest how long data is kept and who has access?

Does the policy and code of conduct state who will have to comply with it?