Lanuage Processors LAB SOLUTION MANUAL VI Sem, BE (CS&E) 2011 DEPT OF COMPTER SCIENCE & ENGG. M. I. T., MANIPAL Contents 1. 2. 3. 4. 5. 6. 7. 8. 9. Preliminary Scanning Applications Formal grammar for C Language subset Design of Lexical Analyzer Design of PredictiveParser Design of Code Generator Using Lex Using Yacc Sample Lab Questions References Language Processors 03 09 10 12 21 29 33 40 41 Page 2 WEEK 1 & 2: Preliminary Scanning applications WEEK 1: Write a program which will take as input a C program consisting of single and multi-line comments and multiple blank spaces and produces as output the C program without comments and single blank space. // This is a single line comment /* This is a Multiline Comment */ #include <stdio.h> #include <fcntl.h> #include<conio.h> int main(void) { clrscr(); FILE *in, *out,*out2; in=fopen("try2.txt","w"); char c; out2=fopen("final.txt","w"); out=fopen("temp.c","r"); char temp='a'; int flag=0; if(!out) printf("\nfile cannot be opened"); else { flag=1; temp=c; Language Processors Page 3 while((c=getc(out))!=EOF) { printf("%c",c); if(temp=='/' && c=='*') do { temp=c; c=getc(out); printf("%c",c); if(temp=='*' && c=='/') { c=getc(out); break; } }while(c!=EOF); if(temp=='/' && c=='/') do { temp=c; c=getc(out); printf("%c",c); if(c=='\n') { c=getc(out); break; } }while(c!=EOF); if(c==' ' && temp==' ') continue; temp=c; if(c!='/') Language Processors Page 4 putc(c,out2); } } fclose(out2); fopen("final.txt","r"); while((c=getc(out2))!=EOF) printf("%c",c); fclose(out2); fclose(out); getch(); } Language Processors Page 5 WEEK 2: Write a program, which will read a program written in C, recognize all of the keywords in that program and print them in upper case letters. #include <stdio.h> #include <fcntl.h> #include<conio.h> #include<string.h> #include<stdio.h> #include<iostream.h> #include<ctype.h> char line[80]; char key[][6]={"VOID","MAIN","INT","CHAR","IF","ELSE","WHILE","FOR"}; void findkey() { int i,j,k,l=strlen(line),p,pos; char temp[10]; for(i=0;i<l;i++) { p=0,pos=i; while(!isalpha(line[i])) { i++; pos++; } while(isalpha(line[i])) temp[p++]=line[i++]; temp[p]='\0'; cout<<"\t*"<<temp<<"*"; for(j=0;j<8;j++) if(strcmpi(temp,key[j])==0) { //cout<<"\nkeyword found is "<<key[j]; int t=0; for(k=pos;k<pos+strlen(key[j]);k++) line[k]=key[j][t++]; break; Language Processors Page 6 } } //cout<<"\n"<<line; } int main(void) { clrscr(); FILE *in, *out,*out2; in=fopen("try2.txt","w"); char c; out2=fopen("final.txt","w"); out=fopen("temp.c","r"); char temp='a'; int flag=0; if(!out) printf("\nfile cannot be opened"); else { flag=1; //temp=c; while(1) { int p=0; while((c=getc(out))!='\n') { line[p++]=c; if(c==EOF) break; } line[p++]='\n'; line[p++]='\0'; findkey(); for(int i=0;i<strlen(line);i++) putc(line[i],out2); putc('\n',out2); if(c==EOF) break; } Language Processors Page 7 } fclose(out2); cout<<"\n\n\n\n the modified prog is \n"; fopen("final.txt","r"); while((c=getc(out2))!=EOF) printf("%c",c); fclose(out2); fclose(out); getch(); } Language Processors Page 8 WEEK 3 – 11: Design of Mini Compiler for C Language for the given subset The formal grammar (sample): Program - main () { declarations statement-list } declarations data-type identifier-list; declarations data-type intchar identifier-list idid, identifier-list id[number] , identifier-list | id[number] statement_list statement ; statement_list statement assign-stat decision_stat looping-stat assign_stat id = expn expn simple-expn eprime eprimerelop simple-expn| simple-exp term seprime seprimeaddop term seprime | term factor tprime tprime mulop factor tprime | factor idnum decision-stat if ( expn ) stat dprime dprime else stat | looping-stat while (expn) statfor (assign_stat ; expn ; assign_stat ) stat relop = =!=<=>=>< addop +mulop * / % Language Processors Page 9 WEEK 3 & 4: Design of Lexical analyzer To construct an adhoc Lexical Analyzer. Identifying different classes of tokens like: keywords, identifiers and special symbols. Selecting a suitable data structure for symbol table (the alternates are linked list, hashing, array of structures, binary search tree) Having selected a data structure, identifying the appropriate fields. Solution: #define KEY_CTR 12 FILE *fin, *fout; char line[80],*ptr,filename[20],token[30]; enum token_class {KEY=20,IDENTIFIER,DIGIT,COMMA,SEMICOLON,COLON,DOT, EQ,GT,LT,EQUEQU,GEQ,LEQ,NEQ,LB,RB,LSB,RSB,LCB,RCB, INCR,DECR,SQUOTE,DQUOTE,ADD,SUB,MUL,DIV,MOD,HASH, DEFAULT,UNDEF}; char keywords[KEY_CTR][10] = {"void","int","char","float","if","else","while","include","for","break ","continue","main"}; int scanner() { int i=0; while(*ptr==' ' || *ptr=='\t') token[0]=*ptr; switch(*ptr) { case '#' :ptr++; case '(' :ptr++; case ')' :ptr++; case '{' :ptr++; case '}' :ptr++; case '[' :ptr++; case ']' :ptr++; case ';' :ptr++; case '.' :ptr++; case '*' :ptr++; case '/' :ptr++; Language Processors {ptr++;} token[1]='\0'; return return return return return return return return return return return HASH; //Single Character Tokens LB; RB; LCB; RCB; LSB; RSB; SEMICOLON; DOT; MUL; DIV; Page 10 … … case '>' : ptr++; if(*ptr=='=') {token[1]='='; token[2]='\0';ptr++; return GEQ;} else { return GT;} case '<' : ptr++; if(*ptr=='=') {token[1]='='; token[2]='\0';ptr++; return LEQ;} Else {return LT;} default : //multi character tokens if(isalpha(*ptr)) { i=0; while(isalnum(*ptr)) { token[i++]=*ptr; ptr++;} token[i]='\0'; for(int j=0;j<KEY_CTR;j++) { if(strcmp(keywords[j],token)==0) { KEYtype=j; return KEY; } } return IDENTIFIER; } else if(isdigit(*ptr)) { i=0; while(isdigit(*ptr)) { token[i++]=*ptr; ptr++;} token[i]='\0'; return DIGIT; } else { ptr++; return UNDEF;} }//end of case } The different data Structures that can be used are: a) Linked list. b) Array of Structures The appropriate fields are Name, Type, Value, etc. Language Processors Page 11 WEEK 5, 6 AND 7: Design of a Predictive Parser To code and test parser: Students should write a formal grammar for the given C subset(Refer Sample Grammar given above) Remove left recursion from each of the productions so that the underlying grammar can be parsed with a predictive parser. The parser obtains a string of tokens from the lexical analyzer and verifies that the string can be generated by the grammar for the C language. The parser should report syntax errors if any (for eg.: Misspelling an identifier or keyword, Undeclared or Multiply declared identifier, Arithmetic or Relational Expressions with unbalanced parentheses and Expression syntax error etc.) with appropriate line-no. For a given grammar: 1. Eliminate the Left Recursion 2. Left Factor the grammar 3. Write down function for each Nonterminal the grammar Eg: Design a parser for the following grammar: E -> E+T|T T -> T*F | F F -> (E)|id After doing the modifications The grammar becomes E -> TE’ E’ -> +TE’|e T -> FT’ T’->*FT’ |e Language Processors Page 12 F -> (E)|id Write the Parser as follows: Void E() { T(); EPRIME(); } Void EPRIME( ) { If(input-symbol ==”+”) { ADVANCE( ); T( ); EPRIME( ); } } Void T( ) { F( ); TPRIME( ); } Void TPRIME( ) { if (input-symbol ==‘*’) { F( ); TPRIME( ); } } void F( ) { if (input-symbol ==‘(‘) { ADVANCE( ); E( ); if (input-symbol==‘)’) Language Processors Page 13 ADVANCE( ); Else if (input-symbol ==‘id’) ADVANCE( ); else ERROR( ); } Parser Code for given C language grammar: void declarations() { s=lex(); if(s==KEY && (KEYtype==INT||KEYtype==CHAR)) { cprintf("_____________________________KEYWORD detected\r\n"); idList(KEYtype); cprintf("End of a Declaration by SEMICOLON\r\n"); declarations(); } else { ep_flag=YES;}//epsilon transition so no error also } int Missing_Error=NO; void idList(int datatype) { s=lex(); if (s!=IDENTIFIER) {// Missing_Error=YES; if(s==COMMA||s==SEMICOLON) {ep_flag=YES; strcpy(errtype,"Identifier Missing");put_error(lineno);} else { sprintf(errtype,"Illegal Character '%s'", token); put_error(lineno);} } //don't use else here cause scan the remaining strcpy(varname,token); idList_X(datatype); } void idList_X(int datatype) { s=lex(); if (s==COMMA && Missing_Error==NO) { ADD_ST(varname,datatype,size); idList(datatype); } else if(s==LSB) Language Processors Page 14 { s=lex(); //get number if (s!=DIGIT) {ep_flag=YES;strcpy(errtype,"Missing Array size after '['"); put_error(lineno);} else {size=atoi(token);} s=lex(); //get ']' if(s !=RSB) {ep_flag=YES; strcpy(errtype,"Missing ']'"); put_error(lineno); } idList_X(datatype); } else if (s==SEMICOLON && Missing_Error==NO) { ADD_ST(varname,datatype,size); } else {ep_flag=YES; strcpy(errtype,"Missing ';'"); put_error(lineno); } Missing_Error=NO; } void statement() { s=lex(); if (s==LCB){ cprintf("***COMPOUND_STATEMENT\r\n"); comp_stat(); } else { ep_flag=YES; cprintf("*****SIMPLE_STATEMENT\r\n"); simple_stat(); } } void comp_stat() { statement(); s=lex(); //get '}' if(s!=RCB){ ep_flag=YES; strcpy(errtype,"Missing '}'"); put_error(lineno); Language Processors Page 15 } // statement(); } void simple_stat() { s=lex(); if(s==IDENTIFIER) { assign_stat(); statement(); } else if (s==KEY) { if(KEYtype== IF) { decesion_stat(); statement(); } else if(KEYtype==WHILE || KEYtype==FOR) { looping_stat(); statement(); } else if (KEYtype==CONTINUE ||KEYtype==BREAK) { jump_stat(); statement(); } } else{ep_flag=YES;} //because lex has been already called. } void assign_stat() { cprintf("ASSIGNMENT STATEMENT for identifier: %s\r\n",token); char gotID[30]; strcpy(gotID,token);//get the identifier name int IDtype=INT;///check from symbol table if (IDtype==INT) { s=lex(); if (s != EQ) { ep_flag=YES; strcpy(errtype,"Missing '='"); put_error(lineno); } expn(); Language Processors Page 16 s=lex(); if (s != SEMICOLON) { ep_flag=YES; strcpy(errtype,"Missing ';'"); put_error(lineno); } } else if (IDtype==CHAR) { //not in the question for handeling them } } int relop() { s=lex(); if(s==EQUEQU || s==NEQ|| s==LT|| s==GT || s==LEQ|| s==GEQ) { ….. } else {ep_flag=YES;return NO;} } void expn() { single_arg_flag=YES; simple_expn(); strcpy(fa1,fa); if(relop()==YES) //check if there are real op; { simple_expn(); } else { //when we have a single argument ie a=10; a=c; etc... } } void simple_expn() { term(); simple_expn_2(); } void simple_expn_2() { s=lex(); Language Processors Page 17 if (s==ADD || s==SUB) //operator { single_arg_flag=NO; term(); simple_expn_2(); } else {ep_flag=YES;} } void term() { factor(); term_not(); } void term_not() { s=lex(); if (s==MUL ||s==DIV|| s==MOD) { single_arg_flag=NO; factor(); term_not(); } else {ep_flag=YES;} } int find_in_ST(char stext[]) { for(int j=0;j<ST_CTR;j++) { if (strcmp(stext,ST[j].id)==0){return YES;} } return NO;//not found } void factor() { s=lex(); if (s==IDENTIFIER || s==DIGIT) { if(s==IDENTIFIER && find_in_ST(token)==NO) { sprintf(errtype,"Undefined Symbol '%s'",token); put_error(lineno);} strcpy(fa,token); } Language Processors Page 18 else { strcpy(fa,""); ep_flag=YES; sprintf(errtype,"Missing ID or Number"); put_error(lineno); } } void parser() { fgets(line,80,fin); ptr=line; lineno++; s=lex(); if (s==KEY && KEYtype==MAIN) { cprintf("main() function is defined\r\n"); s=lex();//( if(s!=LB) { ep_flag=YES; strcpy(errtype,"Missing '('");put_error(lineno); } s=lex();//) if(s!=RB) { ep_flag=YES; strcpy(errtype,"Missing ')'"); put_error(lineno); } s=lex();//{ if(s!=LCB){ ep_flag=YES; strcpy(errtype,"Missing main '{'"); put_error(lineno); } cprintf("BEGINNING of DECLARATION REGION\r\n"); declarations(); statement(); s=lex();//} if(s!=RCB){ …..} } } int main() { if ((fin = fopen("c:\\source.cpp", "rt")) Language Processors == NULL) Page 19 { fprintf(stderr, "Cannot open input file.\n"); getch(); return 1; } parser(); display_ST(); display_ET(); getch(); clrscr(); fclose(fin); getch(); return 0; } Language Processors Page 20 WEEK 8, 9 and 10: Design of Code generator The target code to be generated is 8086 assembly language program. Registers have to be selected for each of the variables used by the program. Code generator will take each line of the source program and generate its equivalent assembly code. Solution: Here we, modify the parser and associated functions such that it also generates code for the language constructs in addition to parsing. The Modified parser code is as follows: void idList_X(int datatype) { s=lex(); if (s==COMMA && Missing_Error==NO) { ADD_ST(varname,datatype,size); if (datatype==INT) { fprintf(fout," %-10s DW 0\n",varname); } else { fprintf(fout," %-10s DB 0\n",varname); } idList(datatype); } else if(s==LSB) { s=lex(); //get number if (s!=DIGIT) { ep_flag=YES; strcpy(errtype,"Missing Array size after '['"); put_error(lineno); } else {size=atoi(token);} Language Processors Page 21 s=lex(); //get ']' if(s !=RSB) { ep_flag=YES; strcpy(errtype,"Missing ']'"); put_error(lineno); } idList_X(datatype); } else if (s==SEMICOLON && Missing_Error==NO) { if (datatype==INT) {fprintf(fout," %-10s DW 0\n",varname);} else {fprintf(fout," %-10s DB 0\n",varname);} ADD_ST(varname,datatype,size); } else { ep_flag=YES; strcpy(errtype,"Missing ';'"); put_error(lineno); } Missing_Error=NO; } void statement() { s=lex(); if (s==LCB) { cprintf("***COMPOUND_STATEMENT\r\n"); comp_stat(); } else { ep_flag=YES; cprintf("*****SIMPLE_STATEMENT\r\n"); simple_stat(); } } Language Processors Page 22 void comp_stat() { statement(); s=lex(); //get '}' if(s!=RCB) { ep_flag=YES; strcpy(errtype,"Missing '}'"); put_error(lineno); } } void simple_stat() { s=lex(); if(s==IDENTIFIER) { assign_stat(); statement(); } else if (s==KEY) { if(KEYtype== IF) { decesion_stat(); statement(); } else if(KEYtype==WHILE || KEYtype==FOR) { looping_stat(); statement(); } else if (KEYtype==CONTINUE ||KEYtype==BREAK) { jump_stat(); statement(); } } else{ep_flag=YES;} //because lex has been already called. } Language Processors Page 23 void assign_stat() { cprintf("ASSIGNMENT STATEMENT for identifier: %s\r\n",token); char gotID[30]; strcpy(gotID,token);//get the identifier name int IDtype=INT;///check from symbol table if (IDtype==INT) { s=lex(); if (s != EQ) { ep_flag=YES; strcpy(errtype,"Missing '='"); put_error(lineno); } expn(); fprintf(fout," MOV %s,AX\n",gotID); s=lex(); if (s != SEMICOLON) { ep_flag=YES; strcpy(errtype,"Missing ';'"); put_error(lineno); } } else if (IDtype==CHAR) { //not in the question for handeling them } } int relop() { s=lex(); if(s==EQUEQU || s==NEQ|| s==LT|| s==GT || s==LEQ|| s==GEQ) { ….. } else { ep_flag=YES; return NO;} } Language Processors Page 24 void expn() { single_arg_flag=YES; simple_expn(); strcpy(fa1,fa); if(relop()==YES) //check if there are real op; { simple_expn(); fprintf(fout," MOV AX,%s\n",fa1); fprintf(fout," MOV BX,%s\n",fa); } else { //when we have a single argument ie a=10; a=c; etc... if(single_arg_flag==YES) fprintf(fout," MOV AX,%s\n",fa); } } void simple_expn() { term(); simple_expn_2(); } void simple_expn_2() { s=lex(); if (s==ADD || s==SUB) //operator { single_arg_flag=NO; sprintf(tempcode," MOV AX,%s\n",fa); if (s==ADD) sprintf(tempcode2," else sprintf(tempcode2," ADD AX,BX"); SUB AX,BX"); term(); fprintf(fout,"%s",tempcode); //mov AX,a fprintf(fout," MOV BX,%s\n",fa);//MOV BX,b fprintf(fout,"%s\n",tempcode2);//ADD c,result simple_expn_2(); } else {ep_flag=YES;} } Language Processors Page 25 void term() { factor(); term_not(); } void term_not() { s=lex(); if (s==MUL ||s==DIV|| s==MOD) { single_arg_flag=NO; sprintf(tempcode3," MOV AX,%s\n",fa); if (s==MUL) sprintf(tempcode4," else sprintf(tempcode4," factor(); MUL BX"); DIV BX"); fprintf(fout,"%s",tempcode3);//mov AX,a fprintf(fout," MOV BX,%s\n",fa);//mov BX,b fprintf(fout,"%s\n",tempcode4);//mul AX,BX fprintf(fout," MOV temp,AX\n");//temp or dx stores the //result; strcpy(fa,"temp"); term_not(); } else {ep_flag=YES;} } int find_in_ST(char stext[]) { for(int j=0;j<ST_CTR;j++) { if (strcmp(stext,ST[j].id)==0){return YES;} } return NO;//not found } void factor() { s=lex(); Language Processors Page 26 if (s==IDENTIFIER || s==DIGIT) { if(s==IDENTIFIER && find_in_ST(token)==NO) { sprintf(errtype,"Undefined Symbol '%s'",token); put_error(lineno); } strcpy(fa,token); } else { strcpy(fa,""); ep_flag=YES; sprintf(errtype,"Missing ID or Number"); put_error(lineno); } } void parser() { fgets(line,80,fin); ptr=line; lineno++; s=lex(); if (s==KEY && KEYtype==MAIN) { cprintf("main() function is defined\r\n"); s=lex();//( if(s!=LB) { ep_flag=YES; strcpy(errtype,"Missing '('"); put_error(lineno); } s=lex();//) if(s!=RB) { ep_flag=YES; strcpy(errtype,"Missing ')'"); put_error(lineno); } s=lex();//{ if(s!=LCB) { ep_flag=YES; strcpy(errtype,"Missing main '{'"); Language Processors Page 27 put_error(lineno); } cprintf("BEGINNING of DECLARATION REGION\r\n"); declarations(); fprintf(fout,"DATA ENDS\n\nCODE SEGMENT\n"); fprintf(fout,"ASSUME CS:CODE DS:DATA\n”); fprintf(fout,”START: MOV AX,DATA\n MOV DS,AX\n\n"); statement(); s=lex();//} if(s!=RCB){ …..} } } int main() { clrscr(); if ((fin = fopen("c:\\source.cpp", "rt")) == NULL) { fprintf(stderr, "Cannot open input file.\n"); getch(); return 1; } if ((fout = fopen("c:\\code.asm", "wt"))== NULL) { fprintf(stderr, "Cannot open output file.\n"); getch(); return 1; } fprintf(fout,"DATA SEGMENT\n"); fprintf(fout,"temp DW 0\n"); parser(); fprintf(fout,"\n MOV AH,4CH\n INT 21H\n"); fprintf(fout,"CODE ENDS\nEND START\n"); display_ST(); display_ET(); getch(); clrscr(); fclose(fin); fclose(fout); getch(); Language Processors Page 28 return 0; } WEEK 13 AND 14: Usage of LEX and Yacc WEEK 13: Usage of LEX tool Steps to use LEX tool: 1. lex filename.l lex.yy.c 2. cc lex.yy.c –lfl a.out 3. .\a.out program gets executed 1. Design a lexical analyzer for recognizing following class of tokens: Keywords (if, then, else), id, num and relational operators using lex compiler. %{ /* definition of manifest constants LT, LE, EQ, NE, GT, GE, IF, THEN, ELSE, ID, NUMBER, RELOP */ #include<stdio.h> #define LT 1 #define LE 2 … %} /* regular definition */ delim [ \t\n] ws {delim}+ letter [A-Za-z] digit [0-9] id {letter}({letter}|{digit})* number {digit}+(\.{digit}+)?(E[+\-]?{digit}+)? %% {ws} if {/* no action and no return */} { printf(“lexeme=%s, tokentype = Reserveword “, yytext); return (IF); Language Processors Page 29 } then { printf(“lexeme=%s, tokentype = Reserveword “, yytext); return (THEN); else } { printf(“lexeme=%s, tokentype = Reserveword “, yytext); return (ELSE); {id} } { printf(“lexeme=%s, tokentype = Reserveword “, yytext); return(ID); {number} } { printf(“lexeme=%s, tokentype = Number “, yytext); return(NUMBER); “<” “<=” “=” “<>” “>” “>=” } {yylav {yylav {yylav {yylav {yylav {yylav = = = = = = LT; LE; EQ; NE; GT; GE; return(RELOP);} return(RELOP);} return(RELOP);} return(RELOP);} return(RELOP);} return(RELOP);} %% main() { yylex(); } Language Processors Page 30 2. Write a lex program to count the number of printf and scanf statements in a valid c program and replace them with write and read statements respectively. %{ #include<stdio.h> int sc=0; int pc=0; void disp(void); %} {ws} scanf printf “.” { } {fprintf(yyout, “read”);sc++;} { fprintf(yyout, “write”);pc++;} {disp();} %% main() { yyin = fopen(“test.l”,”r”); yyout=fopen(“test.l”,”u”); yylex(); } void disp() { printf(“read count : %d “,sc); printf(“write count : %d”,pc); } Language Processors Page 31 3. Write a lex program to count the number of characters, words, blanks and lines in a given string. %{ #include<stdio.h> int sc=0; int cc=0; int wc=0; int lc=0; void disp(void); %} space letter word %% [ ] [a-zA-Z0-9] {letter}* {word} {space} “\n” “#” {wc++; cc=cc+strlen(yytext);} {sc++;} {lc++;} {disp();} %% main() { yylex(); } void disp() { printf(“character count : %d “,cc); printf(“word count : %d”,wc); printf(“blanks count : %d “,sc); printf(“line count : %d”,lc); } Language Processors Page 32 WEEK 14: Usage of Yacc tool Steps to use yacc tool: 1. yacc filename.y y.tab.c 2. cc y.tab.c -lfla.out 3. .\a.out program executed 1. Write a yacc program to test the validity of a simple expression involving the operators +,-,*,/. %{ #include <ctype.h> #include<stdio.h> %} %token id %left ‘+’ ‘-‘ %left ‘*’ ‘/’ %% line expr : ; : | | | | | ; expr ‘\n’ {printf(“\n Valid Expression”);} expr expr expr expr ‘(‘ id ‘+’ expr ‘-‘ expr ‘*’ expr ‘/’ expr expr ‘)’ { { { { { { } } } } } } %% Language Processors Page 33 yylex(){ char c; c = getchar(); if (isdigit(c)) { yylval = c –‘0’; return id; } else return c; } yyerror(char *s){ fprintf(stderr, “%s”,s); } main() { yyparse(); } Language Processors Page 34 2. Write a yacc program to recognize nested if statements and display the number of levels of nested if. /* Lex */ %{ #include "y.tab.h" extern int yylval; %} %% [ \t ]+ ; if {return IF;} "(" {return OPP;} ")" {return CLP;} "{" {return OPB;} "}" {return CLB;} "<" | "<=" | ">" | ">=" | "==" | "!=" "&&"|"||"|"&"|"|"|"!" {return RELOP;} { return LOGOP; } [+ - / *] {return AROP;} \n {return NEWL;} [0-9]+ {return NUMBER;} ";" {return SEM;} [a-z]+ {return ID;} %% Language Processors Page 35 /* yacc */ %{ unsigned levels=0; %} %token IF OPP CLP OPB CLB RELOP LOGOP AROP %token NEWL ID SEM NUMBER %% ifcond: IF OPP COND CLP NEWL OPB explist ifcond CLB {levels++;} ; COND: exp LOGOP exp | exp RELOP exp ; explist: exp SEM ; exp: NUMBER AROP NUMBER | NUMBER | ID ; %% Language Processors Page 36 main() { yyparse(); printf("no of levels %d \n",levels); } yyerror(s) char *s; { printf("not a valid if stmt\n"); } yywrap() { return(1); } Language Processors Page 37 3. Write a yacc program to parse the given string 2+3*5 /* Lex */ %{ #include “y.tab.h” extern int yylval; %} d [0-9] %% {d}+ {yylval = atoi(yytext); return id;} “\n” {return ‘\n’;} . {return yytext[0];} %% /* yacc*/ %{ #include <<stdio.h> %} %token id %% line : expr ‘\n’ { printf(“valid statement”);} ; expr : expr ‘+’ term {} | term {} ; term : term ‘*’ factor {} | factor {} ; factor: ‘(‘ expr ‘)’ {} | id ; %% int yyerror(char *s) { fprintf(stderr, “%s”,s); } main(){ yyparse(); } Language Processors Page 38 Steps to use Yacc tool: 1. lex filename.l lex.yy.c 2. yacc –d filename.y y.tab.c 3. cc lex.yy.c y.tab.c -lfla.out 4. .\a.out program executed Language Processors Page 39 Sample Lab Exam questions 1) Write the Lexical Analyzer to identify various tokens for a given language (say Pascal) 2) Generate the parser for the following grammar start REPEAT stmt UNTIL condition ; stmt id= term arithop term condition term relop term term id | num arithop + | relop < | > | = = Sample input:- REPEAT count = count + increment UNTIL count = =10 3) Generate the 8086 code for the following StartREPEAT statement UNTIL condition Statementid:=expr; Conditionid<num Exprterm expr` Expr`+term expr’ /ε Term factor term` Term’*factor term’ /ε Factorid Ex:- REPEAT A:=B+C*D UNTIL I <10 Language Processors Page 40 References: 1. Compiler principles, Techniques and tolls by Alfred V. Aho, Ravi Sethi, Jeffrey D. Ullman 2. Principles of compiler Design by Alfred V. Aho and Jeffrey D. Ullman 3. Engineering a Compiler by Keith D. Cooper and Linda Torczon 4. Introduction to compiler construction by Thomas W. Parsons 5. Compiler Design in C by Holub A.I Language Processors Page 41