Finite Automata for Lexical analyzer (Click to enlarge) The input to lexical analyzer is character stream. The character stream input is grouped into meaningful units called lexemes, which are then mapped into tokens, the latter constituting the output of the lexical analyzer. The lexical analyzer uses a symbol table. Each identifier, keyword and symbol are given unique id (symbol table id). The lexical analyzer is designed using finite automata. The finite automata for this program is added at the end of the post. The program takes a file input.c (in same file) as input. The input to a lexical analyzer is source string (source code as a long string). A sample input to the lexical analyzer is shown after this program. Lexical Analyzer Program in C #include #include #include #include //*********** struct token { char type[30]; char name[50]; int id; }retToken; //******* struct symboltable { char type[30]; char name[50]; }st[60]; typedef struct token token; char* sourcecode; int lexbeg=0,fwdptr=0,state=0; int symbolcount=0; token newtoken; int isexist=0,lineno=1; //************ int indexof(char *subString,int fromIndex,char *MainString) { int Mainlength,subLength,i,j,retIndex=-1; Mainlength=strlen(MainString); subLength=strlen(subString); if(Mainlength<1||subLength<1||Mainlength-fromIndex=subLength;i++) { if(*(MainString+i)==*(subString)) { retIndex=i; for(j=0;j=Mainlength||toIndex=Mainlength) { printf("\nError in args: subString fn"); return(NULL); } subStr=(char *)malloc(1000*sizeof(char)); for(j=0;j<=toIndex-fromIndex;j++) *(subStr+j)=*(MainString+fromIndex+j); *(subStr+j)='\0'; return(subStr); } //************ char nextchar() { fwdptr++; return(sourcecode[fwdptr-1]); } //************ void retract(int n) { fwdptr-=n; } //************ int fail(char * msg) { printf("%s",msg); return(-1); } //************ int installid(char *string) { int i; for(i=0;i',',','{','}','(',')','#',';'}; for(i=0;i<10;i++) { if(c==syms[i]) return(i+41); } return(0); } void nextToken() { char c; char *temptok; state=0; while(*(sourcecode+fwdptr)!='\0'&&state!=-1) { switch(state) { case -1:return; case 0:c=nextchar(); if(c==' '||c=='\t'||c=='\n') { state=0; lexbeg++; if(c=='\n') { lineno++; printf("\nline %d: ",lineno); } if(c=='\0') state=-1; } else if(c=='<') state=1; else if(c=='>') state=5; else if(c=='=') state=8; else if(isalpha(c)) state=10; else if(isdigit(c)) state=22; else if(isSymbol(c)) state=24; else if(c=='+') state=12; else if(c=='-') state=15; else if(c=='*') state=18; else if(c=='/') state=19; else if(c=='%') state=20; else state=fail("unknown symbol encountered"); break; case 1:c=nextchar(); if(c=='=') state=2; else if(c=='>') state=3; else state=4; break; case 2:strcpy(retToken.type,"relop"); strcpy(retToken.name,"LE"); retToken.id=17;lexbeg=fwdptr;return; case 3:strcpy(retToken.type,"relop");retToken.id=18; strcpy(retToken.name,"NE");lexbeg=fwdptr;return; case 4:retract(1); strcpy(retToken.type,"relop");retToken.id=19; strcpy(retToken.name,"LT");lexbeg=fwdptr;return; case 5:c=nextchar(); if(c=='=') state=6; else state=7; break; case 6:strcpy(retToken.type,"relop");retToken.id=20; strcpy(retToken.name,"GE");lexbeg=fwdptr;return; case 7:retract(1); strcpy(retToken.type,"relop");retToken.id=21; strcpy(retToken.name,"GT");lexbeg=fwdptr;return; case 8:c=nextchar(); if(c=='=') state=9; else state=21; break; case 9:strcpy(retToken.type,"relop");retToken.id=22; strcpy(retToken.name,"EQ");lexbeg=fwdptr;return; case 10:c=nextchar(); if(isalpha(c)||isdigit(c)) state=10; else state=11; break; case 11:retract(1); temptok=subString(sourcecode,lexbeg,fwdptr-1); retToken.id=installid(temptok); retToken=getType(temptok); lexbeg=fwdptr;return; case 12:c=nextchar(); if(c=='+') state=13; else state=14; break; case 13:strcpy(retToken.type,"arop");retToken.id=23; strcpy(retToken.name,"INC");lexbeg=fwdptr;return; case 14:retract(1); strcpy(retToken.type,"arop");retToken.id=24; strcpy(retToken.name,"PLU");lexbeg=fwdptr;return; case 15:c=nextchar(); if(c=='-') state=16; else state=17; break; case 16:strcpy(retToken.type,"arop");retToken.id=25; strcpy(retToken.name,"DEC");lexbeg=fwdptr;return; case 17:retract(1); strcpy(retToken.type,"arop");retToken.id=26; strcpy(retToken.name,"MIN");lexbeg=fwdptr;return; case 18:strcpy(retToken.type,"arop");retToken.id=27; strcpy(retToken.name,"MUL");lexbeg=fwdptr;return; case 19:strcpy(retToken.type,"arop");retToken.id=28; strcpy(retToken.name,"DIV");lexbeg=fwdptr;return; case 20:strcpy(retToken.type,"arop");retToken.id=29; strcpy(retToken.name,"MOD");lexbeg=fwdptr;return; case 21:retract(1);retToken.id=30; strcpy(retToken.type,"arop"); strcpy(retToken.name,"ASSIGN");lexbeg=fwdptr;return; case 22:c=nextchar(); if(isdigit(c)) state=22; else state=23; break; case 23:retract(1);retToken.id=41; strcpy(retToken.type,"Numeric constant"); strcpy(retToken.name,subString(sourcecode,lexbeg,fwdptr-1));lexbeg=fwdptr;return; case 24:strcpy(retToken.type,"Reserved Symbol");retToken.id=isSymbol(c); strcpy(retToken.name,subString(sourcecode,lexbeg,fwdptr-1));lexbeg=fwdptr;return; } } } //************ void regkeywords() { int i; char keywords[][15]={"do","while","main","for","include","if","else","break","continue","int","char","float","double","void","return","switch","case"}; //17 char relop[][3]={"LE","NE","LT","GE","GT","EQ"};//17 to 22 char arop[][7]={"INC","PLU","DEC","MIN","MUL","DIV","MOD","ASSIGN"};//23 to 30 char syms[][2]={".","<",">",",","{","}","(",")","#",";"};//31 to 40 for(i=0;i<=16;i++) { strcpy(st[i].name,keywords[i]); strcpy(st[i].type,"keyword"); } for(i=17;i<=22;i++) { strcpy(st[i].name,relop[i-17]); strcpy(st[i].type,"relop"); } for(i=23;i<=30;i++) { strcpy(st[i].name,arop[i-23]); strcpy(st[i].type,"arop"); } for(i=31;i<41;i++) { strcpy(st[i].name,syms[i-31]); strcpy(st[i].type,"Reserved Symbol"); } strcpy(st[41].name,"NC"); strcpy(st[41].type,"Numeric Constant"); symbolcount=42; } //************ void main() { int i; char c,*line; FILE *input; input=fopen("input.c","r"); i=0; sourcecode=(char*)malloc(sizeof(char)*1200); while((c=getc(input))!=EOF) { *(sourcecode+i)=c; i++; } *(sourcecode+i)='\0'; regkeywords(); printf("\nline 1: "); nextToken(); while(state!=-1) { printf("type: %s, name: %s, id= %d\n",retToken.type,retToken.name,retToken.id); nextToken(); if(lexbeg>=strlen(sourcecode)||fwdptr>=strlen(sourcecode)) state=-1; } printf("\nSymbol Table:\n"); for(i=0;i<=symbolcount;i++) { printf("\nType: %s",st[i].type); printf("\tName: %s",st[i].name); printf("\tid=: %d",i); } } Sample input (input.c) to the Lexical Analyzer void main() { int a,b,c,d; a=10; b=30; c=a+b; d=a*b; }