CFG Reader(Follow)
A. Sixth Edition
This is fifth?fourth? edition of my CFG Reader which stands for Context-Free-Grammar Reader. In this edition,
I finished the follow set calculation and removed meta symbol---"{", "}" from last version because it is same
thing as "|".
What is Follow set?
Example: B -> A a
The variable A will have a follow set of Follow(A) = {a}.
program -> stmt-sequence
stmt-sequence -> stmt-sequence ; statement | statement
statement -> if-stmt | repeat-stmt | assign-stmt | read-stmt | write-stmt
if-stmt -> if boolean-exp then stmt-sequence end
| if boolean-exp then stmt-sequence else stmt-sequence end
repeat-stmt -> repeat stmt-sequence until boolean-exp
assign-stmt -> identifier := integer-exp
read-stmt -> read identifier
write-stmt -> write integer-exp
boolean-exp -> integer-exp comparison-op integer-exp
comparison-op -> < | =
integer-exp -> integer-exp addop term | term
addop -> + | -
term -> term mulop factor | factor
mulop -> * | /
factor -> ( integer-exp ) | number | identifier
This problem has a standard algorithm in text. So, will you just refer to the text if you are really interested in?
I highly suspect if there is anyone in my reader group would be interested in reading this or even want to have
any idea of "what is First" and how to calculate.
E.Further improvement
F.File listing
1. CFGReader.h
2. CFGReader.cpp
3. Grammar.h
4. Grammar.cpp
5. main.cpp (main)
file name: CFGReader.h
#include "Grammar.h" class CFGReader { private: char buffer[BufferLength]; FILE* stream; void newRule(const char* str); void readRule(); void addRule(const char* str); int addToken(const char* str, bool isTerminal=true); void printRule(int index); public: void readFromFile(const char* fileName); void optimize(); void print(); };
file name: CFGReader.cpp
#include <iostream> #include "CFGReader.h" using namespace std; Grammar grammar; const char* deliStr=" |\n"; //this would only remove the immediate left recursion //and should I expand it to remove all kinds of left-recursion? void CFGReader::readFromFile(const char* fileName) { if ((stream=fopen(fileName, "r"))==NULL) { cout<<"cannot open file "<<fileName<<endl; } else { readRule(); } } void CFGReader::readRule() { char* ptr=buffer; char* hold; int count=0; while (!feof(stream)) { count=0; fgets(buffer, BufferLength-1, stream); ptr=buffer; while ((ptr=strtok(ptr, " \n"))!=NULL) { if (strcmp(ptr, "|")!=0) { //test the first two token to see if old rule continues if (count==0) { hold=ptr; } if (count==1) { if (strcmp("->", ptr)==0) { /* curIndex=addToken(hold, false);//the index of cur token grammar[curIndex]->terminal=false; newRule(); */ newRule(hold); } else { addRule(hold); addRule(ptr); } } if (count>=2)//always add { addRule(ptr); } count++; } else { //this is an alternative production rule newRule(NULL); } ptr=NULL; } } } void CFGReader::newRule(const char* str) { grammar.newRule(str); } void CFGReader::optimize() { grammar.optimize(); } void CFGReader::addRule(const char* str) { grammar.addRule(str); } int CFGReader::addToken(const char* str, bool isTerminal) { return grammar.addToken(str, isTerminal); } void CFGReader::printRule(int index) { grammar.printRule(index); } void CFGReader::print() { grammar.print(); grammar.printToken(); //grammar.printAllRules(); }
file name: Grammar.h
const int BufferLength=256; const int MaxTokenCount=80; const int MaxRHSCount=50; const int MaxRuleCount=200; const int MaxTokenLength=10; const int MaxProduction=10; struct Token { //bool isMeta; bool terminal; bool isNull; char* name; int production[MaxProduction];//pointer to the production it gives int count; int firstCount; int followCount; int follow[MaxTokenCount]; int first[MaxTokenCount]; }; class Grammar { private: Token* token[MaxTokenCount]; int tokenCount; int curIndex;//the index of current token int curPos;//the position at production rule //MaxRuleCount>=MaxVariableCount!!! int production[MaxRuleCount][MaxRHSCount]; int prodIndex;//pointing to current production, initialized to -1 int checkRecursion(int tIndex, int curToken); void addBeginEnd(); bool addIntoFirst(int target, int source); bool addFirst(int target, int source); void shiftRule(int ruleIndex, bool Left2Right, int offset); void addAtEnd(int ruleIndex, int toAdd); void addRuleForToken(int tIndex, int ruleIndex); int copyRule(int source, int target);//return the position of -1 void replaceRule(int curToken, int ruleIndex); int findMetaSymbol(int& begin, int& end); void initialize(); void doReplaceMetaSymbol(int ruleIndex, int begin, int end); void removeRuleFromToken(int tIndex, int ruleIndex); void checkEpsilon(int ruleIndex); void removeImmediate(int tIndex, int ruleIndex); void doAddRule(int tIndex); void commonLeftFactor(); int findCommonFactor(int tokenIndex, int ruleIndex); void removeLeftRecursion(); void doCommonFactor(int tokenIndex, int ruleIndex, int index); int forgeToken(int index);//create a new variable void epsilonRule(int ruleIndex); bool isRedundant(int tIndex); void replaceMetaSymbol(); void calculateFirst(); void calculateFollow(); void calculateNull(); bool Null(int tIndex); bool addFollow(int target, int source); bool addIntoFollow(int target, int source); bool addFollowIntoFollow(int target, int source); void removeRedundant(); void removeToken(int tIndex); // void calculateProperty(); public: Grammar(); void optimize(); void printRule(int index); void print(); void printAllRules(); void printToken(); void addRule(const char* str); void newRule(const char* str);//this is an internal method, not suitable for outside Token* operator[](int index); int addToken(const char* str, bool isTerminal=true); Token* createToken(const char* theName, bool isTerminal); int tokenIndex(const char* str); };
file name: Grammar.cpp
#include <iostream> #include "Grammar.h" using namespace std; const char* emptyStr="e"; const char* optionBegin="{"; const char* optionEnd="}"; const char* startStr="START"; const char* endStr="$"; int startSymbolIndex=-1; int stackBottomIndex=-1; int beginIndex=-1; int endIndex=-1; int emptyIndex=-1; //leave the rule unremoved!!! as I have no way to do it!!! void Grammar::removeToken(int tIndex) { delete[]token[tIndex]->name; delete token[tIndex]; tokenCount--; for (int i=tIndex; i<tokenCount; i++) { token[i]=token[i+1]; } } bool Grammar::isRedundant(int tIndex) { int k, theRule, theToken; for (int i=0; i<tokenCount; i++) { for (int j=0; j<token[i]->count; j++) { k=0; theRule=token[i]->production[j]; theToken=production[theRule][k]; while (theToken!=-1) { if (theToken==tIndex) { return false; } k++; theToken=production[theRule][k]; } } } return true; } //what is redundant? except the start variable //the non-terminal never appears in other rules! void Grammar::removeRedundant() { int tIndex=1; bool findNew=false; while (tIndex<tokenCount) { findNew=false; if (!token[tIndex]->terminal) { if (isRedundant(tIndex)) { removeToken(tIndex); findNew=true; } } if (findNew) { tIndex=1; } else { tIndex++; } } } void Grammar::printAllRules() { /* cout<<"\nnow print all rules\n"; for (int i=0; i<=prodIndex; i++) { cout<<"rule index "<<i<<": "; printRule(i); cout<<"\n"; } */ int sum=0; for (int i=0; i<tokenCount; i++) { if (!token[i]->terminal) { sum+=token[i]->count; } } cout<<" total rules is:"<<prodIndex+1; cout<<"\n and the sum is "<<sum<<endl; } void Grammar::addBeginEnd() { int begin=addToken(startStr, false); int end=addToken(endStr, true); prodIndex++; production[prodIndex][0]=0; production[prodIndex][1]=end; production[prodIndex][2]=-1; addRuleForToken(begin, prodIndex); startSymbolIndex=begin; stackBottomIndex=end; } /* void Grammar::calculateProperty() { calculateNull(); calculateFirst(); calculateFollow(); } */ void Grammar::printToken() { for (int i=0; i<tokenCount; i++) { cout<<"name: "<<token[i]->name<<endl; cout<<" isNull: "<<(token[i]->isNull?"true":"false")<<endl; cout<<" isTerminal: "<<(token[i]->terminal?"true":"false")<<endl; cout<<" first: "; for (int j=0; j<token[i]->firstCount; j++) { if (j!=0) { cout<<","; } cout<<"["<<j<<"]="<<token[token[i]->first[j]]->name; } cout<<"\n follow: "; for (j=0; j<token[i]->followCount; j++) { if (j!=0) { cout<<","; } cout<<"["<<j<<"]="<<token[token[i]->follow[j]]->name; } cout<<endl; } } //must use while loop to discover new set!!!!! void Grammar::calculateFirst() { int i; bool addNew; do { i=0; addNew=false; while (i<tokenCount) { //the terminal contains itself if (token[i]->terminal) { //addFirst don't judge if it is nullable!! addFirst(i, i); //token[i]->first[token[i]->firstCount++]=i; } else { //for all its rules for (int j=0; j<token[i]->count; j++) { int theToken, k=0; theToken=production[token[i]->production[j]][k]; //for each token in each rule do { //add non epsilon set if (addIntoFirst(i, theToken)) { addNew=true; } //if it is not null, means it is end if (!token[theToken]->isNull) { break; } //if it is nullable, continue k++; theToken=production[token[i]->production[j]][k]; }while (theToken!=-1); //it means all token in this rule is nullable, so if (theToken==-1) { //it is nullable //addEpsilonIntoFirst(i); addFirst(i, emptyIndex); } } } i++; } }while (addNew); } bool Grammar::addFollowIntoFollow(int target, int source) { bool addNew=false; for (int i=0; i<token[source]->followCount; i++) { if (addFollow(target, token[source]->follow[i])) { addNew=true; } } return addNew; } bool Grammar::addIntoFollow(int target, int source) { bool addNew=false; if (source==-1) { return false; } for (int i=0; i<token[source]->firstCount; i++) { //add non-epsilon if (!token[token[source]->first[i]]->isNull) { if (addFollow(target, token[source]->first[i])) { addNew=true; } } } return addNew; } void Grammar::calculateFollow() { int i; bool addNew, started; //token[startSymbolIndex]->follow[0]=stackBottomIndex; //token[startSymbolIndex]->followCount=1; do { i=0; addNew=false; while (i<tokenCount) { //the terminal contains itself if (!token[i]->terminal) { for (int tIndex=0; tIndex<tokenCount; tIndex++) { //for all its rules if (token[tIndex]->terminal) { continue; } //for each its rule for (int j=0; j<token[tIndex]->count; j++) { int theToken, k=0, theRule=token[tIndex]->production[j]; started=false; theToken=production[theRule][k]; //for each token in each rule do { //the token appears here if (theToken==i) { started=true; //add non epsilon set, including -1 situation!!! if (addIntoFollow(i, production[theRule][k+1])) { addNew=true; } } //if it is not null, means it is end if (started&&!token[theToken]->isNull) { break; } //if it is nullable, continue k++; theToken=production[theRule][k]; }while (theToken!=-1); //it means all token in this rule is nullable, so if (started&&theToken==-1) { //it is nullable //add current variable Follow(j) into Follow(i); if (addFollowIntoFollow(i, tIndex)) { addNew=true; } } } } } i++; } }while (addNew); } void Grammar::addRuleForToken(int tIndex, int ruleIndex) { token[tIndex]->production[token[tIndex]->count++]=ruleIndex; } void Grammar::shiftRule(int ruleIndex, bool left2Right, int offset) { int end=0; while (production[ruleIndex][end]!=-1) { end++; } if (left2Right) { for (int i=end; i>=0; i--) { production[ruleIndex][i+offset]=production[ruleIndex][i]; } } else { for (int i=0; i<=end-offset; i++) { production[ruleIndex][i]=production[ruleIndex][i+offset]; } checkEpsilon(ruleIndex); } } void Grammar::checkEpsilon(int ruleIndex) { if (production[ruleIndex][0]==-1) { epsilonRule(ruleIndex); } } bool Grammar::addFollow(int target, int source) { //check if it is already there for (int i=0; i<token[target]->followCount; i++) { if (token[target]->follow[i]==source) { return false; } } //add at end token[target]->follow[token[target]->followCount++]=source; return true; } bool Grammar::addFirst(int target, int source) { //check if it is already there for (int i=0; i<token[target]->firstCount; i++) { if (token[target]->first[i]==source) { return false; } } //add at end token[target]->first[token[target]->firstCount++]=source; return true; } //add non epsilon into it. bool Grammar::addIntoFirst(int target, int source) { bool addNew=false; if (token[source]->terminal) { return addFirst(target, source); } for (int i=0; i<token[source]->firstCount; i++) { //add non epsilon if (!token[token[source]->first[i]]->isNull) { if (addFirst(target, token[source]->first[i])) { addNew=true; } } } return addNew; } bool Grammar::Null(int tIndex) { if (token[tIndex]->terminal) { return token[tIndex]->isNull; } for (int i=0; i<token[tIndex]->count; i++) { int j=0, theToken; theToken=production[token[tIndex]->production[i]][j]; do { if (theToken==tIndex||!Null(theToken)) { break; } j++; theToken=production[token[tIndex]->production[i]][j]; }while (theToken!=-1); if (theToken==-1) { return true; } } return false; } void Grammar::calculateNull() { for (int i=0; i<tokenCount; i++) { token[i]->isNull=Null(i); } } int Grammar::findMetaSymbol(int& begin, int& end) { int theRule, theToken, k; begin=end=-1; for (int i=0; i<tokenCount; i++) { for (int j=0; j<token[i]->count; j++) { k=0; theRule=token[i]->production[j]; theToken=production[theRule][k]; while (theToken!=-1) { if (theToken==beginIndex) { begin=k; } if (theToken==endIndex) { end=k; return theRule; } k++; theToken=production[theRule][k]; } } } return -1; } void Grammar::doReplaceMetaSymbol(int ruleIndex, int begin, int end) { int newTokenIndex=forgeToken(0), i=0; addRuleForToken(newTokenIndex, ++prodIndex); //token[newTokenIndex]->production[token[newTokenIndex]->count++]=++prodIndex; //token[newTokenIndex]->terminal=false; copyRule(ruleIndex, prodIndex); //shrink while (production[ruleIndex][i+end+1]!=-1) { production[ruleIndex][begin+i]=production[ruleIndex][i+end+1]; i++; } production[ruleIndex][begin+i]=-1; addAtEnd(ruleIndex, newTokenIndex); /* production[ruleIndex][begin]=newTokenIndex; production[ruleIndex][begin+1]=-1; */ shiftRule(prodIndex, false, begin+1); production[prodIndex][end-begin-1]=newTokenIndex; production[prodIndex][end-begin]=-1; epsilonRule(++prodIndex); addRuleForToken(newTokenIndex, prodIndex); } void Grammar::replaceMetaSymbol() { int begin, end, ruleIndex; while ((ruleIndex=findMetaSymbol(begin, end))!=-1) { doReplaceMetaSymbol(ruleIndex, begin, end); } for (int i=0; i<tokenCount; i++) { //if (token[i]->isMeta) if (i==beginIndex||i==endIndex) { removeToken(i); } } } void Grammar::addAtEnd(int ruleIndex, int toAdd) { int end=0; while (production[ruleIndex][end]!=-1) { end++; } production[ruleIndex][end++]=toAdd; production[ruleIndex][end]=-1; } //left-recursion: A -> A a | b | c //change to: A -> b A' | c A' // A' -> a A' | empty void Grammar::removeImmediate(int tIndex, int ruleIndex) { int newIndex=forgeToken(tIndex); int holdRuleIndex=token[tIndex]->production[ruleIndex]; //sequence matters! //change to: A -> b A' for (int i=0; i<token[tIndex]->count; i++) { if (i!=ruleIndex) { addAtEnd(token[tIndex]->production[i], newIndex); } } //shift removeRuleFromToken(tIndex, ruleIndex); addRuleForToken(newIndex, holdRuleIndex); //token[newIndex]->production[token[newIndex]->count++]=holdRuleIndex; shiftRule(holdRuleIndex, false, 1); addAtEnd(holdRuleIndex, newIndex); //add epsilon rule for new variable epsilonRule(++prodIndex); addRuleForToken(newIndex, prodIndex); } int Grammar::forgeToken(int index) { char str[MaxTokenLength+2], ch; int len=strlen(token[index]->name); int temp=0, i=0; strcpy(str, token[index]->name); ch=str[len-1];//get last char of name if (ch>='0'&&ch<'9') { str[len-1]=ch+i+1; } else { str[len]='0'+i; str[len+1]='\0'; } //I won't bother to check repetitation of name while (tokenIndex(str)!=-1) { i++; if (ch>='0'&&ch<'9') { str[len-1]=ch+i+1; } else { str[len]='0'+i; str[len+1]='\0'; } } return addToken(str, false);//it is non-terminal for sure } int Grammar::copyRule(int source, int target) { int i=0; while (production[source][i]!=-1) { production[target][i]=production[source][i]; i++; } production[target][i]=-1; return i; } void Grammar::doAddRule(int tIndex) { token[tIndex]->production[token[tIndex]->count++]=++prodIndex; } void Grammar::addRule(const char* str) { int index; index=addToken(str); production[prodIndex][curPos++]=index; production[prodIndex][curPos]=-1;//set end } //if the token is already there, it return the index //otherwise, it add new node in token array int Grammar::addToken(const char* str, bool isTerminal) { int index; index=tokenIndex(str); if (index==-1) { index=tokenCount; } else { return index; } token[index]=createToken(str, isTerminal); if (strcmp(str, optionBegin)==0) { beginIndex=index; } if (strcmp(str, optionEnd)==0) { endIndex=index; } tokenCount++; if (strcmp(str, emptyStr)==0) { token[index]->isNull=true; emptyIndex=index; } return index; } void Grammar::newRule(const char* str) { if (str!=NULL) { curIndex=addToken(str, false); } //add one pointer token[curIndex]->production[token[curIndex]->count++]=++prodIndex; token[curIndex]->terminal=false; curPos=0;//reset to restart; } Token* Grammar::createToken(const char* theName, bool isTerminal) { Token* ptr=new Token; ptr->name=new char[strlen(theName)+1]; strcpy(ptr->name, theName); ptr->terminal=isTerminal; ptr->count=ptr->firstCount=ptr->followCount=0; ptr->isNull=false; return ptr; } int Grammar::tokenIndex(const char* str) { for (int i=0; i<tokenCount; i++) { if (strcmp(str, token[i]->name)==0) { return i; } } return -1; } int Grammar::checkRecursion(int tIndex, int curToken) { for (int i=0; i<token[curToken]->count; i++) { //token[tIndex]->production[i]=ruleIndex //production[ruleIndex][0] is the first left-recursion one if (production[token[curToken]->production[i]][0]<=curToken) { return i; } } return -1; } void Grammar::printRule(int index) { int nodeIndex=0; while (production[index][nodeIndex]!=-1) { cout<<token[production[index][nodeIndex]]->name<<" "; nodeIndex++; } } void Grammar::initialize() { tokenCount=curIndex=curPos=0; prodIndex=-1;//in order to ++ blindly } Grammar::Grammar() { initialize(); } void Grammar::removeLeftRecursion() { int tIndex=0, curToken=0; bool newChange=false; while (tIndex<tokenCount) { if (!token[tIndex]->terminal) { for (int i=0; i<token[tIndex]->count; i++) { curToken=production[token[tIndex]->production[i]][0]; if (curToken<=tIndex&&!token[curToken]->terminal) { if (curToken!=tIndex) { replaceRule(tIndex, i); } else { removeImmediate(tIndex, i); } newChange=true; } } } //whenever there is some new findings, restart if (!newChange) { tIndex++; } else { tIndex=0; newChange=false; } } } void Grammar::replaceRule(int tIndex, int ruleIndex) { int pos, j, targetEnd, sourceEnd, curRule; curRule=token[tIndex]->production[ruleIndex]; int curToken=production[curRule][0]; for (int i=token[curToken]->count-1; i>=0; i--) { if (i>0) { doAddRule(tIndex); pos=copyRule(token[curToken]->production[i], prodIndex); j=0; while (production[curRule][j+1]!=-1) { production[prodIndex][pos+j]=production[curRule][j+1]; j++; } production[prodIndex][pos+j]=-1; //addRuleForToken(curToken, prodIndex); } else { targetEnd=sourceEnd=0; //curRule=token[tIndex]->production[ruleIndex]; while (true) { if (production[token[curToken]->production[0]][sourceEnd]==-1&& production[curRule][targetEnd]==-1) { break; } if (production[token[curToken]->production[0]][sourceEnd]!=-1) { sourceEnd++; } if (production[curRule][targetEnd]!=-1) { targetEnd++; } } j=targetEnd+sourceEnd-1; while (j>=0) { if (j>=sourceEnd) { production[curRule][j]=production[curRule][j-sourceEnd+1]; } else { production[curRule][j]=production[token[curToken]->production[0]][j]; } j--; } } } } //optimize sequence is first common-factor then remove left recursion //therefore I don't have to check if for same variable if there will be //more than one left-recursion void Grammar::optimize() { replaceMetaSymbol(); removeLeftRecursion(); commonLeftFactor(); removeRedundant(); addBeginEnd(); calculateNull(); calculateFirst(); calculateFollow(); } int Grammar::findCommonFactor(int tIndex, int ruleIndex) { for (int i=ruleIndex+1; i<token[tIndex]->count; i++) { //if the two rule has the same first token if (production[token[tIndex]->production[ruleIndex]][0]== production[token[tIndex]->production[i]][0]) { /* //calculate if there is epsilon if (emptyIndex==-1) { emptyIndex=tokenIndex(emptyStr); } //if it is not epsilon if (production[token[tIndex]->production[i]][0]!=emptyIndex) { return i; } */ return i; } } return -1; } void Grammar::epsilonRule(int ruleIndex) { production[ruleIndex][0]=addToken(emptyStr); production[ruleIndex][1]=-1; } //sample: x -> Aa // x -> Ab //changed to: x -> Ax' //this is to change the old rule // x' -> b //this is to change the old rule // x' -> a //this is the new-added rule void Grammar::doCommonFactor(int tIndex, int ruleIndex, int index) { int newTokenIndex=forgeToken(tIndex);//create a new variable //move the second and after part to the new rule of new variable //doing: x' -> a //curPos=0; //prepare to add one new rule addRuleForToken(newTokenIndex, ++prodIndex); //token[newTokenIndex]->production[token[newTokenIndex]->count++]=++prodIndex; token[newTokenIndex]->terminal=false; copyRule(token[tIndex]->production[ruleIndex], prodIndex); shiftRule(prodIndex, false, 1); /* do { //do copying production[prodIndex][curPos]= production[token[tIndex]->production[ruleIndex]][curPos+1]; curPos++; //even the -1 at end is copied }while (production[token[tIndex]->production[ruleIndex]][curPos]!=-1); */ //in order to show an empty string, in case the string is "epsilon" /* if (curPos==1) { epsilonRule(prodIndex); } */ //replace x -> Aa with x -> Ax' production[token[tIndex]->production[ruleIndex]][1]=newTokenIndex; production[token[tIndex]->production[ruleIndex]][2]=-1;//end //doing: x' -> b //curPos=0; //prepare to add one new rule //pointing new token to where old rule lies addRuleForToken(newTokenIndex, token[tIndex]->production[index]); /* token[newTokenIndex]->production[token[newTokenIndex]->count++]= token[tIndex]->production[index]; */ shiftRule(token[tIndex]->production[index], false, 1); removeRuleFromToken(tIndex, index); } void Grammar::removeRuleFromToken(int tIndex, int ruleIndex) { token[tIndex]->count--; for (int i=ruleIndex; i<token[tIndex]->count; i++) { token[tIndex]->production[i]=token[tIndex]->production[i+1]; } } void Grammar::commonLeftFactor() { int index=-1, tIndex=0, ruleIndex=0; bool flag; //whenever a newrule is done, restart! while (tIndex<tokenCount) { ruleIndex=0; flag=false; while (ruleIndex<token[tIndex]->count) { index=findCommonFactor(tIndex, ruleIndex); if (index!=-1) { doCommonFactor(tIndex, ruleIndex, index); //restart flag=true; break; } else { ruleIndex++; } } if (flag) { tIndex=0; } else { tIndex++; } } } Token* Grammar::operator[](int index) { if (index>=0&&index<tokenCount) { return token[index]; } else { return NULL; } } void Grammar::print() { for (int i=0; i<tokenCount; i++) { if (!token[i]->terminal) { cout<<token[i]->name<<" ==> "; for (int j=0; j<token[i]->count; j++) { printRule(token[i]->production[j]); if (j!=token[i]->count-1) { cout<<" | "; } } cout<<"\n"; } } }
file name: main.cpp (main)
#include <iostream> #include "CFGReader.h" using namespace std; int main() { CFGReader R; R.readFromFile("c:\\ruleTest.txt"); R.optimize(); R.print(); return 0; }
The input is something like following:
Please note that all tokens must be separated by space, including "|" and "->" and "{", "}" is meta symbol
equivalent to Kaleen star.
M' -> program i ; Dl B Dl -> Dv Ml Ml -> Ml M | e M -> module i ( Vl ) Dv B Dv -> variables Vl | e Vl -> Vl V | V V -> Il : T ; T -> integer Ad ; | char Ad Il -> i | Il , i Ad -> e | array [ n ] B -> begin Sl end ; Sl -> S | S Sl S -> L := E ; | if C then S else S | loop Sl end ; | exit ; i ( Lp ) ; | B | read Ln ; | write Ln ; | e ; Lp -> e | Ln Ln -> L { , L } L -> i Ar Ar -> e | [ E ] E -> F { Oa F } Oa -> + | - F -> R { Om R } Om -> * | / R -> L | n | ( E ) | c C -> E Or E Or -> = | < | > | <= | >=
Here is the result:
M' ==> program i ; Dl B Dl ==> Dv Ml B ==> Sl end S ; Dv ==> V ) | module Ml ==> module Sl0 Vl ==> Il START V ==> : T integer ; Il ==> i $ T ==> Ad char ; | , char Ad ==> module | [ n ] begin Sl ==> L Sl0 S ==> := E if ; | C then else L loop L | exit end S ; | Lp ; i Vl read variables ; | Sl end S ; | Ln write ; | } write ; | module ; L ==> i R E ==> Om Il0 C ==> Om Il0 >= if Lp ==> module | write Ln ==> i R Sl0 R ==> i R | ] | Vl if variables | <= Om ==> < | > Or ==> M'0 | M'1 | M'2 | Ml0 | Vl0 M'0 ==> * Om Il0 | module M'1 ==> array := Sl0 | module M'2 ==> = Or START | module Ml0 ==> ( i Vl ) variables Dv B Sl0 | module Vl0 ==> i $ T integer ; START | module Il0 ==> array i $ | module Sl0 ==> module | end START ==> M' $ name: M' isNull: false isTerminal: false first: [0]=program follow: [0]=$ name: program isNull: false isTerminal: true first: [0]=program follow: name: i isNull: false isTerminal: true first: [0]=i follow: name: ; isNull: false isTerminal: true first: [0]=; follow: name: Dl isNull: false isTerminal: false first: [0]=module,[1]=: follow: [0]=i name: B isNull: false isTerminal: false first: [0]=i follow: [0]=module,[1]=end name: Dv isNull: false isTerminal: false first: [0]=module,[1]=: follow: [0]=module,[1]=i name: Ml isNull: false isTerminal: false first: [0]=module follow: name: e isNull: true isTerminal: true first: [0]=e follow: name: module isNull: false isTerminal: true first: [0]=module follow: name: ( isNull: false isTerminal: true first: [0]=( follow: name: Vl isNull: false isTerminal: false first: [0]=i follow: [0]=read,[1]=if,[2]=) name: ) isNull: false isTerminal: true first: [0]=) follow: name: variables isNull: false isTerminal: true first: [0]=variables follow: name: V isNull: false isTerminal: false first: [0]=: follow: [0]=) name: Il isNull: false isTerminal: false first: [0]=i follow: [0]=program name: : isNull: false isTerminal: true first: [0]=: follow: name: T isNull: false isTerminal: false first: [0]=,,[1]=module,[2]=[ follow: [0]=integer name: integer isNull: false isTerminal: true first: [0]=integer follow: name: Ad isNull: false isTerminal: false first: [0]=module,[1]=[ follow: [0]=char name: char isNull: false isTerminal: true first: [0]=char follow: name: , isNull: false isTerminal: true first: [0]=, follow: name: array isNull: false isTerminal: true first: [0]=array follow: name: [ isNull: false isTerminal: true first: [0]=[ follow: name: n isNull: false isTerminal: true first: [0]=n follow: name: ] isNull: false isTerminal: true first: [0]=] follow: name: begin isNull: false isTerminal: true first: [0]=begin follow: name: Sl isNull: false isTerminal: false first: [0]=i follow: [0]=end name: end isNull: false isTerminal: true first: [0]=end follow: name: S isNull: false isTerminal: false first: [0]=:=,[1]=exit,[2]=},[3]=module,[4]=write,[5]=i,[6]=<,[7]=> follow: [0]=; name: L isNull: false isTerminal: false first: [0]=i follow: [0]=module,[1]=end,[2]=loop name: := isNull: false isTerminal: true first: [0]=:= follow: name: E isNull: false isTerminal: false first: [0]=<,[1]=> follow: [0]=if name: if isNull: false isTerminal: true first: [0]=if follow: name: C isNull: false isTerminal: false first: [0]=<,[1]=> follow: [0]=then name: then isNull: false isTerminal: true first: [0]=then follow: name: else isNull: false isTerminal: true first: [0]=else follow: name: loop isNull: false isTerminal: true first: [0]=loop follow: name: exit isNull: false isTerminal: true first: [0]=exit follow: name: Lp isNull: false isTerminal: false first: [0]=module,[1]=write follow: [0]=; name: read isNull: false isTerminal: true first: [0]=read follow: name: Ln isNull: false isTerminal: false first: [0]=i follow: [0]=write name: write isNull: false isTerminal: true first: [0]=write follow: name: } isNull: false isTerminal: true first: [0]=} follow: name: + isNull: false isTerminal: true first: [0]=+ follow: name: - isNull: false isTerminal: true first: [0]=- follow: name: R isNull: false isTerminal: false first: [0]=i,[1]=],[2]=<= follow: [0]=module,[1]=end name: Om isNull: false isTerminal: false first: [0]=<,[1]=> follow: [0]=array,[1]=module name: * isNull: false isTerminal: true first: [0]=* follow: name: / isNull: false isTerminal: true first: [0]=/ follow: name: c isNull: false isTerminal: true first: [0]=c follow: name: Or isNull: false isTerminal: false first: [0]=*,[1]=module,[2]=array,[3]==,[4]=(,[5]=i follow: [0]=program name: = isNull: false isTerminal: true first: [0]== follow: name: < isNull: false isTerminal: true first: [0]=< follow: name: > isNull: false isTerminal: true first: [0]=> follow: name: <= isNull: false isTerminal: true first: [0]=<= follow: name: >= isNull: false isTerminal: true first: [0]=>= follow: name: M'0 isNull: false isTerminal: false first: [0]=*,[1]=module follow: name: M'1 isNull: false isTerminal: false first: [0]=array,[1]=module follow: name: M'2 isNull: false isTerminal: false first: [0]==,[1]=module follow: name: Ml0 isNull: false isTerminal: false first: [0]=(,[1]=module follow: name: Vl0 isNull: false isTerminal: false first: [0]=i,[1]=module follow: name: Il0 isNull: false isTerminal: false first: [0]=array,[1]=module follow: [0]=>= name: Sl0 isNull: false isTerminal: false first: [0]=module,[1]=end follow: name: START isNull: false isTerminal: false first: [0]=program follow: name: $ isNull: false isTerminal: true first: [0]=$ follow: