CFG Reader(moon-machine-1)
A. Fifteenth Edition
This is fifteenth edition of my compiler project. I am now in desperately struggle with the final battle!
The original grammar from teacher:
M ==> rule.0 program i ; Dl B
Dl ==> rule.1 Dv Ml
B ==> rule.17 begin Sl end ;
Dv ==> rule.5 variables Vl | rule.6 e
Ml ==> rule.3 Ml0
Mo ==> rule.4 module i ( Vl ) Dv B
Vl ==> rule.8 Vl0
V ==> rule.9 Il : T ;
Il ==> rule.12 i Il0
T ==> rule.10 integer Ad | rule.11 char Ad
Ad ==> rule.15 e | rule.16 array [ n ]
L ==> rule.14 i Ar
Ar ==> rule.36 e | rule.37 [ E ]
Sl ==> rule.18 S Sl0
S ==> rule.20 i S0 | rule.21 if C then S else S | rule.22 loop Sl end ; | rule.23 exit ; | rule.
25 begin Sl end ; | rule.26 read Ln ; | rule.27 write Lo ; | rule.28 e ;
E ==> rule.38 F M0
C ==> rule.48 F M0 Or E
Lp ==> rule.29 e | rule.30 Ln
Ln ==> rule.31 i Ar M1
Lo ==> rule.32 Lr M2
Lr ==> rule.33 i Ar | rule.34 n | rule.35 c
F ==> rule.41 R M3
Oa ==> rule.39 + | rule.40 -
R ==> rule.44 i Ar | rule.45 n | rule.46 ( E ) | rule.47 c
Om ==> rule.42 * | rule.43 /
Or ==> rule.49 = | rule.50 < | rule.51 > | rule.52 <= | rule.53 >= | rule.54 !=
M0 ==> rule.55 + F M0 | rule.56 e | rule.66 - F M0
M1 ==> rule.57 , L M1 | rule.58 e
M2 ==> rule.59 , Lr M2 | rule.60 e
M3 ==> rule.61 * R M3 | rule.62 e | rule.67 / R M3
Ml0 ==> rule.2 module i ( Vl ) Dv B Ml0 | rule.63 e
Vl0 ==> rule.7 i Il0 : T ; Vl0 | rule.64 e
Il0 ==> rule.13 , i Il0 | rule.65 e
Sl0 ==> rule.68 e | rule.19 Sl
S0 ==> rule.69 Ar := E ; | rule.24 ( Lp ) ;
START ==> rule.70 M $
Press any key to continue
The following gives the index of each token:
index.0 M ==> no.1program no.2i no.3; no.4Dl no.5B
index.4 Dl ==> no.6Dv no.7Ml
index.5 B ==> no.29begin no.30Sl no.31end no.3;
index.6 Dv ==> no.14variables no.12Vl | no.9e
index.7 Ml ==> no.69Ml0
index.8 Mo ==> no.10module no.2i no.11( no.12Vl no.13) no.6Dv no.5B
index.12 Vl ==> no.15V no.70Vl0
index.15 V ==> no.16Il no.17: no.18T no.3;
index.16 Il ==> no.2i no.71Il0
index.18 T ==> no.19integer no.20Ad | no.21char no.20Ad
index.20 Ad ==> no.9e | no.25array no.26[ no.27n no.28]
index.23 L ==> no.2i no.24Ar
index.24 Ar ==> no.9e | no.26[ no.34E no.28]
index.30 Sl ==> no.32S no.72Sl0
index.32 S ==> no.2i no.73S0 | no.35if no.36C no.37then no.32S no.38else
no.32S | no.39loop no.30
Sl no.31end no.3; | no.40exit no.3; | no.29begin no.30Sl no.31end no.3; |
no.42read no.43Ln no.3;
| no.44write no.45Lo no.3; | no.9e no.3;
index.34 E ==> no.50F no.65M0
index.36 C ==> no.50F no.65M0 no.58Or no.34E
index.41 Lp ==> no.9e | no.43Ln
index.43 Ln ==> no.2i no.24Ar no.66M1
index.45 Lo ==> no.48Lr no.67M2
index.48 Lr ==> no.2i no.24Ar | no.27n | no.49c
index.50 F ==> no.54R no.68M3
index.51 Oa ==> no.52+ | no.53-
index.54 R ==> no.2i no.24Ar | no.27n | no.11( no.34E no.13) | no.49c
index.55 Om ==> no.56* | no.57/
index.58 Or ==> no.59= | no.60< | no.61> | no.62<= | no.63>= | no.64!=
index.65 M0 ==> no.52+ no.50F no.65M0 | no.9e | no.53- no.50F no.65M0
index.66 M1 ==> no.22, no.23L no.66M1 | no.9e
index.67 M2 ==> no.22, no.48Lr no.67M2 | no.9e
index.68 M3 ==> no.56* no.54R no.68M3 | no.9e | no.57/ no.54R no.68M3
index.69 Ml0 ==> no.10module no.2i no.11( no.12Vl no.13) no.6Dv no.5B no.69Ml0
| no.9e
index.70 Vl0 ==> no.2i no.71Il0 no.17: no.18T no.3; no.70Vl0 | no.9e
index.71 Il0 ==> no.22, no.2i no.71Il0 | no.9e
index.72 Sl0 ==> no.9e | no.30Sl
index.73 S0 ==> no.24Ar no.33:= no.34E no.3; | no.11( no.41Lp no.13) no.3;
index.74 START ==> no.0M no.75$
Press any key to continue
M ==> rule.0 program i ; Dl B
Dl ==> rule.1 Dv Ml
B ==> rule.17 begin Sl end ;
Dv ==> rule.5 variables Vl | rule.6 e
Ml ==> rule.3 Ml0
Mo ==> rule.4 module i ( Vl ) Dv B
Vl ==> rule.8 V Vl0
V ==> rule.9 Il : T ;
Il ==> rule.12 i Il0
T ==> rule.10 integer Ad | rule.11 char Ad
Ad ==> rule.15 e | rule.16 array [ n ]
L ==> rule.14 i Ar
Ar ==> rule.36 e | rule.37 [ E ]
Sl ==> rule.18 S Sl0
S ==> rule.20 i S0 | rule.21 if C then S else S | rule.22 loop Sl end ; |
rule.23 exit ; | rule.
25 begin Sl end ; | rule.26 read Ln ; | rule.27 write Lo ; | rule.28 e ;
E ==> rule.38 F M0
C ==> rule.48 F M0 Or E
Lp ==> rule.29 e | rule.30 Ln
Ln ==> rule.31 i Ar M1
Lo ==> rule.32 Lr M2
Lr ==> rule.33 i Ar | rule.34 n | rule.35 c
F ==> rule.41 R M3
Oa ==> rule.39 + | rule.40 -
R ==> rule.44 i Ar | rule.45 n | rule.46 ( E ) | rule.47 c
Om ==> rule.42 * | rule.43 /
Or ==> rule.49 = | rule.50 < | rule.51 > | rule.52 <= | rule.53 >= | rule.54
!=
M0 ==> rule.55 + F M0 | rule.56 e | rule.66 - F M0
M1 ==> rule.57 , L M1 | rule.58 e
M2 ==> rule.59 , Lr M2 | rule.60 e
M3 ==> rule.61 * R M3 | rule.62 e | rule.67 / R M3
Ml0 ==> rule.2 module i ( Vl ) Dv B Ml0 | rule.63 e
Vl0 ==> rule.7 i Il0 : T ; Vl0 | rule.64 e
Il0 ==> rule.13 , i Il0 | rule.65 e
Sl0 ==> rule.68 e | rule.19 Sl
S0 ==> rule.69 Ar := E ; | rule.24 ( Lp ) ;
START ==> rule.70 M $
Press any key to continue
The following gives the index of each rule:
M ==> rule.0 program i ; Dl B Dl ==> rule.1 Dv Ml B ==> rule.17 begin Sl end ; Dv ==> rule.5 variables Vl | rule.6 e Ml ==> rule.3 Ml0 Mo ==> rule.4 module i ( Vl ) Dv B Vl ==> rule.8 Vl0 V ==> rule.9 Il : T ; Il ==> rule.12 i Il0 T ==> rule.10 integer Ad | rule.11 char Ad Ad ==> rule.15 e | rule.16 array [ n ] L ==> rule.14 i Ar Ar ==> rule.36 e | rule.37 [ E ] Sl ==> rule.18 S Sl0 S ==> rule.20 i S0 | rule.21 if C then S else S | rule.22 loop Sl end ; | rule.23 exit ; | rule. 25 begin Sl end ; | rule.26 read Ln ; | rule.27 write Lo ; | rule.28 e ; E ==> rule.38 F M0 C ==> rule.48 F M0 Or E Lp ==> rule.29 e | rule.30 Ln Ln ==> rule.31 i Ar M1 Lo ==> rule.32 Lr M2 Lr ==> rule.33 i Ar | rule.34 n | rule.35 c F ==> rule.41 R M3 Oa ==> rule.39 + | rule.40 - R ==> rule.44 i Ar | rule.45 n | rule.46 ( E ) | rule.47 c Om ==> rule.42 * | rule.43 / Or ==> rule.49 = | rule.50 < | rule.51 > | rule.52 <= | rule.53 >= | rule.54 != M0 ==> rule.55 + F M0 | rule.56 e | rule.66 - F M0 M1 ==> rule.57 , L M1 | rule.58 e M2 ==> rule.59 , Lr M2 | rule.60 e M3 ==> rule.61 * R M3 | rule.62 e | rule.67 / R M3 Ml0 ==> rule.2 module i ( Vl ) Dv B Ml0 | rule.63 e Vl0 ==> rule.7 i Il0 : T ; Vl0 | rule.64 e Il0 ==> rule.13 , i Il0 | rule.65 e Sl0 ==> rule.68 e | rule.19 Sl S0 ==> rule.69 Ar := E ; | rule.24 ( Lp ) ; START ==> rule.70 M $ Press any key to continue
กก
The if-statement is almost the most difficult one and Dr. Opatrny said if I can finish that, there will be no
difficulties.
The following is the convention of my intermediate three-address-code:
1. The three-address-code instruction-set: INTER_ADD, INTER_SUB, INTER_MUL, INTER_DIV, INTER_CEQ, INTER_CLT,INTER_CLE, INTER_CGT, INTER_CGE, INTER_CNE, INTER_ASN, INTER_READ, INTER_WRITE, INTER_LABEL, INTER_JTRUE, INTER_JFALSE, INTER_JUMP, INTER_HALT, INTER_CALL, INTER_PARAM 2. The instruction format: a) Arithmatic and logic instruction: (opcode, add1, add2, result) where add1 and add2 can be immediate value, opcode includes INTER_ADD, INTER_SUB, INTER_MUL, INTER_DIV, INTER_CEQ, INTER_CLT,INTER_CLE, INTER_CGT, INTER_CGE, INTER_CNE, i.e. (INTER_ADD, a, b, @t0) <===> @t0 = a+b i.e. (INTER_DIV, a, b, @t0) <===> @t0 = a/b b) Conditional jump and assignment (opcode, condition, -, label)\ where condition is a true-false value or right-hand-side variable and label is a label or left-hand-side variable. opcode includes: INTER_JTRUE, INTER_JFALSE,INTER_ASSIGNMENT i.e. (INTER_ASN, a, -, b) <===> b := a; i.e. (INTER_JFALSE, boolVal, -, label) <===> if_f boolVal then jump label i.e. (INTER_JTRUE, boolVal, -, label) <===> if boolVal then jump label c) Unconditional jump and Label and parameter passing and module calling (opcode, -,-, label) where label is a label or a module or a parameter of a module. opcode include : INTER_JUMP, INTER_LABEL, INTER_CALL, INTER_PARAM i.e. (INTER_LABEL, -,-, lbl) <===> label: lbl; i.e. (INTER_JUMP, -,-, lbl) <===> jump lbl; i.e. (INTER_CALL, -,-, moduleName) <===> call moduleName(); i.e. (INTER_PARAM, -,-,param) <===> passing param by either stack or register d) Read and write use different format: (opcode, var, -, -) where var is parameter for write and the opcode is INTER_WRITE; i.e. (INTER_WRITE, var, -, -) (opcode, -, -, var) where var is a parametn for read and opcode is INTER_READ; i.e. (INTER_READ, -,-, var); 3. Assignment statement and logical comparison statement will check the type of l-hand and r-hand. And only simple data type allowed. That is, array assignment and comparison operations are not allowed. Assignment must be in strict type: different type of left-hand-side and right-hand-side will be checked and will raise an error. 4. Module has parameters of upper bound of 10. Module calling will be checked with types and numbers of parameters. Parameter will be passed by reference or by its address. Recursive module calling is allowed. 5. Each module and variable must be declared before it is used. 6. Each variable and parameter declared in module can only be used in that module. 7. Each module should be declared exactly once. 8. Since operation of assignment and comparison will be checked with type, char and integer cannot be mixed during operations. 9. Read and write operation can only operate on simple data type. The following is consider as an error: module simple() variables A: integer array[3]; begin read A; /*error of read an array */ write simple; /*error of write a module */ end;
กก
The following is the general idea of my project organization:
My program includes 6 big modules:
a) CFGReader: It is the "context-free-grammar" reader which reads the
grammar from "grammar input" file---"ruleTest.txt"--into internal list and
drives "Grammar" module to do necessary modification and calculation for
grammar. The module includes files of "CFGReader.h" and "CFGReader.cpp".
b) Grammar: This is most complicated module of whole project! It analysises grammar input from "CFGReader" and does the following job:
i) replaces the BNF notation of ommission to standard "context-free-grammar" notation.
i.e. BNF format " A ==> B { C } " will be changed into " A ==> B A' " and " A' ==> C | e " where "e" stands for epsilon.
ii) removes "left-recursion"
i.e. " A ==> A B | C " will be changed into " A ==> C A' " and " A' ==> B A' | e "
iii) removes "common-left"
i.e. " A ==> B C | B D | E " will be changed into " A ==> B A' | E " and " A' ==> C | D "
iv) calculate "First" and "Follow" set of each variable.
v) add augment starting variable "START ==> S " where "S" is the original starting variable.
vi) establish parsing table
The table is like this: rows are variables and column is tokens and each entry of table is either the grammar rule or -1 which indicates error.
This module includes files of "grammar.h" and "grammar.cpp".
c) Scanner: My scanner is a table-driven one with "key words" also being
included in the table. That is all key words are treated as a special "id".
Therefore the ending of scanning immediately indicates finding of a particular
key word and no string comparison is needed to differenciate between id and
key word. This module includes files of "scanner.h", "initialize.cpp" and "scanner.cpp"
where "initialize.cpp" is trying to initialize all scanning table by hand. :)
d) Symbol-table: My symbol table is using a simple hash function by
"shifting and plus the ASCII value of character of input string". The size of
table is a prime number of 211. Basically my program has two scopes, one is
"program-level" and one is "module-level". So, I declared two objects of class
"Hash": mainHash and moduleHash to store symbols of two levels or scopes. When
entering new module, the entry in "moduleHash" will be cleared, however, since
the entries are simply pointers to data structures that store information of
identifiers, the "Node" or the data structure of identifiers remains in
memory. This module includes files of "hash.h" and "hash.cpp".
e) Parser: My parser is a LL(1) table-driven top-down parser. It needs two
input file: "ruleTest.txt" as grammar input and "test.txt" as source code
input. The code generation is combined with parser. It includes files of "parser.h"
and "parser.cpp".
f) Error Handler: Error handler will handle all kinds of errors during compilation by accepting error no. and two general parameters which will be interpreted under different situation. All error message no and string are defined in file "errorNo.cpp". Parsing errors will be output to file "test1.txt". Scanner errors will be output to file "test0.txt". And code generation errors will be output to file "test2.txt". This module includes files of "errorNo.h" and "errorNo.cpp".
How to use:
How to use my file:
a) Make sure that the grammar input file "ruleTest.txt" is at same directory of executable file "CFGReader.exe".
b) Put your test source code in a source input file named "test.txt" at the same directory with a).
c) Run "CFGReader.exe" and three output files will be generated. (see below)
"ruleTest.txt" (The grammar input file, don't change it.),
"test.txt" (The input source code for testing, you can write your own with same file name "test.txt"),
"test0.txt" (The source listing file for "scanner".),
"test1.txt"(The output file for "parser", listing the parsing results.),
"test2.txt" (The three-address-code listing file).
กก
E.Further improvement
F.File listing
1. CFGReader.h
2. CFGReader.cpp
3. Grammar.h
4. Grammar.cpp
5. parser.h
6. parser.cpp
7. scanner.h
8. scanner.cpp
9. errorno.h
10. errorNo.cpp
11. initialize.cpp
12. hash.h
13. hash.cpp
12. main.cpp (main)
file name: CFGReader.h
#include "Grammar.h"
class CFGReader
{
private:
char buffer[BufferLength];
FILE* stream;
void newRule(const char* str);
void readRule();
void addRule(const char* str);
int addToken(const char* str, bool isTerminal=true);
void printRule(int index);
public:
void readFromFile(const char* fileName);
void optimize(bool forLL=true);
void calculateLookAhead();
void print();
};
file name: CFGReader.cpp
#include <iostream>
#include "CFGReader.h"
using namespace std;
Grammar grammar;
const char* deliStr=" |\n";
//this would only remove the immediate left recursion
//and should I expand it to remove all kinds of left-recursion?
void CFGReader::readFromFile(const char* fileName)
{
if ((stream=fopen(fileName, "r"))==NULL)
{
cout<<"cannot open file "<<fileName<<endl;
}
else
{
readRule();
}
}
void CFGReader::readRule()
{
char* ptr=buffer;
char* hold;
int count=0;
while (!feof(stream))
{
count=0;
fgets(buffer, BufferLength-1, stream);
ptr=buffer;
while ((ptr=strtok(ptr, " \n"))!=NULL)
{
if (strcmp(ptr, "|")!=0)
{
//test the first two token to see if old rule continues
if (count==0)
{
hold=ptr;
}
if (count==1)
{
if (strcmp("->", ptr)==0)
{
/*
curIndex=addToken(hold, false);//the index of cur token
grammar[curIndex]->terminal=false;
newRule();
*/
newRule(hold);
}
else
{
addRule(hold);
addRule(ptr);
}
}
if (count>=2)//always add
{
addRule(ptr);
}
count++;
}
else
{
//this is an alternative production rule
newRule(NULL);
}
ptr=NULL;
}
}
}
void CFGReader::newRule(const char* str)
{
grammar.newRule(str);
}
void CFGReader::optimize(bool forLL)
{
grammar.optimize(forLL);
}
void CFGReader::calculateLookAhead()
{
grammar.calculateLookAhead();
}
void CFGReader::addRule(const char* str)
{
grammar.addRule(str);
}
int CFGReader::addToken(const char* str, bool isTerminal)
{
return grammar.addToken(str, isTerminal);
}
void CFGReader::printRule(int index)
{
grammar.printRule(index);
}
void CFGReader::print()
{
grammar.print();
//grammar.printToken();
//grammar.buildTable();
//grammar.printTable();
//grammar.printAllRules();
}
file name: Grammar.h
#ifndef GRAMMAR_H
#define GRAMMAR_H
#include <bitset>
#include <iostream>
using namespace std;
const int BufferLength=256;
const int MaxTokenCount=100;
const int MaxRHSCount=40;
const int MaxRuleCount=200;
const int MaxGrammarTokenLength=10;
const int MaxProduction=10;
const int TokenTypeCount=38;
const int MaxStateCount=200;
const int MaxItemCount=500;
//a macro to ease the job
//#define BitSet (bitset<MaxItemCount>)
struct GrammarToken
{
//bool isMeta;
bool terminal;
bool isNull;
char* name;
int production[MaxProduction];//pointer to the production it gives
int count;
int firstCount;
int followCount;
int follow[MaxTokenCount];
int first[MaxTokenCount];
};
struct Item
{
int varIndex;
int rulePos;
int dotPos;
};
bool operator == (Item& first, Item& second);
/*
Item& operator= (Item& first, Item& second)
{
first.dotPos=second.dotPos;
first.rulePos=second.rulePos;
first.varIndex=second.varIndex;
return first;
}
*/
class BitSet: public bitset<MaxItemCount>
{
private:
int current;
public:
BitSet& operator=(const BitSet& theSet);
int next();
BitSet();
void restart(){current=-1;}
};
class Grammar
{
//to allow Parser to access Grammar
friend class Parser;
friend class LRParser;
private:
int LRTable[MaxStateCount][MaxTokenCount];
int terminalCount;
int nonTermCount;
GrammarToken* token[MaxTokenCount];
int tokenCount;
int curIndex;//the index of current token
int curPos;//the position at production rule
//MaxRuleCount>=MaxVariableCount!!!
//the last one in each rule is reserved for the length
int production[MaxRuleCount][MaxRHSCount+1];
int prodIndex;//pointing to current production, initialized to -1
void removeEpsilon(int ruleIndex);
int checkRecursion(int tIndex, int curToken);
void grammarError(int theVar, int theToken, int rule1, int rule2);
void addFirstIntoTable(int theVariable, int theFirst, int theRule);
void addFollowIntoTable(int theVariable, int theRule);
void addBeginEnd();
void addEpsilonForToken(int tIndex);
bool addIntoFirst(int target, int source);
bool addFirst(int target, int source);
void shiftRule(int ruleIndex, bool Left2Right, int offset);
void addAtEnd(int ruleIndex, int toAdd);
void addRuleForToken(int tIndex, int ruleIndex);
int copyRule(int source, int target);//return the position of -1
void replaceRule(int curToken, int ruleIndex);
int findMetaSymbol(int& begin, int& end);
void initialize();
void doReplaceMetaSymbol(int ruleIndex, int begin, int end);
void removeRuleFromToken(int tIndex, int ruleIndex);
void checkEpsilon(int ruleIndex);
void removeImmediate(int tIndex, int ruleIndex);
void doAddRule(int tIndex);
void commonLeftFactor();
int findCommonFactor(int tokenIndex, int ruleIndex);
void removeLeftRecursion();
void doCommonFactor(int tokenIndex, int ruleIndex, int index);
int forgeToken(int index);//create a new variable
void epsilonRule(int ruleIndex);
bool isRedundant(int tIndex);
void replaceMetaSymbol();
void calculateFirst();
void calculateFollow();
void calculateNull();
bool Null(int tIndex);
bool addFollow(int target, int source);
bool addIntoFollow(int target, int source);
bool addFollowIntoFollow(int target, int source);
void removeRedundant();
void removeToken(int tIndex);
void LRoptimize();
void LLoptimize();
void buildLLTable();
void buildLRTable();
int ruleLen(int ruleIndex);
void calculateProperty();
int item2Rule(int itemIndex);
int item2Var(int itemIndex);
int shift2Mask(int input);
int reduce2Mask(int input);
void printDFA();
bool* expandFinished;
int mask2State(int input);
bool isReduce(int input);
bool isShift(int input);
//items-related methods
void initializeDFA();
void uninitializeDFA();
void constructDFA();
//void addItems(int itemIndex, bitset<MaxStateCount>* theSet);
int addItem(Item& theItem);
int itemCount;
int stateCount;
Item**items;
//int addState(bitset
void addSonItem(int tIndex, BitSet& theSet);
int createState(const BitSet& theSet);
BitSet** sets;
int createItem(int theVar, int theNo, int position);
void addClosure(int itemIndex, BitSet& theSet);
void expandState(int stateIndex);
int findNextTokenOfItem(int itemIndex);
void doExpandState(int itemIndex, int tIndex, BitSet& theSet);
void add2Table(int stateIndex, int targetState, int tIndex);//this is shift
void add2Table(int stateIndex, int ruleIndex);//this is reduce
public:
Grammar();
void buildTable(bool forLLGrammar=true);
void optimize(bool forLLGrammar=true);
void printRule(int index, ostream& out=cout);
void printRule(int tIndex, int rulePos, int dotPos, ostream& out=cout);
void print(ostream& out=cout);
void printTable(bool isLL=true);
void printAllRules();
void printToken(bool onlyVar=false);
int varCount();
int termCount();
void addRule(const char* str);
void newRule(const char* str);//this is an internal method, not suitable for outside
GrammarToken* operator[](int index);
int addToken(const char* str, bool isTerminal=true);
GrammarToken* createToken(const char* theName, bool isTerminal);
int tokenIndex(const char* str);
void calculateLookAhead();
};
#endif
กก
file name: Grammar.cpp
#include <iostream>
#include <fstream>
#include <iomanip>
#include "errorNo.h"
#include "Grammar.h"
using namespace std;
extern ofstream fRule;
/*
#define REDUCEMASK 0xd10000000
#define SHIFTMASK 0xd20000000
#define STATEMASK 0xd01111111
#define SHIFTREDUCEMASK 0xd30000000
*/
const int REDUCEMASK = 268435456;
const int SHIFTMASK = 536870912;
const int STATEMASK = 268435455;
const int SHIFTREDUCEMASK = 805306368;
const int ACCEPTANCE = SHIFTREDUCEMASK;
const char* emptyStr="e";
const char* optionBegin="{";
const char* optionEnd="}";
const char* startStr="START";
const char* endStr="$";
int startSymbolIndex=-1;
int stackBottomIndex=-1;
int beginIndex=-1;
int endIndex=-1;
int emptyIndex=-1;
int epsilonIndex=-1;
int table[MaxTokenCount][MaxTokenCount];
char* terminalStr[TokenTypeCount]=
{
//GENERAL TYPE 5
"i", "n", "c", "COMMENT", "ERROR",
//THE FOLLOWING ARE SYMBOL TYPE 18
"(", ")", ";", "+", "-", "*",
"/", ":=", "<", ">", "=", "<=",
">=", "!=", "[", "]", ",",
":",
//THE FOLLOWING ARE RESERVED TYPE 15
"begin", "end", "program", "variables","integer", "array", "char",
"module", "if", "then", "else", "loop", "exit", "read", "write"
};
//these are "hash-table-like" tables
//this is exactly the opposite of below
int token2type[MaxTokenCount];
//this is to translate tokenType in scanner to token-index of grammar
int type2token[MaxTokenCount];
int matchTokens(const char* str)
{
for (int i=0; i<TokenTypeCount; i++)
{
if (strcmp(terminalStr[i], str)==0)
{
return i;
}
}
return -1;
}
void Grammar::calculateProperty()
{
terminalCount=nonTermCount=0;
for (int i=0; i<tokenCount; i++)
{
if (token[i]->terminal)
{
terminalCount++;
continue;
}
for (int j=0; j<token[i]->count; j++)
{
int len;
len=ruleLen(token[i]->production[j]);
production[token[i]->production[j]][MaxRHSCount]=len;
}
nonTermCount++;
}
}
void Grammar::addEpsilonForToken(int tIndex)
{
if (epsilonIndex==-1)
{
epsilonRule(++prodIndex);
addRuleForToken(tIndex, prodIndex);
}
else
{
addRuleForToken(tIndex, epsilonIndex);
}
}
void Grammar::printTable(bool isLL)
{
/*
cout<<" ";
for (int i=0; i<tokenCount; i++)
{
cout<<"| "<<i;
}
cout<<endl;
*/
if (isLL)
{
for (int r=0; r<tokenCount; r++)
{
if (token[r]->terminal)
{
continue;
}
cout<<token[r]->name<<":";
for (int c=0; c<tokenCount; c++)
{
if (table[r][c]!=-1)
{
cout<<token[c]->name<<"="<<table[r][c]<<",";
}
}
cout<<endl;
}
}
else
{
cout<<"the following is LR table\n";
cout<<" ";
for (int col=0; col<tokenCount; col++)
{
cout<<token[col]->name<<",";
}
cout<<endl;
for (int r=0; r<stateCount; r++)
{
cout<<r<<"| ";
for (int c=0; c<tokenCount; c++)
{
int result=LRTable[r][c];
if (isReduce(result))
{
cout<<"r";
cout<<mask2State(result);
}
else
{
if (isShift(result))
{
cout<<"s";
cout<<mask2State(result);
}
else
{
cout<<result;
}
}
cout<<",";
}
cout<<endl;
}
}
}
void Grammar::buildLRTable()
{
constructDFA();
printDFA();
cout<<"\n";
uninitializeDFA();
}
//initialized at "initialize()"
void Grammar::buildLLTable()
{
int typeIndex;
for (int r=0; r<tokenCount; r++)
{
if (token[r]->terminal)
{
typeIndex=matchTokens(token[r]->name);
if (typeIndex!=-1)
{
type2token[typeIndex]=r;
token2type[r]=typeIndex;
}
}
//initialize
/*
for (int c=0; c<MaxTokenCount; c++)
{
table[r][c]=-1;
}
*/
}
for (int i=0; i<tokenCount; i++)
{
if (token[i]->terminal)
{
continue;
}
for (int j=0; j<token[i]->count; j++)
{
int k=0, theRule=token[i]->production[j];
int theToken=production[theRule][k];
while (theToken!=-1)
{
addFirstIntoTable(i, theToken, theRule);
if (!token[theToken]->isNull)
{
break;
}
k++;
theToken=production[theRule][k];
}
if (theToken==-1)
{
addFollowIntoTable(i, theRule);
}
}
}
}
void Grammar::buildTable(bool forLLGrammar)
{
if (forLLGrammar)
{
buildLLTable();
}
else
{
buildLRTable();
}
}
/*
nCount=tCount=0;
for (int r=0; r<MaxTokenCount; r++)
{
if (r<tokenCount)
{
if (token[r]->terminal)
{
typeIndex=matchTokens(token[r]->name);
if (typeIndex!=-1)
{
type2token[typeIndex]=r;
token2type[r]=typeIndex;
}
//I am making a invertable table!!!
tArray[r]=tCount++;
}
else
{
//it is an invertable table!!!
nArray[r]=nCount++;
}
}
for (int c=0; c<MaxTokenCount; c++)
{
table[r][c]=-1;
}
}
for (int i=0; i<tCount; i++)
{
for (int j=0; j<token[i]->count; j++)
{
int k=0, theRule=token[i]->production[j];
int theToken=production[theRule][k];
while (theToken!=-1)
{
addFirstIntoTable(theToken, j);
if (!token[theToken]->isNull)
{
break;
}
k++;
theToken=production[theRule][k];
}
if (theToken==-1)
{
addFollowIntoTable(j);
}
}
}
}
*/
void Grammar::grammarError(int theVar, int theToken, int rule1, int rule2)
{
cout<<"error! the conflict of grammar at ";
cout<<token[theVar]->name<<" for token of "
<<token[theToken]->name
<<" between rules of \n";
printRule(rule1);
cout<<" and ";
printRule(rule2);
cout<<endl;
}
void Grammar::addFollowIntoTable(int theVariable, int theRule)
{
int temp;
for (int i=0; i<token[theVariable]->followCount; i++)
{
temp=token[theVariable]->follow[i];
if (table[theVariable][temp]!=theRule)
{
if (table[theVariable][temp]!=-1)
{
grammarError(theVariable, temp, theRule, table[theVariable][temp]);
}
table[theVariable][temp]=theRule;
}
}
}
void Grammar::addFirstIntoTable(int theVariable, int theFirst, int theRule)
{
for (int i=0; i<token[theFirst]->firstCount; i++)
{
if (table[theVariable][token[theFirst]->first[i]]!=theRule)
{
if (table[theVariable][token[theFirst]->first[i]]!=-1)
{
grammarError(theVariable, token[theFirst]->first[i], theRule,
table[theVariable][token[theFirst]->first[i]]);
}
table[theVariable][token[theFirst]->first[i]]=theRule;
}
}
}
int Grammar::varCount()
{
int result=0;
for (int i=0; i<tokenCount; i++)
{
if (!token[i]->terminal)
{
result++;
}
}
return result;
}
int Grammar::termCount()
{
int result=0;
for (int i=0; i<tokenCount; i++)
{
if (token[i]->terminal)
{
result++;
}
}
return result;
}
//leave the rule unremoved!!! as I have no way to do it!!!
void Grammar::removeToken(int tIndex)
{
delete[]token[tIndex]->name;
delete token[tIndex];
tokenCount--;
for (int i=tIndex; i<tokenCount; i++)
{
token[i]=token[i+1];
}
}
bool Grammar::isRedundant(int tIndex)
{
int k, theRule, theToken;
for (int i=0; i<tokenCount; i++)
{
for (int j=0; j<token[i]->count; j++)
{
k=0;
theRule=token[i]->production[j];
theToken=production[theRule][k];
while (theToken!=-1)
{
if (theToken==tIndex)
{
return false;
}
k++;
theToken=production[theRule][k];
}
}
}
return true;
}
//what is redundant? except the start variable
//the non-terminal never appears in other rules!
void Grammar::removeRedundant()
{
int tIndex=1;
bool findNew=false;
while (tIndex<tokenCount)
{
findNew=false;
if (!token[tIndex]->terminal)
{
if (isRedundant(tIndex))
{
removeToken(tIndex);
findNew=true;
}
}
if (findNew)
{
tIndex=1;
}
else
{
tIndex++;
}
}
}
void Grammar::printAllRules()
{
/*
cout<<"\nnow print all rules\n";
for (int i=0; i<=prodIndex; i++)
{
cout<<"rule index "<<i<<": ";
printRule(i);
cout<<"\n";
}
*/
int sum=0;
for (int i=0; i<tokenCount; i++)
{
if (!token[i]->terminal)
{
sum+=token[i]->count;
}
}
cout<<" total rules is:"<<prodIndex+1;
cout<<"\n and the sum is "<<sum<<endl;
}
void Grammar::addBeginEnd()
{
int begin=addToken(startStr, false);
int end=addToken(endStr, true);
prodIndex++;
production[prodIndex][0]=0;
production[prodIndex][1]=end;
production[prodIndex][2]=-1;
addRuleForToken(begin, prodIndex);
startSymbolIndex=begin;
stackBottomIndex=end;
}
/*
void Grammar::calculateProperty()
{
calculateNull();
calculateFirst();
calculateFollow();
}
*/
void Grammar::printToken(bool onlyVar)
{
for (int i=0; i<tokenCount; i++)
{
if (onlyVar)
{
if (token[i]->terminal)
{
continue;
}
}
cout<<"name: "<<token[i]->name<<endl;
cout<<" isNull: "<<(token[i]->isNull?"true":"false")<<endl;
cout<<" isTerminal: "<<(token[i]->terminal?"true":"false")<<endl;
cout<<" first: ";
for (int j=0; j<token[i]->firstCount; j++)
{
if (j!=0)
{
cout<<",";
}
cout<<"["<<j<<"]="<<token[token[i]->first[j]]->name;
}
cout<<"\n follow: ";
for (j=0; j<token[i]->followCount; j++)
{
if (j!=0)
{
cout<<",";
}
cout<<"["<<j<<"]="<<token[token[i]->follow[j]]->name;
}
cout<<endl;
}
}
//must use while loop to discover new set!!!!!
void Grammar::calculateFirst()
{
int i;
bool addNew;
do
{
i=0;
addNew=false;
while (i<tokenCount)
{
//the terminal contains itself
if (token[i]->terminal)
{
//addFirst don't judge if it is nullable!!
addFirst(i, i);
//token[i]->first[token[i]->firstCount++]=i;
}
else
{
//for all its rules
for (int j=0; j<token[i]->count; j++)
{
int theToken, k=0;
theToken=production[token[i]->production[j]][k];
//for each token in each rule
do
{
//add non epsilon set
if (addIntoFirst(i, theToken))
{
addNew=true;
}
//if it is not null, means it is end
if (!token[theToken]->isNull)
{
break;
}
//if it is nullable, continue
k++;
theToken=production[token[i]->production[j]][k];
}while (theToken!=-1);
//it means all token in this rule is nullable, so
if (theToken==-1)
{
//it is nullable
//addEpsilonIntoFirst(i);
addFirst(i, emptyIndex);
}
}
}
i++;
}
}while (addNew);
}
bool Grammar::addFollowIntoFollow(int target, int source)
{
bool addNew=false;
for (int i=0; i<token[source]->followCount; i++)
{
if (addFollow(target, token[source]->follow[i]))
{
addNew=true;
}
}
return addNew;
}
bool Grammar::addIntoFollow(int target, int source)
{
bool addNew=false;
if (source==-1)
{
return false;
}
for (int i=0; i<token[source]->firstCount; i++)
{
//add non-epsilon
if (!token[token[source]->first[i]]->isNull)
{
if (addFollow(target, token[source]->first[i]))
{
addNew=true;
}
}
}
return addNew;
}
void Grammar::calculateFollow()
{
int i;
bool addNew, started;
//token[startSymbolIndex]->follow[0]=stackBottomIndex;
//token[startSymbolIndex]->followCount=1;
do
{
i=0;
addNew=false;
while (i<tokenCount)
{
//the terminal contains itself
if (!token[i]->terminal)
{
for (int tIndex=0; tIndex<tokenCount; tIndex++)
{
//for all its rules
if (token[tIndex]->terminal)
{
continue;
}
//for each its rule
for (int j=0; j<token[tIndex]->count; j++)
{
int theToken, k=0, theRule=token[tIndex]->production[j];
started=false;
theToken=production[theRule][k];
//for each token in each rule
do
{
//the token appears here
if (started)
{
if (addIntoFollow(i, theToken))
{
addNew=true;
}
if (!token[theToken]->isNull)
{
break;
}
}
if (theToken==i)
{
started=true;
//add non epsilon set, including -1 situation!!!
}
//if it is not null, means it is end
//if it is nullable, continue
k++;
theToken=production[theRule][k];
}while (theToken!=-1);
//it means all token in this rule is nullable, so
if (started&&theToken==-1)
{
//it is nullable
//add current variable Follow(j) into Follow(i);
if (addFollowIntoFollow(i, tIndex))
{
addNew=true;
}
}
}
}
}
i++;
}
}while (addNew);
}
void Grammar::addRuleForToken(int tIndex, int ruleIndex)
{
token[tIndex]->production[token[tIndex]->count++]=ruleIndex;
}
void Grammar::shiftRule(int ruleIndex, bool left2Right, int offset)
{
int end=0;
/*
while (production[ruleIndex][end]!=-1)
{
end++;
}
*/
end=ruleLen(ruleIndex);
if (left2Right)
{
for (int i=end; i>=0; i--)
{
production[ruleIndex][i+offset]=production[ruleIndex][i];
}
}
else
{
for (int i=0; i<=end-offset; i++)
{
production[ruleIndex][i]=production[ruleIndex][i+offset];
}
checkEpsilon(ruleIndex);
}
}
void Grammar::checkEpsilon(int ruleIndex)
{
if (production[ruleIndex][0]==-1)
{
epsilonRule(ruleIndex);
}
}
bool Grammar::addFollow(int target, int source)
{
//check if it is already there
for (int i=0; i<token[target]->followCount; i++)
{
if (token[target]->follow[i]==source)
{
return false;
}
}
//add at end
token[target]->follow[token[target]->followCount++]=source;
return true;
}
bool Grammar::addFirst(int target, int source)
{
//check if it is already there
for (int i=0; i<token[target]->firstCount; i++)
{
if (token[target]->first[i]==source)
{
return false;
}
}
//add at end
token[target]->first[token[target]->firstCount++]=source;
return true;
}
//add non epsilon into it.
bool Grammar::addIntoFirst(int target, int source)
{
bool addNew=false;
if (token[source]->terminal)
{
if (!token[source]->isNull)
{
return addFirst(target, source);
}
else
{
return false;
}
}
for (int i=0; i<token[source]->firstCount; i++)
{
//add non epsilon
if (!token[token[source]->first[i]]->isNull)
{
if (addFirst(target, token[source]->first[i]))
{
addNew=true;
}
}
}
return addNew;
}
bool Grammar::Null(int tIndex)
{
if (token[tIndex]->terminal)
{
return token[tIndex]->isNull;
}
for (int i=0; i<token[tIndex]->count; i++)
{
int j=0, theToken;
theToken=production[token[tIndex]->production[i]][j];
do
{
if (theToken==tIndex||!Null(theToken))
{
break;
}
j++;
theToken=production[token[tIndex]->production[i]][j];
}while (theToken!=-1);
if (theToken==-1)
{
return true;
}
}
return false;
}
void Grammar::calculateNull()
{
for (int i=0; i<tokenCount; i++)
{
token[i]->isNull=Null(i);
}
}
int Grammar::findMetaSymbol(int& begin, int& end)
{
int theRule, theToken, k;
begin=end=-1;
for (int i=0; i<tokenCount; i++)
{
for (int j=0; j<token[i]->count; j++)
{
k=0;
theRule=token[i]->production[j];
theToken=production[theRule][k];
while (theToken!=-1)
{
if (theToken==beginIndex)
{
begin=k;
}
if (theToken==endIndex)
{
end=k;
return theRule;
}
k++;
theToken=production[theRule][k];
}
}
}
return -1;
}
void Grammar::doReplaceMetaSymbol(int ruleIndex, int begin, int end)
{
int newTokenIndex=forgeToken(0), i=0;
addRuleForToken(newTokenIndex, ++prodIndex);
//token[newTokenIndex]->production[token[newTokenIndex]->count++]=++prodIndex;
//token[newTokenIndex]->terminal=false;
copyRule(ruleIndex, prodIndex);
//shrink
while (production[ruleIndex][i+end+1]!=-1)
{
production[ruleIndex][begin+i]=production[ruleIndex][i+end+1];
i++;
}
production[ruleIndex][begin+i]=-1;
addAtEnd(ruleIndex, newTokenIndex);
/*
production[ruleIndex][begin]=newTokenIndex;
production[ruleIndex][begin+1]=-1;
*/
shiftRule(prodIndex, false, begin+1);
production[prodIndex][end-begin-1]=newTokenIndex;
production[prodIndex][end-begin]=-1;
addEpsilonForToken(newTokenIndex);
/*
if (epsilonIndex==-1)
{
eRuleIndex=++prodIndex;
epsilonRule(eRuleIndex);
}
else
{
eRuleIndex=epsilonIndex;
}
addRuleForToken(newTokenIndex, eRuleIndex);
*/
}
void Grammar::replaceMetaSymbol()
{
int begin, end, ruleIndex;
while ((ruleIndex=findMetaSymbol(begin, end))!=-1)
{
doReplaceMetaSymbol(ruleIndex, begin, end);
}
/*
for (int i=0; i<tokenCount; i++)
{
//if (token[i]->isMeta)
if (i==beginIndex||i==endIndex)
{
removeToken(i);
}
}
*/
}
void Grammar::removeEpsilon(int ruleIndex)
{
int i=0;
while (production[ruleIndex][i]!=-1)
{
if (production[ruleIndex][i]==emptyIndex)
{
do
{
production[ruleIndex][i]=production[ruleIndex][i+1];
i++;
}while (production[ruleIndex][i]!=-1);
return;
}
i++;
}
}
void Grammar::addAtEnd(int ruleIndex, int toAdd)
{
int end;
end=ruleLen(ruleIndex);
production[ruleIndex][end++]=toAdd;
production[ruleIndex][end]=-1;
}
//left-recursion: A -> A a | b | c
//change to: A -> b A' | c A'
// A' -> a A' | empty
void Grammar::removeImmediate(int tIndex, int ruleIndex)
{
int newIndex=forgeToken(tIndex);
int holdRuleIndex=token[tIndex]->production[ruleIndex];
//sequence matters!
//change to: A -> b A'
for (int i=0; i<token[tIndex]->count; i++)
{
if (i!=ruleIndex)
{
addAtEnd(token[tIndex]->production[i], newIndex);
removeEpsilon(token[tIndex]->production[i]);
}
}
//shift
removeRuleFromToken(tIndex, ruleIndex);
addRuleForToken(newIndex, holdRuleIndex);
//token[newIndex]->production[token[newIndex]->count++]=holdRuleIndex;
shiftRule(holdRuleIndex, false, 1);
addAtEnd(holdRuleIndex, newIndex);
//add epsilon rule for new variable
addEpsilonForToken(newIndex);
/*
epsilonRule(++prodIndex);
addRuleForToken(newIndex, prodIndex);
*/
}
int Grammar::forgeToken(int index)
{
char str[MaxGrammarTokenLength+2], ch;
int len=strlen(token[index]->name);
int temp=0, i=0;
strcpy(str, token[index]->name);
ch=str[len-1];//get last char of name
if (ch>='0'&&ch<'9')
{
str[len-1]=ch+i+1;
}
else
{
str[len]='0'+i;
str[len+1]='\0';
}
//I won't bother to check repetitation of name
while (tokenIndex(str)!=-1)
{
i++;
if (ch>='0'&&ch<'9')
{
str[len-1]=ch+i+1;
}
else
{
str[len]='0'+i;
str[len+1]='\0';
}
}
return addToken(str, false);//it is non-terminal for sure
}
int Grammar::copyRule(int source, int target)
{
int i=0;
while (production[source][i]!=-1)
{
production[target][i]=production[source][i];
i++;
}
production[target][i]=-1;
return i;
}
void Grammar::doAddRule(int tIndex)
{
token[tIndex]->production[token[tIndex]->count++]=++prodIndex;
}
void Grammar::addRule(const char* str)
{
int index;
index=addToken(str);
production[prodIndex][curPos++]=index;
production[prodIndex][curPos]=-1;//set end
}
//if the token is already there, it return the index
//otherwise, it add new node in token array
int Grammar::addToken(const char* str, bool isTerminal)
{
int index;
index=tokenIndex(str);
if (index==-1)
{
index=tokenCount;
}
else
{
return index;
}
token[index]=createToken(str, isTerminal);
if (strcmp(str, optionBegin)==0)
{
beginIndex=index;
}
if (strcmp(str, optionEnd)==0)
{
endIndex=index;
}
tokenCount++;
if (strcmp(str, emptyStr)==0)
{
token[index]->isNull=true;
emptyIndex=index;
}
return index;
}
void Grammar::newRule(const char* str)
{
if (str!=NULL)
{
curIndex=addToken(str, false);
}
//add one pointer
token[curIndex]->production[token[curIndex]->count++]=++prodIndex;
token[curIndex]->terminal=false;
curPos=0;//reset to restart;
}
GrammarToken* Grammar::createToken(const char* theName, bool isTerminal)
{
GrammarToken* ptr=new GrammarToken;
ptr->name=new char[strlen(theName)+1];
strcpy(ptr->name, theName);
ptr->terminal=isTerminal;
ptr->count=ptr->firstCount=ptr->followCount=0;
ptr->isNull=false;
return ptr;
}
int Grammar::tokenIndex(const char* str)
{
for (int i=0; i<tokenCount; i++)
{
if (strcmp(str, token[i]->name)==0)
{
return i;
}
}
return -1;
}
int Grammar::checkRecursion(int tIndex, int curToken)
{
for (int i=0; i<token[curToken]->count; i++)
{
//token[tIndex]->production[i]=ruleIndex
//production[ruleIndex][0] is the first left-recursion one
if (production[token[curToken]->production[i]][0]<=curToken)
{
return i;
}
}
return -1;
}
void Grammar::printRule(int index, ostream& out)
{
int nodeIndex=0;
//cout<<" ~"<<index<<"~ ";
while (production[index][nodeIndex]!=-1)
{
//cout<<production[index][nodeIndex]<<" "<<
//this is old line
//for debug
out<<token[production[index][nodeIndex]]->name<<" ";
//cout<<token[production[index][nodeIndex]]->name<<" ";
//for debug:
//out<<"no."<<production[index][nodeIndex]<<" "<<
// token[production[index][nodeIndex]]->name<<" ";
nodeIndex++;
}
}
void Grammar::printRule(int tIndex, int rulePos, int dotPos, ostream& out)
{
int nodeIndex=0;
//for debug
//cout<<token[tIndex]->name<<" ==> ";
out<<token[tIndex]->name<<" ==> ";
//cout<<" ~"<<index<<"~ ";
while (production[token[tIndex]->production[rulePos]][nodeIndex]!=-1)
{
if (nodeIndex==dotPos)
{
//debug
//cout<<" . ";
out<<" . ";
}
//debug
out<<token[production[token[tIndex]->production[rulePos]][nodeIndex]]->name<<" ";
//cout<<token[production[token[tIndex]->production[rulePos]][nodeIndex]]->name<<" ";
nodeIndex++;
}
if (nodeIndex==dotPos)
{
//debug
//cout<<" . ";
out<<" . ";
}
//printRule(token[tIndex]->production[rulePos]);
}
void Grammar::initialize()
{
tokenCount=curIndex=curPos=0;
prodIndex=-1;//in order to ++ blindly
for (int i=0; i<MaxStateCount; i++)
{
for (int j=0; j<MaxTokenCount; j++)
{
LRTable[i][j]=-1;
}
}
for (i=0; i<MaxTokenCount; i++)
{
for (int j=0; j<MaxTokenCount; j++)
{
table[i][j]=-1;
}
}
}
Grammar::Grammar()
{
initialize();
}
void Grammar::removeLeftRecursion()
{
int tIndex=0, curToken=0;
bool newChange=false;
while (tIndex<tokenCount)
{
if (!token[tIndex]->terminal)
{
for (int i=0; i<token[tIndex]->count; i++)
{
curToken=production[token[tIndex]->production[i]][0];
if (curToken<=tIndex&&!token[curToken]->terminal)
{
if (curToken!=tIndex)
{
replaceRule(tIndex, i);
}
else
{
removeImmediate(tIndex, i);
}
newChange=true;
}
}
}
//whenever there is some new findings, restart
if (!newChange)
{
tIndex++;
}
else
{
tIndex=0;
newChange=false;
}
}
}
void Grammar::replaceRule(int tIndex, int ruleIndex)
{
int pos, j, targetEnd, sourceEnd, curRule;
curRule=token[tIndex]->production[ruleIndex];
int curToken=production[curRule][0];
for (int i=token[curToken]->count-1; i>=0; i--)
{
if (i>0)
{
doAddRule(tIndex);
pos=copyRule(token[curToken]->production[i], prodIndex);
j=0;
while (production[curRule][j+1]!=-1)
{
production[prodIndex][pos+j]=production[curRule][j+1];
j++;
}
production[prodIndex][pos+j]=-1;
//addRuleForToken(curToken, prodIndex);
}
else
{
targetEnd=sourceEnd=0;
//curRule=token[tIndex]->production[ruleIndex];
while (true)
{
if (production[token[curToken]->production[0]][sourceEnd]==-1&&
production[curRule][targetEnd]==-1)
{
break;
}
if (production[token[curToken]->production[0]][sourceEnd]!=-1)
{
sourceEnd++;
}
if (production[curRule][targetEnd]!=-1)
{
targetEnd++;
}
}
j=targetEnd+sourceEnd-1;
while (j>=0)
{
if (j>=sourceEnd)
{
production[curRule][j]=production[curRule][j-sourceEnd+1];
}
else
{
production[curRule][j]=production[token[curToken]->production[0]][j];
}
j--;
}
}
}
}
void Grammar::calculateLookAhead()
{
replaceMetaSymbol();
calculateNull();
cout<<setw(20)<<"The Rule "<<"Look-Ahead "<<endl;
for (int i=0; i<tokenCount; i++)
{
if (token[i]->terminal)
{
continue;
}
for (int j=0; j<token[i]->count; j++)
{
int theRule, theToken, k=0;
theRule=token[i]->production[j];
cout<<setw(5);
cout<<token[i]->name<<" -> ";
cout<<setiosflags(ios::left);
printRule(theRule);
cout<<" ";
theToken=production[theRule][k];
while (theToken!=-1)
{
if (k!=0)
{
cout<<"+";
}
cout<<"First("<<token[theToken]->name<<")";
if (!token[theToken]->isNull)
{
break;
}
k++;
theToken=production[theRule][k];
}
if (theToken==-1)
{
if (k!=0)
{
cout<<"+";
}
cout<<"Follow("<<token[i]->name<<")";
}
cout<<"\n";
}
}
}
void Grammar::LLoptimize()
{
removeLeftRecursion();
commonLeftFactor();
}
void Grammar::LRoptimize()
{
calculateProperty();
}
//the return value is the count, not the index of rule
int Grammar::ruleLen(int ruleIndex)
{
int end=0;
while (production[ruleIndex][end]!=-1)
{
end++;
}
return end;
}
//optimize sequence is first common-factor then remove left recursion
//therefore I don't have to check if for same variable if there will be
//more than one left-recursion
void Grammar::optimize(bool forLLGrammar)
{
replaceMetaSymbol();
if (forLLGrammar)
{
LLoptimize();
addBeginEnd();
}
else
{
addBeginEnd();
LRoptimize();
}
//removeRedundant();
calculateNull();
calculateFirst();
calculateFollow();
buildTable(forLLGrammar);
}
int Grammar::findCommonFactor(int tIndex, int ruleIndex)
{
for (int i=ruleIndex+1; i<token[tIndex]->count; i++)
{
//if the two rule has the same first token
if (production[token[tIndex]->production[ruleIndex]][0]==
production[token[tIndex]->production[i]][0])
{
/*
//calculate if there is epsilon
if (emptyIndex==-1)
{
emptyIndex=tokenIndex(emptyStr);
}
//if it is not epsilon
if (production[token[tIndex]->production[i]][0]!=emptyIndex)
{
return i;
}
*/
return i;
}
}
return -1;
}
void Grammar::epsilonRule(int ruleIndex)
{
production[ruleIndex][0]=addToken(emptyStr);
production[ruleIndex][1]=-1;
}
//sample: x -> Aa
// x -> Ab
//changed to: x -> Ax' //this is to change the old rule
// x' -> b //this is to change the old rule
// x' -> a //this is the new-added rule
void Grammar::doCommonFactor(int tIndex, int ruleIndex, int index)
{
int newTokenIndex=forgeToken(tIndex);//create a new variable
//move the second and after part to the new rule of new variable
//doing: x' -> a
//curPos=0;
//prepare to add one new rule
addRuleForToken(newTokenIndex, ++prodIndex);
//token[newTokenIndex]->production[token[newTokenIndex]->count++]=++prodIndex;
token[newTokenIndex]->terminal=false;
copyRule(token[tIndex]->production[ruleIndex], prodIndex);
shiftRule(prodIndex, false, 1);
/*
do
{
//do copying
production[prodIndex][curPos]=
production[token[tIndex]->production[ruleIndex]][curPos+1];
curPos++;
//even the -1 at end is copied
}while (production[token[tIndex]->production[ruleIndex]][curPos]!=-1);
*/
//in order to show an empty string, in case the string is "epsilon"
/*
if (curPos==1)
{
epsilonRule(prodIndex);
}
*/
//replace x -> Aa with x -> Ax'
production[token[tIndex]->production[ruleIndex]][1]=newTokenIndex;
production[token[tIndex]->production[ruleIndex]][2]=-1;//end
//doing: x' -> b
//curPos=0;
//prepare to add one new rule
//pointing new token to where old rule lies
addRuleForToken(newTokenIndex, token[tIndex]->production[index]);
/*
token[newTokenIndex]->production[token[newTokenIndex]->count++]=
token[tIndex]->production[index];
*/
shiftRule(token[tIndex]->production[index], false, 1);
removeRuleFromToken(tIndex, index);
}
void Grammar::removeRuleFromToken(int tIndex, int ruleIndex)
{
token[tIndex]->count--;
for (int i=ruleIndex; i<token[tIndex]->count; i++)
{
token[tIndex]->production[i]=token[tIndex]->production[i+1];
}
}
void Grammar::commonLeftFactor()
{
int index=-1, tIndex=0, ruleIndex=0;
bool flag;
//whenever a newrule is done, restart!
while (tIndex<tokenCount)
{
ruleIndex=0;
flag=false;
while (ruleIndex<token[tIndex]->count)
{
index=findCommonFactor(tIndex, ruleIndex);
if (index!=-1)
{
doCommonFactor(tIndex, ruleIndex, index);
//restart
flag=true;
break;
}
else
{
ruleIndex++;
}
}
if (flag)
{
tIndex=0;
}
else
{
tIndex++;
}
}
}
GrammarToken* Grammar::operator[](int index)
{
if (index>=0&&index<tokenCount)
{
return token[index];
}
else
{
return NULL;
}
}
void Grammar::print(ostream& out)
{
for (int i=0; i<tokenCount; i++)
{
if (!token[i]->terminal)
{
//to do: for test
//cout<<"index."<<i<<" "<<token[i]->name<<" ==> ";
//out<<"no."<<i<<" "<<token[i]->name<<" ==> ";
out<<" "<<token[i]->name<<" ==> ";
for (int j=0; j<token[i]->count; j++)
{
//rule no
//out<<"rule."<<token[i]->production[j]<<" ";
printRule(token[i]->production[j], out);
if (j!=token[i]->count-1)
{
out<<" | ";
}
}
out<<"\n";
}
}
}
void Grammar::uninitializeDFA()
{
for (int i=0; i<itemCount; i++)
{
delete items[i];
}
delete [] items;
delete [] expandFinished;
for (i=0; i<stateCount; i++)
{
delete sets[i];
}
delete []sets;
}
void Grammar::initializeDFA()
{
items=new Item*[MaxItemCount];
for (int i=0; i<MaxItemCount; i++)
{
items[i]=new Item;
items[i]->rulePos=items[i]->dotPos=items[i]->varIndex=-1;
}
sets= new BitSet*[MaxStateCount];
expandFinished=new bool[MaxStateCount];
for (i=0; i<MaxStateCount; i++)
{
sets[i]=new BitSet;
sets[i]->reset();
expandFinished[i]=false;
}
itemCount=stateCount=0;
//initialize LRTable
for (i=0; i<MaxStateCount; i++)
{
for (int j=0; j<tokenCount; j++)
{
LRTable[i][j]=-1;
}
}
}
int Grammar::addItem(Item& theItem)
{
for (int i=0; i<itemCount; i++)
{
if (*(items[i])== theItem)
{
return i;
}
}
items[i]=new Item;
*(items[i])=theItem;
itemCount++;
return i;
}
void Grammar::addSonItem(int tIndex, BitSet& theSet)
{
if (!token[tIndex]->terminal)
{
for (int i=0; i<token[tIndex]->count; i++)
{
Item temp;
int itemIndex;//easy reading
temp.varIndex=tIndex;
temp.rulePos=i;
temp.dotPos=0;
itemIndex=addItem(temp);
theSet.set(itemIndex);
//recursive
addSonItem(production[token[tIndex]->production[i]][0], theSet);
}
}
}
int Grammar::createItem(int theVar, int theNo, int position)
{
Item temp;
temp.varIndex=theVar;
temp.rulePos=theNo;
temp.dotPos=position;
return addItem(temp);
}
void Grammar::constructDFA()
{
int startIndex, itemIndex, stateIndex;
BitSet theSet;
//should initialize everything!!!
initializeDFA();
startIndex=createItem(startSymbolIndex, 0, 0);
//theSet.set(startIndex);
addClosure(startIndex, theSet);
stateIndex=itemIndex=0;
stateIndex= createState(theSet);
expandState(stateIndex);//recursion inside
//for all choices, do createState(startStart, tansition token) (
}
int Grammar::item2Var(int itemIndex)
{
int tIndex, rulePos, dotPos, theVar, theRule;
tIndex=items[itemIndex]->varIndex;
rulePos=items[itemIndex]->rulePos;
dotPos=items[itemIndex]->dotPos;
theRule=token[tIndex]->production[rulePos];
//so, get the var
theVar=production[theRule][dotPos];
return theVar;
}
void Grammar::addClosure(int itemIndex, BitSet& theSet)
{
int tIndex, rulePos, dotPos, theVar, theRule;
if (!theSet.test(itemIndex))
{
theSet.set(itemIndex);//add itself first!!!
}
else
{
//prevent looping
return;
}
tIndex=items[itemIndex]->varIndex;
rulePos=items[itemIndex]->rulePos;
dotPos=items[itemIndex]->dotPos;
theRule=token[tIndex]->production[rulePos];
//so, get the var
theVar=production[theRule][dotPos];
//to skip the epsilon
if (theVar==emptyIndex)
{
theVar=production[theRule][dotPos+1];
}
//theVar=item2Var(itemIndex);
if (theVar!=-1)
{
if (!token[theVar]->terminal)
{
for (int i=0; i<token[theVar]->count; i++)
{
Item temp;
int newItemIndex;//easy reading
temp.varIndex=theVar;
temp.rulePos=i;
temp.dotPos=0;
newItemIndex=addItem(temp);
//theSet.set(itemIndex);
//recursive
addClosure(newItemIndex, theSet);
}
}
}
}
int Grammar::createState(const BitSet& theSet)
{
for (int i=0; i<stateCount; i++)
{
if (*(sets[i])==theSet)
{
return i;
}
}
sets[i]=new BitSet;
*sets[i] = theSet;
stateCount++;
return i;
}
//indicating if it is reduceable by returning true
void Grammar::doExpandState(int itemIndex, int tIndex, BitSet& theSet)
{
int theNewItemIndex;
//int theNextVar=findNextTokenOfItem(itemIndex);
theNewItemIndex=createItem(items[itemIndex]->varIndex, items[itemIndex]->rulePos,
items[itemIndex]->dotPos+1);
/*
if (tIndex!=stackBottomIndex)
{
acceptedItemIndex=theNewItemIndex;
}
*/
addClosure(theNewItemIndex, theSet);
// return findNextTokenOfItem(theNewItemIndex)==-1;//true=reduceable
}
int Grammar::item2Rule(int itemIndex)
{
int ruleIndex, rulePos, varIndex;
//only for easy reading
varIndex=items[itemIndex]->varIndex;
rulePos=items[itemIndex]->rulePos;
ruleIndex =token[varIndex]->production[rulePos];
return ruleIndex;
}
void Grammar::expandState(int stateIndex)
{
int itemIndex, targetState, tIndex, reduceIndex=-1, shiftIndex;
BitSet theSet;//a new temp
bool findShift=false, findReduce=false, isAccepted=false;
//this will prevent looping expanding!!!???
expandFinished[stateIndex]=true;
//to see if it is to reduce
sets[stateIndex]->restart();
while ((itemIndex=sets[stateIndex]->next())!=-1)
{
tIndex=findNextTokenOfItem(itemIndex);
if (tIndex==-1)
{
findReduce=true;
/*
int ruleIndex, rulePos, varIndex;
//only for easy reading
varIndex=items[itemIndex]->varIndex;
rulePos=items[itemIndex]->rulePos;
ruleIndex =token[varIndex]->production[rulePos];
*/
int ruleIndex=item2Rule(itemIndex);
add2Table(stateIndex, ruleIndex);
if (reduceIndex!=-1)
{
errorHandle(ReduceReduceConflict,
(void*)(items[itemIndex]), (void*)(items[reduceIndex]));
}
else
{
reduceIndex=itemIndex;
}
}
else
{
shiftIndex=itemIndex;
findShift=true;
}
}
//how to do in LR(1)????
//I am currently working on LR(0). I try not to think about it
if (findReduce&&findShift)
{
//in LR(1), this is not necessarily a conflict, because you need to
//check the first and follow set? I forgot it now. :)
errorHandle(ShiftReduceConflict, (void*)(items[shiftIndex]),
(void*)(items[reduceIndex]));
//temparily
int temp1, temp2;
temp1=items[shiftIndex]->varIndex;
temp2=items[shiftIndex]->rulePos;
cout<<"\nconflict at shift:\n";
printRule(temp1, temp2, items[shiftIndex]->dotPos);
temp1=items[reduceIndex]->varIndex;
temp2=items[reduceIndex]->rulePos;
cout<<"\nconflict with reduce:\n";
printRule(temp1, temp2, items[reduceIndex]->dotPos);
cout<<endl;
return;//currently ignore LR(1) and should be improved when LR(1)
}
for (int i=0; i<tokenCount; i++)
{
if (i==emptyIndex||i==stackBottomIndex)
{
continue;
}
sets[stateIndex]->restart();
//for all items, search if there is token to match to advance the dot
while ((itemIndex=sets[stateIndex]->next())!=-1)
{
if (findNextTokenOfItem(itemIndex)== i)//
{
//do I need to add acceptance here?
if (i==stackBottomIndex)
{
}
doExpandState(itemIndex, i, theSet);//should finish addClosure
}
}
//only expandState after you find one "transition"
if (theSet.count()>0)//if you do find some
{
targetState=createState(theSet);
//here to write the LRTable
add2Table(stateIndex, targetState, i);//3 params means to reduce
findShift=true;
theSet.reset();//prepare for next token
//prevent infinite loop because DFA is cyclic graph
if (!expandFinished[targetState])
{
expandState(targetState);
}
}//no one is set
}
}
//this is for reduce of LR(0)
void Grammar::add2Table(int stateIndex, int ruleIndex)
{
int result= reduce2Mask(ruleIndex);
//currently this is for LR(0)
for (int i=0; i<tokenCount; i++)
{
if (token[i]->terminal)
{
if (LRTable[stateIndex][i]!=-1)
{
errorHandle(OverWritingLRTable);
}
LRTable[stateIndex][i]=result;
}
}
}
//this is for shift
void Grammar::add2Table(int stateIndex, int targetIndex, int tIndex)
{
int result=targetIndex;
if (token[tIndex]->terminal)
{
result= shift2Mask(targetIndex);
}
if (LRTable[stateIndex][tIndex]!=-1)
{
errorHandle(OverWritingLRTable);
}
LRTable[stateIndex][tIndex]=result;
}
int Grammar::findNextTokenOfItem(int itemIndex)
{
int tIndex, theNo, thePos, theVar;
tIndex=items[itemIndex]->varIndex;//easy for reading
theNo=items[itemIndex]->rulePos;
thePos=items[itemIndex]->dotPos;
//maybe I should write small function to wrap this kind of indexing!!!!
theVar=production[token[tIndex]->production[theNo]][thePos];
return theVar;
}
bool Grammar::isReduce(int input)
{
return (SHIFTREDUCEMASK&input)==REDUCEMASK;
}
bool Grammar::isShift(int input)
{
return (SHIFTREDUCEMASK&input)==SHIFTMASK;
}
int Grammar::mask2State(int input)
{
return STATEMASK&input;
}
//internal test purpose
void Grammar::printDFA()
{
for (int i=0; i<stateCount; i++)
{
cout<<"\nstate no."<<i<<":\n";
sets[i]->restart();
int j=0;
while ((j=sets[i]->next())!=-1)
{
printRule(items[j]->varIndex, items[j]->rulePos, items[j]->dotPos);
cout<<endl;
}
}
}
//these are trivial functions and will later be changed to be inline
int Grammar::reduce2Mask(int input)
{
return REDUCEMASK|input;
}
int Grammar::shift2Mask(int input)
{
return SHIFTMASK|input;
}
BitSet::BitSet()
{
current=-1;
reset();
}
BitSet& BitSet::operator =(const BitSet& theSet)
{
reset();
this->operator |=(theSet);
return *this;
}
int BitSet::next()
{
for (int i=current+1; i<bitset_size; i++)
{
if (at(i))
{
current=i;
return i;
}
}
current=-1;
return -1;
}
bool operator == (Item& first, Item& second)
{
return (first.dotPos==second.dotPos)&&(first.rulePos==second.rulePos)&&
(first.varIndex==second.varIndex);
}
file name: parser.h
#include "grammar.h"
#include "hash.h"
const int MaxStackLength=50;
const int DataSegmentLength=2000;
const int CodeSegmentLength=2000;
const int StackSegmentLength=2000;
class Parser
{
private:
bool isLLParser;
int insCounter;
int varCounter;
int stackCounter;
int labelCount;
int stack[MaxStackLength];
bool push(int num);
bool pop(int& num);
int top;
bool typeCheck(Node* operand1, Node* operand2);
void type2CodeState(int theRule);
void type2SemState(int theVar);
//the code generation function
void shrinkStack();
void doThen();
void doElse();
void doFinishStatement();
void doSubscript();
void mulShrinkStack();
void doLoop();
void doExit();
void doParenthesis();
void doParamCheck();
void doModuleCall();
void doReading();
void doWriting();
//the code generation function
Node* semAction(char* varName);
void codeAction(int tIndex);
void variableAction(Node* ptr);
void generateCode();
void initialize();
bool pushToken(int tIndex, int theToken);
void prepare();
void pushRule(int theRule);
void LLParse();
void LRParse();
void printInterCode();
Node* constTemp(int num);
Node* constTemp(char ch);
//Node* nextTemp();
Node* nextTemp(Node* sameType);
Node* createLabel();
void nextName(char* buffer);
void addVariable();
void loadOperand();
void storeOperand(int regIndex);
void clearRegister(int index);
void doLoadOperand(bool isFirst);
void arithGen();
void logicGen();
void assignGen();
void readGen();
void writeGen();
void labelGen();
void conJumpGen(bool isTrue);
void callGen();
void paramGen();
void jumpGen();
void moduleGen(Node* ptr);
void getParamAdd(Node* ptr);
void getVarAdd(Node* ptr);
void programGen();
void loadAdd(Node* ptr);
void moduleEndGen();
void alignStack();
void pushReg(int regIndex);
void popReg(int regIndex);
public:
Parser(bool forLL=true);
void parseFile(const char* fileName);
};
file name: parser.cpp
#include <iostream>
#include <fstream>
#include "parser.h"
#include "scanner.h"
#include "errorNo.h"
#include "hash.h"
using namespace std;
const int OpCodeCount=34;
enum OpCodes
{
op_add, op_sub, op_mul, op_div, //4
op_eq, op_neq, op_lt, op_le, op_gt, op_ge, //6
op_addi, op_subi, op_muli, op_divi, //4
op_eqi, op_nei, op_lti, op_lei, op_gti, op_gei, //6
op_get, op_put, //2
op_jl, op_jlr, //2
op_j, op_jr,
op_hlt, //1
op_nop, //1
op_bz, op_bnz,
op_lw, op_lb, op_sw, op_sb, //4
};
char* opCodeStr[OpCodeCount]=
{
"add", "sub", "mul", "div",
"ceq", "cne", "clt", "cle", "cgt", "cge",
"addi", "subi", "muli", "divi",
"ceqi", "cnei", "clti", "clei", "cgti", "cgei",
"getc", "putc",
"jl", "jlr",
"j", "jr",
"hlt",
"nop",
"bz", "bnz",
"lw", "lb", "sw", "sb"
};
enum RegisterSet
{
R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15
};
char* registerName[16]=
{
"R0", "R1", "R2", "R3", "R4", "R5", "R6", "R7", "R8", "R9", "R10", "R11",
"R12", "R13", "R14", "R15"
};
enum RegisterAlias
{
ZERO=R0, AX=R1, BX=R2, CX=R3, DX=R4, SI=R5, DI=R6, FLAG=R7, DS=R8, MEM=R9, SS=R10,
FP=R11, SP=R12, INDEX=R13, IO=R14, RETURN=R15
};
enum InstructionSet
{
//intermediate code instruction set:
INTER_ADD, INTER_SUB, INTER_MUL, INTER_DIV,
INTER_CEQ, INTER_CNE, INTER_CLT,INTER_CLE, INTER_CGT, INTER_CGE,
INTER_ASN,
INTER_READ, INTER_WRITE,
INTER_LABEL,
INTER_JTRUE, INTER_JFALSE, INTER_JUMP,
INTER_HALT,
INTER_CALL,
INTER_PARAM
};
const int InstructionSetCount=20;
char* instructionStr[InstructionSetCount]=
{
"ADD", "SUB", "MUL", "DIV",
"CEQ", "CNE", "CLT","CLE", "CGT", "CGE",
"ASN", "READ", "WRITE",
"LABEL", "JTRUE", "JFALSE", "JUMP",
"HALT", "CALL", "PARAM"
};
enum VarMap
{
M =0 ,
Dl=4 ,
B =5 ,
Dv =6 ,
Ml =7 ,
Mo =8 ,
Vl = 12 ,
V =15 ,
Il= 16 ,
T =18 ,
Ad =20 ,
L =23 ,
Ar =24 ,
Sl =30 ,
S =32 ,
E =34 ,
C =36 ,
Lp =41 ,
Ln =43 ,
Lo =45 ,
Lr =48 ,
F =50 ,
Oa =51 ,
R =54 ,
Om =55 ,
Or =58 ,
M0 =65 ,
M1 =66 ,
M2 =67,
M3 =68 ,
Ml0 =69,
Vl0=70 ,
Il0=71 ,
Sl0=72 ,
S0 =73 ,
START=74 ,
TKPROGRAM= 1,
TKID= 2,
TKMODULE=10,
TKOPENPAR=11,
TKSEMICOLON=3,
TKCLOSEPAR=13,
TKVARIABLES=14,
TKCOLON=17,
TKINTEGER=19,
TKCHAR=21,
TKCOMA=22, // no.22 ,
TKARRAY=25,
TKNUMBER=27,
TKBEGIN=29,
TKEND=31,
TKLOOP=39,
TKEXIT=40,
TKOPENBRACKET=26, //no.26[
TKCLOSEBRACKET=28, //]
TKREAD=42, //no.42read
TKWRITE=44, //no.44write
TKCONSTCHAR=49, // no.49c
TKPLUS=52, // no.52+ | no.53-
TKSUB=53,
TKMUL=56, //no.56* no.54R no.68M3 | no.9e | no.57/ no.54R no.68M3
TKDIV=57, //
TKEQ=59, //no.59= | no.60< | no.61> | no.62<= | no.63>= | no.64!=
TKLT=60,
TKLE=62,
TKGT=61,
TKGE=63,
TKNEQ=64,
TKASSIGNMENT=33, //no.33:=
TKTHEN=37, // no.37then
TKELSE=38 //no.38else
};
enum CodeState
{
CODEREADY,//NO ACTION
CODEISTATEMENT, //S -> i S0
CODEIFSTATEMENT, //S -> if C then S else S
CODELOOPSTATEMENT, //S -> loop Sl end;
CODEEXITSTATEMENT, // S -> exit;
CODEBEGINSTATEMENT, // S ->begin Sl end;
CODEREADSTATEMENT, // S -> read Ln;
CODEWRITESTATEMENT, // S -> write Lo;
CODEEMPTYSTATEMENT, // S -> e;
CODEEXPRESSION // E -> F M0
}codeState=CODEREADY;
enum SemState
{
SEMPROGRAM,
SEMPROGRAMVAR,
SEMPROGRAMTYPE,
SEMPROGRAMBODY,
SEMMODULE,
SEMMODULEPARAM,
SEMMODULEPARAMTYPE,
SEMMODULEVAR,
SEMMODULEVARTYPE,
SEMMODULEBODY,
SEMDEFAULT
}semState; //semState
struct Instruction
{
InstructionSet opcode;
Node* operand1;
Node* operand2;
Node* result;
}instruction;
Scanner scanner;
extern Grammar grammar;
const char* defaultListFile="nickListFile.txt";
extern int startSymbolIndex;
extern int stackBottomIndex;
extern int table[MaxTokenCount][MaxTokenCount];
extern int token2type[MaxTokenCount];
extern int type2token[MaxTokenCount];
extern int emptyIndex;
extern ofstream fList;
ofstream fRule;
ofstream fInterCode;
ofstream fTarget;
const int SemStackLength=20;
const int IntegerLength=4;
const int CharLength=1;
int semOperatorStack[SemStackLength];
Node* semOperandStack[SemStackLength];
int semOperatorPtr=0;
int semOperandPtr=0;
Node* modulePtr=NULL;
int paramCounter=0;
Hash mainHash, moduleHash;
Node* varList[MaxParamNo];//the first one is module
Node temps[MaxTempStackLength];
int tempCount=0;
int varListCount=0;
bool isReading=false;
bool isWriting=false;
bool isType=false;
bool firstModule=true;
bool isID=false;
int scope=0;
void printImmediate(Node* ptr)
{
if (ptr==NULL)
{
fInterCode<<"-,";
return;
}
if (ptr->immediate)
{
if (ptr->type==0)
{
fInterCode<<ptr->intVal;
}
else
{
fInterCode<<"'"<<ptr->chVal<<"'";
}
}
else
{
fInterCode<<ptr->name;
//fInterCode<<ptr->address;
}
fInterCode<<",";
}
void Parser::printInterCode()
{
void printImmediate(Node* ptr);
fInterCode<<"("<<instructionStr[instruction.opcode]<<",";
printImmediate(instruction.operand1);
printImmediate(instruction.operand2);
//printImmediate(instruction.result);
if (instruction.result!=NULL)
{
fInterCode<<instruction.result->name;
/*
if (instruction.result->isLabel)
{
fInterCode<<instruction.result->name;
}
else
{
fInterCode<<instruction.result->address;
}
*/
}
else
{
fInterCode<<"-";
}
fInterCode<<")\n";
generateCode();
}
bool Parser::typeCheck(Node* operand1, Node* operand2)
{
if (operand1->type!=operand2->type)
{
return false;
}
else
{
if (operand1->structure==1)
{
return operand1->size==operand2->size;
}
}
return true;
}
void Parser::mulShrinkStack()
{
if (semOperatorPtr==0)
{
return;
}
switch (semOperatorStack[semOperatorPtr-1])
{
case INTER_MUL:
case INTER_DIV:
instruction.operand2=semOperandStack[--semOperandPtr];
//some check
instruction.operand1=semOperandStack[--semOperandPtr];
instruction.opcode=(InstructionSet)semOperatorStack[--semOperatorPtr];
//type-check function here!!!!
if (typeCheck(instruction.operand1,instruction.operand2))
{
//donot allow array
/*
if (instruction.operand1->structure==1)
{
errorHandle(CannotOperateOnComplicatedType, (void*)instruction.operand1,
(void*)instruction.operand2);
}
*/
instruction.result=nextTemp(instruction.operand1);
printInterCode();
//Hash::pushCurTemp();
semOperandStack[semOperandPtr++]=instruction.result;
//shrinkStack();
}
else
{
errorHandle(UnmatchedOperandType, (void*)instruction.operand1,
(void*)instruction.operand2);
}
break;
}
}
void Parser::shrinkStack()
{
//mulShrinkStack();
if (semOperatorPtr==0)
{
return;
}
switch (semOperatorStack[semOperatorPtr-1])
{
case INTER_ADD:
case INTER_SUB:
instruction.operand2=semOperandStack[--semOperandPtr];
//some check
instruction.operand1=semOperandStack[--semOperandPtr];
instruction.opcode=(InstructionSet)semOperatorStack[--semOperatorPtr];
//type-check function here!!!!
if (typeCheck(instruction.operand1,instruction.operand2))
{
/*
if (instruction.operand1->structure>0)
{
errorHandle(CannotOperateOnComplicatedType, (void*)instruction.operand1,
(void*)instruction.operand2);
}
*/
instruction.result=nextTemp(instruction.operand1);
printInterCode();
//Hash::pushCurTemp();
semOperandStack[semOperandPtr++]=instruction.result;
shrinkStack();
}
else
{
errorHandle(UnmatchedOperandType, (void*)instruction.operand1,
(void*)instruction.operand2);
}
break;
/*
case TKTHEN:
Node *lbl, *elseLbl;
//sPtr=semOperandStack[--semOperandPtr];
lbl=semOperandStack[--semOperandPtr];
semOperatorPtr--;//remove the "then";
semOperandStack[semOperandPtr++]=elseLbl;//PUSH DOWN LABEL
semOperatorStack[semOperatorPtr++]=TKELSE;//ELSE IS LIKE AN OPERATOR
instruction.opcode=INTER_LABEL;
instruction.result=lbl;
instruction.operand1=NULL;
instruction.operand2=NULL;
printInterCode();
break;
*/
}
//}
}
//then what is codeReady?????????
void Parser::type2CodeState(int theRule)
{
//M3 ==> rule.61 * R M3 | rule.62 e | rule.67 / R M3
//M0 ==> rule.55 + F M0 | rule.56 e | rule.66 - F M0
if (theRule==61||theRule==67||theRule==62)
{
mulShrinkStack();
}
if (theRule==56||theRule==55||theRule==66)
{
shrinkStack();
}
/*
switch(theRule)
{
case 20: //S ==> rule.20 i S0
codeState=CODEISTATEMENT;
break;
case 21: // S ==> rule.21 if C then S else S
codeState=CODEIFSTATEMENT;
break;
case 22: //S ==> rule.22 loop Sl end ;
codeState=CODELOOPSTATEMENT;
break;
case 23: //rule.23 exit ;
codeState = CODEEXITSTATEMENT;
break;
case 25: //rule.25 begin Sl end ;
codeState = CODEBEGINSTATEMENT;
break;
case 26: //rule.26 read Ln ;
codeState=CODEREADSTATEMENT;
break;
case 27: //rule.27 write Lo ;
codeState=CODEWRITESTATEMENT;
break;
case 28: // rule.28 e ;
codeState=CODEEMPTYSTATEMENT;
break;
/*
case 38: //E ==> rule.38 F M0
codeState=CODEEXPRESSION;
break;
case 62: //M3 ==> rule.62 e
case 56: //M0 ==> rule.56 e
// Code(CODEIFSTATEMENT);
shrinkStack();
break;
}
*/
}
void doAddVariable(Node* ptr, int& counter)
{
int shift=1;
if (ptr->type==0)
{
if (counter%4!=0)
{
counter=(counter/4+1)*4; //make it align
}
shift=4;
}
ptr->address=counter;//add is offset of data segment
if (ptr->structure==1)//it is array
{
//it must be array
shift*=ptr->size;
}
counter+=shift;
}
void Parser::addVariable()
{
void doAddVariable(Node* ptr, int& counter);
for (int i=1; i<varListCount; i++)
{
if (semState==SEMPROGRAMTYPE)//it is program variable
{
doAddVariable(varList[i], varCounter);
}
if (semState==SEMMODULEVARTYPE)
{
doAddVariable(varList[i], stackCounter);
}
if (semState==SEMMODULEPARAMTYPE)
{
varList[i]->isParam=true;
//to make all parameter pointers!!!!!
varList[i]->structure=4;
//Why?????????
//2 means "return add"+"dynamic link"
//varListCount-i+1 means the index in reversed order
//*(-4) means it is integer address and it is upper to Frame
//as my stack is starting from high mem add
//varListCount includes one for "name of module"!!!
varList[i]->address=(2+varListCount-i)*(-4);
}
}
}
void Parser::type2SemState(int theRule)
{
switch (theRule)
{
//case TKPROGRAM:
case 0: //M ==> rule.0 program i ; Dl B
semState=SEMPROGRAM;//INSIDE ACTION, REMAIN UNCHANGED
break;
//case TKVARIABLES: Dv ==> rule.5 variables Vl
case 5:
if (semState==SEMPROGRAM)
{
semState=SEMPROGRAMVAR;
}
else
{
semState=SEMMODULEVAR;
}
varListCount=1;//because at that time, the varList[0] is the name of program
//or the module name
break;
//case TKCOLON: T ==> rule.10 integer Ad | rule.11 char Ad
case 10:
case 11:
{
switch (semState)
{
case SEMPROGRAMVAR:
semState=SEMPROGRAMTYPE;
break;
case SEMMODULEVAR:
semState=SEMMODULEVARTYPE;
break;
case SEMMODULEPARAM:
semState=SEMMODULEPARAMTYPE;
break;
}
isType=true;//here starts
break;
}
break;
//case TKMODULE: Ml0 ==> rule.2 module i ( Vl ) Dv B Ml0
case 2:
semState=SEMMODULE;
//fRule<<"\nprint module "<<varList[0]->name<<endl;
//moduleHash.print();
varListCount=0;
moduleHash.purge();
Hash::flushTemp();
semOperandPtr=semOperatorPtr=0;
stackCounter=0;//target code
break;
//case TKOPENPAR: Vl0 ==> rule.7 i Il0 : T ; Vl0
case 7:
switch (semState)
{
case SEMMODULE:
semState=SEMMODULEPARAM;
break;
case SEMPROGRAMTYPE: //A new variable to be declared
semState=SEMPROGRAMVAR;
break;
case SEMMODULEPARAMTYPE:
semState=SEMMODULEPARAM;
break;
case SEMMODULEVARTYPE:
semState=SEMMODULEVAR;
break;
}
break;
//case TKBEGIN: //B ==> rule.17 begin Sl end ;
case 17:
{
switch (semState)
{
case SEMMODULEVAR:
case SEMMODULE:
case SEMMODULEPARAM:
case SEMMODULEVARTYPE:
case SEMMODULEPARAMTYPE:
semState=SEMMODULEBODY;
break;
case SEMPROGRAMVAR:
case SEMPROGRAM:
//case SEMPROGRAM
semState=SEMPROGRAMBODY;
//insCounter+=4;
break;
}
}
break;
//this is new!!!//Ml0 ==> rule.2 module i ( Vl ) Dv B Ml0 | rule.63 e
case 63:
semState=SEMPROGRAM;
Hash::flushTemp();
semOperandPtr=semOperatorPtr=0;
/*
if (!firstModule)
{
moduleEndGen();
}
*/
break;
}
}
void Parser::nextName(char* buffer)
{
buffer[0]='L';
itoa(labelCount, buffer+1, 10);
labelCount++;
}
Node* Parser::createLabel()
{
Node* ptr;
char buffer[10];
nextName(buffer);
if (mainHash.insert(buffer, ptr))
{
ptr->isLabel=true;
ptr->immediate=true;
return ptr;
}
else
{
errorHandle(InternalLabelNameConflict, (void*)buffer, NULL);
return NULL;
}
}
void Parser::loadAdd(Node* ptr)
{
if (ptr->isParam)
{
getParamAdd(ptr);
}
else
{
getVarAdd(ptr);
}
}
void Parser::getParamAdd(Node* ptr)
{
//get its address into IO
fTarget<<opCodeStr[op_lw]<<" ";
fTarget<<registerName[IO]<<",";
fTarget<<ptr->address*(-1)<<"(";
fTarget<<registerName[FP]<<")\n";
insCounter+=4;
}
void Parser::doLoadOperand(bool isFirst)
{
Node* ptr;
int regIndex;
if (isFirst)
{
regIndex=BX;
clearRegister(BX);
ptr=instruction.operand1;
}
else
{
regIndex=CX;
clearRegister(CX);
ptr=instruction.operand2;
}
if (ptr->immediate)
{
//must clear register first!!!
//clearRegister(regIndex);
//opcode is different!!!!
fTarget<<opCodeStr[op_addi]<<" ";
fTarget<<registerName[regIndex]<<",";
fTarget<<registerName[ZERO]<<",";
fTarget<<(ptr->type==0?ptr->intVal:ptr->chVal)<<"\n";
}
else
{
//if it is param it cannot be an array!!!
if (ptr->isParam)
{
/*
//get its address into IO
fTarget<<opCodeStr[op_lw]<<" ";
fTarget<<registerName[IO]<<",";
fTarget<<ptr->address*(-1)<<"(";
fTarget<<registerName[FP]<<")\n";
insCounter+=4;
*/
getParamAdd(ptr);//IO hold the add
fTarget<<opCodeStr[ptr->type==0?op_lw:op_lb]<<" ";
fTarget<<registerName[regIndex]<<",";
fTarget<<0<<"("<<registerName[IO]<<")\n";
insCounter+=4;
}
else
{
//opcode
if (ptr->type==0)
{
fTarget<<opCodeStr[op_lw]<<" ";
}
else
{
fTarget<<opCodeStr[op_lb]<<" ";
}
//the result
fTarget<<registerName[regIndex]<<",";
//if it is a pointer
//because the stack bottom is at high mem
fTarget<<ptr->address*(-1)<<"(";//always address
//scope
if (ptr->scope==0||ptr->structure==4)
{
fTarget<<registerName[DS]<<")\n";
}
else
{
fTarget<<registerName[FP]<<")\n";
}
}
}
insCounter+=4;
}
void Parser::getVarAdd(Node* ptr)
{
fTarget<<opCodeStr[op_addi]<<" ";
fTarget<<registerName[IO]<<",";
fTarget<<registerName[ptr->scope==0?DS:FP]<<",";
fTarget<<ptr->address*(-1)<<"\n";
insCounter+=4;
}
void Parser::loadOperand()
{
//THIS MUST BE THE a[I] FORMAT
if (instruction.operand1->structure==1&&instruction.opcode==INTER_ADD)
{
loadAdd(instruction.operand1);
/*
if (instruction.operand1->isParam)
{
//get its addr first
getParamAdd(instruction.operand1);
}
else
{
//get its addr directly
getVarAdd(instruction.operand1);
}
*/
//BX := IO
fTarget<<opCodeStr[op_add]<<" ";
fTarget<<registerName[BX]<<",";
fTarget<<registerName[ZERO]<<",";
fTarget<<registerName[IO]<<"\n";
insCounter+=4;
//so BX holds the addr of beginning of array and
//CX holds the offset
doLoadOperand(false);
/*
//now IO hold beginning addr of array, CX holds the offset
fTarget<<opCodeStr[op_add]<<" ";
fTarget<<registerName[IO]<<",";
fTarget<<registerName[IO]<<",";
fTarget<<registerName[CX]<<"\n";
insCounter+=4;
//now IO holds full addr
fTarget<<opCodeStr[instruction.operand1->type==0?op_lw:op_lb];
fTarget<<registerName[BX]<<",";
fTarget<<0<<"("<<registerName[IO]<<")\n";
insCounter+=4;
clearRegister(CX);
*/
}
else
{
doLoadOperand(true);
doLoadOperand(false);
}
}
void Parser::alignStack()
{
if (stackCounter%4!=0)
{
stackCounter=(stackCounter/4+1)*4;
}
}
void Parser::storeOperand(int regIndex)
{
loadAdd(instruction.result);
//opcode if it is a pointer
//if (instruction.result->type==0||instruction.result->structure==4)
if (instruction.result->type==0)
{
fTarget<<opCodeStr[op_sw]<<" ";
//alignStack();
//stackCounter+=4;
}
else
{
fTarget<<opCodeStr[op_sb]<<" ";
//stackCounter+=1;
}
//address
fTarget<<0<<"("<<registerName[IO]<<"),";
fTarget<<registerName[regIndex]<<"\n";
insCounter+=4;
/*
fTarget<<instruction.result->address*(-1)<<"(";
//scope
if (instruction.result->scope==0)
{
fTarget<<registerName[DS]<<"),";
}
else
{
fTarget<<registerName[FP]<<"),";
}
fTarget<<registerName[regIndex]<<"\n";
insCounter++;
*/
}
void Parser::logicGen()
{
loadOperand();
fTarget<<opCodeStr[instruction.opcode]<<" ";
fTarget<<registerName[FLAG]<<",";
fTarget<<registerName[BX]<<",";
fTarget<<registerName[CX]<<"\n";
insCounter+=4;
}
void Parser::arithGen()
{
loadOperand();
fTarget<<opCodeStr[instruction.opcode]<<" ";
fTarget<<registerName[AX]<<",";
fTarget<<registerName[BX]<<",";
fTarget<<registerName[CX]<<"\n";
insCounter+=4;
storeOperand(AX);
}
void Parser::clearRegister(int index)
{
fTarget<<opCodeStr[op_sub]<<" ";
fTarget<<registerName[index]<<",";
fTarget<<registerName[index]<<",";
fTarget<<registerName[index]<<"\n";
insCounter+=4;
}
void Parser::moduleEndGen()
{
//load return address
popReg(RETURN);
popReg(BX);
//move FP upper!!!
fTarget<<opCodeStr[op_add]<<" ";
fTarget<<registerName[FP]<<",";
fTarget<<registerName[FP]<<",";
fTarget<<registerName[BX]<<"\n";
insCounter+=4;
//because FP will move 8 more for storing ret add and stack always moves
fTarget<<opCodeStr[op_addi]<<" ";
fTarget<<registerName[FP]<<",";
fTarget<<registerName[FP]<<",";
fTarget<<8<<"\n";
insCounter+=4;
clearRegister(SP);
fTarget<<opCodeStr[op_jr]<<" ";
fTarget<<registerName[RETURN]<<"\n";
insCounter+=4;
/*
fTarget<<opCodeStr[op_lw]<<" ";
fTarget<<registerName[RETURN]<<",";
fTarget<<4<<"("<<registerName[FP]<<")\n";
insCounter+=4;
//load dynamic link to BX
fTarget<<opCodeStr[op_lw]<<" ";
fTarget<<registerName[BX]<<",";
fTarget<<8<<"("<<registerName[FP]<<")\n";
insCounter+=4;
//move FP upper!!!
fTarget<<opCodeStr[op_add]<<" ";
fTarget<<registerName[FP]<<",";
fTarget<<registerName[FP]<<",";
fTarget<<registerName[BX]<<"\n";
insCounter+=4;
fTarget<<opCodeStr[op_jr]<<" ";
fTarget<<registerName[RETURN]<<"\n";
insCounter+=4;
*/
}
void Parser::assignGen()
{
doLoadOperand(true);//load operand1 into BX
/*
clearRegister(AX);
fTarget<<opCodeStr[op_add]<<" ";
fTarget<<registerName[AX]<<",";
fTarget<<registerName[BX]<<",";
fTarget<<registerName[ZERO]<<"\n";
insCounter+=4;
*/
storeOperand(BX);
}
void Parser::readGen()
{
if (instruction.result->type==1)
{
fTarget<<opCodeStr[op_get]<<" ";
fTarget<<registerName[IO]<<"\n";
insCounter+=4;
storeOperand(IO);
}
else
{
fTarget<<opCodeStr[op_jl]<<" ";
fTarget<<registerName[RETURN]<<",";
fTarget<<" getint\n";
insCounter+=4;
storeOperand(AX);
}
}
void Parser::writeGen()
{
doLoadOperand(true);
if (instruction.operand1->type==1)
{
fTarget<<opCodeStr[op_put]<<" ";
fTarget<<registerName[BX]<<"\n";
insCounter+=4;
}
else
{
//AX:=BX
fTarget<<opCodeStr[op_add]<<" ";
fTarget<<registerName[AX]<<",";
fTarget<<registerName[BX]<<",";
fTarget<<registerName[ZERO]<<"\n";
insCounter+=4;
fTarget<<opCodeStr[op_jl]<<" ";
fTarget<<registerName[RETURN]<<",";
fTarget<<"putint\n";
insCounter+=4;
//storeOperand(AX);
}
}
void Parser::labelGen()
{
instruction.result->address=insCounter;
//it will be immediately followed by
fTarget<<instruction.result->name<<" \n";
}
void Parser::conJumpGen(bool isTrue)
{
fTarget<<opCodeStr[isTrue?op_bnz:op_bz]<<" ";
fTarget<<registerName[FLAG]<<",";
//fTarget<<4<<"\n"; //jump to next instruction where we jump
fTarget<<instruction.result->name<<"\n";
insCounter+=4;
}
//dynamic link will be saved in beginning of module, so does the instruction
void Parser::jumpGen()
{
fTarget<<opCodeStr[op_j]<<" ";
//fTarget<<insCounter-instruction.result->address<<"\n";
fTarget<<instruction.result->name<<"\n";
insCounter+=4;
}
void Parser::programGen()
{
fTarget<<"entry\n";
fTarget<<opCodeStr[op_addi]<<" ";
fTarget<<registerName[DS]<<",";
fTarget<<registerName[ZERO]<<",";
//fTarget<<"topaddr\n";
fTarget<<32000<<"\n";
insCounter+=4;
clearRegister(SP);
fTarget<<opCodeStr[op_addi]<<" ";
fTarget<<registerName[FP]<<",";
fTarget<<registerName[ZERO]<<",";
fTarget<<"24000\n";
insCounter+=4;
}
void Parser::callGen()
{
//save stack counter which is the dynamic link
//copy stackcounter to IO
pushReg(SP);
//branch and link
fTarget<<opCodeStr[op_jl]<<" ";
fTarget<<registerName[RETURN]<<",";
fTarget<<instruction.result->name<<"\n";
insCounter+=4;
/*
alignStack();
fTarget<<opCodeStr[op_addi]<<" ";
fTarget<<registerName[IO]<<",";
fTarget<<registerName[ZERO]<<",";
fTarget<<stackCounter<<"\n";
insCounter+=4;
//write IO to stack
alignStack();
fTarget<<opCodeStr[op_sw]<<" ";
fTarget<<stackCounter*(-1)<<"(";
fTarget<<registerName[FP]<<"),";
fTarget<<registerName[IO]<<"\n";
insCounter+=4;
stackCounter+=4;
//branch and link
fTarget<<opCodeStr[op_jl]<<" ";
fTarget<<registerName[RETURN]<<",";
fTarget<<instruction.result->name<<"\n";
insCounter+=4;
*/
}
void Parser::paramGen()
{
//first get param's absolute address
//load the param's add into IO register
/*
fTarget<<opCodeStr[op_addi]<<" ";
fTarget<<registerName[IO]<<",";
fTarget<<registerName[ZERO]<<",";
fTarget<<instruction.result->address<<"\n";
insCounter+=4;
*/
loadAdd(instruction.result);
/*
//add IO, FP, IO
fTarget<<opCodeStr[op_add]<<" ";
fTarget<<registerName[IO]<<",";
fTarget<<registerName[IO]<<",";
if (instruction.result->scope==0)
{
fTarget<<registerName[DS]<<"\n";
}
else
{
fTarget<<registerName[FP]<<"\n";
}
insCounter+=4;
*/
//sw -stackCounter(FP), IO
//alignStack();
/*
fTarget<<opCodeStr[op_sw]<<" ";
fTarget<<stackCounter*(-1)<<"(";
fTarget<<registerName[FP]<<"),";
fTarget<<registerName[IO]<<"\n";
insCounter+=4;
//should I???
stackCounter+=4;
*/
pushReg(IO);
}
void Parser::pushReg(int regIndex)
{
//calc mem
fTarget<<opCodeStr[op_sub]<<" ";
fTarget<<registerName[MEM]<<",";
fTarget<<registerName[FP]<<",";
fTarget<<registerName[SP]<<"\n";
insCounter+=4;
//write stack
fTarget<<opCodeStr[op_sw]<<" ";
fTarget<<0<<"("<<registerName[MEM]<<"),";
fTarget<<registerName[regIndex]<<"\n";
insCounter+=4;
//add 4 to sp
fTarget<<opCodeStr[op_addi]<<" ";
fTarget<<registerName[SP]<<",";
fTarget<<registerName[SP]<<",";
fTarget<<4<<"\n";
insCounter+=4;
}
void Parser::popReg(int regIndex)
{
//sub 4 from sp
fTarget<<opCodeStr[op_subi]<<" ";
fTarget<<registerName[SP]<<",";
fTarget<<registerName[SP]<<",";
fTarget<<4<<"\n";
insCounter+=4;
//calc mem
fTarget<<opCodeStr[op_sub]<<" ";
fTarget<<registerName[MEM]<<",";
fTarget<<registerName[FP]<<",";
fTarget<<registerName[SP]<<"\n";
insCounter+=4;
//read stack
fTarget<<opCodeStr[op_lw]<<" ";
fTarget<<registerName[regIndex]<<",";
fTarget<<0<<"("<<registerName[MEM]<<")\n";
insCounter+=4;
}
void Parser::moduleGen(Node* ptr)
{
//save return address
fTarget<<ptr->name<<"\n";//the label
ptr->address=insCounter;//but not increase!!
//save return add
//alignStack();
/*
fTarget<<opCodeStr[op_sw]<<" ";
fTarget<<stackCounter*(-1)<<"(";
fTarget<<registerName[FP]<<"),";
fTarget<<registerName[RETURN]<<"\n";
insCounter+=4;
*/
pushReg(RETURN);
//stackCounter+=4;//remember it is now restarting!!
//move FP pointer
fTarget<<opCodeStr[op_sub]<<" ";
fTarget<<registerName[FP]<<",";
fTarget<<registerName[FP]<<",";
fTarget<<registerName[SP]<<"\n";
//fTarget<<stackCounter*(-1)<<"\n";
insCounter+=4;
clearRegister(SP);
//now the stackcounter reset to 0
//stackCounter=0;
}
void Parser::generateCode()
{
switch (instruction.opcode)
{
case INTER_ADD:
case INTER_SUB:
case INTER_MUL:
case INTER_DIV:
arithGen();
break;
case INTER_CEQ:
case INTER_CLT:
case INTER_CLE:
case INTER_CGT:
case INTER_CGE:
case INTER_CNE:
logicGen();
break;
case INTER_ASN:
assignGen();
break;
case INTER_READ:
readGen();
break;
case INTER_WRITE:
writeGen();
break;
case INTER_LABEL:
labelGen();
break;
case INTER_JTRUE:
conJumpGen(true);
break;
case INTER_JFALSE:
conJumpGen(false);
break;
case INTER_JUMP:
jumpGen();
break;
case INTER_HALT:
fTarget<<opCodeStr[op_hlt]<<"\n";
insCounter+=4;
break;
case INTER_CALL:
callGen();
break;
case INTER_PARAM:
paramGen();
break;
}
}
void Parser::variableAction(Node* ptr)
{
/*
if (codeState!=CODEREADY)
{
semOperandStack[semOperandPtr++]=ptr;
}
*/
if (semState==SEMMODULEBODY||semState==SEMPROGRAMBODY)
{
semOperandStack[semOperandPtr++]=ptr;
}
}
void Parser::doThen()
{
Node* lbl;
doFinishStatement();
//I changed to label here!!!
//lbl=constTemp(0);
lbl=createLabel();
//createLabel???
instruction.opcode=INTER_JFALSE;
instruction.operand1=semOperandStack[--semOperandPtr];//pop condition
instruction.operand2=NULL;
instruction.result=lbl;
printInterCode();
semOperatorStack[semOperatorPtr++]=TKTHEN;//this "then" is considered to be operator
semOperandStack[semOperandPtr++]=lbl;//push down label for future updating
}
void Parser::doElse()
{
Node* elseLbl;
//should I call shrinkStack()????????
//shrinkStack();
//do jump
//shrinkStack();
doFinishStatement();
instruction.opcode=INTER_JUMP;
//changed to label
//elseLbl=constTemp(0);//the label is immediate
elseLbl=createLabel();//the label is immediate
instruction.result = elseLbl;
instruction.operand1=NULL;
instruction.operand2=NULL;
printInterCode();
if (semOperatorStack[semOperatorPtr-1]==TKTHEN)
{
semOperatorPtr--;//pop "then"
instruction.result=semOperandStack[--semOperandPtr];
instruction.opcode=INTER_LABEL;
instruction.operand1=instruction.operand2=NULL;
printInterCode();
}
semOperandStack[semOperandPtr++]=elseLbl;
semOperatorStack[semOperatorPtr++]=TKELSE;
}
void Parser::doFinishStatement()
{
if (isReading)
{
//reset the state!!
isReading=false;
doReading();
return;
}
if (isWriting)
{
isWriting=false;
doWriting();
return ;
}
shrinkStack();
switch (semOperatorStack[semOperatorPtr-1])
{
case INTER_ASN:
instruction.operand1=semOperandStack[--semOperandPtr];
instruction.result=semOperandStack[--semOperandPtr];
if (typeCheck(instruction.operand1, instruction.result))
{
/*
if (instruction.operand1->structure==1)
{
errorHandle(CannotOperateOnComplicatedType, (void*)instruction.operand1,
(void*)instruction.result);
}
*/
}
else
{
errorHandle(UnmatchedOperandType, (void*)instruction.operand1,
(void*)instruction.result);
}
instruction.operand2=NULL;
semOperatorPtr--;//pop the ":="
instruction.opcode=INTER_ASN;
printInterCode();
//semOperandPtr=semOperatorPtr=0;
break;
case TKELSE:
Node *thenLbl;
//sPtr=semOperandStack[--semOperandPtr];
thenLbl=semOperandStack[--semOperandPtr];
semOperatorPtr--;//pop "else"
instruction.result=thenLbl;
instruction.opcode=INTER_LABEL;
instruction.operand1=instruction.operand2=NULL;
printInterCode();
//Hash::flushTemp();
//semOperandPtr=semOperatorPtr=0;
break;
case INTER_CEQ:
case INTER_CLT:
case INTER_CLE:
case INTER_CGT:
case INTER_CGE:
case INTER_CNE:
instruction.operand2=semOperandStack[--semOperandPtr];
//some check
instruction.operand1=semOperandStack[--semOperandPtr];
instruction.opcode=(InstructionSet)semOperatorStack[--semOperatorPtr];
//type-check function here!!!!
if (typeCheck(instruction.operand1, instruction.operand2))
{
if (instruction.operand1->structure>0)//not simple type
{
errorHandle(CannotCompareComplicatedType, (void*)instruction.operand1,
(void*)instruction.operand2);
}
//replaced here
//instruction.result=Hash::nextTemp(instruction.operand1);
instruction.result=nextTemp(instruction.operand1);
printInterCode();
//Hash::pushCurTemp();
semOperandStack[semOperandPtr++]=instruction.result;
//shrinkStack();
}
else
{
errorHandle(UnmatchedOperandType, (void*)instruction.operand1,
(void*)instruction.operand2);
}
break;
case TKLOOP:
semOperatorPtr--;//pop loop
instruction.result=semOperandStack[--semOperandPtr];
instruction.operand1=instruction.operand2=NULL;
instruction.opcode=INTER_JUMP;
printInterCode();
break;
}
//Hash::flushTemp();
//semOperandPtr=semOperatorPtr=0;
}
void Parser::doSubscript()
{
if (semOperatorStack[semOperatorPtr-1]==TKOPENBRACKET)
{
Node* subscriptPtr, *idPtr, *temp;
subscriptPtr=semOperandStack[--semOperandPtr];
idPtr=semOperandStack[--semOperandPtr];
//remove bracket;
semOperatorPtr--;
if (idPtr->type==0)
{
temp=constTemp(IntegerLength);
}
if (idPtr->type==1)
{
temp=constTemp(CharLength);
}
instruction.opcode=INTER_MUL;
instruction.operand1=subscriptPtr;
instruction.operand2=temp;
//replaced here
instruction.result=nextTemp(subscriptPtr);//default???
printInterCode();
//here is what should be changed to be indexed!!!!!!
instruction.opcode=INTER_ADD;
instruction.operand1=idPtr;
instruction.operand2=instruction.result;
instruction.result=nextTemp(idPtr);
instruction.result->structure=0; //now it is no more array!!!
//to make it a pointer!!!!
//don't know
//instruction.result->type=2;
//it becomes a pointer!!!
instruction.result->structure=4;
//don't know
printInterCode();
semOperandStack[semOperandPtr++]=instruction.result;
}
}
void Parser::codeAction(int tIndex)
{
switch (tIndex)
{
case TKOPENBRACKET: //=26, //no.26[
semOperatorStack[semOperatorPtr++]=tIndex;
break;
case TKCLOSEBRACKET: //=28, //]
shrinkStack();
doSubscript();
break;
case TKEQ: //=59, //no.59= | no.60< | no.61> | no.62<= | no.63>= | no.64!=
semOperatorStack[semOperatorPtr++]=INTER_CEQ;
break;
case TKLT: //=60,
semOperatorStack[semOperatorPtr++]=INTER_CLT;
break;
case TKLE: //=62,
semOperatorStack[semOperatorPtr++]=INTER_CLE;
break;
case TKGT: //=61,
semOperatorStack[semOperatorPtr++]=INTER_CGT;
break;
case TKGE: //=63,
semOperatorStack[semOperatorPtr++]=INTER_CGE;
break;
case TKNEQ: //=64,
semOperatorStack[semOperatorPtr++]=INTER_CNE;
break;
case TKASSIGNMENT: //=33 //no.33:=
//shrinkStack();
semOperatorStack[semOperatorPtr++]=INTER_ASN;
break;
case TKPLUS: //=52, // no.52+ | no.53-
semOperatorStack[semOperatorPtr++]=INTER_ADD;
break;
case TKSUB: //=53,
semOperatorStack[semOperatorPtr++]=INTER_SUB;
break;
case TKMUL: //=56, //no.56* no.54R no.68M3 | no.9e | no.57/ no.54R no.68M3
semOperatorStack[semOperatorPtr++]=INTER_MUL;
break;
case TKDIV: //=57,
semOperatorStack[semOperatorPtr++]=INTER_DIV;
break;
case TKTHEN:
doThen();
break;
case TKELSE:
doElse();
break;
case TKSEMICOLON:
//shrinkStack();
doFinishStatement();
break;
case TKLOOP:
doLoop();
break;
case TKEXIT:
doExit();
break;
case TKOPENPAR:
doParenthesis();
break;
case TKCLOSEPAR:
shrinkStack();
doModuleCall();
//semOperatorStack[semOperatorPtr++]=tIndex;
break;
case TKCOMA:
//do I need to shrink stack??????
//shrinkStack();
if (isReading)
{
doReading();
}
else
{
if (isWriting)
{
doWriting();
}
else
{
doParamCheck();
}
}
//semOperatorStack[semOperatorPtr++]=tIndex;
break;
case TKREAD:
isReading=true;
break;
case TKWRITE:
isWriting=true;
break;
}
}
void Parser::doWriting()
{
instruction.opcode=INTER_WRITE;
instruction.operand1=semOperandStack[--semOperandPtr];
/*
if (!instruction.operand1->immediate&&instruction.operand1->structure>0)
{
errorHandle(CannotOperateOnComplicatedType, (void*)instruction.operand1, NULL);
}
*/
instruction.result=instruction.operand2=NULL;
printInterCode();
}
void Parser::doReading()
{
instruction.result=semOperandStack[--semOperandPtr];
/*
if (instruction.result->structure>0)
{
errorHandle(CannotOperateOnComplicatedType, (void*)instruction.result, NULL);
}
*/
instruction.operand1=instruction.operand2=NULL;
instruction.opcode=INTER_READ;
printInterCode();
}
void Parser::doModuleCall()
{
Node* temp;
if (modulePtr!=NULL)
{
temp=semOperandStack[--semOperandPtr];//pop the pointer to check
if (temp==modulePtr)
{
//no params
if (modulePtr->paramNo!=paramCounter)
{
errorHandle(MissMatchModuleParamNo, (void*)modulePtr, NULL);
}
}
else
{
//this must be the last param, otherwise error
if (paramCounter!=modulePtr->paramNo-1)
{
errorHandle(MissMatchModuleParamNo, (void*)modulePtr, NULL);
}
//has params
semOperandStack[semOperandPtr++]=temp;//fake push down,
//in order to check and it is the LAST param!!!
doParamCheck();
temp=semOperandStack[--semOperandPtr];//popthe last one
if (temp!=modulePtr)//this should be the module ptr itself
{
errorHandle(MissMatchModuleParamNo, (void*)modulePtr, NULL);
}
}
instruction.result=modulePtr;
instruction.opcode=INTER_CALL;
instruction.operand1=instruction.operand2=NULL;
printInterCode();
modulePtr=NULL;
paramCounter=0;
}
}
void Parser::doParamCheck()
{
Node*temp;
if (modulePtr!=NULL)
{
/*
if (paramCounter==modulePtr->paramNo)
{
temp=semOperandStack[semOperandPtr-1];//don't pop
if (temp!=modulePtr)
{
errorHandle(MissMatchModuleParamNo, (void*)modulePtr, NULL);
}
else
{
return;
}
}
*/
temp=semOperandStack[--semOperandPtr];//pop the param
//unexpected bottom of param stack
if (temp==modulePtr&&modulePtr->paramNo!=paramCounter)
{
errorHandle(MissMatchModuleParamNo, (void*)modulePtr, NULL);
}
//more param than expected
if (temp!=modulePtr&&modulePtr->paramNo==paramCounter)
{
errorHandle(MissMatchModuleParamNo, (void*)modulePtr, NULL);
}
//there is no param at all!
if (temp==modulePtr&&modulePtr->paramNo==0)
{
semOperandStack[semOperandPtr++]=modulePtr;//push back
return;
}
if (!typeCheck(modulePtr->paramType[paramCounter], temp))
{
errorHandle(MissMatchModuleParamType, (void*)modulePtr,
(void*)temp);
}
instruction.opcode=INTER_PARAM;
instruction.result=temp;
instruction.operand1=instruction.operand2=NULL;
printInterCode();
paramCounter++;//don't forget
}
}
void Parser::doParenthesis()
{
if (isID)
{
modulePtr=semOperandStack[semOperandPtr-1];//Don't POP!!otherwise
//we don't know if there is param or not, it is like the bottom of stack symbol
if (modulePtr->structure!=2)//2--module
{
errorHandle(IllegalModuleCalling, (void*)modulePtr, NULL);
}
//semOperatorStack[semOperatorPtr++]=TKOPENPAR;//no need for openparenthesis
paramCounter=0;
}
}
void Parser::doExit()
{
instruction.opcode=INTER_HALT;
instruction.operand1=instruction.operand2=instruction.result=NULL;
printInterCode();
}
void Parser::doLoop()
{
//create
//instruction.result=constTemp(0);
instruction.result=createLabel();
instruction.operand1=instruction.operand2=NULL;
instruction.opcode=INTER_LABEL;
printInterCode();
semOperatorStack[semOperatorPtr++]=TKLOOP;//push down the loop as operator
semOperandStack[semOperandPtr++]=instruction.result;//push down label
}
Node* Parser::semAction(char* varName)
{
Node* ptr=NULL;
switch(semState)
{
case SEMPROGRAMBODY:
if (mainHash.insert(varName, ptr))
{
//varList[varListCount++]=ptr;
ptr->declared=false;
ptr->scope=0;
errorHandle(VariableUndeclared, (void*)(ptr->name));
}
break;
case SEMPROGRAM:
if (!mainHash.insert(varName, ptr))
{
errorHandle(VariableRedeclared, (void*)(ptr->name));
}
else
{
varListCount=1;//make sure it is starting from 0!!!
varList[0]=ptr;
ptr->structure=PROGRAM;
}
break;
case SEMPROGRAMVAR:
if (!mainHash.insert(varName, ptr))
{
errorHandle(VariableRedeclared, (void*)(ptr->name));
}
else
{
ptr->scope=0;
varList[varListCount++]=ptr;
}
break;
case SEMMODULE:
if (!mainHash.insert(varName, ptr))
{
errorHandle(VariableRedeclared, (void*)(ptr->name));
}
else
{
ptr->structure=MODULE;
varListCount=1;
varList[0]=ptr;//put module at beginning of varlist
//try to see if ??????????????????????
moduleGen(ptr);
//fTarget<<ptr->name<<" \n";
}
break;
case SEMMODULEPARAM:
if (!moduleHash.insert(varName, ptr))
{
errorHandle(VariableRedeclared, (void*)(ptr->name));
}
else
{
ptr->scope=1;
varList[varListCount++]=ptr;
varList[0]->paramType[varList[0]->paramNo++]=ptr;
}
break;
case SEMMODULEVAR:
if (!moduleHash.insert(varName, ptr))
{
errorHandle(VariableRedeclared, (void*)(ptr->name));
}
else
{
ptr->scope=1;
varList[varListCount++]=ptr;
}
break;
case SEMMODULEBODY:
if (!moduleHash.search(varName, ptr))
{
if (!mainHash.search(varName, ptr))
{
//not declared, so we add it to module hash
moduleHash.insert(varName, ptr);
ptr->declared=false;
ptr->scope=1;
//varList[varListCount++]=ptr;
errorHandle(VariableUndeclared, (void*)(ptr->name));
}
else
{
//declared in program, so, add it to list
//ptr->scope=1;
//varList[varListCount++]=ptr;
//we need to insert the pointer not the string!!!
moduleHash.addPtr(ptr);
}
}
/*
if (!ptr->declared)
{
errorHandle(VariableUndeclared, (void*)varName);
}
*/
break;
}
return ptr;
}
//to do: inside typeAction, must reset varList to 1!!!
//because varlist[0] is always reserved for module and program
//no! this action is done by semState-setting!! see above
void typeAction(int theVar)
{
if (theVar==TKINTEGER||theVar==TKCHAR||theVar==TKARRAY||theVar==TKNUMBER)
{
for (int i=1; i<varListCount; i++)
{
if (theVar==TKINTEGER||theVar==TKCHAR)
{
varList[i]->type=(theVar==TKINTEGER)?0:1;
}
else
{
if (theVar==TKARRAY)
{
varList[i]->structure=ARRAY;
}
else
{
//is number
varList[i]->size=scanner.getNumber();
}
}
//add these parameter pointers into list of module in its structure
/*//no need here! it should be at place of addvariable
if (semState==SEMMODULEPARAMTYPE)
{
varList[i]->isParam=true;
}
*/
}
}
}
Node* Parser::nextTemp(Node* sameType)
{
Node* ptr=Hash::nextTemp(sameType);
int shift=1;
if (ptr->type==0)
{
shift=4;
}
if (ptr->structure==1)
{
shift*=ptr->size;
}
if (semState==SEMPROGRAMBODY)
{
ptr->address=varCounter;
varCounter+=shift;
}
else
{
ptr->address=stackCounter;
stackCounter+=shift;
}
return ptr;
}
/*
//no one is using this function!!!?????
Node* Parser::nextTemp()
{
Node* ptr=Hash::nextTemp();
return ptr;
}
*/
Node* Parser::constTemp(int num)
{
Node* ptr=Hash::nextTemp();
ptr->immediate=true;
ptr->type=0;
ptr->intVal=num;
return ptr;
}
Node* Parser::constTemp(char ch)
{
Node* ptr=Hash::nextTemp();
ptr->immediate=true;
ptr->type=1;
ptr->chVal=ch;
return ptr;
}
void Parser::LLParse()
{
//internal method
//void type2SemState(int theVar);
//Node* semAction(char* varName);
void typeAction(int theVar);
//internal method
int theToken, theVar;
bool canPop=true;
Node* ptr=NULL;
while (scanner.nextToken())
{
if (!canPop)
{
errorHandle(UnexpectedEmptyStack);
}
if (scanner.token.type==COMMENTTYPE)
{
continue;
}
//the tokentype from scanner has to be mapped to
//that of parser
theToken=type2token[scanner.token.type];
while ((canPop=pop(theVar))==true)
{
//theVar is from stack, theToken is from source
if (grammar.token[theVar]->terminal)
{
if (theToken==theVar)//match
{
//never call it here!!!
//type2SemState(theVar);
//3-add-code generation
if (theVar==TKID)
{
isID=true;//this means it is
ptr=semAction(scanner.getToken());
//THIS IS THE ID
variableAction(ptr);
}
else
{
//add non-variable semantics into stack or to reduce
if (semState==SEMPROGRAMBODY||semState==SEMMODULEBODY)
{
if (theVar==TKNUMBER)
{
ptr=constTemp(scanner.getNumber());
variableAction(ptr);
}
if (theVar==TKCONSTCHAR)
{
ptr=constTemp(scanner.getChar());
variableAction(ptr);
}
codeAction(theToken);
}
isID=false;//previous token is ID
}
//this is the end of type action
if (isType&&theVar==TKSEMICOLON)
{
addVariable();
isType=false;
varListCount=1;
}
if (isType)
{
typeAction(theVar);
}
break;
}
else
{
if (theVar==emptyIndex)
{
continue;
}
errorHandle(IllegalGrammarToken);
}
}
else
{
if (!pushToken(theVar, theToken))
{
int temp=token2type[theToken];
errorHandle(IllegalGrammarToken, (void*)(grammar.token[temp]->name));
}
}
}
}
if (pop(theVar))
{
if (theVar!=stackBottomIndex)
{
errorHandle(NotEmptyStack);
}
}
//instruction.opcode=INTER_EXIT;
doExit();
//instruction.result=
//so at end to print the main
fRule<<"print program "<<endl;
mainHash.print();
}
bool Parser::pushToken(int tIndex, int theToken)
{
//void type2SemState(int theVar);
//void type2CodeState(int theVar);
if (table[tIndex][theToken]!=-1)
{
//for debugging
//cout<<grammar.token[tIndex]->name<<" => ";
fRule<<grammar.token[tIndex]->name<<" => ";
grammar.printRule(table[tIndex][theToken], fRule);
//cout<<endl;
fRule<<endl;
pushRule(table[tIndex][theToken]);
//////////////////////////////////////////////////////////////////////////
//modified on mar. 26
//this is the production rule index, instead of token index!!!
type2SemState(table[tIndex][theToken]);//
//////////////////////////////////////////////////////////////////////////
//debug only
//the following are code action
type2CodeState(table[tIndex][theToken]);
//if (tIndex==Sl0)//&&table[tIndex][theToken]==68)
/*
if (table[tIndex][theToken]==68)
{
//
if (top!=4)
{
fRule<<"print module "<<varList[0]->name<<endl;
moduleHash.print();
}
}
*/
//this may need to modify mar.26
////////////////////////////////////////////////
//Ml0 ==> rule.2 module i ( Vl ) Dv B Ml0 | rule.63 e
if (table[tIndex][theToken]==2||table[tIndex][theToken]==63)
{
if (!firstModule)
{
fRule<<"print module "<<varList[0]->name<<endl;
moduleHash.print();
moduleEndGen();
}
if (firstModule)
{
firstModule=false;
}
//semState=SEMPROGRAMVAR;
if (table[tIndex][theToken]==63)
{
programGen();
}
}
//debug only
return true;
}
return false;
}
void Parser::pushRule(int theRule)
{
int len=0;
while (grammar.production[theRule][len]!=-1)
{
len++;
}
for (int i=len-1; i>=0; i--)
{
if (!push(grammar.production[theRule][i]))
{
errorHandle(StackOverFlow);
}
}
}
void Parser::LRParse()
{
/*
int theToken, theVar;
bool canPop=true;
while (scanner.nextToken())
{
if (!canPop)
{
errorHandle(UnexpectedEmptyStack);
}
if (scanner.token.type==COMMENTTYPE)
{
continue;
}
theToken=type2token[scanner.token.type];
//if (grammar.isShift(LRTable[
}
/*
while ((canPop=pop(theVar))==true)
{
if (grammar.token[theVar]->terminal)
{
if (theToken==theVar)//match
{
break;
}
else
{
if (theVar==emptyIndex)
{
continue;
}
errorHandle(IllegalGrammarToken);
}
}
else
{
if (!pushToken(theVar, theToken))
{
errorHandle(IllegalGrammarToken);
}
}
}
}
if (pop(theVar))
{
if (theVar!=stackBottomIndex)
{
errorHandle(NotEmptyStack);
}
/*
if (theVar!=stackBottomIndex)
{
errorHandle(NotEmptyStack);
}
}
*/
}
void Parser::parseFile(const char*fileName)
{
char buffer[50];
char bufferRule[50];
strcpy(buffer, fileName);
int temp=strlen(buffer);
buffer[temp-4]='0';
buffer[temp-3]='.';
buffer[temp-2]='t';
buffer[temp-1]='x';
buffer[temp]='t';
buffer[temp+1]='\0';
strcpy(bufferRule, buffer);
bufferRule[strlen(buffer)-5]='1';
fRule.open(bufferRule);
bufferRule[strlen(buffer)-5]='2';
fInterCode.open(bufferRule);
bufferRule[strlen(buffer)-5]='3';
fTarget.open(bufferRule);
//scanner.readFromFile(fileName, defaultListFile);
//for debugging purpose
scanner.readFromFile(fileName, buffer);
prepare();
if (isLLParser)
{
LLParse();
}
else
{
LRParse();
}
/*
while (scanner.nextToken())
{
if (!canPop)
{
errorHandle(UnexpectedEmptyStack);
}
if (scanner.token.type==COMMENTTYPE)
{
continue;
}
theToken=type2token[scanner.token.type];
while ((canPop=pop(theVar))==true)
{
if (grammar.token[theVar]->terminal)
{
if (theToken==theVar)//match
{
break;
}
else
{
if (theVar==emptyIndex)
{
continue;
}
errorHandle(IllegalGrammarToken);
}
}
else
{
if (!pushToken(theVar, theToken))
{
errorHandle(IllegalGrammarToken);
}
}
}
}
if (pop(theVar))
{
if (theVar!=stackBottomIndex)
{
errorHandle(NotEmptyStack);
}
/*
if (theVar!=stackBottomIndex)
{
errorHandle(NotEmptyStack);
}
}
*/
}
Parser::Parser(bool forLL)
{
isLLParser=forLL;
initialize();
}
void Parser::initialize()
{
labelCount=1000;
top=0;
insCounter=varCounter=0;
}
bool Parser::pop(int& num)
{
if (top==0)
{
return false;
}
num=stack[--top];
return true;
}
bool Parser::push(int num)
{
if (top==MaxStackLength-1)
{
return false;
}
stack[top++]=num;
return true;
}
void Parser::prepare()
{
top=0;
if (isLLParser)
{
pushRule(grammar.token[startSymbolIndex]->production[0]);
}
else
{
push(0);
}
}
file name: scanner.h
///////////////////////////////////////////////////////////////////////////////////////////
//Program: SLang Scanner
//Author: Qingzhe Huang
//Date: Jan. 18, 2004
//FileName: scanner.h
//Features:
// 1. I want to improve efficiency of scanning, so I used table-driven method.
// 2. I used enum to represent character of all ASCII---CharType---where "space, tab,
// end of line, end of file are all considered to be White Space.
// 3. All legal token is represented by enum TokenType.
// 4. I defined a huge amount of TokenState which is basically the state of a DFA. As
// I don't want to search reserved keyword with linear search or whatever, I have
// many states for the reserved words.
// 5. I deliberately make the sequence of first 38 TokenState elements exactly same as
// all that of TokenType, so that each final state of DFA has a 1-1 correspondence with
// type of token.
// 6. I defined a struct of Token which may be used in future parser.
// 7. I defined an errorNo variable to represent various errors. And a series error string
// for displaying information.
// 8. When class Scanner is created, it will initialize the big "state-charType" table.
// 9. When readFromFile is called, it will first read one char in advance.
// 10. When an error is encountered, the caller of Scanner should understand that no further
// char is read in. So, stop calling "nextToken()". This is a bit controvercial, and I
// plan to change it in next version.
////////////////////////////////////////////////////////////////////////////////////////////
/*////////////////////////////////////////////////////////////////////////////
Program: SLang Scanner
Author: Qingzhe Huang
Date: Jan. 21, 2004
FileName: scanner.h
Features:
1. I restructured the struct Token, to make it a union field in order to store int
value for number.
2. I restructured the function "nextToken()" in order to give out correct line no.
when error occurs.
*////////////////////////////////////////////////////////////////////////////////
#ifndef SCANNER_H
#define SCANNER_H
#include <iostream>
using namespace std;
extern enum ErrorCode;
const int TokenStateCount=138;
const int CharTypeCount=72;
const int MaxTokenLength=255;
const int MaxNumberLength=12;
enum CharType
{
//all small letters 26
SMALLA,SMALLB,SMALLC,SMALLD,SMALLE,SMALLF,SMALLG,SMALLH,SMALLI,SMALLJ,SMALLK,SMALLL,
SMALLM,SMALLN,SMALLO,SMALLP,SMALLQ,SMALLR,SMALLS,SMALLT,SMALLU,SMALLV,SMALLW,SMALLX,
SMALLY,SMALLZ,
//all big letters 26
BIGA,BIGB,BIGC,BIGD,BIGE,BIGF,BIGG,BIGH,BIGI,BIGJ,BIGK,BIGL,BIGM,BIGN,BIGO,BIGP,BIGQ,
BIGR,BIGS,BIGT,BIGU,BIGV,BIGW,BIGX,BIGY,BIGZ,
//all digit 1
DIGIT,
//all symbols 16
QUOTE, OPENPAR, CLOSEPAR, SEMICOLON,PLUS, MINUS, TIMES, SLASH, COLON,
EQUAL,SMALLER,GREATER,EXCLAIM,OPENBRACKET, CLOSEBRACKET,COMMA,
//space, tab, end of line are regarded as whitespace, 1
WHITESPACE,
//UNDERSCORE IS A SPECIAL SYMBOL 1
UNDERSCORE,
//all other ASCII is regarded as illegal 1
ILLEGAL
};
//TOTAL 38, JUST 1-1 WITH THE FIRST 38 OF TOKENSTATE
enum TokenType
{
//GENERAL TYPE 5
IDTYPE, NUMBERTYPE, CHARCONSTTYPE, COMMENTTYPE, ERRORTYPE,
//THE FOLLOWING ARE SYMBOL TYPE 18
OPENPARTYPE, CLOSEPARTYPE, SEMICOLONTYPE, PLUSTYPE, MINUSTYPE, TIMESTYPE,
SLASHTYPE, ASSIGNMENTTYPE, SMALLERTYPE, GREATERTYPE, EQUALTYPE, SMALLEREQUALTYPE,
GREATEREQUALTYPE, NOTEQUALTYPE, OPENBRACKETTYPE, CLOSEBRACKETTYPE, COMMATYPE,
COLONTYPE,
//THE FOLLOWING ARE RESERVED TYPE 15
BEGINTYPE, ENDTYPE, PROGRAMTYPE, VARIABLESTYPE,INTEGERTYPE, ARRAYTYPE, CHARTYPE,
MODULETYPE, IFTYPE, THENTYPE, ELSETYPE, LOOPTYPE, EXITTYPE, READTYPE, WRITETYPE
};
//total 138 states
enum TokenState
{
//THE FINAL STATE 38, in order to easy initialize "finalState", I put them in beginning
//5 generals
IDEND, NUMBEREND, CONSTCHAREND, COMMENTEND, ERROR,
//18 symbols
OPENPAREND, CLOSEPAREND, SEMICOLONEND, PLUSEND, MINUSEND, TIMESEND,
SLASHEND, ASSIGNMENTEND, SMALLEREND, GREATEREND, EQUALEND, SMALLEREQUALEND,
GREATEREQUALEND, NOTEQUALEND, OPENBRACKETEND, CLOSEBRACKETEND, COMMAEND,
COLONEND,
//15 reserved
BEGINEND, ENDEND, PROGRAMEND, VARIABLESEND, INTEGEREND, ARRAYEND, CHAREND,
MODULEEND, IFEND, THENEND, ELSEEND, LOOPEND, EXITEND, READEND, WRITEEND,
//THE FOLLOWING ARE ALL NON-FINAL STATES
//THE very FIRST CHAR 1
READY,
//THE FOLLOWING ARE ALL RESERVED STATE
//the first char 12
ARRAY1, BEGIN1, CHAR1, E1, I1, LOOP1, MODULE1, PROGRAM1, READ1, THEN1, VARIABLES1,
WRITE1,
//THE SECOND CHAR 15
ARRAY2, BEGIN2, CHAR2, ELSE2, END2, EXIT2, IF2, INTEGER2, LOOP2, MODULE2, PROGRAM2,
READ2, THEN2, VARIABLES2, WRITE2,
//THE THIRD CHAR 14
ARRAY3, BEGIN3, CHAR3, ELSE3, END3, EXIT3, INTEGER3, LOOP3, MODULE3, PROGRAM3, READ3,
THEN3, VARIABLES3, WRITE3,
//THE FOURTH CHAR 13
ARRAY4, BEGIN4, CHAR4, ELSE4, EXIT4, INTEGER4, LOOP4, MODULE4, PROGRAM4, READ4, THEN4,
VARIABLES4, WRITE4,
//THE FIFTH CHAR 7
ARRAY5, BEGIN5, INTEGER5, MODULE5, PROGRAM5, VARIABLES5, WRITE5,
//THE SIXTH CHAR 4
INTEGER6, MODULE6, PROGRAM6, VARIABLES6,
//THE SEVENTH CHAR 3
INTEGER7, PROGRAM7, VARIABLES7,
//THE EIGHTH CHAR 1
VARIABLES8,
//THE NINETH CHAR 1
VARIABLES9,
//THESE ARE NON-RESERVED
//THESE ARE GENERAL 9
IDBEGIN, IDUNDERSCORE, NUMBERBEGIN, CONSTCHARQUOTEBEGIN, CONSTCHARBEGIN, COMMENTSTARBEGIN,
COMMENTBEGIN, COMMENTSTAREND, COMMENTSLASHBEGIN,
//the SINGLE symbols 16
QUOTEBEGIN, OPENPARBEGIN, CLOSEPARBEGIN, SEMICOLONBEGIN,
PLUSBEGIN, MINUSBEGIN, TIMESBEGIN, SLASHBEGIN, COLONBEGIN, SMALLERBEGIN, GREATERBEGIN,
EQUALBEGIN, EXCLAIMBEGIN, OPENBRACKETBEGIN, CLOSEBRACKETBEGIN, COMMABEGIN,
//MULTI SYMBOL 4
ASSIGNMENTBEGIN, SMALLEREQUALBEGIN,
GREATEREQUALBEGIN, NOTEQUALBEGIN
};
//extern ErrorCode errorNo;
extern void errorHandle(ErrorCode errorNo);
struct Token
{
TokenType type;
union
{
char name[MaxTokenLength+1];
int number;
char charVal;
};
};
class Scanner
{
private:
int tokenCount;
unsigned char ch;
void printLineNo();
FILE* stream;
bool nextChar();
void initialize();
bool resume();
public:
Scanner();
static Token token;
bool readFromFile(const char* fileName, const char* listFileName);
char* getToken(){return token.name;}
int getNumber(){ return token.number;}
char getChar(){ return token.charVal;}
bool nextToken();
void report();
};
void initialTokenState();
#endif
file name: scanner.cpp
/*//////////////////////////////////////////////////////////////////////////// Program: SLang Scanner Author: Qingzhe Huang Date: Jan. 21, 2004 FileName: scanner.cpp Features: 1. As Dr. Optrany said, the number should be stored as int or double whatever. 2. I restructured the function "nextToken()" in order to give out correct line no. when error occurs. *//////////////////////////////////////////////////////////////////////////////// #include <iostream> #include <fstream> #include "scanner.h" #include "errorNo.h" using namespace std; ofstream fList; //ofstream fRule; //this will determine how many errors o