Reduce
A.First Edition
This is my first edition of Reduce, which has only a single usage to translate a NFA into regular expression.
I feel unsatisfied with the code. It is ugly! However, I don't have enough energy to make a perfect design.
In order to translate the "Transition Diagram" into "Regular Expression", you need to eliminate all nodes except
the start and final ones. 1. Find out all inward and outward arrows and then replace the node by combining each
inward starting node with each outward ending node. Their expressions should be concatenated.
The idea is straight-forward, the algorithm is unefficient.
C.Further improvement
#include <iostream> #include <fstream> using namespace std; enum States {InBound, OutBound, IsLoop}; struct Function { int domain; char* expr; int range; bool isStart; bool isFinal; }; class Reduce { private: Function rules[30]; Function temp[30]; int tempCount; int size; int inCount; int outCount; int loopCount; int buffer[3][30]; bool findStates(int state); bool findShortCut(int state); void replace(int state); void copyFunction(Function* target, Function* source, bool toClear=false); void generate(); void moveRule(int state); // bool notAffected(int index); void clearState(); void addFunction(int index); void getExpr(); public: void setParam(Function**theRule, int theSize); void readRules(const char* fileName); void beginReduce(); }; int main() { Reduce R; //you need to copy my input file to your local C: R.readRules("c:\\rules.txt"); R.beginReduce(); return 0; } void Reduce::addFunction(int index) { char str[256]; for (int i=0; i<tempCount; i++) { if (temp[i].domain == rules[index].domain&&temp[i].range==rules[index].range) { strcpy(str, "("); strcat(str, temp[i].expr); strcat(strcat(str, "+"), rules[index].expr); strcat(str, ")"); delete []temp[i].expr; temp[i].expr = new char[strlen(str)+1]; strcpy(temp[i].expr, str); return; } } copyFunction(&temp[tempCount], &rules[index]); tempCount++; } void Reduce::readRules(const char* fileName) { ifstream f; int flag; char str[256]; size = 0; f.open(fileName, ios::in); while (!f.eof()) { f>>rules[size].domain>>str>>rules[size].range>>flag; rules[size].isStart = flag==1?true:false; f>>flag; rules[size].isFinal = flag==1?true:false; rules[size].expr = new char[strlen(str)+1]; strcpy(rules[size].expr, str); size++; } } /* bool Reduce::notAffected(int index) { rules[index]. for (int i=0; i<inCount; i++) { if (rules[buffer[InBound][i]].range==state) { return false; } } for (i=0; i<outCount; i++) { if (rules[buffer[OutBound][i]].domain==state) { return false; } } for (i =0; i<loopCount; i++) { if (rules[buffer[IsLoop][i]].domain==state) { return false; } } return true; } */ void Reduce::clearState() { for (int i=0; i<inCount; i++) { delete [] rules[buffer[InBound][i]].expr; } for (i=0; i<outCount; i++) { delete [] rules[buffer[OutBound][i]].expr; } for (i =0; i<loopCount; i++) { delete [] rules[buffer[IsLoop][i]].expr; } } void Reduce::copyFunction(Function* target, Function* source, bool toClear) { target->domain = source->domain; target->range = source->range; if (toClear) { delete [] target->expr; } target->expr = new char[strlen(source->expr)+1]; strcpy(target->expr, source->expr); target->isFinal = source->isFinal; target->isStart = source->isStart; } void Reduce::moveRule(int state) { for (int i=0; i<size; i++) { //if not affected if (rules[i].domain!=state&&rules[i].range!=state) { addFunction(i); } } clearState(); for (i=0; i<tempCount; i++) { copyFunction(&rules[i], &temp[i]); } size = tempCount; } void Reduce::setParam(Function** theRule, int theSize) { size = theSize; for (int i=0; i<size; i++) { copyFunction(&rules[i], theRule[i]); } } void Reduce::replace(int state) { generate(); moveRule(state); } void Reduce::generate() { tempCount=0; char str[256]; int in, out, loop; for (int i=0; i<inCount; i++) { for (int j=0; j<outCount; j++) { in = buffer[InBound][i]; out = buffer[OutBound][j]; temp[tempCount].domain = rules[in].domain; temp[tempCount].isStart = rules[in].isStart;//the start from in temp[tempCount].range = rules[out].range; temp[tempCount].isFinal = rules[out].isFinal;//final from out strcpy(str, rules[in].expr); for (int k=0; k<loopCount; k++) { loop = buffer[IsLoop][k]; if (k==0) { strcat(str, ".star"); strcat(str, "("); } strcat(strcat(str, rules[loop].expr), "+"); if (k==loopCount-1) { //replace '+' with ')' str[strlen(str)-1]=')'; } } strcat(str, "."); strcat(str, rules[out].expr); temp[tempCount].expr = new char[strlen(str)+1]; strcpy(temp[tempCount].expr, str); //finished one record tempCount++; } } } bool Reduce::findStates(int state) { for (int i=0; i<size; i++) { if (rules[i].domain==state||rules[i].range==state) { return true; } } return false; } bool Reduce::findShortCut(int state) { inCount = outCount = loopCount = 0; for (int i=0; i<size; i++) { //exclusiveOr if (rules[i].domain==state&&rules[i].range==state) { if (rules[i].isFinal||rules[i].isStart) { return false; } buffer[IsLoop][loopCount]=i; loopCount++; } else { if (rules[i].domain==state) { if (rules[i].isStart) { return false; } buffer[OutBound][outCount]=i; outCount++; } if (rules[i].range==state) { if (rules[i].isFinal) { return false; } buffer[InBound][inCount]=i; inCount++; } } } //must both have in and out return (inCount!=0&&outCount!=0); } void Reduce::getExpr() { char str[256]; for (int i=0; i<size; i++) { //loop at start if (rules[i].isStart&&(rules[i].domain==rules[i].range)) { strcat(strcpy(str, "star("), rules[i].expr); strcat(str, "+"); } } str[strlen(str)-1]=')'; //the main expr for (i=0; i<size; i++) { if (rules[i].domain!=rules[i].range) { strcat(strcat(str, "."), rules[i].expr); } } for (i=0; i<size; i++) { //final loop if (rules[i].isFinal&&(rules[i].domain==rules[i].range)) { strcat(str, ".star("); strcat(str, rules[i].expr); strcat(str, "+"); } } str[strlen(str)-1]=')'; cout<<str<<endl; } void Reduce::beginReduce() { int state=0; while (findStates(state)) { if (findShortCut(state)) { replace(state); } state++; } getExpr(); }
The result is like following:
star((b.star(b).a.b+a)).b.star(b).a.a.star((a+b))
Press any key to continue
กก
**explanation**
1. I used "star" to represent "keleen star" as I cannot print the exponent-like "*"'s.
2. I used "." to show it is a concatenation for better reading.
3. "+" sign still means the "union".
4. Here is the input file which is the DFA for a language over {a,b} such that "baa" is its substring.
5. The input file format is like following:
Domain Expression Range isStart isFinal
6. Domain and Range means that the transition functions is starting from Domain and ending at Range which is
the index of states. The expression is the label. When Domain is the initial state, isState is true,
indicated by "1". Otherwise "0". When Range is the final state, isFinal is "1". Otherwise "0".
7. The input file is named "rules.txt" and stored in place of "c:\".
กก