Dependency(canonical)
A. Third Edition
This is the third edition of my "dependency" and I think it is almost the perfect version of all because it
implements almost all functions you can name---canonical cover, decomposition, lossless-join-check.
"Function dependency" is a very important issue in database theory. And what's more, it is the essence of all.
Because I think it is the key of knowledge: the world is described by relations and relations of relations.
For the function "decomposition", I don't want to export an array of sets of the decomposition. The simple reason
is who would use it this way. So, I simply "cout" them.
E.Further improvement
F.File listing
1. rules.h
2. rules.cpp
3. set.h
4. set.cpp
5. main.cpp (main)
file name: set.h
#ifndef SET_H #define SET_H #include <iostream> #include <bitset> using namespace std; const int MaxAttrNumber=20; class Set { //this is a long-pain for me, I have no other way to //let compiler recognize this "friend" function outside declaration friend ostream& operator<<(ostream& out, const Set& dummy) { for (int i=0; i<dummy.size; i++) { if (dummy.theSet.test(i)) { out<<'1'; } else { out<<'0'; } } return out; } private: bitset<MaxAttrNumber> theSet; int size; int current; public: void setSize(const Set& dummy); int getSize(){ return size;} int next(int current) const; int first() const; int count() const; Set intersection(const Set& dummy) const; Set unionSet(const Set& dummy) const; Set difference(const Set& dummy) const; //I am crazy about operator overloading!!!:) Set operator-(const Set& dummy) const; Set operator+(const Set& dummy) const; Set operator*(const Set& dummy) const; void operator=(const Set& dummy); bool operator==(const Set& dummy) const; bool operator!=(const Set& dummy) const; bool operator>(const Set& dummy) const; bool operator>=(const Set& dummy) const; bool operator<(const Set& dummy) const; bool operator<=(const Set& dummy) const; void set(int pos); void forEachSubSet(Set& dummy) const;//must be called before "eachSub()" bool eachSub(Set& dummy) const; void set(); void reset(int pos); void reset(); bool test(int pos) const; bool isIn(const Set& dummy) const; void setSize(int theSize) {size=theSize;} Set(int theSize=10); }; #endif
file name: set.cpp
#include "Set.h" bool Set::isIn(const Set& dummy) const { for (int i=0; i<size; i++) { if (theSet.test(i)) { if (!dummy.test(i))//here I use Set.test() instead of set.test() { return false; } } } return true; } bool Set::test(int pos) const { return (pos<size&&theSet.test(pos)); } //current=-1;//initialize to -1 to prepare for being called int Set::next(int current) const { for (int i=current+1; i<size; i++)//include situation current>=size { if (theSet.test(i)) { return i; } } return -1;//not found } bool Set::operator !=(const Set& dummy)const { return !(this->operator ==(dummy)); } bool Set::operator <(const Set& dummy)const { return (this->isIn(dummy)&&this->operator !=(dummy)); } bool Set::operator <=(const Set& dummy)const { return isIn(dummy); } bool Set::operator >(const Set& dummy)const { return !(this->operator <=(dummy)); } bool Set::operator >=(const Set& dummy)const { return !(this->operator <(dummy)); } bool Set::operator ==(const Set& dummy)const { for (int i=0; i<(size>dummy.size?size:dummy.size); i++) { if (test(i)^dummy.test(i)) { return false; } } return true; } void Set::setSize(const Set& dummy) { size=dummy.size; } void Set::operator =(const Set& dummy) { size=dummy.size; for (int i=0; i<size; i++) { if (dummy.test(i)) { theSet.set(i); } else { theSet.reset(i); } } } Set::Set(int theSize) { size=theSize; reset(); } void Set::reset() { for (int i=0; i<size; i++) { theSet.reset(i); } } void Set::reset(int pos) { if (pos<size) { theSet.reset(pos); } } void Set::set() { theSet.set(); } void Set::set(int pos) { theSet.set(pos); } void Set::forEachSubSet(Set& dummy) const { dummy.size=size; dummy.reset();//emptyset } bool Set::eachSub(Set& dummy) const { int index=first();//starting from very first while (index!=-1)//not exceeding boundery { if (dummy.test(index)) { dummy.reset(index); index=next(index); } else { dummy.set(index); if (dummy<*this) { return true; } } } return false; } int Set::first()const { return next(-1); } int Set::count()const { return theSet.count(); } Set Set::unionSet(const Set& dummy) const { Set result; result.size=size>dummy.size?size:dummy.size; for (int i=0; i<result.size; i++) { if (test(i)||dummy.test(i)) { result.set(i); } } return result;//this is a temparory object; } Set Set::difference(const Set& dummy) const { Set result; result.size=size>dummy.size?size:dummy.size; for (int i=0; i<result.size; i++) { if (test(i)&&!dummy.test(i)) { result.set(i); } } return result; } Set Set::operator +(const Set& dummy) const { return unionSet(dummy); } Set Set::operator -(const Set& dummy) const { return difference(dummy); } Set Set::intersection(const Set& dummy) const { Set result; result.size=size<dummy.size?size:dummy.size; for (int i=0; i<result.size; i++) { if (test(i)&&dummy.test(i)) { result.set(i); } } return result; } Set Set::operator *(const Set& dummy) const { return intersection(dummy); }
file name: rules.h
#include <iostream> #include <bitset> #include "set.h" using namespace std; //make it simple, the first line must list all variables or attributes //"relation"(A,B,C...) const int MaxNameLength=5; const int MaxRuleNumber=100; const int MaxKeyNumber=10; const int MaxComposition=10; class Rule { friend class Rules; private: Set lhs; Set rhs; public: Rule(); bool test(int pos, bool isLeft); void lhsSet(int pos); void rhsSet(int pos); void setSize(int theSize); void set(int pos, bool isLeft); void setRule(const Set& left, const Set& right); void operator=(const Rule& dummy); bool operator==(const Rule& dummy); }; class Rules { private: Set leftSet; Set rightSet; Set attrClosure[MaxAttrNumber]; char attributes[MaxAttrNumber][MaxNameLength+1]; char relationName[MaxNameLength+1]; bool needKey;//this is needed for checklossless int keyCount; int attrCount; int attrFound[MaxAttrNumber]; int numberFound; int searchByChar(char ch, int step); void ruleReading(FILE* stream); Rule rules[MaxRuleNumber]; int ruleNumber; void displayRule(int index); void removeRule(int index); void calcClosure(); void addRule(const Set& left, const Set& right); int extractNames(FILE* stream, char* delimeters, char endChar='\n'); void closure(Set& attrSet, int maskedRule) const; bool checkAttr(int index); void checkRule(int index); void split(); void combine(); //"this" relation imply "dummy" relation bool imply(const Rules& dummy) const; void showMatrix(const Set* matrix, int row); public: void canonical(); void decomposition(); Set candidateKey[MaxKeyNumber]; void addRule(const Rule& dummy); void readFile(const char* fileName); void display(); void display(const Set& attrSet); int findKey(); bool checkLossless(); bool eachKey(Set& dummy); bool operator==(const Rules& dummy) const; Rules(); };
file name: rules.cpp
#include "Rules.h" void Rules::decomposition() { bool keyFound=false; Set dummy; findKey(); canonical(); for (int i=0; i<ruleNumber; i++) { dummy=rules[i].lhs+rules[i].rhs; cout<<"\ndecomposition #"<<i+1<<":"; display(dummy); cout<<"\ndependency is:"; displayRule(i); cout<<endl; for (int j=0; j<keyCount; j++) { if (candidateKey[j]<=dummy) { keyFound=true;//some decomposition contains some key } } } if (!keyFound) { cout<<"\ndecomposition #"<<ruleNumber+1<<":"; display(candidateKey[0]); cout<<"\nthe key has no particular dependency\n"; } needKey=!keyFound; } //English can be ambiguous: this function really means: //"this" relation imply "dummy" relation bool Rules::imply(const Rules& dummy) const { Set left; //for all dependency in "dummy" relation... for (int i=0; i<dummy.ruleNumber; i++) { left=dummy.rules[i].lhs; //if the closure of that particular dependency under //dependency of "this" relation //cannot contains its rhs in that dependency in "dummy" //relation, we say... closure(left, -1); if (!(dummy.rules[i].rhs<=left)) { //we say "dummy" relation is not implied by "this" relation return false; } } //can we????? return true; } //the "secret" name for this function is "equivalent" bool Rules::operator ==(const Rules& dummy) const { return imply(dummy)&&dummy.imply(*this); } void Rules::canonical() { int i; bool found; split(); //cout<<"\nafter split"; //display(); do { i=0; found=false; while (i<ruleNumber) { if (checkAttr(i)) { found=true; } i++; } }while (found); //cout<<"\nafter checkattr"; //display(); for (i=0; i<ruleNumber; i++) { checkRule(i); } //cout<<"\nafter check rule"; //display(); combine(); } void Rules::combine() { for (int i=0; i<ruleNumber; i++) { for (int j=i+1; j<ruleNumber; j++) { if (rules[i].lhs==rules[j].lhs) { rules[i].rhs=rules[i].rhs+rules[j].rhs; removeRule(j); } } } } void Rules::removeRule(int index) { if (index<ruleNumber) { ruleNumber--; for (int i=index; i<ruleNumber; i++) { rules[i]=rules[i+1]; } } } void Rules::checkRule(int index) { Set dummy; dummy=rules[index].lhs; closure(dummy, index); if (rules[index].rhs<=dummy) { removeRule(index); } } bool Rules::checkAttr(int index) { Set dummy; //for the out parameter "dummy", you cannot modify it! rules[index].lhs.forEachSubSet(dummy); while (rules[index].lhs.eachSub(dummy)) { Set old; //so, you have to use a copy of dummy to calc closure of it old=dummy; closure(old, index); if (rules[index].rhs<=old) { rules[index].lhs=dummy; for (int i=0; i<ruleNumber; i++) { if (i!=index) { if (rules[i]==rules[index]) { //found repeat removeRule(index); } } } //otherwise do nothing return true; } } return false; } int Rules::findKey() { Set theLeft, theRight; bool isSub; leftSet.setSize(attrCount); rightSet.setSize(attrCount); leftSet.set();//the universal set rightSet.reset();//the empty set for (int i=0; i<ruleNumber; i++) { rightSet= rightSet+rules[i].rhs; } rightSet=leftSet-rightSet;//rightSet is the minimum part of candidate key leftSet=leftSet-rightSet;//leftSet is the difference of rightSet,should be added to key keyCount=0; theRight=rightSet; theLeft=leftSet; closure(theRight, -1); if (theRight.count()==attrCount)//the only key { candidateKey[keyCount++]=rightSet; } else { leftSet.forEachSubSet(theLeft); while (leftSet.eachSub(theLeft)) { isSub=false; theRight=rightSet; theRight=theRight+theLeft; for (int i=0; i<keyCount; i++) { //display(theRight); //display(candidateKey[i]); if (candidateKey[i]<theRight) { isSub=true; break; } } if (isSub) { continue;//if subset of any candidate key, no need to test } Set temp; temp=theRight; closure(temp, -1); if (temp.count()==attrCount) { candidateKey[keyCount++]=theRight; } } } return keyCount; } void Rules::showMatrix(const Set* matrix, int row) { for (int j=0; j<attrCount; j++) { cout<<"\t"<<attributes[j]; } cout<<"\n"; for (int i=0; i<row; i++) { displayRule(i); cout<<"\t"<<matrix[i]<<endl; } } //this is a standard algorithm and it makes sure //that all dependency agree with each other //or in good English that there is some common agreement //between all dependency. Is this good enough? I doubt it. bool Rules::checkLossless() { int row=needKey?ruleNumber+1:ruleNumber; Set* matrix=new Set[row]; for (int i=0; i<row; i++) { if (needKey&&i==row-1) { matrix[i]=candidateKey[0]; } else { matrix[i]=rules[i].lhs+rules[i].rhs; } } showMatrix(matrix, row); int index; bool found; do { index=0; found=false; while (index<row) { for (int j=0; j<ruleNumber; j++) { if (j!=index) { if (rules[j].lhs<=matrix[index]) { if (!(rules[j].rhs<=matrix[index])) { matrix[index]=matrix[index]+rules[j].rhs; found=true; } } } } index++; } }while(found); showMatrix(matrix, row); for (index=0; index<row; index++) { if (matrix[index].count()==attrCount) { delete []matrix; return true; } } delete [] matrix; return false; } void Rules::calcClosure() { for (int i=0; i<attrCount; i++) { attrClosure[i].setSize(attrCount); attrClosure[i].reset(); attrClosure[i].set(i); closure(attrClosure[i], -1); } } void Rules::closure(Set& attrSet, int maskedRule) const { bool found=false; int i=0; do { i=0; found=false; while (i<ruleNumber) { if (i!=maskedRule)//this rule will not be calculated { if (rules[i].lhs<=attrSet)//lhs is contained { if ((attrSet*rules[i].rhs)!=rules[i].rhs) { attrSet=attrSet+rules[i].rhs; found=true; } } } i++; } } while (found); } void Rules::display(const Set& attrSet) { bool first=true; cout<<"{"; for (int i=0; i<attrCount; i++) { if (attrSet.test(i)) { if (first) { first=false; } else { cout<<","; } cout<<attributes[i]; } } cout<<"}"; } void Rules::display() { cout<<"\nnow display\n"; cout<<relationName<<"("; for (int i=0; i<attrCount; i++) { cout<<attributes[i]; if (i!=attrCount-1) { cout<<","; } else { cout<<");\n"; } } for (i=0; i<ruleNumber; i++) { displayRule(i); } } bool Rule::test(int pos, bool isLeft) { if (isLeft) { return lhs.test(pos); } else { return rhs.test(pos); } } void Rules::displayRule(int index) { for (int i=0; i<attrCount; i++) { if (rules[index].test(i, true)) { cout<<attributes[i]; } } cout<<" -> "; for (i=0; i<attrCount; i++) { if (rules[index].test(i, false)) { cout<<attributes[i]; } } cout<<";\n"; } void Rule::set(int pos, bool isLeft) { if (isLeft) { lhsSet(pos); } else { rhsSet(pos); } } void Rule::rhsSet(int pos) { rhs.set(pos); } void Rule::lhsSet(int pos) { lhs.set(pos); } Rule::Rule() { lhs.reset(); rhs.reset(); } void Rules::readFile(const char* fileName) { FILE* stream; stream=fopen(fileName, "r"); attrCount=extractNames(stream, "=,()", ';');//ignore the first relation name ruleReading(stream); } void Rules::ruleReading(FILE* stream) { char ch; int step=0; ruleNumber=0;//initialize bool isLeft=true; rules[ruleNumber].setSize(attrCount);//do twice!! while (!feof(stream)) { ch=fgetc(stream); if (ch==';'||ch==',') { ruleNumber++; rules[ruleNumber].setSize(attrCount);//do twice!! isLeft=true; } else { if (ch=='-'||ch=='>') { isLeft=false; } else { if (isalpha(ch)) { searchByChar(ch, step); if (numberFound!=1) { if (step<MaxNameLength) { step++; } else { cout<<"illegal attribute stated!\n"; exit(1); } } else { step=0; rules[ruleNumber].set(attrFound[0], isLeft); } } } } } } void Rule::setSize(int theSize) { lhs.setSize(theSize); rhs.setSize(theSize); } int Rules::searchByChar(char ch, int step) { if (step==0)//this is first time, and all attributes are searched { numberFound=0; for (int i=0; i<attrCount; i++)// { if (ch==attributes[i][step]) { attrFound[numberFound]=i; numberFound++; } } } else { int number=0;//new 'attrFound' re-counting for (int i=0; i<numberFound; i++) { if (ch==attributes[attrFound[i]][step]) { attrFound[number]=i; number++; } } numberFound=number; } return numberFound; } int findChar(char ch, char* str) { int index=0; while (str[index]!='\0') { if (str[index]==ch) { return index; } index++; } return -1; } //extract names from a line delimetered by various chars, like ',','(',')'.... int Rules::extractNames(FILE* stream, char* delimeters, char endChar) { int findChar(char ch, char* str); char ch; int nameIndex=0; char buffer[MaxNameLength+1]; char* ptr=buffer; while (!feof(stream)) { ch=getc(stream); if (ch==endChar) { if (ptr!=buffer) { *ptr='\0'; strcpy(attributes[nameIndex], buffer); } break; } if (findChar(ch, delimeters)>0)//deli reached { //skip the consecutive deli if (ptr!=buffer) { *ptr='\0'; if (ch=='(')//the very first one { strcpy(relationName, buffer); } else { strcpy(attributes[nameIndex], buffer); nameIndex++; } ptr=buffer;//restart } } else { *ptr=ch; ptr++; } } return nameIndex; } Rules::Rules() { numberFound=attrCount=0; } void Rule::operator =(const Rule& dummy) { lhs=dummy.lhs; rhs=dummy.rhs; } void Rules::addRule(const Rule& dummy) { rules[ruleNumber++]=dummy; } void Rules::addRule(const Set& left, const Set& right) { rules[ruleNumber++].setRule(left, right); } void Rule::setRule(const Set& left, const Set& right) { lhs=left; rhs=right; } void Rules::split() { int index; for (int i=0; i<ruleNumber; i++) { if (rules[i].rhs.count()>1) { index=rules[i].rhs.first(); index=rules[i].rhs.next(index);//starting from second one while (index!=-1) { Set right; right.setSize(rules[i].rhs); right.set(index); rules[i].rhs.reset(index);//remove from old rule addRule(rules[i].lhs, right); index=rules[i].rhs.next(index); } } } } bool Rule::operator ==(const Rule& dummy) { return (lhs==dummy.lhs&&rhs==dummy.rhs); }
file name: main.cpp (main)
#include "Rules.h" #include "set.h" int main() { int number; Rules R; R.readFile("d:\\rules.txt"); R.display(); number=R.findKey(); for (int i=0; i<number; i++) { R.display(R.candidateKey[i]); cout<<"\n"; } R.canonical(); R.decomposition(); R.checkLossless(); return 0; }
Here goes the result of test:
(My little scanner is power enough. The lossless-check function just looks for a line where all element is "1".)
now display
R(A,B,C,D,E,H);
A -> B;
DE -> A;
BC -> E;
BCD -> A;
ADE -> B;
{A,C,D,H}
{B,C,D,H}
{C,D,E,H}
decomposition #1:{A,B}
dependency is:A -> B;
decomposition #2:{A,D,E}
dependency is:DE -> A;
decomposition #3:{B,C,E}
dependency is:BC -> E;
decomposition #4:{A,C,D,H}
the key has no particular dependency
A B C D E H
A -> B;
110000
DE -> A;
100110
BC -> E;
011010
BCD -> A;
101101
A B C D E H
A -> B;
110000
DE -> A;
110110
BC -> E;
011010
BCD -> A;
111111
Press any key to continue