Dependency(canonical)

A. Third Edition
This is the third edition of my "dependency" and I think it is almost the perfect version of all because it 
implements almost all functions you can name---canonical cover, decomposition, lossless-join-check.
B.The problem

"Function dependency" is a very important issue in database theory. And what's more, it is the essence of all.

Because I think it is the key of knowledge: the world is described by relations and relations of relations.

 

C.The idea of program
 
There is really little job to do when you obtain the right tool or you make it for yourself like me. After I
 
overloaded those operators of my Set class, everything become so simple.
 
There is only one thing I want to remind you---the "forEachSubSet" function expects the parameter won't be
 
modified. 
 
D.The major functions
For the function "decomposition", I don't want to export an array of sets of the decomposition. The simple reason
is who would use it this way. So, I simply "cout" them.
 
E.Further improvement
 
F.File listing
1. rules.h 
2. rules.cpp
3. set.h
4. set.cpp
5. main.cpp (main)
file name: set.h
#ifndef SET_H
#define SET_H

#include <iostream>
#include <bitset>

using namespace std;


const int MaxAttrNumber=20;

class Set
{
	//this is a long-pain for me, I have no other way to 
	//let compiler recognize this "friend" function outside declaration
	friend ostream& operator<<(ostream& out, const Set& dummy)	
	{
		for (int i=0; i<dummy.size; i++)
		{
			if (dummy.theSet.test(i))
			{
				out<<'1';
			}
			else
			{
				out<<'0';
			}
		}
		return out;
	}
private:
	bitset<MaxAttrNumber> theSet;
	int size;
	int current;
public:
	void setSize(const Set& dummy);
	int getSize(){ return size;}
	int next(int current) const;
	int first() const;	
	int count() const;
	Set intersection(const Set& dummy) const;
	Set unionSet(const Set& dummy) const;
	Set difference(const Set& dummy) const;
	
	//I am crazy about operator overloading!!!:)
	Set operator-(const Set& dummy) const;
	Set operator+(const Set& dummy) const;
	Set operator*(const Set& dummy) const;
	void operator=(const Set& dummy);
	bool operator==(const Set& dummy) const;
	bool operator!=(const Set& dummy) const;
	bool operator>(const Set& dummy) const;
	bool operator>=(const Set& dummy) const;
	bool operator<(const Set& dummy) const;
	bool operator<=(const Set& dummy) const;
	void set(int pos);
	void forEachSubSet(Set& dummy) const;//must be called before "eachSub()"
	bool eachSub(Set& dummy) const;
	void set();
	void reset(int pos);
	void reset();
	bool test(int pos) const;
	bool isIn(const Set& dummy) const;
	void setSize(int theSize) {size=theSize;}
	Set(int theSize=10);
};

#endif
 
file name: set.cpp
#include "Set.h"

bool Set::isIn(const Set& dummy) const
{
	for (int i=0; i<size; i++)
	{
		if (theSet.test(i))
		{
			if (!dummy.test(i))//here I use Set.test() instead of set.test()
			{
				return false;
			}
		}
	}
	return true;		
}

bool Set::test(int pos) const
{
	return (pos<size&&theSet.test(pos));
}

//current=-1;//initialize to -1 to prepare for being called
int Set::next(int current) const
{
	for (int i=current+1; i<size; i++)//include situation current>=size
	{
		if (theSet.test(i))
		{
			return i;
		}
	}
	return -1;//not found
}

bool Set::operator !=(const Set& dummy)const
{
	return !(this->operator ==(dummy));
}

bool Set::operator <(const Set& dummy)const
{
	return (this->isIn(dummy)&&this->operator !=(dummy));
}

bool Set::operator <=(const Set& dummy)const
{
	return isIn(dummy);
}

bool Set::operator >(const Set& dummy)const
{
	return !(this->operator <=(dummy));
}

bool Set::operator >=(const Set& dummy)const
{
	return !(this->operator <(dummy));
}

bool Set::operator ==(const Set& dummy)const
{
	for (int i=0; i<(size>dummy.size?size:dummy.size); i++)
	{
		if (test(i)^dummy.test(i))
		{
			return false;
		}
	}
	return true;
}

void Set::setSize(const Set& dummy)
{
	size=dummy.size;
}

void Set::operator =(const Set& dummy)
{
	size=dummy.size;
	for (int i=0; i<size; i++)
	{
		if (dummy.test(i))
		{
			theSet.set(i);
		}
		else
		{
			theSet.reset(i);
		}
	}
}


Set::Set(int theSize)
{
	size=theSize;
	reset();
}

void Set::reset()
{
	for (int i=0; i<size; i++)
	{
		theSet.reset(i);
	}
}

void Set::reset(int pos)
{
	if (pos<size)
	{
		theSet.reset(pos);
	}
}

void Set::set()
{
	theSet.set();
}

void Set::set(int pos)
{
	theSet.set(pos);
}
	
void Set::forEachSubSet(Set& dummy) const
{
	dummy.size=size;
	dummy.reset();//emptyset
}


bool Set::eachSub(Set& dummy) const
{
	int index=first();//starting from very first

	while (index!=-1)//not exceeding boundery
	{
		if (dummy.test(index))
		{
			dummy.reset(index);
			index=next(index);
		}
		else
		{
			dummy.set(index);
			if (dummy<*this)
			{
				return true;
			}
		}
	}
	return false;
}

int Set::first()const
{
	return next(-1);
}

int Set::count()const
{
	return theSet.count();
}

Set Set::unionSet(const Set& dummy) const
{
	Set result;
	result.size=size>dummy.size?size:dummy.size;
	for (int i=0; i<result.size; i++)
	{
		if (test(i)||dummy.test(i))
		{
			result.set(i);
		}
	}
	return result;//this is a temparory object;
}

Set Set::difference(const Set& dummy) const
{
	Set result;
	result.size=size>dummy.size?size:dummy.size;
	for (int i=0; i<result.size; i++)
	{
		if (test(i)&&!dummy.test(i))
		{
			result.set(i);
		}
	}
	return result;
}

Set Set::operator +(const Set& dummy) const
{
	return unionSet(dummy);
}

Set Set::operator -(const Set& dummy) const
{
	return difference(dummy);
}

Set Set::intersection(const Set& dummy) const
{
	Set result;
	result.size=size<dummy.size?size:dummy.size;
	for (int i=0; i<result.size; i++)
	{
		if (test(i)&&dummy.test(i))
		{
			result.set(i);
		}
	}
	return result;
}

Set Set::operator *(const Set& dummy) const
{
	return intersection(dummy);
}



  
file name: rules.h 
#include <iostream>
#include <bitset>
#include "set.h"

using namespace std;

//make it simple, the first line must list all variables or attributes
//"relation"(A,B,C...)
const int MaxNameLength=5;
const int MaxRuleNumber=100;
const int MaxKeyNumber=10;
const int MaxComposition=10;




class Rule
{
	friend class Rules;
private:
	Set lhs;
	Set rhs;	
public:	
	Rule();
	bool test(int pos, bool isLeft);
	void lhsSet(int pos);
	void rhsSet(int pos);
	void setSize(int theSize);
	void set(int pos, bool isLeft);
	void setRule(const Set& left, const Set& right);
	void operator=(const Rule& dummy);
	bool operator==(const Rule& dummy);
};

class Rules
{
private:
	Set leftSet;
	Set rightSet;
	Set attrClosure[MaxAttrNumber];
	char attributes[MaxAttrNumber][MaxNameLength+1];
	char relationName[MaxNameLength+1];
	bool needKey;//this is needed for checklossless
	int keyCount;
	int attrCount;
	int attrFound[MaxAttrNumber];
	int numberFound;
	int searchByChar(char ch, int step);
	void ruleReading(FILE* stream);
	Rule rules[MaxRuleNumber];
	int ruleNumber;
	void displayRule(int index);
	void removeRule(int index);
	void calcClosure();
	void addRule(const Set& left, const Set& right);
	int extractNames(FILE* stream, char* delimeters, char endChar='\n');
	void closure(Set& attrSet, int maskedRule) const;
	bool checkAttr(int index);
	void checkRule(int index);
	void split();
	void combine();
	//"this" relation imply "dummy" relation
	bool imply(const Rules& dummy) const;
	void showMatrix(const Set* matrix, int row);
public:	
	void canonical();
	void decomposition();	
	Set candidateKey[MaxKeyNumber];
	void addRule(const Rule& dummy);
	void readFile(const char* fileName);
	void display();
	void display(const Set& attrSet);
	int findKey();
	bool checkLossless();
	bool eachKey(Set& dummy);
	bool operator==(const Rules& dummy) const;
	Rules();
};

 
file name: rules.cpp 
#include "Rules.h"

void Rules::decomposition()
{
	bool keyFound=false;
	Set dummy;
	findKey();
	canonical();
	for (int i=0; i<ruleNumber; i++)
	{
		dummy=rules[i].lhs+rules[i].rhs;
		cout<<"\ndecomposition #"<<i+1<<":";
		display(dummy);
		cout<<"\ndependency is:";
		displayRule(i);
		cout<<endl;
		for (int j=0; j<keyCount; j++)
		{
			if (candidateKey[j]<=dummy)
			{
				keyFound=true;//some decomposition contains some key
			}
		}
	}
	if (!keyFound)
	{
		cout<<"\ndecomposition #"<<ruleNumber+1<<":";
		display(candidateKey[0]);
		cout<<"\nthe key has no particular dependency\n";
	}
	needKey=!keyFound;
}
	
	
//English can be ambiguous: this function really means:
//"this" relation imply "dummy" relation
bool Rules::imply(const Rules& dummy) const
{
	Set left;
	//for all dependency in "dummy" relation...
	for (int i=0; i<dummy.ruleNumber; i++)
	{
		left=dummy.rules[i].lhs;
		//if the closure of that particular dependency under
		//dependency of "this" relation
		//cannot contains its rhs in that dependency in "dummy" 
		//relation, we say...
		closure(left, -1);
		if (!(dummy.rules[i].rhs<=left))
		{
			//we say "dummy" relation is not implied by "this" relation
			return false;
		}
	}
	//can we?????
	return true;
}

//the "secret" name for this function is "equivalent"
bool Rules::operator ==(const Rules& dummy) const
{
	return imply(dummy)&&dummy.imply(*this);
}



void Rules::canonical()
{
	int i;
	bool found;
	split();
	//cout<<"\nafter split";
	//display();
	do
	{
		i=0; 
		found=false;
		while (i<ruleNumber)
		{
			if (checkAttr(i))
			{
				found=true;				
			}
			i++;
		}
	}while (found);
	//cout<<"\nafter checkattr";
	//display();
	for (i=0; i<ruleNumber; i++)
	{
		checkRule(i);
	}
	//cout<<"\nafter check rule";
	//display();
	combine();
	
}

void Rules::combine()
{
	for (int i=0; i<ruleNumber; i++)
	{
		for (int j=i+1; j<ruleNumber; j++)
		{
			if (rules[i].lhs==rules[j].lhs)
			{
				rules[i].rhs=rules[i].rhs+rules[j].rhs;
				removeRule(j);
			}
		}
	}
}


void Rules::removeRule(int index)
{
	if (index<ruleNumber)
	{
		ruleNumber--;
		for (int i=index; i<ruleNumber; i++)
		{
			rules[i]=rules[i+1];
		}
	}	
}



void Rules::checkRule(int index)
{
	Set dummy;
	dummy=rules[index].lhs;
	closure(dummy, index);
	if (rules[index].rhs<=dummy)
	{
		removeRule(index);
	}
}


bool Rules::checkAttr(int index)
{
	Set dummy;
	//for the out parameter "dummy", you cannot modify it!
	rules[index].lhs.forEachSubSet(dummy);

	while (rules[index].lhs.eachSub(dummy))
	{
		Set old;	
		//so, you have to use a copy of dummy to calc closure of it
		old=dummy;
		closure(old, index);
	
		if (rules[index].rhs<=old)
		{		
			rules[index].lhs=dummy;
			for (int i=0; i<ruleNumber; i++)
			{
				if (i!=index)
				{
					if (rules[i]==rules[index])
					{
						//found repeat
						removeRule(index);
					}
				}
			}
			//otherwise do nothing
			return true;
		}
	}
	return false;
}


int Rules::findKey()
{
	Set theLeft, theRight;
	bool isSub;
	leftSet.setSize(attrCount);
	rightSet.setSize(attrCount);
	leftSet.set();//the universal set
	rightSet.reset();//the empty set
	for (int i=0; i<ruleNumber; i++)
	{
		rightSet= rightSet+rules[i].rhs;
	}
	rightSet=leftSet-rightSet;//rightSet is the minimum part of candidate key
	leftSet=leftSet-rightSet;//leftSet is the difference of rightSet,should be added to key
	keyCount=0;
	theRight=rightSet;
	theLeft=leftSet;

	closure(theRight, -1);
	if (theRight.count()==attrCount)//the only key
	{
		candidateKey[keyCount++]=rightSet;		
	}
	else
	{
		leftSet.forEachSubSet(theLeft);
		while (leftSet.eachSub(theLeft))
		{
			isSub=false;
			theRight=rightSet;
			theRight=theRight+theLeft;
			for (int i=0; i<keyCount; i++)
			{
				//display(theRight);
				//display(candidateKey[i]);
				if (candidateKey[i]<theRight)
				{
					isSub=true;
					break;
				}
			}
			if (isSub)
			{
				continue;//if subset of any candidate key, no need to test
			}
			Set temp;
			temp=theRight;
			closure(temp, -1);
			if (temp.count()==attrCount)
			{
				candidateKey[keyCount++]=theRight;
			}

		}
	}	
	return keyCount;
}

void Rules::showMatrix(const Set* matrix, int row)
{
	for (int j=0; j<attrCount; j++)
	{
		cout<<"\t"<<attributes[j];
	}
	cout<<"\n";
	for (int i=0; i<row; i++)
	{
		displayRule(i);
		cout<<"\t"<<matrix[i]<<endl;
	}
}

//this is a standard algorithm and it makes sure
//that all dependency agree with each other
//or in good English that there is some common agreement 
//between all dependency. Is this good enough? I doubt it.
bool Rules::checkLossless()
{
	int row=needKey?ruleNumber+1:ruleNumber;
	
	Set* matrix=new Set[row];
	for (int i=0; i<row; i++)
	{
		if (needKey&&i==row-1)
		{
			matrix[i]=candidateKey[0];	
		}
		else
		{
			matrix[i]=rules[i].lhs+rules[i].rhs;
		}
	}
	showMatrix(matrix, row);
	int index;
	bool found;
	do
	{
		index=0;
		found=false;
		while (index<row)
		{
			for (int j=0; j<ruleNumber; j++)
			{
				if (j!=index)
				{
					if (rules[j].lhs<=matrix[index])
					{
						if (!(rules[j].rhs<=matrix[index]))
						{
							matrix[index]=matrix[index]+rules[j].rhs;
							found=true;
						}
					}
				}
			}
			index++;
		}
	}while(found);
	showMatrix(matrix, row);
	for (index=0; index<row; index++)
	{
		if (matrix[index].count()==attrCount)
		{
			delete []matrix;
			return true;
		}
	}
	delete [] matrix;
	return false;
}


void Rules::calcClosure()
{
	for (int i=0; i<attrCount; i++)
	{
		attrClosure[i].setSize(attrCount);
		attrClosure[i].reset();
		attrClosure[i].set(i);
		closure(attrClosure[i], -1);
	}
}

void  Rules::closure(Set& attrSet, int maskedRule) const
{
	bool found=false;
	int i=0;
	do 
	{
		i=0;
		found=false;
		while (i<ruleNumber)
		{
			if (i!=maskedRule)//this rule will not be calculated
			{
				if (rules[i].lhs<=attrSet)//lhs is contained
				{
					if ((attrSet*rules[i].rhs)!=rules[i].rhs)
					{
						attrSet=attrSet+rules[i].rhs;
						found=true;
					}
				}
			}
			i++;
		}
	}
	while (found);
}

void Rules::display(const Set& attrSet)
{
	bool first=true;
	cout<<"{";
	for (int i=0; i<attrCount; i++)
	{
		
		if (attrSet.test(i))
		{
			if (first)
			{
				first=false;
			}
			else
			{
				cout<<",";
			}
			cout<<attributes[i];
		}
	}
	cout<<"}";
}

void Rules::display()
{
	cout<<"\nnow display\n";
	cout<<relationName<<"(";
	for (int i=0; i<attrCount; i++)
	{
		cout<<attributes[i];
		if (i!=attrCount-1)
		{
			cout<<",";
		}
		else
		{
			cout<<");\n";
		}
	}
	for (i=0; i<ruleNumber; i++)
	{
		displayRule(i);
	}
}


bool Rule::test(int pos, bool isLeft)
{
	if (isLeft)
	{
		return lhs.test(pos);
	}
	else
	{
		return rhs.test(pos);
	}
}


void Rules::displayRule(int index)
{
	for (int i=0; i<attrCount; i++)
	{
		if (rules[index].test(i, true))
		{
			cout<<attributes[i];
		}
	}
	cout<<" -> ";
	for (i=0; i<attrCount; i++)
	{
		if (rules[index].test(i, false))
		{
			cout<<attributes[i];
		}
	}
	cout<<";\n";
}


void Rule::set(int pos, bool isLeft)
{
	if (isLeft)
	{
		lhsSet(pos);
	}
	else
	{
		rhsSet(pos);
	}
}

void Rule::rhsSet(int pos)
{
	rhs.set(pos);
}

void Rule::lhsSet(int pos)
{
	lhs.set(pos);
}

Rule::Rule()
{
	lhs.reset();
	rhs.reset();
	
}


void Rules::readFile(const char* fileName)
{
	FILE* stream;
	stream=fopen(fileName, "r");
	attrCount=extractNames(stream, "=,()", ';');//ignore the first relation name
	ruleReading(stream);

}

void Rules::ruleReading(FILE* stream)
{
	char ch;
	int step=0;
	ruleNumber=0;//initialize
	bool isLeft=true;
	rules[ruleNumber].setSize(attrCount);//do twice!!
	while (!feof(stream))
	{		
		ch=fgetc(stream);
		if (ch==';'||ch==',')
		{
			ruleNumber++;
			rules[ruleNumber].setSize(attrCount);//do twice!!
			isLeft=true;
		}
		else
		{
			if (ch=='-'||ch=='>')
			{
				isLeft=false;
			}
			else
			{
				if (isalpha(ch))
				{
					searchByChar(ch, step);
					if (numberFound!=1)
					{
						if (step<MaxNameLength)
						{
							step++;
						}
						else
						{
							cout<<"illegal attribute stated!\n";
							exit(1);
						}
					}
					else
					{
						step=0;
						
						rules[ruleNumber].set(attrFound[0], isLeft);
					}
				}
			}
		}
	}
}

void Rule::setSize(int theSize)
{
	lhs.setSize(theSize);
	rhs.setSize(theSize);
}
	

int Rules::searchByChar(char ch, int step)
{
	if (step==0)//this is first time, and all attributes are searched
	{
		numberFound=0;
		for (int i=0; i<attrCount; i++)//
		{
			if (ch==attributes[i][step])
			{
				attrFound[numberFound]=i;
				numberFound++;
			}
		}
	}
	else
	{
		int number=0;//new 'attrFound' re-counting
		for (int i=0; i<numberFound; i++)
		{
			if (ch==attributes[attrFound[i]][step])
			{
				attrFound[number]=i;
				number++;
			}
		}
		numberFound=number;
	}
	return numberFound;
}


int findChar(char ch, char* str)
{
	int index=0;
	while (str[index]!='\0')
	{
		if (str[index]==ch)
		{
			return index;
		}
		index++;
	}
	return -1;
}

//extract names from a line delimetered by various chars, like ',','(',')'....
int Rules::extractNames(FILE* stream, char* delimeters, char endChar)
{
	int findChar(char ch, char* str);
	char ch;
	int nameIndex=0;
	char buffer[MaxNameLength+1];
	char* ptr=buffer;

	while (!feof(stream))
	{
		ch=getc(stream);
		if (ch==endChar)
		{
			if (ptr!=buffer)
			{
				*ptr='\0';
				strcpy(attributes[nameIndex], buffer);
			}
			break;
		}

		if (findChar(ch, delimeters)>0)//deli reached
		{
			//skip the consecutive deli
			if (ptr!=buffer)
			{
				*ptr='\0';	
				if (ch=='(')//the very first one
				{
					strcpy(relationName, buffer);
				}
				else
				{
					strcpy(attributes[nameIndex], buffer);
					nameIndex++;
				}
				ptr=buffer;//restart
			}
		}
		else
		{
			*ptr=ch;
			ptr++;
		}
	}
	return nameIndex;
}		

Rules::Rules()
{
	numberFound=attrCount=0;
}
	
void Rule::operator =(const Rule& dummy)
{
	lhs=dummy.lhs;
	rhs=dummy.rhs;
}

void Rules::addRule(const Rule& dummy)
{
	rules[ruleNumber++]=dummy;
}

void Rules::addRule(const Set& left, const Set& right)
{
	rules[ruleNumber++].setRule(left, right);
}

void Rule::setRule(const Set& left, const Set& right)
{
	lhs=left;
	rhs=right;
}

void Rules::split()
{
	int index;
	for (int i=0; i<ruleNumber; i++)
	{
		if (rules[i].rhs.count()>1)
		{			
			index=rules[i].rhs.first();
			index=rules[i].rhs.next(index);//starting from second one
			while (index!=-1)
			{
				Set right;				
				right.setSize(rules[i].rhs);
				right.set(index);
				rules[i].rhs.reset(index);//remove from old rule
				addRule(rules[i].lhs, right);
				index=rules[i].rhs.next(index);
			}
		}
	}
}

bool Rule::operator ==(const Rule& dummy)
{
	return (lhs==dummy.lhs&&rhs==dummy.rhs);
}
 
file name: main.cpp (main)
#include "Rules.h"
#include "set.h"

int main()
{
	int number;
	Rules R;
	R.readFile("d:\\rules.txt");
	R.display();
	
	
	number=R.findKey();
	for (int i=0; i<number; i++)
	{
		R.display(R.candidateKey[i]);
		cout<<"\n";
	}

	R.canonical();

	R.decomposition();
	R.checkLossless();

	return 0;
}
 
Here goes the result of test:
(My little scanner is power enough. The lossless-check function just looks for a line where all element is "1".)
 

now display
R(A,B,C,D,E,H);
A -> B;
DE -> A;
BC -> E;
BCD -> A;
ADE -> B;
{A,C,D,H}
{B,C,D,H}
{C,D,E,H}

decomposition #1:{A,B}
dependency is:A -> B;


decomposition #2:{A,D,E}
dependency is:DE -> A;


decomposition #3:{B,C,E}
dependency is:BC -> E;


decomposition #4:{A,C,D,H}
the key has no particular dependency
A B C D E H
A -> B;
110000
DE -> A;
100110
BC -> E;
011010
BCD -> A;
101101
A B C D E H
A -> B;
110000
DE -> A;
110110
BC -> E;
011010
BCD -> A;
111111
Press any key to continue




                                 back.gif (341 bytes)       up.gif (335 bytes)         next.gif (337 bytes)