Dependency(candidateKey)

A. Third Edition
This is the third edition of my "dependency" and I implemented the "findKey" method which calculates the 
candidate key of relation schema.
B.The problem

"Function dependency" is a very important issue in database theory. And what's more, it is the essence of all.

Because I think it is the key of knowledge: the world is described by relations and relations of relations.

 

C.The idea of program
 
Candidate key is calculated this way:
 
1. Find all attributes that doesn't appear right-hand-side of any dependency.
 
2. From '1' get its complement set, and for all its subset, union with '1' to calculate its closure. If the
 
closure is equal to whole set of attributes, it is a superkey.
 
3. In order to be a candidate key, make sure the subset of superkey is not a candidate key. So, at '2', starting
 
from the smallest subset---empty set. Whenever you get a union from subset, test if it contains any found
 
candidate key. If yes, skip it.
 
D.The major functions
Now you will find it is extremely useful for me to overload those operators for "Set".
E.Further improvement
 
F.File listing
1. rules.h 
2. rules.cpp
3. set.h
4. set.cpp
5. main.cpp (main)
file name: set.h
#ifndef SET_H
#define SET_H

#include <iostream>
#include <bitset>

using namespace std;


const int MaxAttrNumber=20;

class Set
{
	//this is a long-pain for me, I have no other way to 
	//let compiler recognize this "friend" function outside declaration
	friend ostream& operator<<(ostream& out, const Set& dummy)	
	{
		for (int i=0; i<dummy.size; i++)
		{
			if (dummy.theSet.test(i))
			{
				out<<'1';
			}
			else
			{
				out<<'0';
			}
		}
		return out;
	}
private:
	bitset<MaxAttrNumber> theSet;
	int size;
	int current;
public:
	void setSize(const Set& dummy);
	int getSize(){ return size;}
	int next(int current);
	int first();	
	int count();
	Set intersection(const Set& dummy);
	Set unionSet(const Set& dummy);
	Set difference(const Set& dummy);
	
	//I am crazy about operator overloading!!!:)
	Set operator-(const Set& dummy);
	Set operator+(const Set& dummy);
	Set operator*(const Set& dummy);
	void operator=(const Set& dummy);
	bool operator==(const Set& dummy);
	bool operator!=(const Set& dummy);
	bool operator>(const Set& dummy);
	bool operator>=(const Set& dummy);
	bool operator<(const Set& dummy);
	bool operator<=(const Set& dummy);
	void set(int pos);
	void forEachSubSet(Set& dummy);//must be called before "eachSub()"
	bool eachSub(Set& dummy);
	void set();
	void reset(int pos);
	void reset();
	bool test(int pos) const;
	bool isIn(const Set& dummy);
	void setSize(int theSize) {size=theSize;}
	Set(int theSize=10);
};

#endif
 
file name: set.cpp
#include "Set.h"

bool Set::isIn(const Set& dummy)
{
	for (int i=0; i<size; i++)
	{
		if (theSet.test(i))
		{
			if (!dummy.test(i))//here I use Set.test() instead of set.test()
			{
				return false;
			}
		}
	}
	return true;		
}

bool Set::test(int pos) const
{
	return (pos<size&&theSet.test(pos));
}

//current=-1;//initialize to -1 to prepare for being called
int Set::next(int current)
{
	for (int i=current+1; i<size; i++)//include situation current>=size
	{
		if (theSet.test(i))
		{
			return i;
		}
	}
	return -1;//not found
}

bool Set::operator !=(const Set& dummy)
{
	return !(this->operator ==(dummy));
}

bool Set::operator <(const Set& dummy)
{
	return (this->isIn(dummy)&&this->operator !=(dummy));
}

bool Set::operator <=(const Set& dummy)
{
	return isIn(dummy);
}

bool Set::operator >(const Set& dummy)
{
	return !(this->operator <=(dummy));
}

bool Set::operator >=(const Set& dummy)
{
	return !(this->operator <(dummy));
}

bool Set::operator ==(const Set& dummy)
{
	for (int i=0; i<(size>dummy.size?size:dummy.size); i++)
	{
		if (test(i)^dummy.test(i))
		{
			return false;
		}
	}
	return true;
}

void Set::setSize(const Set& dummy)
{
	size=dummy.size;
}

void Set::operator =(const Set& dummy)
{
	size=dummy.size;
	for (int i=0; i<size; i++)
	{
		if (dummy.test(i))
		{
			theSet.set(i);
		}
		else
		{
			theSet.reset(i);
		}
	}
}


Set::Set(int theSize)
{
	size=theSize;
	reset();
}

void Set::reset()
{
	for (int i=0; i<size; i++)
	{
		theSet.reset(i);
	}
}

void Set::reset(int pos)
{
	if (pos<size)
	{
		theSet.reset(pos);
	}
}

void Set::set()
{
	theSet.set();
}

void Set::set(int pos)
{
	theSet.set(pos);
}
	
void Set::forEachSubSet(Set& dummy)
{
	dummy.size=size;
	dummy.reset();//emptyset
}


bool Set::eachSub(Set& dummy)
{
	int index=first();//starting from very first

	while (index!=-1)//not exceeding boundery
	{
		if (dummy.test(index))
		{
			dummy.reset(index);
			index=next(index);
		}
		else
		{
			dummy.set(index);
			if (dummy<*this)
			{
				return true;
			}
		}
	}
	return false;
}

int Set::first()
{
	return next(-1);
}

int Set::count()
{
	return theSet.count();
}

Set Set::unionSet(const Set& dummy)
{
	Set result;
	result.size=size>dummy.size?size:dummy.size;
	for (int i=0; i<result.size; i++)
	{
		if (test(i)||dummy.test(i))
		{
			result.set(i);
		}
	}
	return result;//this is a temparory object;
}

Set Set::difference(const Set& dummy)
{
	Set result;
	result.size=size>dummy.size?size:dummy.size;
	for (int i=0; i<result.size; i++)
	{
		if (test(i)&&!dummy.test(i))
		{
			result.set(i);
		}
	}
	return result;
}

Set Set::operator +(const Set& dummy)
{
	return unionSet(dummy);
}

Set Set::operator -(const Set& dummy)
{
	return difference(dummy);
}

Set Set::intersection(const Set& dummy)
{
	Set result;
	result.size=size<dummy.size?size:dummy.size;
	for (int i=0; i<result.size; i++)
	{
		if (test(i)&&dummy.test(i))
		{
			result.set(i);
		}
	}
	return result;
}

Set Set::operator *(const Set& dummy)
{
	return intersection(dummy);
}



  
file name: rules.h 
#include <iostream>
#include <bitset>
#include "set.h"

using namespace std;

//make it simple, the first line must list all variables or attributes
//"relation"(A,B,C...)
const int MaxNameLength=5;
const int MaxRuleNumber=100;
const int MaxKeyNumber=10;




class Rule
{
	friend class Rules;
private:
	Set lhs;
	Set rhs;	
public:	
	Rule();
	bool test(int pos, bool isLeft);
	void lhsSet(int pos);
	void rhsSet(int pos);
	void setSize(int theSize);
	void set(int pos, bool isLeft);
	void setRule(const Set& left, const Set& right);
	void operator=(const Rule& dummy);
};

class Rules
{
private:
	Set leftSet;
	Set rightSet;
	Set attrClosure[MaxAttrNumber];
	char attributes[MaxAttrNumber][MaxNameLength+1];
	char relationName[MaxNameLength+1];
	int keyCount;
	int attrCount;
	int attrFound[MaxAttrNumber];
	int numberFound;
	int searchByChar(char ch, int step);
	void ruleReading(FILE* stream);
	Rule rules[MaxRuleNumber];
	int ruleNumber;
	void displayRule(int index);
	
	void calcClosure();
	//void doCalcClosure(Set& attrSet, int maskedRule);
	void addRule(const Set& left, const Set& right);
	int extractNames(FILE* stream, char* delimeters, char endChar='\n');
	void closure(Set& attrSet, int maskedRule);
public:
	void split();
	Set candidateKey[MaxKeyNumber];
	void addRule(const Rule& dummy);
	void readFile(const char* fileName);
	void display();
	void display(const Set& attrSet);
	int findKey();
	bool eachKey(Set& dummy);
	Rules();
};
file name: rules.cpp 
#include "Rules.h"

int Rules::findKey()
{
	Set theLeft, theRight;
	bool isSub;
	leftSet.setSize(attrCount);
	rightSet.setSize(attrCount);
	leftSet.set();//the universal set
	rightSet.reset();//the empty set
	for (int i=0; i<ruleNumber; i++)
	{
		rightSet= rightSet+rules[i].rhs;
	}
	rightSet=leftSet-rightSet;//rightSet is the minimum part of candidate key
	leftSet=leftSet-rightSet;//leftSet is the difference of rightSet,should be added to key
	keyCount=0;
	theRight=rightSet;
	theLeft=leftSet;

	closure(theRight, -1);
	if (theRight.count()==attrCount)//the only key
	{
		candidateKey[keyCount++]=rightSet;		
	}
	else
	{
		leftSet.forEachSubSet(theLeft);
		while (leftSet.eachSub(theLeft))
		{
			isSub=false;
			theRight=rightSet;
			theRight=theRight+theLeft;
			for (int i=0; i<keyCount; i++)
			{
				//display(theRight);
				//display(candidateKey[i]);
				if (candidateKey[i]<theRight)
				{
					isSub=true;
					break;
				}
			}
			if (isSub)
			{
				continue;//if subset of any candidate key, no need to test
			}
			Set temp;
			temp=theRight;
			closure(temp, -1);
			if (temp.count()==attrCount)
			{
				candidateKey[keyCount++]=theRight;
			}

		}
	}	
	return keyCount;
}

void Rules::calcClosure()
{
	for (int i=0; i<attrCount; i++)
	{
		attrClosure[i].setSize(attrCount);
		attrClosure[i].reset();
		attrClosure[i].set(i);
		closure(attrClosure[i], -1);
	}
}

void  Rules::closure(Set& attrSet, int maskedRule)
{
	bool found=false;
	int i=0;
	do 
	{
		i=0;
		found=false;
		while (i<ruleNumber)
		{
			if (i!=maskedRule)//this rule will not be calculated
			{
				if (rules[i].lhs<=attrSet)//lhs is contained
				{
					if ((attrSet*rules[i].rhs)!=rules[i].rhs)
					{
						attrSet=attrSet+rules[i].rhs;
						found=true;
					}
				}
			}
			i++;
		}
	}
	while (found);
}

void Rules::display(const Set& attrSet)
{
	bool first=true;
	cout<<"{";
	for (int i=0; i<attrCount; i++)
	{
		
		if (attrSet.test(i))
		{
			if (first)
			{
				first=false;
			}
			else
			{
				cout<<",";
			}
			cout<<attributes[i];
		}
	}
	cout<<"}";
}

void Rules::display()
{
	cout<<"\nnow display\n";
	cout<<relationName<<"(";
	for (int i=0; i<attrCount; i++)
	{
		cout<<attributes[i];
		if (i!=attrCount-1)
		{
			cout<<",";
		}
		else
		{
			cout<<");\n";
		}
	}
	for (i=0; i<ruleNumber; i++)
	{
		displayRule(i);
	}
}


bool Rule::test(int pos, bool isLeft)
{
	if (isLeft)
	{
		return lhs.test(pos);
	}
	else
	{
		return rhs.test(pos);
	}
}


void Rules::displayRule(int index)
{
	for (int i=0; i<attrCount; i++)
	{
		if (rules[index].test(i, true))
		{
			cout<<attributes[i];
		}
	}
	cout<<" -> ";
	for (i=0; i<attrCount; i++)
	{
		if (rules[index].test(i, false))
		{
			cout<<attributes[i];
		}
	}
	cout<<";\n";
}


void Rule::set(int pos, bool isLeft)
{
	if (isLeft)
	{
		lhsSet(pos);
	}
	else
	{
		rhsSet(pos);
	}
}

void Rule::rhsSet(int pos)
{
	rhs.set(pos);
}

void Rule::lhsSet(int pos)
{
	lhs.set(pos);
}

Rule::Rule()
{
	lhs.reset();
	rhs.reset();
	
}


void Rules::readFile(const char* fileName)
{
	FILE* stream;
	stream=fopen(fileName, "r");
	attrCount=extractNames(stream, "=,()", ';');//ignore the first relation name
	ruleReading(stream);

}

void Rules::ruleReading(FILE* stream)
{
	char ch;
	int step=0;
	ruleNumber=0;//initialize
	bool isLeft=true;
	rules[ruleNumber].setSize(attrCount);//do twice!!
	while (!feof(stream))
	{		
		ch=fgetc(stream);
		if (ch==';'||ch==',')
		{
			ruleNumber++;
			rules[ruleNumber].setSize(attrCount);//do twice!!
			isLeft=true;
		}
		else
		{
			if (ch=='-'||ch=='>')
			{
				isLeft=false;
			}
			else
			{
				if (isalpha(ch))
				{
					searchByChar(ch, step);
					if (numberFound!=1)
					{
						if (step<MaxNameLength)
						{
							step++;
						}
						else
						{
							cout<<"illegal attribute stated!\n";
							exit(1);
						}
					}
					else
					{
						step=0;
						
						rules[ruleNumber].set(attrFound[0], isLeft);
					}
				}
			}
		}
	}
}

void Rule::setSize(int theSize)
{
	lhs.setSize(theSize);
	rhs.setSize(theSize);
}
	

int Rules::searchByChar(char ch, int step)
{
	if (step==0)//this is first time, and all attributes are searched
	{
		numberFound=0;
		for (int i=0; i<attrCount; i++)//
		{
			if (ch==attributes[i][step])
			{
				attrFound[numberFound]=i;
				numberFound++;
			}
		}
	}
	else
	{
		int number=0;//new 'attrFound' re-counting
		for (int i=0; i<numberFound; i++)
		{
			if (ch==attributes[attrFound[i]][step])
			{
				attrFound[number]=i;
				number++;
			}
		}
		numberFound=number;
	}
	return numberFound;
}


int findChar(char ch, char* str)
{
	int index=0;
	while (str[index]!='\0')
	{
		if (str[index]==ch)
		{
			return index;
		}
		index++;
	}
	return -1;
}

//extract names from a line delimetered by various chars, like ',','(',')'....
int Rules::extractNames(FILE* stream, char* delimeters, char endChar)
{
	int findChar(char ch, char* str);
	char ch;
	int nameIndex=0;
	char buffer[MaxNameLength+1];
	char* ptr=buffer;

	while (!feof(stream))
	{
		ch=getc(stream);
		if (ch==endChar)
		{
			if (ptr!=buffer)
			{
				*ptr='\0';
				strcpy(attributes[nameIndex], buffer);
			}
			break;
		}

		if (findChar(ch, delimeters)>0)//deli reached
		{
			//skip the consecutive deli
			if (ptr!=buffer)
			{
				*ptr='\0';	
				if (ch=='(')//the very first one
				{
					strcpy(relationName, buffer);
				}
				else
				{
					strcpy(attributes[nameIndex], buffer);
					nameIndex++;
				}
				ptr=buffer;//restart
			}
		}
		else
		{
			*ptr=ch;
			ptr++;
		}
	}
	return nameIndex;
}		

Rules::Rules()
{
	numberFound=attrCount=0;
}
	
void Rule::operator =(const Rule& dummy)
{
	lhs=dummy.lhs;
	rhs=dummy.rhs;
}

void Rules::addRule(const Rule& dummy)
{
	rules[ruleNumber++]=dummy;
}

void Rules::addRule(const Set& left, const Set& right)
{
	rules[ruleNumber++].setRule(left, right);
}

void Rule::setRule(const Set& left, const Set& right)
{
	lhs=left;
	rhs=right;
}

void Rules::split()
{
	int index;
	for (int i=0; i<ruleNumber; i++)
	{
		if (rules[i].rhs.count()>1)
		{			
			index=rules[i].rhs.first();
			index=rules[i].rhs.next(index);//starting from second one
			while (index!=-1)
			{
				Set right;				
				right.setSize(rules[i].rhs);
				right.set(index);
				rules[i].rhs.reset(index);//remove from old rule
				addRule(rules[i].lhs, right);
				index=rules[i].rhs.next(index);
			}
		}
	}
}

file name: main.cpp (main)
#include "Rules.h"
#include "set.h"

int main()
{
	int number;
	Rules R;
	R.readFile("d:\\rules.txt");
	R.split();
	R.display();
	
	
	number=R.findKey();
	for (int i=0; i<number; i++)
	{
		R.display(R.candidateKey[i]);
		cout<<"\n";
	}
	R.display();



	return 0;
}
 
The testing rules is like this: (name of file is "rules.txt")
R(A,B,C,D,E,H);
A->B;
DE->A;
BC->E;
BCD->A;
AED->BH;
The result of program is: (Please note that "split" is for calculating canonical cover which is to be finished.)

now display
R(A,B,C,D,E,H);
A -> B;
DE -> A;
BC -> E;
BCD -> A;
ADE -> B;
ADE -> H;
{A,C,D}
{B,C,D}
{C,D,E}

now display
R(A,B,C,D,E,H);
A -> B;
DE -> A;
BC -> E;
BCD -> A;
ADE -> B;
ADE -> H;
Press any key to continue
 
 




                                 back.gif (341 bytes)       up.gif (335 bytes)         next.gif (337 bytes)