#include "stdafx.h"


extern FILE* log;

HttpClient::HttpClient()
{
	hSession = hFile = NULL;
	bufferSize = 0;
}

HttpClient::~HttpClient()
{	
	if (hSession != NULL)
	{
		InternetCloseHandle(hSession);
	}
}

bool HttpClient::combineUrl(URL_COMPONENTS& url, LPCTSTR link, LPTSTR buffer, int bufSize)
{
	int offset =0;
	int bufOffset = 0;
	int i, j;
	TCHAR temp[64];

	//_tprintf(_T("link=[%s]\n"), link);
	offset = findFirstStringW(link, _tcslen(link), _T("://"));
	if (offset < 0)
	{
		_sntprintf(buffer+bufOffset, url.dwSchemeLength, _T("%s"), url.lpszScheme);
		bufOffset += url.dwSchemeLength;
		_sntprintf(buffer+bufOffset, 3, _T("://"));
		bufOffset += 3;
		for (i = url.dwHostNameLength; i>= 0; i--)
		{
			if (url.lpszHostName[i] !=_T(' '))
			{
				break;
			}
		}
		_tcsncpy(buffer + bufOffset, url.lpszHostName, i);
		bufOffset += url.dwHostNameLength;
		buffer[bufOffset]= '\0';
		//_tprintf(_T("[%s]\n"), buffer);
		j = skipSpaceW(link, _tcslen(link));

		strncpy((char*)temp, (char*)url.lpszHostName, url.dwHostNameLength);
		temp[url.dwHostNameLength]='\0';
		//printf("[%s]\n", (char*)temp);

		bufOffset += i + 1;
		if (url.lpszHostName[i] != _T('/'))
		{				
			if (link[j] != _T('/'))
			{
				buffer[bufOffset] = _T('/');
				bufOffset++;
				_tcscpy(buffer+ bufOffset, link + j);
			}
			else
			{
				_tcscpy(buffer+ bufOffset, link + j);
			}
			bufOffset += _tcslen(link) - j;
			//_tprintf(_T("[%s]\n"), buffer);
		}
		else
		{

			if (link[j] == _T('/'))
			{
				_tcscpy(buffer+bufOffset , link+j+1);
				bufOffset += _tcslen(link) - j -1;

			}
			else
			{
				_tcscpy(buffer+bufOffset , link+j);
				bufOffset += _tcslen(link) - j;	 
			}
			//_tprintf(_T("[%s]\n"), buffer);
		}
		buffer[bufOffset] = _T('\0');
		//_tprintf(_T("[%s]\n"), buffer);
	}
	else
	{
		_tcscpy(buffer, link);
	}
	return true;
}


int HttpClient::searchWebSite(LPCTSTR www)
{
	
	int result = 0;
	int i;
	DWORD bufLength = 0;
	URL_COMPONENTS url_comp;
	TCHAR buf[MaxDomainNameLength];
	TCHAR hostName[MaxDomainNameLength];
	TCHAR schemeBuf[MaxDomainNameLength];
	DomainNameType theLink, theName;
	if ((log = fopen("httpclient.log", "a"))== NULL)
	{
		_tprintf(_T("cannot open log\n"));
		return -1;
	}	

	if (readWebSite(www))
	{
		htmlParser.retrieveCurrentHost(www);
		_ftprintf(log, _T("the host name is [%s]\n"), htmlParser.currentHost.nameBuffer);
		htmlParser.parseBuffer((LPTSTR)buffer, bufferSize);
	}
	while (!htmlParser.linkQueue.empty() && htmlParser.linkQueue.size()<5000 && htmlParser.nameQueue.size()<10000)
	{
		theLink = htmlParser.linkQueue.front();			

		bufLength =  MaxDomainNameLength;
		
		_tprintf(_T("the link[%s] is going to be combined\n"), theLink.nameBuffer);
		if (InternetCombineUrl(www, theLink.nameBuffer, buf, &bufLength, ICU_BROWSER_MODE))
		{
			if (!htmlParser.isOutsideHostName(buf))
			{
				if (readWebSite(buf))
				{
					_tprintf(_T("combined[%s],link[%s]\n"), buf, theLink.nameBuffer);
					htmlParser.parseBuffer((LPTSTR)buffer, bufferSize);  				
				}
			}
			else
			{
				_tprintf(_T("outside host [%s]\n"), buf);
				_ftprintf(log, _T("link[%s] is outside [%s]\n"), buf, www);
			}
		}
		else
		{
			_tprintf(_T("failed combine url www=[%s]\n"), www);
			printf("failed combine url link=[%s]\n", theLink.nameBuffer);
			_ftprintf(log, _T("failed to combine url:\n[%s][%s]\n"), www, 
				theLink.nameBuffer);
		}	

		htmlParser.linkQueue.pop_front();

	}
	while (!htmlParser.nameQueue.empty())
	{
		theName = htmlParser.nameQueue.front();
		
		bufLength =  MaxDomainNameLength; 		

		if (InternetCombineUrl(www, theName.nameBuffer, buf, &bufLength, ICU_BROWSER_MODE))
		{
			if (readWebSite(buf))
			{
				savePictureFile((LPBYTE)buffer, bufferSize);  				
			}
		} 
		else
		{
			_tprintf(_T("failed combine url www=[%s]\n"), www);
			_tprintf(_T("failed combine url link=[%s]\n"), theName.nameBuffer);

			printf("last error %d\n", GetLastError());
		}
		htmlParser.nameQueue.pop_front();
	}
	fclose(log);
	return result;
}


/////////////////////////////////////////////////////////////////////////////////////////////
bool HttpClient::readWebSite(LPCTSTR www)
{   
	DWORD size = 0;
	int offset = 0;
	BOOL result = false;
  	_tprintf(_T("\nread website[www]=[%s]\n"), www);
	_ftprintf(log, _T("\nread website[www]=[%s]\n"), www);

	if ((hSession = InternetOpen(_T("Microsoft Internet Explorer"), INTERNET_OPEN_TYPE_DIRECT, NULL, NULL, INTERNET_INVALID_PORT_NUMBER)) 
			  == NULL)
	{
		return false;
	}  	

	if ((hFile = InternetOpenUrl(hSession, www, NULL, 0, INTERNET_FLAG_DONT_CACHE, NULL))!= NULL)
	{  			
		while (result = InternetReadFile(hFile, buffer + offset, BufferSize, &size))
		{
			if (size == 0)
			{
				bufferSize = offset;
				result = true;
			    break;
			}
			offset += size;
			if (offset > BufferSize)
			{
				printf("************************buffer overflow!***************************\n");
				result = false;
				break;
			}
		}  		
		if (InternetCloseHandle(hFile))
		{
			hFile = NULL;
		}
		else
		{
			printf("error of close handle\n");
			result = false;
		}
	}
	else
	{
		printf("\n\n%d\n\n", GetLastError());
	}
	if (InternetCloseHandle(hSession))
	{
		hFile = NULL;
	}

    return result;
}



/*
	memset(&url_comp, 0, sizeof(URL_COMPONENTS));
		url_comp.dwStructSize = sizeof(URL_COMPONENTS);
		//url_comp.dwHostNameLength = 1;// url_comp.dwExtraInfoLength = url_comp.dwPasswordLength=
			//url_comp.dwSchemeLength=url_comp.dwUrlPathLength=url_comp.dwUserNameLength = 1;
		url_comp.dwSchemeLength= MaxDomainNameLength;
		url_comp.dwSchemeLength = MaxDomainNameLength;
		url_comp.lpszHostName = hostName;
		url_comp.lpszScheme = schemeBuf;
		
		if (!InternetCrackUrl(www, _tcslen(www), ICU_DECODE , &url_comp))
		{
			printf("error %d\n", GetLastError());
		}  

		//hostName[url_comp.dwHostNameLength] = _T('\0');
		//schemeBuf[url_comp.dwSchemeLength] = _T('\0');

		//_tprintf(_T("[%s][%s]\n"), hostName, schemeBuf);
		for (i =0; i< url_comp.dwHostNameLength; i++)
		{
			_tprintf(_T("%c"), hostName[i]);
		}

		for (i =0; i< url_comp.dwSchemeLength; i++)
		{
			_tprintf(_T("%c"), schemeBuf[i]);
		}

  */