/*
    Copyright (C) 2008 Rouslan Dimitrov

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

#include "thread.h"
#include "log.h"

void DoWork(CThreadPool *pool);
DWORD WINAPI ThreadScheduler(LPVOID lpThreadPool);

// Unfortunately __forceinline doesn't inline this function
__declspec(naked) int __fastcall AtomicXAdd(int *, int) {
	__asm {
		lock xadd	[ecx], edx
		mov			eax, edx
		ret
	}
}

// This is serializing
__declspec(naked) int __fastcall GetCurrentProcessorNumberWinAll(void) {
    __asm {
	//	push	ebx
		mov		eax, 1
		cpuid
		shr		ebx, 24
		mov		eax, ebx	// initial APIC ID
	//	pop		ebx
		ret		0
	}
}

void CThreadPool::Init(bool useCaller) {
	DWORD		dwProcAffinityMask, dwSysAffinityMask;
	HANDLE		hCurProc = GetCurrentProcess();

	nThreads = 0;

	GetProcessAffinityMask(hCurProc, &dwProcAffinityMask, &dwSysAffinityMask);

	for(int cpu=0, first=1; cpu<MAX_THREADS; cpu++) {
		if(!(dwProcAffinityMask & (1<<cpu)))
			continue; // CPU isn't available

		if(useCaller && first) {
			// Leave the first available CPU for the calling thread
			first--;
			continue;
		}

		hThreads[nThreads] = CreateThread(0, 0, ThreadScheduler, this, 0, 0);

		if(!hThreads[nThreads]) {
			g_CLog.Log(LOG_SYSTEM, "Couldn't create a thread for cpu %d. Retrying...", cpu);
			Sleep(10);
			hThreads[nThreads] = CreateThread(0, 0, ThreadScheduler, this, 0, 0);
			if(!hThreads[nThreads])
				break;
		}

		hThreadsDone[nThreads] = CreateEvent(0, false, false, 0); // Auto reset event

		SetThreadIdealProcessor(hThreads[nThreads], cpu);
		nThreads++;
	}

	hMoreWork = CreateEvent(0, true, false, 0);

	useCallingThread = useCaller;
	bRunning = true;

	g_CLog.Log(LOG_SYSTEM, "Thread pool running %d threads.", nThreads + (int)useCaller);

	WaitThreads();
}

void CThreadPool::RunOn(WORKFUNC f, void* a, int s, int e) {
	function = f;
	argument = a;
	curIndex = s;
	endIndex = e;

	// TODO: MSDN says PulseEvent is unreliable, perhaps we need a loop with SetEvent?
	PulseEvent(hMoreWork);

	if(useCallingThread) {
		// HANDLE hCurThread = GetCurrentThread();
		// SetThreadIdealProcessor(hCurThread, 0);

		DoWork(this);

		// SetThreadIdealProcessor(hCurThread, MAXIMUM_PROCESSORS);
	}
}

void CThreadPool::ShutDown() {
	bRunning = false;

	SetEvent(hMoreWork);
	
	WaitThreadsToExit();

	for(int i=0; i<nThreads; i++)
		CloseHandle(hThreads[i]);
}

void DoWork(CThreadPool *pool) {
	int i;
	while(i = AtomicXAdd(&pool->curIndex, 1), i < pool->endIndex) {
		pool->function(i, pool->argument);
	}
}

void SignalAndWait(HANDLE& signal, HANDLE wait) {
	if(SignalObjectAndWait(signal, wait, INFINITE, false) == WAIT_FAILED) {
		// It is not clear from the spec whether the signaling or the waiting failed, so we exit the thread
		// Close the thread and change its event to remain always signaled
		CloseHandle(signal);
		signal = CreateEvent(0, true, true, 0);
		g_CLog.Log(LOG_SYSTEM, "Threading error. Killing 1 thread.");
		ExitThread(E_FAIL);
	}
}

DWORD WINAPI ThreadScheduler(LPVOID lpThreadPool) {
	CThreadPool*	pool = (CThreadPool*)lpThreadPool;
	static int		nThreads = 0;
	int				threadID = AtomicXAdd(&nThreads, 1);

	SignalAndWait(pool->hThreadsDone[threadID], pool->hMoreWork);

	while(pool->bRunning) {
		DoWork(pool);

		SignalAndWait(pool->hThreadsDone[threadID], pool->hMoreWork);
	}

	return 0;
}
