//-------------------------------------------------------------------------------------
//
// Copyright 2009 Intel Corporation
// All Rights Reserved
//
// Permission is granted to use, copy, distribute and prepare derivative works of this
// software for any purpose and without fee, provided, that the above copyright notice
// and this statement appear in all copies.  Intel makes no representations about the
// suitability of this software for any purpose.  THIS SOFTWARE IS PROVIDED "AS IS."
// INTEL SPECIFICALLY DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, AND ALL LIABILITY,
// INCLUDING CONSEQUENTIAL AND OTHER INDIRECT DAMAGES, FOR THE USE OF THIS SOFTWARE,
// INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PROPRIETARY RIGHTS, AND INCLUDING THE
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  Intel does not
// assume any responsibility for any errors which may appear in this software nor any
// responsibility to update it.
//

#include "StdAfx.h"
#include "nulstein.h"
#include "Mutex.h"
#include "TaskScheduler.h"

/* worker threads stack size */ 
#define STACKSIZE 64*1024 /* 64K */ 

DWORD	CTaskPool::ms_TLSindex;

//________________________________________________________________________________
CTaskPool::CTaskPool()
{
	m_ThreadCount = 0;
	m_bShuttingDown = false;
}

unsigned CTaskPool::GetHardwareThreadsCount()
{
#if SINGLETHREADED
	return 1;
#else
	SYSTEM_INFO si;

	GetSystemInfo(&si);
	return si.dwNumberOfProcessors;
#endif
}

bool CTaskPool::Start()
{
	unsigned iThread;
	
	m_bShuttingDown		= false;
	m_bWorkersIdle		= false;
	m_pMainCompletion	= NULL;
	
	/* find hardware thread count */ 
	m_ThreadCount = GetHardwareThreadsCount();
	if (m_ThreadCount > MAX_THREADS)
		m_ThreadCount = MAX_THREADS;
	
	/* initialize Thread Local Storage */ 
	/* (TLS lets us store/query the CWorkerThread* corresponding to current thread) */ 
	ms_TLSindex = TlsAlloc();
	if (TLS_OUT_OF_INDEXES==ms_TLSindex) return false;

	/* Worker wakeup event */ 	
	m_WakeUpCall		= CreateSemaphore(NULL, 0, MAX_THREADS, NULL);
	m_SleepNotification	= CreateSemaphore(NULL, 0, MAX_THREADS, NULL);

	/* set ourselves up as thread[0] */ 
	m_Thread[0].AttachToThisThread(this);
	
	/* start worker threads */ 
	for(iThread=1; iThread<m_ThreadCount; iThread++)
	{
		m_Thread[iThread].Start(this);
	}
	
	return true;
}

bool CTaskPool::Stop()
{
	unsigned iThread;

	m_bShuttingDown = true;

	/* wait for all to finish */ 
	WaitForWorkersToBeReady();
	WakeWorkers();
	for(iThread=1; iThread<m_ThreadCount; iThread++)
	{
		while (!m_Thread[iThread].m_bFinished)
		{ 
			/* spin */; 
		}
	}
		
	TlsFree(ms_TLSindex);
	ms_TLSindex = TLS_OUT_OF_INDEXES;
	return true;
}

//________________________________________________________________________________
bool CWorkerThread::AttachToThisThread(CTaskPool* pTaskPool)
{
	m_pTaskPool		= pTaskPool;
	m_hThread		= GetCurrentThread();
	
	m_TaskCount		= 0;
	m_bFinished		= false;
	
	TlsSetValue(m_pTaskPool->ms_TLSindex, this);
	
	return true;
}

bool CWorkerThread::Start(CTaskPool* pTaskPool)
{
	DWORD ThreadId;
	
	m_pTaskPool				= pTaskPool;
	m_TaskCount				= 0;
	m_bFinished 			= false;
	m_pCurrentCompletion	= NULL;
	
	m_hThread		= CreateThread(NULL, STACKSIZE, _ThreadProc, this, 0, &ThreadId);
	ASSERT(m_hThread);
	return m_hThread!=NULL;
}

CWorkerThread* CWorkerThread::GetCurrent()
{
	return (CWorkerThread*) TlsGetValue( CTaskPool::ms_TLSindex );
}

DWORD WINAPI CWorkerThread::_ThreadProc(void* p)
{
	CWorkerThread* pThread = (CWorkerThread*)p;
	
	return pThread->ThreadProc();
}

void CWorkerThread::Idle()
{
	/* Advertise we're going to sleep */ 
	ReleaseSemaphore( m_pTaskPool->m_SleepNotification, 1, NULL );
	
	/* Sleep */ 
	WaitForSingleObject(m_pTaskPool->m_WakeUpCall, INFINITE);
}

DWORD CWorkerThread::ThreadProc()
{
	/* Thread Local Storage */ 
	TlsSetValue(m_pTaskPool->ms_TLSindex, this);

	/* run */ 
	for(;;)
	{
		Idle();
				
		/* check if we're shutting down */ 
		if (m_pTaskPool->m_bShuttingDown) 
			break;

		PROFILE_SYNC_PREPARE( this );
		
		while (m_pTaskPool->m_pMainCompletion)
		{
			/* do work */ 
			DoWork(NULL);

			/* check if we're shutting down */ 
			if (m_pTaskPool->m_bShuttingDown) 
				break;
		}
		
		PROFILE_SYNC_CANCEL( this );
	}
	
	m_bFinished = true;
	return 0;
}

void CWorkerThread::DoWork(CTaskCompletion* pExpected)
{	
	/* NOTE: 
		If pExpected is NULL, then we'll work until there is nothing left to do. This 
		is normally happening only in the case of a worker's thread loop (above).
		
		if it isn't NULL, then it means the caller is waiting for this particular thing 
		to complete (and will want to carry on something once it is). We will do our work
		and steal some until the condition happens. This is normally happening when as
		part of WorkUntilDone (below)
	*/
	
	/* NOTE: This method needs to be reentrant (!)
        A task can be spawing more tasks and may have to wait for their completion.
        So, as part of our pTask->Run() we can be called again, via the WorkUntilDone 
        method, below.
	*/
	
	do
	{
		CInternalTask*		pTask;
		CTaskCompletion*	pLastCompletion;
		
		pLastCompletion = NULL;
		while (PopTask(&pTask))
		{
			/* do something */ 
			PROFILE_SYNC_CANCEL( this );
			/* TASK */ pLastCompletion = m_pCurrentCompletion;
			/* TASK */ m_pCurrentCompletion = pTask->m_pCompletion;
			/* TASK */ 
			/* TASK */ pTask->Run(this);
			/* TASK */ 
			/* TASK */ m_pCurrentCompletion->MarkBusy(false);
			/* TASK */ m_pCurrentCompletion = pLastCompletion;
			PROFILE_SYNC_PREPARE( this );
			
			/* check if work we're expecting is done */ 
			if (pExpected && !pExpected->IsBusy()) return;
		}

		/* check if main work is finished */ 
		if (!m_pTaskPool->m_pMainCompletion) 
			return;
			
	} while (StealTasks());	
	
	/* Nothing left to do, for now */ 
}

void CWorkerThread::WorkUntilDone(CTaskCompletion* pCard)
{
	PROFILE_SYNC_PREPARE( this );
	
	while (pCard->IsBusy())
	{
		DoWork(pCard);
	}
	
	PROFILE_SYNC_CANCEL( this );

	if (m_pTaskPool->m_pMainCompletion == pCard)
	{	/* This is the root task. As this is finished, the scheduler can go idle.		*/ 
		/* What happens next: (eventually,) each worker thread will see that there		*/ 
		/* is no main completion any more and go idle waiting for semaphore to signal	*/ 
		/* that new work nees to be done (see CWorkerThread::ThreadProc)				*/ 
		
		m_pTaskPool->m_pMainCompletion = NULL;
	}
}

bool CWorkerThread::PopTask(CInternalTask** ppOutTask)
{
	CSpinMutexLock L(&m_TaskMutex);
	CInternalTask*	pTask;
	
	/* Check there is work */ 
	if (!m_TaskCount) 
		return false;

	pTask = m_Task[m_TaskCount-1];
	
	/* Check if we can pop a partial-task (ex: one iteration of a loop) */ 
	if (pTask->PartialPop(this, ppOutTask))
	{
		pTask->m_pCompletion->MarkBusy(true);	
		return true;
	}
	
	/* pop top of pile */ 
	*ppOutTask = pTask;
	m_TaskCount--;
	return true;
}

bool CWorkerThread::_PushTask(CInternalTask* pTask)
{
	/* if we're single threaded, ignore */ 
	if (m_pTaskPool->m_ThreadCount<2) 
		return false;
		
	/* if task pool is empty, try to spread subtasks across all threads */ 
	if (!m_pTaskPool->m_pMainCompletion)
	{	
		/* check we're indeed the main thread			*/ 
		/* (no worker can push job, no task is queued)	*/ 
		ASSERT( GetWorkerIndex()==0 ); 
		
		/* Ready ? */ 
		m_pTaskPool->WaitForWorkersToBeReady();
		
		/* Set... */ 
		if (pTask->Spread( m_pTaskPool ))
		{	/* Go! Mark this task as the root task (see WorkUntilDone) */ 
			m_pTaskPool->m_pMainCompletion = pTask->m_pCompletion; 
			m_pTaskPool->WakeWorkers();
			return true;
		}
	}
	
	/* push work onto pile */ 	
	{	
		CSpinMutexLock L(&m_TaskMutex);
		
		/* don't queue more than we can */ 
		if (m_TaskCount >= MAX_TASKSPERTHREAD)
			return false;
		
		/* push job */ 
		pTask->m_pCompletion->MarkBusy(true);
		m_Task[m_TaskCount] = pTask;
		m_TaskCount++;
	}
	
	/* Go ! */ 
	if (!m_pTaskPool->m_pMainCompletion)
	{	/* Mark this task as the root task (see WorkUntilDone) */ 
		m_pTaskPool->m_pMainCompletion = pTask->m_pCompletion;
		m_pTaskPool->WakeWorkers();
	}
	
	return true;
}

bool CWorkerThread::PushTask(CInternalTask* pTask)
{
	if (_PushTask(pTask))
		return true;

	/* if we can't queue it, run it */ 	
	/* (NOTE: we don't touch completion card: not set, not cleared) */ 
	pTask->Run(this);
	return false;
}

bool CWorkerThread::GiveUpSomeWork(CWorkerThread* pIdleThread)
{	
	CSpinMutexLock L;

	if (!L.TryLock(&m_TaskMutex)) return false;

	/* anything to share ? */ 
	if (!m_TaskCount) return false;
		
	/* grab work */ 
	unsigned		GrabCount;
	unsigned		iTask;
	CInternalTask**	p;
	CSpinMutexLock	LockIdleThread(&pIdleThread->m_TaskMutex);
	
	if (pIdleThread->m_TaskCount)
		return false; /* can happen if we're trying to steal work while taskpool has gone idle and started again */ 
	
	/* if only one task remaining, try to split it */ 
	if (m_TaskCount==1)
	{
		CInternalTask* pTask;
		
		pTask = NULL;
		if (m_Task[0]->Split(pIdleThread, &pTask))
		{
			pTask->m_pCompletion->MarkBusy(true);

			pIdleThread->m_Task[0] = pTask;
			pIdleThread->m_TaskCount = 1;
			return true;
		}
	}

	/* grab half the remaining tasks (rounding up) */ 
	GrabCount = (m_TaskCount+1) /2;

	/* copy old tasks to my list */ 
	p = pIdleThread->m_Task;
	for(iTask=0; iTask<GrabCount; iTask++)
	{
		*p++ = m_Task[iTask];
		m_Task[iTask] = NULL;
	}
	pIdleThread->m_TaskCount = GrabCount;
	
	/* move remaining tasks down */ 
	p = m_Task;
	for( ; iTask<m_TaskCount; iTask++)
	{
		*p++ = m_Task[iTask];
	}
	m_TaskCount -= GrabCount;
	
	return true;
}

bool CWorkerThread::StealTasks()
{
	unsigned	iThread;
	int			Offset;

	/* avoid always starting with same other thread. This aims at avoiding a potential	*/ 
	/* for problematic patterns. Note: the necessity of doing is largely speculative.	*/ 
	Offset = (GetWorkerIndex() + GetTickCount()) % m_pTaskPool->m_ThreadCount;
	
	/*  */ 
	for(iThread=0; iThread<m_pTaskPool->m_ThreadCount; iThread++)
	{
		CWorkerThread*	pThread;
		
		pThread = &m_pTaskPool->m_Thread[ (iThread+Offset)% m_pTaskPool->m_ThreadCount ];
		if (pThread==this) continue;
	
		if (pThread->GiveUpSomeWork(this)) 
			return true;
			
		if (m_TaskCount) 
			return true;
	}
	
	return false;
}

//________________________________________________________________________________
