519 lines
14 KiB
C++
519 lines
14 KiB
C++
#include <stdlib.h>
|
|
#include <assert.h>
|
|
|
|
#include "ModelStructs.h"
|
|
#include "FuncCall.h"
|
|
#include "CallInfoStructs.h"
|
|
#include "Interval.h"
|
|
//====
|
|
#include <stdio.h>
|
|
extern long TraceProcNum;
|
|
//=***
|
|
#include "LoopBlock.h"
|
|
|
|
extern _ShadowInfo * GetShadowByIndex(long ID);
|
|
extern void DelShadow(long ID);
|
|
extern _AMViewInfo * GetAMViewByIndex(long ID);
|
|
extern _DArrayInfo * GetDArrayByIndex(long ID);
|
|
extern _ShdGrpInfo * GetShdGroupByIndex(long ID);
|
|
|
|
_ParLoopInfo ParLoopInfo;
|
|
int _ParLoopInfo::count = 0;
|
|
|
|
//grig
|
|
//LoopBlock * prevLoopBlock=NULL;
|
|
//\grig
|
|
|
|
void FuncCall::crtpl()
|
|
{
|
|
crtpl_Info* params=(crtpl_Info*) call_params;
|
|
ParLoopInfo.ID=params->ID;
|
|
ParLoopInfo.Rank=params->Rank;
|
|
ParLoopInfo.AlignType=0;
|
|
ParLoopInfo.PatternType=0;
|
|
ParLoopInfo.PatternID=0;
|
|
ParLoopInfo.exfrst = false;
|
|
ParLoopInfo.imlast = false;
|
|
ParLoopInfo.across = false;
|
|
#ifdef nodef
|
|
if(ParLoopInfo.AxisArray)
|
|
free(ParLoopInfo.AxisArray);
|
|
ParLoopInfo.AxisArray=(long*)calloc(ParLoopInfo.Rank,sizeof(long));
|
|
assert(ParLoopInfo.AxisArray != NULL);
|
|
#endif
|
|
ParLoopInfo.ParLoop_Obj = new ParLoop(ParLoopInfo.Rank);
|
|
}
|
|
|
|
void FuncCall::endpl()
|
|
{
|
|
int i;
|
|
double curr_pt;
|
|
_ShadowInfo* SHD;
|
|
|
|
if (ParLoopInfo.imlast) {
|
|
SHD=GetShadowByIndex(ParLoopInfo.imlast_SGR);
|
|
for (i=0; i<MPSProcCount(); i++) {
|
|
|
|
curr_pt = CurrProcTime(currentVM->map(i));
|
|
if(curr_pt < SHD->time_end) {
|
|
printf("Overlap = %f (%f -%f )\n", curr_pt - SHD->time_start, curr_pt, SHD->time_start);
|
|
|
|
AddTime(__Shadow_overlap,currentVM->map(i), (curr_pt - SHD->time_start));
|
|
AddTime(__Wait_shadow,currentVM->map(i),
|
|
(SHD->time_end - curr_pt));
|
|
} else {
|
|
AddTime(__Shadow_overlap,currentVM->map(i), (curr_pt - SHD->time_start));
|
|
}
|
|
}
|
|
ParLoopInfo.imlast = false;
|
|
|
|
DelShadow(ParLoopInfo.imlast_SGR/*params->ID*/);
|
|
|
|
}
|
|
else if (ParLoopInfo.across) {
|
|
/* íå íóæíî, ïîòîìó ÷òî åñëè èñïîëçóåòñÿ across, òî èñïîëüçóåòñÿ ñèíõðîííîå îáíîâëåíèÿ ãðàíÿìè
|
|
/* (êàê íà â âõîäå (íà ïåðâîé èòåðàöèè (ìíå êàæåòñÿ òàì íåëüçÿ çàäàòü àñèíõðîííûé ðåæèì))
|
|
/* òàê è ïðè âû÷ècëåíèè öèêëà across), ïîýòîìó íà âûõîäå èç öèêëà íå òðåáóåòñÿ ñ÷èòàòü âðåìÿ Shadow.
|
|
SHD=GetShadowByIndex(ParLoopInfo.across_SGR);
|
|
for (i=0; i<MPSProcCount(); i++) {
|
|
|
|
curr_pt = CurrProcTime(currentVM->map(i));
|
|
if(curr_pt < SHD->time_end)
|
|
{
|
|
printf("Overlap across= %f (%f - %f )\n", curr_pt - SHD->time_start, curr_pt, SHD->time_start);
|
|
|
|
AddTime(__Shadow_overlap,currentVM->map(i), (curr_pt - SHD->time_start));
|
|
AddTime(__Wait_shadow,currentVM->map(i), (SHD->time_end - curr_pt));
|
|
} else {
|
|
|
|
//====
|
|
//printf("SHD %f %f\n",curr_pt, SHD->time_start);
|
|
//was AddTime(__Shadow_overlap,currentVM->map(i), (curr_pt - SHD->time_start));
|
|
//=***
|
|
}
|
|
}
|
|
*/
|
|
|
|
ParLoopInfo.across = false;
|
|
|
|
DelShadow(ParLoopInfo.across_SGR);
|
|
|
|
}
|
|
delete ParLoopInfo.ParLoop_Obj;
|
|
ParLoopInfo.ParLoop_Obj=NULL;
|
|
|
|
//grig
|
|
// if(prevLoopBlock!=NULL)
|
|
// {
|
|
// delete prevLoopBlock;
|
|
// prevLoopBlock=NULL;
|
|
// }
|
|
//\grig
|
|
}
|
|
|
|
void FuncCall::mappl()
|
|
{
|
|
mappl_Info* params = (mappl_Info*) call_params;
|
|
|
|
if (params->PatternType == 1) {
|
|
// AMView
|
|
ParLoopInfo.PatternType=1;
|
|
ParLoopInfo.AlignType=1;
|
|
ParLoopInfo.PatternID=params->PatternRef;
|
|
_AMViewInfo* AMV_Info=GetAMViewByIndex(params->PatternRef);
|
|
ParLoopInfo.ParLoop_Obj->MapPL(AMV_Info->AMView_Obj, params->AxisArray,
|
|
params->CoeffArray, params->ConstArray, params->InInitIndexArray,
|
|
params->InLastIndexArray, params->InStepArray);
|
|
} else if (params->PatternType == 2) {
|
|
// DisArray
|
|
ParLoopInfo.PatternType=2;
|
|
ParLoopInfo.AlignType=2;
|
|
ParLoopInfo.PatternID=params->PatternRefPtr;
|
|
_DArrayInfo* DA_Info=GetDArrayByIndex(params->PatternRefPtr);
|
|
//RRRRRRRRRRRRRRRR
|
|
ParLoopInfo.ParLoop_Obj->MapPL(DA_Info->DArray_Obj, params->AxisArray,
|
|
params->CoeffArray, params->ConstArray, params->InInitIndexArray,
|
|
params->InLastIndexArray, params->InStepArray);
|
|
} else {
|
|
return;
|
|
}
|
|
ParLoopInfo.AxisArray = params->AxisArray;
|
|
|
|
}
|
|
|
|
void FuncCall::dopl()
|
|
{
|
|
int i, j, cnt;
|
|
double time = 0.0, ip_time = 0.0;
|
|
long loop_size = ParLoopInfo.ParLoop_Obj->GetLoopSize();
|
|
long block_size;
|
|
// long interceptj;
|
|
//====
|
|
int type_size, mode=0;
|
|
//=***
|
|
|
|
/* for(i=0;i<MPSProcCount();i++)
|
|
{ printf("vcalltime[%d]=%f * Proc %d = %f\n",i,vcall_time[i],TraceProcNum,vcall_time[i]*TraceProcNum);
|
|
vcall_time[i]*=TraceProcNum;
|
|
}
|
|
|
|
// printf("calltime=%f * Proc %d = %f\n",call_time,TraceProcNum,call_time*TraceProcNum);
|
|
call_time*=TraceProcNum; //number of processors in trace-mode execution
|
|
*/
|
|
|
|
|
|
// printf("DOPL %f\n",call_time);
|
|
if (mode)
|
|
{
|
|
printf("DOPL ");
|
|
for(i=0;i<MPSProcCount(); i++)
|
|
printf("%f ",CurrProcTime(i));
|
|
printf("\n");
|
|
}
|
|
|
|
dopl_full_Info* tmp_params = (dopl_full_Info*)this->call_params;
|
|
|
|
if (call_time==0 || loop_size==0)
|
|
return;
|
|
|
|
LoopBlock** ProcBlock=(LoopBlock**)calloc(MPSProcCount(),sizeof(LoopBlock*));
|
|
assert(ProcBlock != NULL);
|
|
|
|
//ïîñòðîåíèå âèòêîâ íà êàæäîì ïðîöåññîðå
|
|
for(i=0;i<MPSProcCount();i++)
|
|
ProcBlock[i]=new LoopBlock(ParLoopInfo.ParLoop_Obj, i,1);
|
|
|
|
|
|
if (mode)
|
|
{
|
|
printf("start DOPL\n");
|
|
for (i = 0; i < MPSProcCount(); i++)
|
|
{
|
|
int k;
|
|
printf("DOPL proc[%d]= ", i);
|
|
for (k = 0; k < ProcBlock[i]->LSDim.size(); k++)
|
|
printf("%d %d ", ProcBlock[i]->LSDim[k].Lower, ProcBlock[i]->LSDim[k].Upper);
|
|
printf("\n");
|
|
}
|
|
}
|
|
|
|
//====
|
|
if(ParLoopInfo.across && tmp_params->ReturnVar==1)
|
|
{
|
|
#define max_rank 4
|
|
#define ShdWid(k) ((!invers[k])?ParLoopInfo.SGnew->BoundGroup_Obj->dimInfo[k].LeftBSize:ParLoopInfo.SG->BoundGroup_Obj->dimInfo[k].RightBSize)
|
|
#define PreShdWid(k) (invers[k]?ParLoopInfo.SGnew->BoundGroup_Obj->dimInfo[k].LeftBSize:ParLoopInfo.SG->BoundGroup_Obj->dimInfo[k].RightBSize)
|
|
#define msize(i,j) ((j<rank_mas)?(ProcBlock[i]->LSDim[j].Upper - ProcBlock[i]->LSDim[j].Lower + 1) / ProcBlock[i]->LSDim[j].Step:1)
|
|
std::vector<long> pp;
|
|
int k,d,rank,j,i,rank_mas,x;
|
|
int invers[max_rank],prev[max_rank],post[max_rank],p[max_rank],n[max_rank];
|
|
double a,sendtime,com_time,real_sync,exectime,overlap,TStart,TByte;
|
|
|
|
pp=currentVM->getSizeArray();
|
|
rank=pp.size();
|
|
|
|
|
|
for(k=0;k<rank;k++)
|
|
p[k]=pp[k];
|
|
//ïî äðóãèì èçìåðåíèÿ ðåøåòêà ïðîöîâ èìååò øèðèíó 1
|
|
for(k=rank;k<max_rank;k++)
|
|
p[k]=1;
|
|
|
|
for(k=0;k<MPSProcCount();k++)
|
|
if(!ProcBlock[k]->empty())
|
|
break;
|
|
|
|
if(k<MPSProcCount())
|
|
rank_mas=ProcBlock[0]->LSDim.size();
|
|
else
|
|
rank_mas=0; //impossible must be
|
|
|
|
// rank_mas=rank;
|
|
|
|
// printf("rank=%d rank_mas=%d\n",rank,rank_mas);
|
|
|
|
//calc invers
|
|
for(i=0;i<MPSProcCount();i++)
|
|
{
|
|
for(j=0;j<ParLoopInfo.Rank;j++)
|
|
invers[j]=ParLoopInfo.ParLoop_Obj->Invers[j];
|
|
|
|
for(k=0;k<rank_mas;k++)
|
|
n[k]=i;
|
|
for(k=rank_mas;k<max_rank;k++)
|
|
n[k]=0;
|
|
|
|
for(k=max_rank-1;k>=0;k--)
|
|
{
|
|
n[k]=n[k]%p[k];
|
|
for(x=0;x<k;x++)
|
|
n[x]=n[x]/p[k];
|
|
}
|
|
|
|
for(k=0;k<rank;k++)
|
|
{
|
|
for(j=k+1,d=1;j<rank;j++)
|
|
d*=p[j];
|
|
//íàäî prev == -1 åñëè íåò ïðåä. ïðîöåññîðà äëÿ íåãî ïî ýòîìó èçìåðåíèþ, êîò. íàäî æäàòü
|
|
if(invers[k])
|
|
if(n[k]!=p[k]-1 && i+d<MPSProcCount()) prev[k]=i+d;
|
|
else prev[k]=-1;
|
|
else
|
|
if(n[k]!=0 && i-d>=0) prev[k]=i-d;
|
|
else prev[k]=-1;
|
|
|
|
if(!invers[k])
|
|
if(n[k]!=p[k]-1 && i+d<MPSProcCount()) post[k]=i+d;
|
|
else post[k]=-1;
|
|
else
|
|
if(n[k]!=0 && i-d>=0) post[k]=i-d;
|
|
else post[k]=-1;
|
|
}
|
|
// printf("PREV %d %d\n",prev[0],prev[1],prev[2],prev[3]);
|
|
// printf("POST %d %d\n",post[0],post[1],post[2],post[3]);
|
|
|
|
|
|
for(k=0,a=1;k<rank;k++)
|
|
a*=msize(i,k);
|
|
|
|
// for(k=0;k<rank;k++)
|
|
// printf("SHAD widthNEW[%d]=%d SHAD width[%d]=%d\n",k,ShdWid(k),k,PreShdWid(k));
|
|
type_size=ParLoopInfo.ParLoop_Obj->AcrossFlag;
|
|
|
|
TStart = currentVM->getTStart();
|
|
TByte = currentVM->getTByte()*type_size;
|
|
|
|
//printf("Tstart=%.10f TByte=%.10f\n",TStart,TByte);
|
|
|
|
sendtime=0; com_time=0; real_sync=0; exectime=0; overlap=0;
|
|
for(k=0;k<rank;k++)
|
|
{
|
|
if(post[k]!=-1)
|
|
{ com_time+=TStart+a/msize(i,k)*TByte;
|
|
}
|
|
}
|
|
}
|
|
//printf("Procs[%d] comm=%f\n",i,com_time);
|
|
// AddMPSTime(__Shadow_synchronize,my_num,real_sync);
|
|
AddMPSTime(__Wait_shadow,com_time);
|
|
// AddMPSTime(__Shadow_overlap,overlap);
|
|
}
|
|
|
|
if(ParLoopInfo.across && tmp_params->ReturnVar==0)
|
|
{
|
|
double max_time;
|
|
type_size=ParLoopInfo.ParLoop_Obj->AcrossFlag;
|
|
//Åñëè îáðàòíûé îòñ÷åò â öèêëå òî Step äîëæåí áûòü < 0
|
|
for(i=0;i<MPSProcCount();i++)
|
|
for(j=0;j<ProcBlock[i]->GetRank();j++)
|
|
if(ParLoopInfo.ParLoop_Obj->Invers[j]==1) ProcBlock[i]->LSDim[j].Step=-ProcBlock[i]->LSDim[j].Step;
|
|
|
|
|
|
max_time=0;
|
|
for(i=0;i<MPSProcCount();i++)
|
|
max_time=(CurrProcTime(i)>max_time)?CurrProcTime(i):max_time;
|
|
|
|
for(i=0;i<MPSProcCount();i++)
|
|
{
|
|
// AddTimeSynchronize(__Synchronize, i, max_time-CurrProcTime(i));
|
|
AddTimeSynchronize(__Wait_shadow, i, max_time-CurrProcTime(i));
|
|
// printf("Sync %f\n",max_time-CurrProcTime(i));
|
|
}
|
|
|
|
// printf("DOPL %f ACROSS LoopSZ=%d:%d %d:%d %d:%d\n",call_time,ParLoopInfo.ParLoop_Obj->LowerIndex[0],ParLoopInfo.ParLoop_Obj->HigherIndex[0],ParLoopInfo.ParLoop_Obj->LowerIndex[1],ParLoopInfo.ParLoop_Obj->HigherIndex[1],ParLoopInfo.ParLoop_Obj->LowerIndex[2],ParLoopInfo.ParLoop_Obj->HigherIndex[2]);
|
|
// printf("DOPL ACROSS LoopInvers=%d %d %d\n",ParLoopInfo.ParLoop_Obj->Invers[0],ParLoopInfo.ParLoop_Obj->Invers[1],ParLoopInfo.ParLoop_Obj->Invers[2]);
|
|
ParLoopInfo.ParLoop_Obj->AcrossCost->Across(call_time, ParLoopInfo.ParLoop_Obj->GetLoopSize(),ProcBlock,type_size);
|
|
|
|
max_time=0;
|
|
for(i=0;i<MPSProcCount();i++)
|
|
max_time=(CurrProcTime(i)>max_time)?CurrProcTime(i):max_time;
|
|
for(i=0;i<MPSProcCount();i++)
|
|
{
|
|
AddTimeVariation(__Wait_shadow, i, max_time-CurrProcTime(i));
|
|
// printf("time[%d]=%f max=%f TimVar=%f\n",i,CurrProcTime(i),max_time,max_time-CurrProcTime(i));
|
|
}
|
|
|
|
|
|
|
|
AddMPSTime(__CPU_time_sys, vret_time);
|
|
AddMPSTime(__Insuff_parall_sys, (ret_time * ((double) MPSProcCount()-1.0) / (double) MPSProcCount()));
|
|
|
|
|
|
if(mode)
|
|
{
|
|
printf("DONE DOPL ");
|
|
for(i=0;i<MPSProcCount(); i++)
|
|
printf("%f ",CurrProcTime(i));
|
|
printf("\n");
|
|
}
|
|
|
|
// return;
|
|
}
|
|
else //ParLoopInfo.across && tmp_params->ReturnVar==1
|
|
{
|
|
//=***
|
|
|
|
//grig
|
|
/*
|
|
LoopBlock *minipl;
|
|
|
|
if(prevLoopBlock!=NULL)
|
|
{
|
|
minipl= prevLoopBlock;
|
|
|
|
// ïðîâåðÿåì ïåðåñå÷åíèå áëîêà minipl ñ ëîêàëüíûìè áëîêàìè ïðîöåññîðîâ,
|
|
// êîîðåêòèðóåì âðåìÿ âûïîëíååíèÿ - äëÿ êàæäîãî ïðîöåñññîðà
|
|
for(i=0;i<MPSProcCount();i++)
|
|
{
|
|
block_size=ProcBlock[i]->GetBlockSize();
|
|
|
|
if(block_size==0)
|
|
continue;
|
|
interceptj=intersection(*minipl,*ProcBlock[i]); // ÷èñëî ýëåìåíòîâ â ïåðåñå÷åíèè
|
|
|
|
|
|
|
|
|
|
time= ((double)vcall_time[i])*((double)interceptj/(double)minipl->GetBlockSize());
|
|
//\grig
|
|
|
|
|
|
//currentVM->getProcPower(); // MPSProcPower();
|
|
AddTime(__CPU_time_usr, currentVM->map(i), time);
|
|
|
|
cnt=0;
|
|
|
|
for (j=0; j<MPSProcCount(); j++)
|
|
if(*(ProcBlock[i]) == *(ProcBlock[j]))
|
|
cnt++;
|
|
|
|
if (cnt > 1)
|
|
{
|
|
ip_time = time * (((double) cnt - 1.0) / (double) cnt);
|
|
AddTime(__Insuff_parall_usr, currentVM->map(i), ip_time);
|
|
}
|
|
|
|
}
|
|
|
|
//delete minipl;
|
|
|
|
if(tmp_params->Dim.size()!=0)
|
|
{
|
|
delete minipl;
|
|
std::vector<LoopLS> lstemp;
|
|
//lstemp.resize(tmp_params->Dim.size();
|
|
for(i=0;i<tmp_params->Dim.size();i++)
|
|
{
|
|
lstemp.push_back(LoopLS(tmp_params->Lower[i],tmp_params->Upper[i],tmp_params->Step[i]));
|
|
}
|
|
// ïîñòîåíèå áëîêà âûïîëíÿþùèõñÿ íà äàííûé ìîìåíò âèòêîâ
|
|
prevLoopBlock = new LoopBlock(lstemp);
|
|
lstemp.resize(0);
|
|
}
|
|
|
|
|
|
AddMPSTime(__CPU_time_sys, vret_time);
|
|
AddMPSTime(__Insuff_parall_sys,(ret_time * ((double) MPSProcCount()-1.0) / (double) MPSProcCount()));
|
|
}
|
|
//grig
|
|
*/
|
|
// else
|
|
// {
|
|
|
|
//grig
|
|
/*
|
|
if(tmp_params->Dim.size()!=0)
|
|
{
|
|
std::vector<LoopLS> lstemp;
|
|
|
|
for(i=0;i<tmp_params->Dim.size();i++)
|
|
{
|
|
lstemp.push_back(LoopLS(tmp_params->Lower[i],tmp_params->Upper[i],tmp_params->Step[i]));
|
|
}
|
|
// ïîñòîåíèå áëîêà âûïîëíÿþùèõñÿ íà äàííûé ìîìåíò âèòêîâ
|
|
prevLoopBlock = new LoopBlock(lstemp);
|
|
lstemp.resize(0);
|
|
}
|
|
*/
|
|
|
|
//\grig
|
|
|
|
// LoopBlock** ProcBlock=(LoopBlock**)calloc(MPSProcCount(),sizeof(LoopBlock*));
|
|
// assert(ProcBlock != NULL);
|
|
|
|
// for(i=0;i<MPSProcCount();i++)
|
|
// ProcBlock[i]=new LoopBlock(ParLoopInfo.ParLoop_Obj, i,1);
|
|
|
|
|
|
for(i=0;i<MPSProcCount();i++)
|
|
{
|
|
block_size=ProcBlock[i]->GetBlockSize();
|
|
// printf("DOPL[%d]=%d of %d\n",i,block_size,loop_size);
|
|
|
|
if(block_size==0)
|
|
continue;
|
|
//grig
|
|
time = (vcall_time[i]*((double)block_size/(double)loop_size));//commented grig /currentVM->getProcPower(i);
|
|
//\grig
|
|
|
|
|
|
//currentVM->getProcPower()/*MPSProcPower()*/;
|
|
AddTime(__CPU_time_usr, currentVM->map(i), time);
|
|
|
|
cnt=0;
|
|
for (j=0; j<MPSProcCount(); j++)
|
|
{
|
|
// printf("i=%d j=%d [0] %d %d %d %d\n",i,j,ProcBlock[i]->LSDim[0].Lower,ProcBlock[i]->LSDim[0].Upper, ProcBlock[j]->LSDim[0].Lower, ProcBlock[j]->LSDim[0].Upper);
|
|
// printf("i=%d j=%d [1] %d %d %d %d",i,j,ProcBlock[i]->LSDim[1].Lower,ProcBlock[i]->LSDim[1].Upper, ProcBlock[j]->LSDim[1].Lower, ProcBlock[j]->LSDim[1].Upper);
|
|
if(*(ProcBlock[i]) == *(ProcBlock[j]))
|
|
cnt++;
|
|
// printf(" cnt=%d\n",cnt);
|
|
}
|
|
|
|
//printf("DOPL time=%f cnt=%d\n",time,cnt);
|
|
|
|
if (cnt > 1)
|
|
{
|
|
ip_time = time * (((double) cnt - 1.0) / (double) cnt);
|
|
AddTime(__Insuff_parall_usr, currentVM->map(i), ip_time);
|
|
}
|
|
}
|
|
|
|
// for (i=0;i<MPSProcCount();i++)
|
|
// delete ProcBlock[i];
|
|
// free(ProcBlock);
|
|
|
|
AddMPSTime(__CPU_time_sys, vret_time);
|
|
AddMPSTime(__Insuff_parall_sys, (ret_time * ((double) MPSProcCount()-1.0) / (double) MPSProcCount()));
|
|
}
|
|
|
|
|
|
// for (i=0;i<MPSProcCount();i++)
|
|
// delete ProcBlock[i];
|
|
free(ProcBlock);
|
|
|
|
return;
|
|
}
|
|
|
|
void FuncCall::ParLoopTime()
|
|
{
|
|
switch(func_id) {
|
|
case crtpl_ :
|
|
crtpl();
|
|
break;
|
|
case mappl_ :
|
|
mappl();
|
|
break;
|
|
case dopl_ :
|
|
dopl();
|
|
break;
|
|
case endpl_ :
|
|
endpl();
|
|
break;
|
|
}
|
|
|
|
if (func_id != dopl_)
|
|
RegularTime();
|
|
}
|
|
|