#include #include #include "ModelStructs.h" #include "FuncCall.h" #include "CallInfoStructs.h" #include "Interval.h" //==== #include extern long TraceProcNum; //=*** #include "LoopBlock.h" extern _ShadowInfo * GetShadowByIndex(long ID); extern void DelShadow(long ID); extern _AMViewInfo * GetAMViewByIndex(long ID); extern _DArrayInfo * GetDArrayByIndex(long ID); extern _ShdGrpInfo * GetShdGroupByIndex(long ID); _ParLoopInfo ParLoopInfo; int _ParLoopInfo::count = 0; //grig //LoopBlock * prevLoopBlock=NULL; //\grig void FuncCall::crtpl() { crtpl_Info* params=(crtpl_Info*) call_params; ParLoopInfo.ID=params->ID; ParLoopInfo.Rank=params->Rank; ParLoopInfo.AlignType=0; ParLoopInfo.PatternType=0; ParLoopInfo.PatternID=0; ParLoopInfo.exfrst = false; ParLoopInfo.imlast = false; ParLoopInfo.across = false; #ifdef nodef if(ParLoopInfo.AxisArray) free(ParLoopInfo.AxisArray); ParLoopInfo.AxisArray=(long*)calloc(ParLoopInfo.Rank,sizeof(long)); assert(ParLoopInfo.AxisArray != NULL); #endif ParLoopInfo.ParLoop_Obj = new ParLoop(ParLoopInfo.Rank); } void FuncCall::endpl() { int i; double curr_pt; _ShadowInfo* SHD; if (ParLoopInfo.imlast) { SHD=GetShadowByIndex(ParLoopInfo.imlast_SGR); for (i=0; imap(i)); if(curr_pt < SHD->time_end) { printf("Overlap = %f (%f -%f )\n", curr_pt - SHD->time_start, curr_pt, SHD->time_start); AddTime(__Shadow_overlap,currentVM->map(i), (curr_pt - SHD->time_start)); AddTime(__Wait_shadow,currentVM->map(i), (SHD->time_end - curr_pt)); } else { AddTime(__Shadow_overlap,currentVM->map(i), (curr_pt - SHD->time_start)); } } ParLoopInfo.imlast = false; DelShadow(ParLoopInfo.imlast_SGR/*params->ID*/); } else if (ParLoopInfo.across) { /* не нужно, потому что если исползуется across, то используется синхронное обновления гранями /* (как на в входе (на первой итерации (мне кажется там нельзя задать асинхронный режим)) /* так и при вычиcлении цикла across), поэтому на выходе из цикла не требуется считать время Shadow. SHD=GetShadowByIndex(ParLoopInfo.across_SGR); for (i=0; imap(i)); if(curr_pt < SHD->time_end) { printf("Overlap across= %f (%f - %f )\n", curr_pt - SHD->time_start, curr_pt, SHD->time_start); AddTime(__Shadow_overlap,currentVM->map(i), (curr_pt - SHD->time_start)); AddTime(__Wait_shadow,currentVM->map(i), (SHD->time_end - curr_pt)); } else { //==== //printf("SHD %f %f\n",curr_pt, SHD->time_start); //was AddTime(__Shadow_overlap,currentVM->map(i), (curr_pt - SHD->time_start)); //=*** } } */ ParLoopInfo.across = false; DelShadow(ParLoopInfo.across_SGR); } delete ParLoopInfo.ParLoop_Obj; ParLoopInfo.ParLoop_Obj=NULL; //grig // if(prevLoopBlock!=NULL) // { // delete prevLoopBlock; // prevLoopBlock=NULL; // } //\grig } void FuncCall::mappl() { mappl_Info* params = (mappl_Info*) call_params; if (params->PatternType == 1) { // AMView ParLoopInfo.PatternType=1; ParLoopInfo.AlignType=1; ParLoopInfo.PatternID=params->PatternRef; _AMViewInfo* AMV_Info=GetAMViewByIndex(params->PatternRef); ParLoopInfo.ParLoop_Obj->MapPL(AMV_Info->AMView_Obj, params->AxisArray, params->CoeffArray, params->ConstArray, params->InInitIndexArray, params->InLastIndexArray, params->InStepArray); } else if (params->PatternType == 2) { // DisArray ParLoopInfo.PatternType=2; ParLoopInfo.AlignType=2; ParLoopInfo.PatternID=params->PatternRefPtr; _DArrayInfo* DA_Info=GetDArrayByIndex(params->PatternRefPtr); //RRRRRRRRRRRRRRRR ParLoopInfo.ParLoop_Obj->MapPL(DA_Info->DArray_Obj, params->AxisArray, params->CoeffArray, params->ConstArray, params->InInitIndexArray, params->InLastIndexArray, params->InStepArray); } else { return; } ParLoopInfo.AxisArray = params->AxisArray; } void FuncCall::dopl() { int i, j, cnt; double time = 0.0, ip_time = 0.0; long loop_size = ParLoopInfo.ParLoop_Obj->GetLoopSize(); long block_size; // long interceptj; //==== int type_size, mode=0; //=*** /* for(i=0;icall_params; if (call_time==0 || loop_size==0) return; LoopBlock** ProcBlock=(LoopBlock**)calloc(MPSProcCount(),sizeof(LoopBlock*)); assert(ProcBlock != NULL); //построение витков на каждом процессоре for(i=0;iLSDim[k].Lower, ProcBlock[i]->LSDim[k].Upper); printf("\n"); } } //==== if(ParLoopInfo.across && tmp_params->ReturnVar==1) { #define max_rank 4 #define ShdWid(k) ((!invers[k])?ParLoopInfo.SGnew->BoundGroup_Obj->dimInfo[k].LeftBSize:ParLoopInfo.SG->BoundGroup_Obj->dimInfo[k].RightBSize) #define PreShdWid(k) (invers[k]?ParLoopInfo.SGnew->BoundGroup_Obj->dimInfo[k].LeftBSize:ParLoopInfo.SG->BoundGroup_Obj->dimInfo[k].RightBSize) #define msize(i,j) ((jLSDim[j].Upper - ProcBlock[i]->LSDim[j].Lower + 1) / ProcBlock[i]->LSDim[j].Step:1) std::vector pp; int k,d,rank,j,i,rank_mas,x; int invers[max_rank],prev[max_rank],post[max_rank],p[max_rank],n[max_rank]; double a,sendtime,com_time,real_sync,exectime,overlap,TStart,TByte; pp=currentVM->getSizeArray(); rank=pp.size(); for(k=0;kempty()) break; if(kLSDim.size(); else rank_mas=0; //impossible must be // rank_mas=rank; // printf("rank=%d rank_mas=%d\n",rank,rank_mas); //calc invers for(i=0;iInvers[j]; for(k=0;k=0;k--) { n[k]=n[k]%p[k]; for(x=0;x=0) prev[k]=i-d; else prev[k]=-1; if(!invers[k]) if(n[k]!=p[k]-1 && i+d=0) post[k]=i-d; else post[k]=-1; } // printf("PREV %d %d\n",prev[0],prev[1],prev[2],prev[3]); // printf("POST %d %d\n",post[0],post[1],post[2],post[3]); for(k=0,a=1;kAcrossFlag; TStart = currentVM->getTStart(); TByte = currentVM->getTByte()*type_size; //printf("Tstart=%.10f TByte=%.10f\n",TStart,TByte); sendtime=0; com_time=0; real_sync=0; exectime=0; overlap=0; for(k=0;kReturnVar==0) { double max_time; type_size=ParLoopInfo.ParLoop_Obj->AcrossFlag; //Если обратный отсчет в цикле то Step должен быть < 0 for(i=0;iGetRank();j++) if(ParLoopInfo.ParLoop_Obj->Invers[j]==1) ProcBlock[i]->LSDim[j].Step=-ProcBlock[i]->LSDim[j].Step; max_time=0; for(i=0;imax_time)?CurrProcTime(i):max_time; for(i=0;iLowerIndex[0],ParLoopInfo.ParLoop_Obj->HigherIndex[0],ParLoopInfo.ParLoop_Obj->LowerIndex[1],ParLoopInfo.ParLoop_Obj->HigherIndex[1],ParLoopInfo.ParLoop_Obj->LowerIndex[2],ParLoopInfo.ParLoop_Obj->HigherIndex[2]); // printf("DOPL ACROSS LoopInvers=%d %d %d\n",ParLoopInfo.ParLoop_Obj->Invers[0],ParLoopInfo.ParLoop_Obj->Invers[1],ParLoopInfo.ParLoop_Obj->Invers[2]); ParLoopInfo.ParLoop_Obj->AcrossCost->Across(call_time, ParLoopInfo.ParLoop_Obj->GetLoopSize(),ProcBlock,type_size); max_time=0; for(i=0;imax_time)?CurrProcTime(i):max_time; for(i=0;iReturnVar==1 { //=*** //grig /* LoopBlock *minipl; if(prevLoopBlock!=NULL) { minipl= prevLoopBlock; // проверяем пересечение блока minipl с локальными блоками процессоров, // кооректируем время выполнеения - для каждого процесссора for(i=0;iGetBlockSize(); if(block_size==0) continue; interceptj=intersection(*minipl,*ProcBlock[i]); // число элементов в пересечении time= ((double)vcall_time[i])*((double)interceptj/(double)minipl->GetBlockSize()); //\grig //currentVM->getProcPower(); // MPSProcPower(); AddTime(__CPU_time_usr, currentVM->map(i), time); cnt=0; for (j=0; j 1) { ip_time = time * (((double) cnt - 1.0) / (double) cnt); AddTime(__Insuff_parall_usr, currentVM->map(i), ip_time); } } //delete minipl; if(tmp_params->Dim.size()!=0) { delete minipl; std::vector lstemp; //lstemp.resize(tmp_params->Dim.size(); for(i=0;iDim.size();i++) { lstemp.push_back(LoopLS(tmp_params->Lower[i],tmp_params->Upper[i],tmp_params->Step[i])); } // постоение блока выполняющихся на данный момент витков prevLoopBlock = new LoopBlock(lstemp); lstemp.resize(0); } AddMPSTime(__CPU_time_sys, vret_time); AddMPSTime(__Insuff_parall_sys,(ret_time * ((double) MPSProcCount()-1.0) / (double) MPSProcCount())); } //grig */ // else // { //grig /* if(tmp_params->Dim.size()!=0) { std::vector lstemp; for(i=0;iDim.size();i++) { lstemp.push_back(LoopLS(tmp_params->Lower[i],tmp_params->Upper[i],tmp_params->Step[i])); } // постоение блока выполняющихся на данный момент витков prevLoopBlock = new LoopBlock(lstemp); lstemp.resize(0); } */ //\grig // LoopBlock** ProcBlock=(LoopBlock**)calloc(MPSProcCount(),sizeof(LoopBlock*)); // assert(ProcBlock != NULL); // for(i=0;iGetBlockSize(); // printf("DOPL[%d]=%d of %d\n",i,block_size,loop_size); if(block_size==0) continue; //grig time = (vcall_time[i]*((double)block_size/(double)loop_size));//commented grig /currentVM->getProcPower(i); //\grig //currentVM->getProcPower()/*MPSProcPower()*/; AddTime(__CPU_time_usr, currentVM->map(i), time); cnt=0; for (j=0; jLSDim[0].Lower,ProcBlock[i]->LSDim[0].Upper, ProcBlock[j]->LSDim[0].Lower, ProcBlock[j]->LSDim[0].Upper); // printf("i=%d j=%d [1] %d %d %d %d",i,j,ProcBlock[i]->LSDim[1].Lower,ProcBlock[i]->LSDim[1].Upper, ProcBlock[j]->LSDim[1].Lower, ProcBlock[j]->LSDim[1].Upper); if(*(ProcBlock[i]) == *(ProcBlock[j])) cnt++; // printf(" cnt=%d\n",cnt); } //printf("DOPL time=%f cnt=%d\n",time,cnt); if (cnt > 1) { ip_time = time * (((double) cnt - 1.0) / (double) cnt); AddTime(__Insuff_parall_usr, currentVM->map(i), ip_time); } } // for (i=0;i