diff --git a/dvm/fdvm/trunk/fdvm/acc.cpp b/dvm/fdvm/trunk/fdvm/acc.cpp index 81fbf61..143323c 100644 --- a/dvm/fdvm/trunk/fdvm/acc.cpp +++ b/dvm/fdvm/trunk/fdvm/acc.cpp @@ -2532,7 +2532,10 @@ void ACC_ParallelLoopEnd(SgStatement *pardo) } if (options.isOn(LOOP_ANALYSIS)) + { delete currentLoop; + currentLoop = NULL; + } } if (options.isOn(RTC)) diff --git a/dvm/fdvm/trunk/fdvm/acc_across.cpp b/dvm/fdvm/trunk/fdvm/acc_across.cpp index ac726b5..279fa0e 100644 --- a/dvm/fdvm/trunk/fdvm/acc_across.cpp +++ b/dvm/fdvm/trunk/fdvm/acc_across.cpp @@ -29,8 +29,8 @@ extern void searchIdxs(vector &allInfo, SgExpression *st); extern int warpSize; // local functions -ArgsForKernel **Create_C_Adapter_Function_Across_variants(SgSymbol*, SgSymbol*, const int, const int, const int, SageSymbols**, SageSymbols**); -ArgsForKernel **Create_C_Adapter_Function_Across_OneThread(SgSymbol*, SgSymbol*, const int, const int); +vector Create_C_Adapter_Function_Across_variants(SgSymbol*, SgSymbol*, const int, const int, const int, const vector&, const vector&); +vector Create_C_Adapter_Function_Across_OneThread(SgSymbol*, SgSymbol*, const int, const int); symb_list* AddToSymbList(symb_list*, SgSymbol*); symb_list* AddNewToSymbList(symb_list*, SgSymbol*); void CreateReductionBlocksAcross(SgStatement*, int, SgExpression*, SgSymbol*); @@ -51,8 +51,8 @@ static vector allRegNames; static unsigned countOfCopies; static vector allVariants; -static const char *funcDvmhConvXYfortVer = " attributes(device) subroutine dvmh_convert_XY_int(x,y,Rx,Ry,slash,idx)\n implicit none\n integer ,value:: x\n integer ,value:: y\n integer ,value:: Rx\n integer ,value:: Ry\n integer ,value:: slash\n integer ,device:: idx \n \n if(slash .eq. 0) then\n if(Rx .eq. Ry) then\n if(x + y .lt. Rx) then\n idx = y + (1+x+y)*(x+y)/2\n else\n idx = Rx*(Rx-1)+x-(2*Rx-x-y-1)*(2*Rx-x-y-2)/2\n endif \n elseif(Rx .lt. Ry) then\n if(x + y .lt. Rx) then\n idx = y + ((1+x+y)*(x+y)) / 2\n elseif(x + y .lt. Ry) then\n idx = ((1+Rx)*Rx) / 2 + Rx - x - 1 + Rx * (x+y-Rx)\n else\n idx = Rx*Ry-Ry+y-(((Rx+Ry-y-x-1)*(Rx+Ry-y-x-2))/2)\n endif\n else\n if(x + y .lt. Ry) then\n idx = x + (1+x+y)*(x+y) / 2\n elseif(x + y .lt. Rx) then\n idx = (1+Ry)*Ry/2 + (Ry-y-1) + Ry * (x+y-Ry)\n else\n idx = Rx*Ry-Rx+x-((Rx+Ry-y-x-1)*(Rx+Ry-y-x-2)/2)\n endif\n endif\n else\n if(Rx .eq. Ry) then\n if(x + Rx-1-y .lt. Rx) then\n idx = Rx-1-y + (x+Rx-y)*(x+Rx-1-y)/2\n else\n idx = Rx*(Rx-1) + x - (Rx-x+y)*(Rx-x+y-1)/2\n endif\n elseif(Rx .lt. Ry) then\n if(x + Ry-1-y .lt. Rx) then \n idx = Ry-1-y + ((x+Ry-y)*(x+Ry-1-y)) / 2\n elseif(x + Ry-1-y .lt. Ry) then\n idx = ((1+Rx)*Rx)/2+Rx-x-1+Rx*(x+Ry-1-y-Rx)\n else\n idx = Rx*Ry-1-y-(((Rx+y-x)*(Rx+y-x-1))/2)\n endif\n else\n if(x + Ry-1-y .lt. Ry) then\n idx = x + (1+x+Ry-1-y)*(x+Ry-1-y)/2\n elseif(x + Ry-1-y .lt. Rx) then\n idx = (1+Ry)*Ry/2 + y + Ry * (x-y-1)\n else\n idx = Rx*Ry-Rx+x-((Rx+y-x)*(Rx+y-x-1)/2)\n endif\n endif\n endif\n end subroutine\n"; -static const char *funcDvmhConvXYfortVerLong = " attributes(device) subroutine dvmh_convert_XY_llong(x,y,Rx,Ry,slash,idx)\n implicit none\n integer*8 ,value:: x\n integer*8 ,value:: y\n integer*8 ,value:: Rx\n integer*8 ,value:: Ry\n integer*8 ,value:: slash\n integer*8 ,device:: idx \n \n if(slash .eq. 0) then\n if(Rx .eq. Ry) then\n if(x + y .lt. Rx) then\n idx = y + (1+x+y)*(x+y)/2\n else\n idx = Rx*(Rx-1)+x-(2*Rx-x-y-1)*(2*Rx-x-y-2)/2\n endif \n elseif(Rx .lt. Ry) then\n if(x + y .lt. Rx) then\n idx = y + ((1+x+y)*(x+y)) / 2\n elseif(x + y .lt. Ry) then\n idx = ((1+Rx)*Rx) / 2 + Rx - x - 1 + Rx * (x+y-Rx)\n else\n idx = Rx*Ry-Ry+y-(((Rx+Ry-y-x-1)*(Rx+Ry-y-x-2))/2)\n endif\n else\n if(x + y .lt. Ry) then\n idx = x + (1+x+y)*(x+y) / 2\n elseif(x + y .lt. Rx) then\n idx = (1+Ry)*Ry/2 + (Ry-y-1) + Ry * (x+y-Ry)\n else\n idx = Rx*Ry-Rx+x-((Rx+Ry-y-x-1)*(Rx+Ry-y-x-2)/2)\n endif\n endif\n else\n if(Rx .eq. Ry) then\n if(x + Rx-1-y .lt. Rx) then\n idx = Rx-1-y + (x+Rx-y)*(x+Rx-1-y)/2\n else\n idx = Rx*(Rx-1) + x - (Rx-x+y)*(Rx-x+y-1)/2\n endif\n elseif(Rx .lt. Ry) then\n if(x + Ry-1-y .lt. Rx) then \n idx = Ry-1-y + ((x+Ry-y)*(x+Ry-1-y)) / 2\n elseif(x + Ry-1-y .lt. Ry) then\n idx = ((1+Rx)*Rx)/2+Rx-x-1+Rx*(x+Ry-1-y-Rx)\n else\n idx = Rx*Ry-1-y-(((Rx+y-x)*(Rx+y-x-1))/2)\n endif\n else\n if(x + Ry-1-y .lt. Ry) then\n idx = x + (1+x+Ry-1-y)*(x+Ry-1-y)/2\n elseif(x + Ry-1-y .lt. Rx) then\n idx = (1+Ry)*Ry/2 + y + Ry * (x-y-1)\n else\n idx = Rx*Ry-Rx+x-((Rx+y-x)*(Rx+y-x-1)/2)\n endif\n endif\n endif\n end subroutine\n" ; +static const char *funcDvmhConvXYfortVer = " attributes(device) subroutine dvmh_convert_XY_int(x,y,Rx,Ry,slash,idx)\n implicit none\n integer ,value:: x\n integer ,value:: y\n integer ,value:: Rx\n integer ,value:: Ry\n integer ,value:: slash\n integer ,device:: idx \n \n if(slash .eq. 0) then\n if(Rx .eq. Ry) then\n if(x + y .lt. Rx) then\n idx = y + (1+x+y)*(x+y)/2\n else\n idx = Rx*(Rx-1)+x-(2*Rx-x-y-1)*(2*Rx-x-y-2)/2\n endif \n elseif(Rx .lt. Ry) then\n if(x + y .lt. Rx) then\n idx = y + ((1+x+y)*(x+y)) / 2\n elseif(x + y .lt. Ry) then\n idx = ((1+Rx)*Rx) / 2 + Rx - x - 1 + Rx * (x+y-Rx)\n else\n idx = Rx*Ry-Ry+y-(((Rx+Ry-y-x-1)*(Rx+Ry-y-x-2))/2)\n endif\n else\n if(x + y .lt. Ry) then\n idx = x + (1+x+y)*(x+y) / 2\n elseif(x + y .lt. Rx) then\n idx = (1+Ry)*Ry/2 + (Ry-y-1) + Ry * (x+y-Ry)\n else\n idx = Rx*Ry-Rx+x-((Rx+Ry-y-x-1)*(Rx+Ry-y-x-2)/2)\n endif\n endif\n else\n if(Rx .eq. Ry) then\n if(x + Rx-1-y .lt. Rx) then\n idx = Rx-1-y + (x+Rx-y)*(x+Rx-1-y)/2\n else\n idx = Rx*(Rx-1) + x - (Rx-x+y)*(Rx-x+y-1)/2\n endif\n elseif(Rx .lt. Ry) then\n if(x + Ry-1-y .lt. Rx) then \n idx = Ry-1-y + ((x+Ry-y)*(x+Ry-1-y)) / 2\n elseif(x + Ry-1-y .lt. Ry) then\n idx = ((1+Rx)*Rx)/2+Rx-x-1+Rx*(x+Ry-1-y-Rx)\n else\n idx = Rx*Ry-1-y-(((Rx+y-x)*(Rx+y-x-1))/2)\n endif\n else\n if(x + Ry-1-y .lt. Ry) then\n idx = x + (1+x+Ry-1-y)*(x+Ry-1-y)/2\n elseif(x + Ry-1-y .lt. Rx) then\n idx = (1+Ry)*Ry/2 + y + Ry * (x-y-1)\n else\n idx = Rx*Ry-Rx+x-((Rx+y-x)*(Rx+y-x-1)/2)\n endif\n endif\n endif\n end subroutine\n"; +static const char *funcDvmhConvXYfortVerLong = " attributes(device) subroutine dvmh_convert_XY_llong(x,y,Rx,Ry,slash,idx)\n implicit none\n integer*8 ,value:: x\n integer*8 ,value:: y\n integer*8 ,value:: Rx\n integer*8 ,value:: Ry\n integer*8 ,value:: slash\n integer*8 ,device:: idx \n \n if(slash .eq. 0) then\n if(Rx .eq. Ry) then\n if(x + y .lt. Rx) then\n idx = y + (1+x+y)*(x+y)/2\n else\n idx = Rx*(Rx-1)+x-(2*Rx-x-y-1)*(2*Rx-x-y-2)/2\n endif \n elseif(Rx .lt. Ry) then\n if(x + y .lt. Rx) then\n idx = y + ((1+x+y)*(x+y)) / 2\n elseif(x + y .lt. Ry) then\n idx = ((1+Rx)*Rx) / 2 + Rx - x - 1 + Rx * (x+y-Rx)\n else\n idx = Rx*Ry-Ry+y-(((Rx+Ry-y-x-1)*(Rx+Ry-y-x-2))/2)\n endif\n else\n if(x + y .lt. Ry) then\n idx = x + (1+x+y)*(x+y) / 2\n elseif(x + y .lt. Rx) then\n idx = (1+Ry)*Ry/2 + (Ry-y-1) + Ry * (x+y-Ry)\n else\n idx = Rx*Ry-Rx+x-((Rx+Ry-y-x-1)*(Rx+Ry-y-x-2)/2)\n endif\n endif\n else\n if(Rx .eq. Ry) then\n if(x + Rx-1-y .lt. Rx) then\n idx = Rx-1-y + (x+Rx-y)*(x+Rx-1-y)/2\n else\n idx = Rx*(Rx-1) + x - (Rx-x+y)*(Rx-x+y-1)/2\n endif\n elseif(Rx .lt. Ry) then\n if(x + Ry-1-y .lt. Rx) then \n idx = Ry-1-y + ((x+Ry-y)*(x+Ry-1-y)) / 2\n elseif(x + Ry-1-y .lt. Ry) then\n idx = ((1+Rx)*Rx)/2+Rx-x-1+Rx*(x+Ry-1-y-Rx)\n else\n idx = Rx*Ry-1-y-(((Rx+y-x)*(Rx+y-x-1))/2)\n endif\n else\n if(x + Ry-1-y .lt. Ry) then\n idx = x + (1+x+Ry-1-y)*(x+Ry-1-y)/2\n elseif(x + Ry-1-y .lt. Rx) then\n idx = (1+Ry)*Ry/2 + y + Ry * (x-y-1)\n else\n idx = Rx*Ry-Rx+x-((Rx+y-x)*(Rx+y-x-1)/2)\n endif\n endif\n endif\n end subroutine\n" ; static const char* fermiPreprocDir = "CUDA_FERMI_ARCH"; // local variables @@ -79,7 +79,7 @@ static inline void mywarn(const char *str) { #if debugMode printf("%s\n", str); -#endif +#endif } static char *getLoopLine(const char *sadapter) @@ -122,20 +122,6 @@ static SgExpression *RegisterReduction_forAcross(SgSymbol *s_loop_ref, SgSymbol return fe; } -SgStatement* makeSymbolDeclarationWithInit_T(SgSymbol *Init, SgSymbol *Value) -{ - SgStatement *st; - SgExpression *e; - st = new SgStatement(VAR_DECL); - - e = new SgExpression(CLASSINIT_OP); - e->setLhs(SgMakeDeclExp(Init, Init->type())); - e->setRhs(new SgExprListExp(*new SgVarRefExp(Value))); - st->setExpression(0, *new SgExprListExp(*e)); - - return st; -} - SgExpression *CreateBlocksThreadsSpec(SgSymbol *s_shared, SgSymbol *s_blocks, SgSymbol *s_threads, SgSymbol *s_stream) { SgExprListExp *el, *ell, *elm; @@ -159,7 +145,7 @@ SgExpression* CreateBlocksThreadsSpec(int size, SgSymbol *s_blocks, SgSymbol *s_ el = new SgExprListExp(*new SgVarRefExp(s_blocks)); ell = new SgExprListExp(*new SgVarRefExp(s_threads)); el->setRhs(ell); - //size==0 - parallel loop without reduction clause + //size==0 - parallel loop without reduction clause mult = size ? &((*ThreadsGridSize(s_threads)) * (*new SgValueExp(size))) : new SgValueExp(size); elm = new SgExprListExp(*mult); ell->setRhs(elm); @@ -181,7 +167,7 @@ static void getDefaultCudaBlock(int &x, int &y, int &z, int loopDep, int loopInd { if (loopDep == 0) { - if (loopIndep == 1) { x = 256; y = 1; z = 1; } + if (loopIndep == 1) { x = 256; y = 1; z = 1; } else if (loopIndep == 2) { x = 32; y = 14; z = 1; } else { x = 32; y = 7; z = 2; } } @@ -195,12 +181,12 @@ static void getDefaultCudaBlock(int &x, int &y, int &z, int loopDep, int loopInd else if (loopDep == 2) { if (loopIndep == 0) { x = 32; y = 1; z = 1; } - else if (loopIndep == 1) { x = 32; y = 4; z = 1; } + else if (loopIndep == 1) { x = 32; y = 4; z = 1; } else { x = 16; y = 8; z = 2; } } else if (loopDep >= 3) { - if (loopIndep == 0) { x = 32; y = 5; z = 1; } + if (loopIndep == 0) { x = 32; y = 5; z = 1; } else { x = 32; y = 5; z = 2; } } } @@ -291,7 +277,7 @@ static int getSizeOf() return ret; } -SgStatement *CreateKernelProcedureDevice(SgSymbol *skernel) +static SgStatement *CreateKernelProcedureDevice(SgSymbol *skernel) { SgStatement *st, *st_end; SgExpression *e; @@ -313,7 +299,7 @@ SgStatement *CreateKernelProcedureDevice(SgSymbol *skernel) return st; } -SgStatement* AssignStatement(SgExpression &lhs, SgExpression &rhs) +static SgStatement* AssignStatement(SgExpression &lhs, SgExpression &rhs) { SgStatement *st; if (options.isOn(C_CUDA)) @@ -323,36 +309,36 @@ SgStatement* AssignStatement(SgExpression &lhs, SgExpression &rhs) return st; } -SgSymbol *createVariantOfSAdapter(SgSymbol *sadapter, char *variant) +static char* createName(const char* oldName, const char* variant) { - SgSymbol *s_adapter, *s_tmp; - char *oldName = sadapter->identifier(); - char *correctName = new char[strlen(oldName) + strlen(variant) + 1]; + char* correctName = new char[strlen(oldName) + strlen(variant) + 1]; correctName[0] = '\0'; strcat(correctName, oldName); strcat(correctName, variant); - s_adapter = new SgSymbol(FUNCTION_NAME, correctName, *C_VoidType(), *block_C); - s_tmp = new SgSymbol(PROCEDURE_NAME, correctName, *current_file->firstStatement()); + + return correctName; +} + +static SgSymbol *createVariantOfSAdapter(SgSymbol *sadapter, const char *variant) +{ + SgSymbol *s_adapter; + const char *oldName = sadapter->identifier(); + s_adapter = new SgSymbol(FUNCTION_NAME, createName(oldName, variant), *C_VoidType(), *block_C); return s_adapter; } -SgSymbol *createVariantOfKernelSymbol(SgSymbol *kernel_symb, char *variant) +static SgSymbol *createVariantOfKernelSymbol(SgSymbol *kernel_symb, const char *variant) { SgSymbol *sk; char *oldName = kernel_symb->identifier(); - char *correctName = new char[strlen(oldName) + strlen(variant) + 1]; - correctName[0] = '\0'; - strcat(correctName, oldName); - strcat(correctName, variant); - - sk = new SgSymbol(PROCEDURE_NAME, correctName, *mod_gpu); + sk = new SgSymbol(PROCEDURE_NAME, createName(oldName, variant), *mod_gpu); if (options.isOn(C_CUDA)) sk->setType(C_VoidType()); return sk; } -void createNewAdapter(SgSymbol *sadapter, ParamsForAllVariants &newVar, char *str) +static void createNewAdapter(SgSymbol *sadapter, ParamsForAllVariants &newVar, char *str) { SgSymbol *s_adapter; char *nameOfNewSAdapter; @@ -366,7 +352,7 @@ void createNewAdapter(SgSymbol *sadapter, ParamsForAllVariants &newVar, char *st newVar.s_adapter = s_adapter; } -void createNewKernel(SgSymbol *kernel_symb, ParamsForAllVariants &newVar, char *str) +static void createNewKernel(SgSymbol *kernel_symb, ParamsForAllVariants &newVar, char *str) { SgSymbol *s_ks; char *nameOfNewSK; @@ -380,30 +366,6 @@ void createNewKernel(SgSymbol *kernel_symb, ParamsForAllVariants &newVar, char * newVar.s_kernel_symb = s_ks; } -static SgSymbol* getSymbByNum(SageSymbols *allSymb, int place) -{ - SageSymbols *tmp = allSymb; - for (int i = 0; ; ++i) - { - if (i == place) - return tmp->symb; - else - tmp = tmp->next; - } -} - -static int getLongByType(int type[], int num) -{ - int ret = 0; - int p = 1; - for (int i = num - 1; i >= 0; i--) - { - ret += type[i] * p; - p = p << 1; - } - return ret; -} - static int countBit(int num) { int ret = 0; @@ -431,9 +393,10 @@ static void generateAllBitmasks(int dep, int all, vector &out) } } -static void GetAllCombinations2(vector &allVariants, SgSymbol *sadapter, SgSymbol *kernel_symb, int numAcr, int sizeOfAllSymb, SageSymbols *allSymb) +static void GetAllCombinations2(vector &allVariants, SgSymbol *sadapter, SgSymbol *kernel_symb, int numAcr, + const vector& allSymb) { - int *bitmask = new int[(unsigned)sizeOfAllSymb]; + const unsigned sizeOfAllSymb = allSymb.size(); char *tmpstrAdapter = new char[16]; char *tmpstrKernel = new char[16]; @@ -442,20 +405,13 @@ static void GetAllCombinations2(vector &allVariants, SgSym ParamsForAllVariants newVar; newVar.allDims = sizeOfAllSymb; - newVar.loopSymb = new SageSymbols*[numLoopVars]; - newVar.loopAcrossSymb = new SageSymbols*[numLoopVars]; + newVar.loopSymb.resize(numLoopVars); + newVar.loopAcrossSymb.resize(numLoopVars); newVar.nameOfNewSAdapter = NULL; newVar.s_adapter = NULL; newVar.acrossV = numAcr; newVar.loopV = newVar.allDims - newVar.acrossV; - for (int bit = 0; bit < sizeOfAllSymb; ++bit) - { - if (bit > sizeOfAllSymb - numAcr - 1) - bitmask[bit] = 1; - else - bitmask[bit] = 0; - } - newVar.type = getLongByType(bitmask, sizeOfAllSymb); + newVar.type = (1 << numAcr) - 1; sprintf(tmpstrAdapter, "%d", newVar.type); strcat(tmpstrAdapter, "_case"); @@ -470,45 +426,40 @@ static void GetAllCombinations2(vector &allVariants, SgSym { if (r < numAcr) { - newVar.loopAcrossSymb[r] = new SageSymbols(); - newVar.loopAcrossSymb[r]->across_left = newVar.loopAcrossSymb[r]->across_right = 0; - newVar.loopAcrossSymb[r]->symb = getSymbByNum(allSymb, sizeOfAllSymb - r - 1); - newVar.loopAcrossSymb[r]->len = sizeOfAllSymb - r - 1; + newVar.loopAcrossSymb[r].across_left = newVar.loopAcrossSymb[r].across_right = 0; + newVar.loopAcrossSymb[r].symb = allSymb[sizeOfAllSymb - r - 1].symb; + newVar.loopAcrossSymb[r].len = sizeOfAllSymb - r - 1; } else { - newVar.loopSymb[k] = new SageSymbols(); - newVar.loopSymb[k]->across_left = newVar.loopSymb[k]->across_right = 0; - newVar.loopSymb[k]->symb = getSymbByNum(allSymb, sizeOfAllSymb - r - 1); - newVar.loopSymb[k]->len = sizeOfAllSymb - r - 1; + newVar.loopSymb[k].across_left = newVar.loopSymb[k].across_right = 0; + newVar.loopSymb[k].symb = allSymb[sizeOfAllSymb - r - 1].symb; + newVar.loopSymb[k].len = sizeOfAllSymb - r - 1; k++; } } allVariants.push_back(newVar); - - delete[]bitmask; } static void GetAllVariants2(vector &allVariants, SgSymbol *sadapter, SgSymbol *kernel_symb) { - int allDims = 0, acrossV = 0; + int acrossV = 0; - SageAcrossInfo *Info = GetLoopsWithParAndAcrDir(); - SageSymbols *allSymb = GetSymbInParalell(&allDims, dvm_parallel_dir->expr(2)); - SageArrayIdxs *idxInfo = Info->idx->next; - while (idxInfo && (acrossV < allDims)) + SageAcrossInfo Info = GetLoopsWithParAndAcrDir(); + vector allSymb = GetSymbInParalell(dvm_parallel_dir->expr(2)); + const int allDims = allSymb.size(); + + for (int z = 0; z < Info.idxs.size() && (acrossV < allDims); ++z) { - for (int i = 0; i < idxInfo->dim && (acrossV < allDims); ++i) - { - if (idxInfo->symb[i]->across_left != 0 || idxInfo->symb[i]->across_right != 0) + SageArrayIdxs& idxInfo = Info.idxs[z]; + for (int i = 0; i < idxInfo.dim && (acrossV < allDims); ++i) + if (idxInfo.symb[i].across_left != 0 || idxInfo.symb[i].across_right != 0) acrossV++; - } - idxInfo = idxInfo->next; } // correct dependencies lvl only for ACROSS with one dep SgStatement *st = loop_body; - + SgExpression* dvmDir = dvm_parallel_dir->expr(1); vector allInfo; bool nextStep = true; @@ -532,7 +483,7 @@ static void GetAllVariants2(vector &allVariants, SgSymbol } list = list->rhs(); } - + for (int i = 0; i < toAnalyze.size(); ++i) { SgExpression* array = toAnalyze[i]; @@ -590,7 +541,7 @@ static void GetAllVariants2(vector &allVariants, SgSymbol break; } } - + if (nextStep) { SgExpression* dvmDir = dvm_parallel_dir->expr(2); @@ -644,7 +595,7 @@ static void GetAllVariants2(vector &allVariants, SgSymbol } } for (int i = 1; i <= acrossV; ++i) - GetAllCombinations2(allVariants, sadapter, kernel_symb, i, allDims, allSymb); + GetAllCombinations2(allVariants, sadapter, kernel_symb, i, allSymb); } /*void printAllVars(vector &vectorT) @@ -668,11 +619,10 @@ static void GetAllVariants2(vector &allVariants, SgSymbol ArgsForKernel *Create_C_Adapter_Function_Across(SgSymbol *sadapter) { - ArgsForKernel **retValueForKernel = NULL; createBodyKernel = true; // clear information - allRegNames.clear(); + allRegNames.clear(); SgStatement *st_hedr, *st_end, *first_exec, *stmt; vector cuda_kernel; @@ -694,9 +644,10 @@ ArgsForKernel *Create_C_Adapter_Function_Across(SgSymbol *sadapter) if (options.isOn(ONE_THREAD)) { - int num = 0; - SageSymbols *tmpStr = GetSymbInParalell(&num, dvm_parallel_dir->expr(2)); - retValueForKernel = Create_C_Adapter_Function_Across_OneThread(sadapter, kernel_symb, num, 0); + const vector tmpStr = GetSymbInParalell(dvm_parallel_dir->expr(2)); + int num = tmpStr.size(); + + vector retValueForKernel = Create_C_Adapter_Function_Across_OneThread(sadapter, kernel_symb, num, 0); for (unsigned t = 0; t < countKernels; ++t) { @@ -704,9 +655,6 @@ ArgsForKernel *Create_C_Adapter_Function_Across(SgSymbol *sadapter) CopyOfBody.pop(); currentLoop = new Loop(loop_body, options.isOn(OPT_EXP_COMP)); - - num = 0; - tmpStr = GetSymbInParalell(&num, dvm_parallel_dir->expr(2)); SgType *typeParams = indexTypeInKernel(rtTypes[t]); for (int i = 0; i < num; ++i) @@ -714,26 +662,25 @@ ArgsForKernel *Create_C_Adapter_Function_Across(SgSymbol *sadapter) char *str = new char[64]; char *addL = new char[64]; str[0] = addL[0] = '\0'; - retValueForKernel[t]->otherVarsForOneTh.push_back(tmpStr->symb); - strcat(str, tmpStr->symb->identifier()); + retValueForKernel[t].otherVarsForOneTh.push_back(tmpStr[i].symb); + strcat(str, tmpStr[i].symb->identifier()); strcat(str, "_"); strcat(addL, str); strcat(addL, "low"); - retValueForKernel[t]->otherVars.push_back(new SgSymbol(VARIABLE_NAME, addL, typeParams, kernel_symb->scope())); + retValueForKernel[t].otherVars.push_back(new SgSymbol(VARIABLE_NAME, addL, typeParams, kernel_symb->scope())); addL[0] = '\0'; strcat(addL, str); strcat(addL, "high"); - retValueForKernel[t]->otherVars.push_back(new SgSymbol(VARIABLE_NAME, addL, typeParams, kernel_symb->scope())); + retValueForKernel[t].otherVars.push_back(new SgSymbol(VARIABLE_NAME, addL, typeParams, kernel_symb->scope())); addL[0] = '\0'; strcat(addL, str); strcat(addL, "idx"); - retValueForKernel[t]->otherVars.push_back(new SgSymbol(VARIABLE_NAME, addL, typeParams, kernel_symb->scope())); - tmpStr = tmpStr->next; + retValueForKernel[t].otherVars.push_back(new SgSymbol(VARIABLE_NAME, addL, typeParams, kernel_symb->scope())); } - + string kernel_symbNew = kernel_symb->identifier(); if (rtTypes[t] == rt_INT) kernel_symbNew += "_int"; @@ -741,8 +688,8 @@ ArgsForKernel *Create_C_Adapter_Function_Across(SgSymbol *sadapter) kernel_symbNew += "_long"; else if (rtTypes[t] == rt_LLONG) kernel_symbNew += "_llong"; - - cuda_kernel[t] = CreateLoopKernelAcross(new SgSymbol(FUNCTION_NAME, kernel_symbNew.c_str(), *C_VoidType(), *block_C), retValueForKernel[t], indexTypeInKernel(rtTypes[t])); + + cuda_kernel[t] = CreateLoopKernelAcross(new SgSymbol(FUNCTION_NAME, kernel_symbNew.c_str(), *C_VoidType(), *block_C), &retValueForKernel[t], indexTypeInKernel(rtTypes[t])); if (options.isOn(RTC)) { acc_call_list = ACC_RTC_ExpandCallList(acc_call_list); @@ -755,6 +702,7 @@ ArgsForKernel *Create_C_Adapter_Function_Across(SgSymbol *sadapter) } delete currentLoop; + currentLoop = NULL; } if (options.isOn(RTC)) ACC_RTC_CompleteAllParams(); @@ -784,7 +732,8 @@ ArgsForKernel *Create_C_Adapter_Function_Across(SgSymbol *sadapter) printf("%d case\n", allVariants[i].type); #endif ParamsForAllVariants tmp = allVariants[i]; - + vector retValueForKernel; + for (unsigned k = 0; k < countKernels; ++k) { loop_body = CopyOfBody.top(); @@ -807,7 +756,7 @@ ArgsForKernel *Create_C_Adapter_Function_Across(SgSymbol *sadapter) { if (k == 0) // create CUDA handler once retValueForKernel = Create_C_Adapter_Function_Across_variants(tmp.s_adapter, tmp.s_kernel_symb, tmp.loopV, tmp.acrossV, tmp.allDims, tmp.loopSymb, tmp.loopAcrossSymb); - cuda_kernel[k] = CreateLoopKernelAcross(new SgSymbol(FUNCTION_NAME, kernel_symb.c_str(), *C_VoidType(), *block_C), retValueForKernel[k], tmp.acrossV, indexTypeInKernel(rtTypes[k])); + cuda_kernel[k] = CreateLoopKernelAcross(new SgSymbol(FUNCTION_NAME, kernel_symb.c_str(), *C_VoidType(), *block_C), &retValueForKernel[k], tmp.acrossV, indexTypeInKernel(rtTypes[k])); if (options.isOn(RTC)) acc_call_list = ACC_RTC_ExpandCallList(acc_call_list); } @@ -819,7 +768,7 @@ ArgsForKernel *Create_C_Adapter_Function_Across(SgSymbol *sadapter) if (k == 0) // create CUDA handler once retValueForKernel = Create_C_Adapter_Function_Across_variants(tmp.s_adapter, tmp.s_kernel_symb, tmp.loopV, tmp.acrossV, tmp.allDims, tmp.loopSymb, tmp.loopAcrossSymb); - cuda_kernel[k] = CreateLoopKernelAcross(new SgSymbol(FUNCTION_NAME, kernel_symb.c_str(), *C_VoidType(), *block_C), retValueForKernel[k], tmp.acrossV, indexTypeInKernel(rtTypes[k])); + cuda_kernel[k] = CreateLoopKernelAcross(new SgSymbol(FUNCTION_NAME, kernel_symb.c_str(), *C_VoidType(), *block_C), &retValueForKernel[k], tmp.acrossV, indexTypeInKernel(rtTypes[k])); if (options.isOn(RTC)) { acc_call_list = ACC_RTC_ExpandCallList(acc_call_list); @@ -828,7 +777,7 @@ ArgsForKernel *Create_C_Adapter_Function_Across(SgSymbol *sadapter) if (strstr(kernel_symb.c_str(), "_llong") != NULL) acc_call_list = AddNewToSymbList(acc_call_list, createNewFunctionSymbol("dvmh_convert_XY_llong")); else if (strstr(kernel_symb.c_str(), "_int") != NULL) - acc_call_list = AddNewToSymbList(acc_call_list, createNewFunctionSymbol("dvmh_convert_XY_int")); + acc_call_list = AddNewToSymbList(acc_call_list, createNewFunctionSymbol("dvmh_convert_XY_int")); } } } @@ -839,7 +788,10 @@ ArgsForKernel *Create_C_Adapter_Function_Across(SgSymbol *sadapter) newVars.clear(); } if (contitionOfOptimization) + { delete currentLoop; + currentLoop = NULL; + } } if (options.isOn(RTC)) { @@ -858,7 +810,7 @@ ArgsForKernel *Create_C_Adapter_Function_Across(SgSymbol *sadapter) kernel_symb += "_long"; else if (rtTypes[k] == rt_LLONG) kernel_symb += "_llong"; - + if (options.isOn(C_CUDA)) ACC_RTC_ConvertCudaKernel(cuda_kernel[k], kernel_symb.c_str()); else @@ -869,7 +821,7 @@ ArgsForKernel *Create_C_Adapter_Function_Across(SgSymbol *sadapter) } } - + mywarn(" end: create all VARIANTS"); //create new control function @@ -906,7 +858,7 @@ ArgsForKernel *Create_C_Adapter_Function_Across(SgSymbol *sadapter) arg_list = arg_list->rhs(); } - for (el = uses_list; el; el = el->rhs()) // + for (el = uses_list; el; el = el->rhs()) // { s = el->lhs()->symbol(); typ = C_PointerType(C_Type(s->type())); @@ -971,9 +923,7 @@ ArgsForKernel *Create_C_Adapter_Function_Across(SgSymbol *sadapter) allVarForIfBlock.push_back(tmp); funcCall = new SgFunctionCallExp(*createNewFunctionSymbol(allVariants[k].nameOfNewSAdapter)); for (size_t i = 0; i < argsForVariantFunction.size(); ++i) - { funcCall->addArg(*new SgVarRefExp(argsForVariantFunction[i])); - } funcCall->addArg(*new SgVarRefExp(which_run)); allFuncCalls.push_back(funcCall); } @@ -1011,7 +961,7 @@ ArgsForKernel *Create_C_Adapter_Function_Across(SgSymbol *sadapter) SgFunctionCallExp *eventF = new SgFunctionCallExp(*createNewFunctionSymbol("cudaEventCreate")); eventF->addArg(SgAddrOp(*new SgVarRefExp(cudaEventStart))); st_end->insertStmtBefore(*new SgCExpStmt(*eventF), *st_hedr); - + eventF = new SgFunctionCallExp(*createNewFunctionSymbol("cudaEventCreate")); eventF->addArg(SgAddrOp(*new SgVarRefExp(cudaEventStop))); st_end->insertStmtBefore(*new SgCExpStmt(*eventF), *st_hedr); @@ -1078,7 +1028,7 @@ ArgsForKernel *Create_C_Adapter_Function_Across(SgSymbol *sadapter) whileSt1->lastExecutable()->insertStmtBefore(*if_st); } } - + tmpF2 = new SgFunctionCallExp(*createNewFunctionSymbol("printf")); tmpF2->addArg(*new SgValueExp("It may be wrong!!\\n")); @@ -1159,7 +1109,7 @@ ArgsForKernel *Create_C_Adapter_Function_Across(SgSymbol *sadapter) return NULL; } -ArgsForKernel** Create_C_Adapter_Function_Across_OneThread(SgSymbol *sadapter, SgSymbol *kernel_symb, const int loopV, const int acrossV) +vector Create_C_Adapter_Function_Across_OneThread(SgSymbol *sadapter, SgSymbol *kernel_symb, const int loopV, const int acrossV) { #if debugMode warn("PARALLEL directive with ACROSS clause in region", 420, dvm_parallel_dir); @@ -1190,7 +1140,7 @@ ArgsForKernel** Create_C_Adapter_Function_Across_OneThread(SgSymbol *sadapter, S // end of init block mywarn("start: create fuction header "); - // create fuction header + // create fuction header st_hedr = Create_C_Function(sadapter); st_hedr->addComment(Cuda_LoopHandlerComment()); st_end = st_hedr->lexNext(); @@ -1201,7 +1151,7 @@ ArgsForKernel** Create_C_Adapter_Function_Across_OneThread(SgSymbol *sadapter, S mywarn(" end: create fuction header "); mywarn("start: create dummy argument list "); - // create dummy argument list: loop_ref, , + // create dummy argument list: loop_ref, , typ = C_PointerType(C_Derived_Type(s_DvmhLoopRef)); s_loop_ref = new SgSymbol(VARIABLE_NAME, "loop_ref", *typ, *st_hedr); @@ -1228,7 +1178,7 @@ ArgsForKernel** Create_C_Adapter_Function_Across_OneThread(SgSymbol *sadapter, S h_first = sarg; } - for (el = uses_list, ln = 0; el; el = el->rhs(), ++ln) // + for (el = uses_list, ln = 0; el; el = el->rhs(), ++ln) // { s = el->lhs()->symbol(); typ = C_PointerType(C_Type(s->type())); @@ -1272,10 +1222,10 @@ ArgsForKernel** Create_C_Adapter_Function_Across_OneThread(SgSymbol *sadapter, S s_loc_var = NULL; is_array = 0; ered = er->lhs(); // reduction (variant==ARRAY_OP) - ev = ered->rhs(); // reduction variable reference for reduction operations except MINLOC,MAXLOC + ev = ered->rhs(); // reduction variable reference for reduction operations except MINLOC,MAXLOC if (isSgExprListExp(ev)) { - ev = ev->lhs(); // reduction variable reference + ev = ev->lhs(); // reduction variable reference loc_var_ref = ered->rhs()->rhs()->lhs(); //location array reference en = ered->rhs()->rhs()->rhs()->lhs(); // number of elements in location array loc_el_num = LocElemNumber(en); @@ -1308,7 +1258,7 @@ ArgsForKernel** Create_C_Adapter_Function_Across_OneThread(SgSymbol *sadapter, S btype = loc_type->baseType(); else btype = loc_type; - //!printf("__112\n"); + //!printf("__112\n"); SgArrayType *typearray = new SgArrayType(*C_Type(btype)); typearray->addRange(*new SgValueExp(loc_el_num)); s_loc_var->setType(*typearray); @@ -1321,7 +1271,7 @@ ArgsForKernel** Create_C_Adapter_Function_Across_OneThread(SgSymbol *sadapter, S st_hedr->insertStmtAfter(*stmt, *st_hedr); } - //!printf("__113\n"); + //!printf("__113\n"); /*--- executable statements: register reductions in RTS ---*/ e = &SgAssignOp(*new SgVarRefExp(s_tmp_var), *new SgValueExp(ln+1)); stmt = new SgCExpStmt(*e); @@ -1542,7 +1492,7 @@ ArgsForKernel** Create_C_Adapter_Function_Across_OneThread(SgSymbol *sadapter, S funcCall->addArg(*toAdd); } else - funcCall->addArg(*new SgVarRefExp(s)); + funcCall->addArg(*new SgVarRefExp(s)); } else { @@ -1617,20 +1567,17 @@ ArgsForKernel** Create_C_Adapter_Function_Across_OneThread(SgSymbol *sadapter, S stmt->addComment("// Free temporary variables"); } } - // create args for kernel and return it - ArgsForKernel **argsKernel = new ArgsForKernel*[countKernels]; + // create args for kernel and return it + vector argsKernel(countKernels); for (unsigned i = 0; i < countKernels; ++i) - { - argsKernel[i] = new ArgsForKernel(); - argsKernel[i]->st_header = st_hedr; - } + argsKernel[i].st_header = st_hedr; delete[]reduction_ptr; mywarn(" end Adapter Function"); return argsKernel; } -static inline void insertReductionArgs(SgSymbol **reduction_ptr, SgSymbol **reduction_loc_ptr, +static inline void insertReductionArgs(SgSymbol **reduction_ptr, SgSymbol **reduction_loc_ptr, SgSymbol **reduction_symb, SgSymbol **reduction_loc_symb, SgFunctionCallExp *funcCallKernel, SgSymbol* numBlocks, int &has_red_array) { @@ -1650,7 +1597,7 @@ static inline void insertReductionArgs(SgSymbol **reduction_ptr, SgSymbol **redu int i = 0; for (rsl = red_struct_list, s = red_first; rsl; rsl = rsl->next, ++i) //s!=s_blocks_info { - if (rsl->redvar_size == 0) //reduction variable is scalar + if (rsl->redvar_size == 0) //reduction variable is scalar { if (options.isOn(RTC)) { @@ -1663,7 +1610,7 @@ static inline void insertReductionArgs(SgSymbol **reduction_ptr, SgSymbol **redu } else //TODO!! { - has_red_array = 1; + has_red_array = 1; for (int k = 0; k < rsl->redvar_size; ++k) funcCallKernel->addArg(*new SgArrayRefExp(*reduction_symb[i], *new SgValueExp(k))); } @@ -1687,13 +1634,13 @@ static inline void insertReductionArgs(SgSymbol **reduction_ptr, SgSymbol **redu } -static void createArgsForKernelForTwodeps(SgFunctionCallExp*& funcCallKernel, SgSymbol* kernel_symb, SgExpression* espec, SgSymbol*& sg, SgSymbol* hgpu_first, - SgSymbol*& sb, SgSymbol* base_first, symb_list*& sl, int& ln, int num, SgExpression*& e, SgSymbol** reduction_ptr, - SgSymbol** reduction_loc_ptr, SgSymbol** reduction_symb, SgSymbol** reduction_loc_symb, SgSymbol* red_blocks, int& has_red_array, - SgSymbol* diag, const int& loopV, SgSymbol** num_elems, const int& acrossV, SgSymbol* acrossBase[16], SgSymbol* loopBase[16], - SgSymbol* idxI, SageSymbols** loopAcrossSymb, SageSymbols** loopSymb, SgSymbol*& s, SgSymbol* uses_first, SgSymbol*& sdev, - SgSymbol* scalar_first, int uses_num, vector& dvm_array_headers, SgSymbol** addressingParams, SgSymbol** outTypeOfTransformation, - SgSymbol* type_of_run, SgSymbol* bIdxs) +static void createArgsForKernelForTwoDeps(SgFunctionCallExp*& funcCallKernel, SgSymbol* kernel_symb, SgExpression* espec, SgSymbol*& sg, SgSymbol* hgpu_first, + SgSymbol*& sb, SgSymbol* base_first, symb_list*& sl, int& ln, int num, SgExpression*& e, SgSymbol** reduction_ptr, + SgSymbol** reduction_loc_ptr, SgSymbol** reduction_symb, SgSymbol** reduction_loc_symb, SgSymbol* red_blocks, int& has_red_array, + SgSymbol* diag, const int& loopV, SgSymbol** num_elems, const int& acrossV, SgSymbol* acrossBase[16], SgSymbol* loopBase[16], + SgSymbol* idxI, const vector& loopAcrossSymb, const vector& loopSymb, SgSymbol*& s, SgSymbol* uses_first, + SgSymbol*& sdev, SgSymbol* scalar_first, int uses_num, vector& dvm_array_headers, + SgSymbol** addressingParams, SgSymbol** outTypeOfTransformation, SgSymbol* type_of_run, SgSymbol* bIdxs) { funcCallKernel = CallKernel(kernel_symb, espec); @@ -1740,9 +1687,9 @@ static void createArgsForKernelForTwodeps(SgFunctionCallExp*& funcCallKernel, Sg for (int i = 0; i < loopV; ++i) funcCallKernel->addArg(*new SgVarRefExp(loopBase[i])); for (int i = 0; i < acrossV; ++i) - funcCallKernel->addArg(*new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[i]->len))); + funcCallKernel->addArg(*new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[i].len))); for (int i = 0; i < loopV; ++i) - funcCallKernel->addArg(*new SgArrayRefExp(*idxI, *new SgValueExp(loopSymb[i]->len))); + funcCallKernel->addArg(*new SgArrayRefExp(*idxI, *new SgValueExp(loopSymb[i].len))); for (s = uses_first, sdev = scalar_first, ln = 0; ln < uses_num; s = s->next(), ln++) // uses { @@ -1781,8 +1728,8 @@ static void createArgsForKernelForTwodeps(SgFunctionCallExp*& funcCallKernel, Sg funcCallKernel->addArg(*new SgArrayRefExp(*bIdxs, *new SgValueExp(i))); } -ArgsForKernel** Create_C_Adapter_Function_Across_variants(SgSymbol *sadapter, SgSymbol *kernel_symb, const int loopV, const int acrossV, - const int allDims, SageSymbols **loopSymb, SageSymbols **loopAcrossSymb) +vector Create_C_Adapter_Function_Across_variants(SgSymbol *sadapter, SgSymbol *kernel_symb, const int loopV, const int acrossV, + const int allDims, const vector& loopSymb, const vector& loopAcrossSymb) { #if debugMode warn("PARALLEL directive with ACROSS clause in region", 420, dvm_parallel_dir); @@ -1825,14 +1772,14 @@ ArgsForKernel** Create_C_Adapter_Function_Across_variants(SgSymbol *sadapter, Sg //end of init block mywarn("start: create fuction header "); - // create fuction header + // create fuction header st_hedr = Create_C_Function(sadapter); st_hedr->addComment(Cuda_LoopHandlerComment()); st_end = st_hedr->lexNext(); fe = st_hedr->expr(0); first_exec = st_end; if (declaration_include) - { + { declaration_cmnt = "#include \n#define MIN(X,Y) ((X) < (Y) ? (X) : (Y))\n#define MAX(X,Y) ((X) > (Y) ? (X) : (Y))"; declaration_include = false; } @@ -1840,7 +1787,7 @@ ArgsForKernel** Create_C_Adapter_Function_Across_variants(SgSymbol *sadapter, Sg mywarn(" end: create fuction header "); mywarn("start: create dummy argument list "); - // create dummy argument list: loop_ref, , + // create dummy argument list: loop_ref, , typ = C_PointerType(C_Derived_Type(s_DvmhLoopRef)); s_loop_ref = new SgSymbol(VARIABLE_NAME, "loop_ref", *typ, *st_hedr); @@ -1867,7 +1814,7 @@ ArgsForKernel** Create_C_Adapter_Function_Across_variants(SgSymbol *sadapter, Sg h_first = sarg; } - for (el = uses_list, ln = 0; el; el = el->rhs(), ++ln) // + for (el = uses_list, ln = 0; el; el = el->rhs(), ++ln) // { s = el->lhs()->symbol(); typ = C_PointerType(C_Type(s->type())); @@ -1938,10 +1885,10 @@ ArgsForKernel** Create_C_Adapter_Function_Across_variants(SgSymbol *sadapter, Sg s_loc_var = NULL; is_array = 0; ered = er->lhs(); // reduction (variant==ARRAY_OP) - ev = ered->rhs(); // reduction variable reference for reduction operations except MINLOC,MAXLOC + ev = ered->rhs(); // reduction variable reference for reduction operations except MINLOC,MAXLOC if (isSgExprListExp(ev)) { - ev = ev->lhs(); // reduction variable reference + ev = ev->lhs(); // reduction variable reference loc_var_ref = ered->rhs()->rhs()->lhs(); //location array reference en = ered->rhs()->rhs()->rhs()->lhs(); // number of elements in location array loc_el_num = LocElemNumber(en); @@ -1975,7 +1922,7 @@ ArgsForKernel** Create_C_Adapter_Function_Across_variants(SgSymbol *sadapter, Sg btype = loc_type->baseType(); else btype = loc_type; - //!printf("__112\n"); + //!printf("__112\n"); SgArrayType *typearray = new SgArrayType(*C_Type(btype)); typearray->addRange(*new SgValueExp(loc_el_num)); s_loc_var->setType(*typearray); @@ -1988,7 +1935,7 @@ ArgsForKernel** Create_C_Adapter_Function_Across_variants(SgSymbol *sadapter, Sg st_hedr->insertStmtAfter(*stmt, *st_hedr); } - //!printf("__113\n"); + //!printf("__113\n"); /*--- executable statements: register reductions in RTS ---*/ e = &SgAssignOp(*new SgVarRefExp(s_tmp_var), *new SgValueExp(ln+1)); stmt = new SgCExpStmt(*e); @@ -2049,7 +1996,7 @@ ArgsForKernel** Create_C_Adapter_Function_Across_variants(SgSymbol *sadapter, Sg s->setType(tpArr); addDeclExpList(s, stmt->expr(0)); - idxTypeInKernel = s = new SgSymbol(VARIABLE_NAME, TestAndCorrectName("idxTypeInKernel"), *LongT, *st_hedr); + idxTypeInKernel = s = new SgSymbol(VARIABLE_NAME, TestAndCorrectName("idxTypeInKernel"), *LongT, *st_hedr); addDeclExpList(s, stmt->expr(0)); if (options.isOn(GPU_O0)) @@ -2246,7 +2193,7 @@ ArgsForKernel** Create_C_Adapter_Function_Across_variants(SgSymbol *sadapter, Sg for (int i = 0; i < acrossV; ++i) { acrossBase[i] = s = new SgSymbol(VARIABLE_NAME, TestAndCorrectName(strcat(strcpy(new char[20], "base_"), - loopAcrossSymb[i]->symb->identifier())), *LongT, *st_hedr); + loopAcrossSymb[i].symb->identifier())), *LongT, *st_hedr); if (i == 0) { stmt = makeSymbolDeclaration(s); @@ -2258,7 +2205,7 @@ ArgsForKernel** Create_C_Adapter_Function_Across_variants(SgSymbol *sadapter, Sg for (int i = 0; i < loopV; ++i) { loopBase[i] = s = new SgSymbol(VARIABLE_NAME, TestAndCorrectName(strcat(strcpy(new char[20], "base_"), - loopSymb[i]->symb->identifier())), *LongT, *st_hedr); + loopSymb[i].symb->identifier())), *LongT, *st_hedr); addDeclExpList(s, stmt->expr(0)); } // end @@ -2438,11 +2385,11 @@ ArgsForKernel** Create_C_Adapter_Function_Across_variants(SgSymbol *sadapter, Sg st_end->insertStmtBefore(*stmt, *st_hedr); stmt = new SgIfStmt(SgEqOp(*new SgVarRefExp(idxTypeInKernel), *new SgVarRefExp(*new SgSymbol(VARIABLE_NAME, "rt_LONG"))) - && + && SgEqOp(*sizeofL, *sizeofLL), *new SgCExpStmt(SgAssignOp(*new SgVarRefExp(idxTypeInKernel), *new SgVarRefExp(*new SgSymbol(VARIABLE_NAME, "rt_LLONG"))))); st_end->insertStmtBefore(*stmt, *st_hedr); - + /* -------- call loop_cuda_get_config_(loop_ref, &shared_mem, ®_per_th, &threads, &stream, &shared_mem); ------------*/ SgFunctionCallExp *tmpFunc = new SgFunctionCallExp(*createNewFunctionSymbol("dim3")); int x = 0, y = 0, z = 0; @@ -2475,7 +2422,7 @@ ArgsForKernel** Create_C_Adapter_Function_Across_variants(SgSymbol *sadapter, Sg std::string preproc = std::string("#ifdef ") + fermiPreprocDir; char* tmp = new char[preproc.size() + 1]; strcpy(tmp, preproc.data()); - + st_end->insertStmtBefore(*PreprocessorDirective(tmp), *st_hedr); e = &SgAssignOp(*new SgVarRefExp(shared_mem), *new SgValueExp(shared_mem_count)); stmt = new SgCExpStmt(*e); @@ -2497,7 +2444,7 @@ ArgsForKernel** Create_C_Adapter_Function_Across_variants(SgSymbol *sadapter, Sg string define_name_int = kernel_symb->identifier(); string define_name_long = kernel_symb->identifier(); - + define_name_int += "_int_regs"; define_name_long += "_llong_regs"; @@ -2584,7 +2531,7 @@ ArgsForKernel** Create_C_Adapter_Function_Across_variants(SgSymbol *sadapter, Sg st_end->insertStmtBefore(*stmt, *st_hedr); stmt->addComment("//Start method"); - e = &SgAssignOp(*new SgVarRefExp(acrossBase[0]), *new SgArrayRefExp(*lowI, *new SgValueExp(loopAcrossSymb[0]->len))); + e = &SgAssignOp(*new SgVarRefExp(acrossBase[0]), *new SgArrayRefExp(*lowI, *new SgValueExp(loopAcrossSymb[0].len))); stmt = new SgCExpStmt(*e); st_end->insertStmtBefore(*stmt, *st_hedr); @@ -2594,7 +2541,7 @@ ArgsForKernel** Create_C_Adapter_Function_Across_variants(SgSymbol *sadapter, Sg for (int k = 0; k < MIN(2, loopV); ++k) { SgStatement *st1; - idx[k] = loopSymb[k]->len; + idx[k] = loopSymb[k].len; e = &SgAssignOp(*new SgVarRefExp(loopBase[k]), *new SgArrayRefExp(*lowI, *new SgValueExp(idx[k]))); stmt = new SgCExpStmt(*e); @@ -2623,7 +2570,7 @@ ArgsForKernel** Create_C_Adapter_Function_Across_variants(SgSymbol *sadapter, Sg for (int k = 2; k < loopV; ++k) { SgStatement *st1; - idx[k] = loopSymb[k]->len; + idx[k] = loopSymb[k].len; e = &SgAssignOp(*new SgVarRefExp(loopBase[k]), *new SgArrayRefExp(*lowI, *new SgValueExp(idx[k]))); stmt = new SgCExpStmt(*e); @@ -2650,7 +2597,7 @@ ArgsForKernel** Create_C_Adapter_Function_Across_variants(SgSymbol *sadapter, Sg { SgStatement *st1; int k = 2; - idx[k] = loopSymb[k]->len; + idx[k] = loopSymb[k].len; e = &SgAssignOp(*new SgVarRefExp(loopBase[k]), *new SgArrayRefExp(*lowI, *new SgValueExp(idx[k]))); stmt = new SgCExpStmt(*e); @@ -2688,8 +2635,8 @@ ArgsForKernel** Create_C_Adapter_Function_Across_variants(SgSymbol *sadapter, Sg if (loopV != 0) { // (blocks.x * blocks.y * blocks.z * threads.x * threads.y * threads.z) / warpSize) - e = &SgAssignOp(*new SgVarRefExp(*red_blocks), - (*new SgRecordRefExp(*s_blocks, "x") * *new SgRecordRefExp(*s_blocks, "y") * *new SgRecordRefExp(*s_blocks, "z") * + e = &SgAssignOp(*new SgVarRefExp(*red_blocks), + (*new SgRecordRefExp(*s_blocks, "x") * *new SgRecordRefExp(*s_blocks, "y") * *new SgRecordRefExp(*s_blocks, "z") * *new SgRecordRefExp(*s_threads, "x") * *new SgRecordRefExp(*s_threads, "y") * *new SgRecordRefExp(*s_threads, "z")) / *new SgValueExp(warpSize)); stmt = new SgCExpStmt(*e); @@ -2719,7 +2666,7 @@ ArgsForKernel** Create_C_Adapter_Function_Across_variants(SgSymbol *sadapter, Sg } e = &SgAssignOp(*new SgVarRefExp(acrossBase[0]), *new SgVarRefExp(acrossBase[0]) - + *new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[0]->len))); + + *new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[0].len))); stmt = new SgCExpStmt(*e); @@ -2727,17 +2674,17 @@ ArgsForKernel** Create_C_Adapter_Function_Across_variants(SgSymbol *sadapter, Sg { SgFunctionCallExp *f1 = new SgFunctionCallExp(*createNewFunctionSymbol("abs")); SgFunctionCallExp *f2 = new SgFunctionCallExp(*createNewFunctionSymbol("abs")); - f1->addArg(*new SgArrayRefExp(*highI, *new SgValueExp(loopAcrossSymb[0]->len)) - *new SgArrayRefExp(*lowI, *new SgValueExp(loopAcrossSymb[0]->len))); - f2->addArg(*new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[0]->len))); + f1->addArg(*new SgArrayRefExp(*highI, *new SgValueExp(loopAcrossSymb[0].len)) - *new SgArrayRefExp(*lowI, *new SgValueExp(loopAcrossSymb[0].len))); + f2->addArg(*new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[0].len))); - e = &SgAssignOp(*new SgArrayRefExp(*highI, *new SgValueExp(loopAcrossSymb[0]->len)), (*f1 + *f2) / *f2); + e = &SgAssignOp(*new SgArrayRefExp(*highI, *new SgValueExp(loopAcrossSymb[0].len)), (*f1 + *f2) / *f2); stmt = new SgCExpStmt(*e); st_end->insertStmtBefore(*stmt, *st_hedr); } - + if (options.isOn(GPU_O0)) { - e = &SgAssignOp(*new SgArrayRefExp(*steps, *new SgArrayRefExp(*bIdxs, *new SgValueExp(0))), *new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[0]->len))); + e = &SgAssignOp(*new SgArrayRefExp(*steps, *new SgArrayRefExp(*bIdxs, *new SgValueExp(0))), *new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[0].len))); stmt = new SgCExpStmt(*e); st_end->insertStmtBefore(*stmt, *st_hedr); @@ -2787,9 +2734,9 @@ ArgsForKernel** Create_C_Adapter_Function_Across_variants(SgSymbol *sadapter, Sg for (int i = 0; i < loopV; ++i) funcCallKernel->addArg(*new SgVarRefExp(loopBase[i])); for (int i = 0; i < acrossV; ++i) - funcCallKernel->addArg(*new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[i]->len))); + funcCallKernel->addArg(*new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[i].len))); for (int i = 0; i < loopV; ++i) - funcCallKernel->addArg(*new SgArrayRefExp(*idxI, *new SgValueExp(loopSymb[i]->len))); + funcCallKernel->addArg(*new SgArrayRefExp(*idxI, *new SgValueExp(loopSymb[i].len))); for (s = uses_first, sdev = scalar_first, ln = 0; ln < uses_num; s = s->next(), ln++) // uses { @@ -2811,22 +2758,22 @@ ArgsForKernel** Create_C_Adapter_Function_Across_variants(SgSymbol *sadapter, Sg for (int i = 0; i < acrossV + loopV; ++i) funcCallKernel->addArg(*new SgArrayRefExp(*bIdxs, *new SgValueExp(i))); - char *cond_ = new char[strlen("cond_") + strlen(loopAcrossSymb[0]->symb->identifier()) + 1]; + char *cond_ = new char[strlen("cond_") + strlen(loopAcrossSymb[0].symb->identifier()) + 1]; cond_[0] = '\0'; strcat(cond_, "cond_"); - strcat(cond_, loopAcrossSymb[0]->symb->identifier()); - + strcat(cond_, loopAcrossSymb[0].symb->identifier()); + if (options.isOn(GPU_O0)) { - funcCallKernel->addArg(*new SgArrayRefExp(*highI, *new SgValueExp(loopAcrossSymb[0]->len))); + funcCallKernel->addArg(*new SgArrayRefExp(*highI, *new SgValueExp(loopAcrossSymb[0].len))); for (int i = loopV - 1; i >= 0; i--) - funcCallKernel->addArg(*new SgArrayRefExp(*steps, *new SgValueExp(loopSymb[i]->len))); - funcCallKernel->addArg(*new SgArrayRefExp(*steps, *new SgValueExp(loopAcrossSymb[0]->len))); + funcCallKernel->addArg(*new SgArrayRefExp(*steps, *new SgValueExp(loopSymb[i].len))); + funcCallKernel->addArg(*new SgArrayRefExp(*steps, *new SgValueExp(loopAcrossSymb[0].len))); } - + } mywarn(" end: out adding args section"); - + stmt = createKernelCallsInCudaHandler(funcCallKernel, s_loop_ref, idxTypeInKernel, s_blocks); if (options.isOn(GPU_O0)) @@ -2834,13 +2781,13 @@ ArgsForKernel** Create_C_Adapter_Function_Across_variants(SgSymbol *sadapter, Sg else { SgSymbol *tmpV = new SgSymbol(VARIABLE_NAME, "int tmpV"); - SgSymbol *tmpV1 = new SgSymbol(VARIABLE_NAME, "tmpV"); + SgSymbol *tmpV1 = new SgSymbol(VARIABLE_NAME, "tmpV"); SgExprListExp *expr = new SgExprListExp(); - expr->setLhs(SgAssignOp(*new SgVarRefExp(acrossBase[0]), *new SgVarRefExp(acrossBase[0]) + *new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[0]->len)))); + expr->setLhs(SgAssignOp(*new SgVarRefExp(acrossBase[0]), *new SgVarRefExp(acrossBase[0]) + *new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[0].len)))); expr->setRhs(new SgExprListExp()); expr->rhs()->setLhs(SgAssignOp(*new SgVarRefExp(tmpV1), *new SgVarRefExp(tmpV1) + *new SgValueExp(1))); SgForStmt *simple; - simple = new SgForStmt(&SgAssignOp(*new SgVarRefExp(tmpV), *new SgValueExp(0)), &(*new SgVarRefExp(tmpV1) < *new SgArrayRefExp(*highI, *new SgValueExp(loopAcrossSymb[0]->len))), expr, stmt); + simple = new SgForStmt(&SgAssignOp(*new SgVarRefExp(tmpV), *new SgValueExp(0)), &(*new SgVarRefExp(tmpV1) < *new SgArrayRefExp(*highI, *new SgValueExp(loopAcrossSymb[0].len))), expr, stmt); st_end->insertStmtBefore(*simple); } } @@ -2862,8 +2809,8 @@ ArgsForKernel** Create_C_Adapter_Function_Across_variants(SgSymbol *sadapter, Sg { int idx[2]; SgStatement *st1, *st2; - idx[1] = loopAcrossSymb[1]->len; - idx[0] = loopAcrossSymb[0]->len; + idx[1] = loopAcrossSymb[1].len; + idx[0] = loopAcrossSymb[0].len; SgFunctionCallExp *f1 = new SgFunctionCallExp(*createNewFunctionSymbol("abs")); funcCall = new SgFunctionCallExp(*createNewFunctionSymbol("abs")); @@ -2889,8 +2836,8 @@ ArgsForKernel** Create_C_Adapter_Function_Across_variants(SgSymbol *sadapter, Sg { SgFunctionCallExp *tempF = new SgFunctionCallExp(*createNewFunctionSymbol("abs")); funcCall = new SgFunctionCallExp(*createNewFunctionSymbol("abs")); - funcCall->addArg((*new SgArrayRefExp(*lowI, *new SgValueExp(loopSymb[0]->len)) - *new SgArrayRefExp(*highI, *new SgValueExp(loopSymb[0]->len)))); - tempF->addArg(*new SgArrayRefExp(*idxI, *new SgValueExp(loopSymb[0]->len))); + funcCall->addArg((*new SgArrayRefExp(*lowI, *new SgValueExp(loopSymb[0].len)) - *new SgArrayRefExp(*highI, *new SgValueExp(loopSymb[0].len)))); + tempF->addArg(*new SgArrayRefExp(*idxI, *new SgValueExp(loopSymb[0].len))); e = &SgAssignOp(*new SgVarRefExp(num_elems[1]), (*funcCall + *new SgValueExp(1)) / *tempF + SgNeqOp((*funcCall + *new SgValueExp(1)) % *tempF, *new SgValueExp(0))); stmt = new SgCExpStmt(*e); st_end->insertStmtBefore(*stmt, *st_hedr); @@ -2900,8 +2847,8 @@ ArgsForKernel** Create_C_Adapter_Function_Across_variants(SgSymbol *sadapter, Sg { SgFunctionCallExp *tempF = new SgFunctionCallExp(*createNewFunctionSymbol("abs")); funcCall = new SgFunctionCallExp(*createNewFunctionSymbol("abs")); - funcCall->addArg((*new SgArrayRefExp(*lowI, *new SgValueExp(loopSymb[k + 1]->len)) - *new SgArrayRefExp(*highI, *new SgValueExp(loopSymb[k + 1]->len)))); - tempF->addArg(*new SgArrayRefExp(*idxI, *new SgValueExp(loopSymb[k + 1]->len))); + funcCall->addArg((*new SgArrayRefExp(*lowI, *new SgValueExp(loopSymb[k + 1].len)) - *new SgArrayRefExp(*highI, *new SgValueExp(loopSymb[k + 1].len)))); + tempF->addArg(*new SgArrayRefExp(*idxI, *new SgValueExp(loopSymb[k + 1].len))); e_z[k] = &((*funcCall + *new SgValueExp(1)) / *tempF + SgNeqOp((*funcCall + *new SgValueExp(1)) % *tempF, *new SgValueExp(0))); } if (loopV > 2) @@ -2976,7 +2923,7 @@ ArgsForKernel** Create_C_Adapter_Function_Across_variants(SgSymbol *sadapter, Sg { e = &SgAssignOp(*new SgVarRefExp(*red_blocks), (*new SgVarRefExp(q) / *new SgVarRefExp(nums[0]) + SgNeqOp(*new SgVarRefExp(q) % *new SgVarRefExp(nums[0]), *new SgValueExp(0))) * - *new SgRecordRefExp(*s_blocks, "y") * *new SgRecordRefExp(*s_blocks, "z") * + *new SgRecordRefExp(*s_blocks, "y") * *new SgRecordRefExp(*s_blocks, "z") * *new SgRecordRefExp(*s_threads, "x") * *new SgRecordRefExp(*s_threads, "y") * *new SgRecordRefExp(*s_threads, "z") / *new SgValueExp(warpSize)); stmt = new SgCExpStmt(*e); st_end->insertStmtBefore(*stmt, *st_hedr); @@ -3003,7 +2950,7 @@ ArgsForKernel** Create_C_Adapter_Function_Across_variants(SgSymbol *sadapter, Sg // init bases for (int i = 0; i < acrossV; ++i) { - e = &SgAssignOp(*new SgVarRefExp(acrossBase[i]), *new SgArrayRefExp(*lowI, *new SgValueExp(loopAcrossSymb[i]->len))); + e = &SgAssignOp(*new SgVarRefExp(acrossBase[i]), *new SgArrayRefExp(*lowI, *new SgValueExp(loopAcrossSymb[i].len))); stmt = new SgCExpStmt(*e); st_end->insertStmtBefore(*stmt, *st_hedr); if (i == 0) @@ -3011,7 +2958,7 @@ ArgsForKernel** Create_C_Adapter_Function_Across_variants(SgSymbol *sadapter, Sg } for (int i = 0; i < loopV; ++i) { - e = &SgAssignOp(*new SgVarRefExp(loopBase[i]), *new SgArrayRefExp(*lowI, *new SgValueExp(loopSymb[i]->len))); + e = &SgAssignOp(*new SgVarRefExp(loopBase[i]), *new SgArrayRefExp(*lowI, *new SgValueExp(loopSymb[i].len))); stmt = new SgCExpStmt(*e); st_end->insertStmtBefore(*stmt, *st_hedr); } @@ -3030,16 +2977,16 @@ ArgsForKernel** Create_C_Adapter_Function_Across_variants(SgSymbol *sadapter, Sg st_end->insertStmtBefore(*while_st, *st_hedr); e = &SgAssignOp(*new SgVarRefExp(acrossBase[0]), *new SgVarRefExp(acrossBase[0]) + - *new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[0]->len))); + *new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[0].len))); stmt = new SgCExpStmt(*e); while_st->insertStmtAfter(*stmt); /* --------- add argument list to kernel call ----*/ - createArgsForKernelForTwodeps(funcCallKernel, kernel_symb, espec, sg, hgpu_first, sb, base_first, sl, ln, num, e, - reduction_ptr, reduction_loc_ptr, reduction_symb, reduction_loc_symb, red_blocks, - has_red_array, diag, loopV, num_elems, acrossV, acrossBase, loopBase, idxI, - loopAcrossSymb, loopSymb, s, uses_first, sdev, scalar_first, uses_num, dvm_array_headers, + createArgsForKernelForTwoDeps(funcCallKernel, kernel_symb, espec, sg, hgpu_first, sb, base_first, sl, ln, num, e, + reduction_ptr, reduction_loc_ptr, reduction_symb, reduction_loc_symb, red_blocks, + has_red_array, diag, loopV, num_elems, acrossV, acrossBase, loopBase, idxI, + loopAcrossSymb, loopSymb, s, uses_first, sdev, scalar_first, uses_num, dvm_array_headers, addressingParams, outTypeOfTransformation, type_of_run, bIdxs); stmt = createKernelCallsInCudaHandler(funcCallKernel, s_loop_ref, idxTypeInKernel, s_blocks); @@ -3074,7 +3021,7 @@ ArgsForKernel** Create_C_Adapter_Function_Across_variants(SgSymbol *sadapter, Sg if_st->falseBody()->insertStmtBefore(*new SgCExpStmt(SgAssignOp(*new SgVarRefExp(diag), *new SgValueExp(0)))); e = &SgAssignOp(*new SgVarRefExp(acrossBase[0]), *new SgVarRefExp(acrossBase[0]) - - *new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[0]->len))); + - *new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[0].len))); stmt = new SgCExpStmt(*e); if_st->insertStmtAfter(*stmt); if_st->falseBody()->insertStmtBefore(stmt->copy()); @@ -3083,8 +3030,8 @@ ArgsForKernel** Create_C_Adapter_Function_Across_variants(SgSymbol *sadapter, Sg stmt = new SgCExpStmt(*e); if_st->lexNext()->insertStmtAfter(*stmt); if_st->falseBody()->lexNext()->lexNext()->lexNext()->insertStmtAfter(stmt->copy(), *if_st); - e = &SgAssignOp(*new SgVarRefExp(acrossBase[1]), *new SgArrayRefExp(*lowI, *new SgValueExp(loopAcrossSymb[1]->len)) + - *new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[1]->len))); + e = &SgAssignOp(*new SgVarRefExp(acrossBase[1]), *new SgArrayRefExp(*lowI, *new SgValueExp(loopAcrossSymb[1].len)) + + *new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[1].len))); stmt = new SgCExpStmt(*e); if_st->insertStmtAfter(*stmt); if_st->falseBody()->insertStmtBefore(stmt->copy()); @@ -3094,12 +3041,12 @@ ArgsForKernel** Create_C_Adapter_Function_Across_variants(SgSymbol *sadapter, Sg e = &SgAssignOp(*new SgVarRefExp(acrossBase[0]), *new SgVarRefExp(acrossBase[0]) + - *new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[0]->len))); + *new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[0].len))); stmt = new SgCExpStmt(*e); while_st1->insertStmtAfter(*stmt); e = &SgAssignOp(*new SgVarRefExp(acrossBase[1]), *new SgVarRefExp(acrossBase[1]) + - *new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[1]->len))); + *new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[1].len))); stmt = new SgCExpStmt(*e); while_st2->insertStmtAfter(*stmt); @@ -3115,7 +3062,7 @@ ArgsForKernel** Create_C_Adapter_Function_Across_variants(SgSymbol *sadapter, Sg while_st4->lastExecutable()->insertStmtAfter(stmt->copy()); /* --------- add argument list to kernel call ----*/ - createArgsForKernelForTwodeps(funcCallKernel, kernel_symb, espec, sg, hgpu_first, sb, base_first, sl, ln, num, e, + createArgsForKernelForTwoDeps(funcCallKernel, kernel_symb, espec, sg, hgpu_first, sb, base_first, sl, ln, num, e, reduction_ptr, reduction_loc_ptr, reduction_symb, reduction_loc_symb, red_blocks, has_red_array, q, loopV, num_elems, acrossV, acrossBase, loopBase, idxI, loopAcrossSymb, loopSymb, s, uses_first, sdev, scalar_first, uses_num, dvm_array_headers, @@ -3127,7 +3074,7 @@ ArgsForKernel** Create_C_Adapter_Function_Across_variants(SgSymbol *sadapter, Sg mywarn(" end: block3"); /* --------- add argument list to kernel call ----*/ - createArgsForKernelForTwodeps(funcCallKernel, kernel_symb, espec, sg, hgpu_first, sb, base_first, sl, ln, num, e, + createArgsForKernelForTwoDeps(funcCallKernel, kernel_symb, espec, sg, hgpu_first, sb, base_first, sl, ln, num, e, reduction_ptr, reduction_loc_ptr, reduction_symb, reduction_loc_symb, red_blocks, has_red_array, elem, loopV, num_elems, acrossV, acrossBase, loopBase, idxI, loopAcrossSymb, loopSymb, s, uses_first, sdev, scalar_first, uses_num, dvm_array_headers, @@ -3164,8 +3111,8 @@ ArgsForKernel** Create_C_Adapter_Function_Across_variants(SgSymbol *sadapter, Sg s_cuda_var[2] = tmpS; } - SgExpression* firstElem = new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[0]->len)); - SgExpression* secondElem = new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[1]->len)); + SgExpression* firstElem = new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[0].len)); + SgExpression* secondElem = new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[1].len)); SgIfStmt* if_stSwap = new SgIfStmt(*new SgVarRefExp(M1) > *new SgVarRefExp(M2), *new SgCExpStmt(*firstElem ^= *secondElem ^= *firstElem ^= *secondElem)); @@ -3196,9 +3143,9 @@ ArgsForKernel** Create_C_Adapter_Function_Across_variants(SgSymbol *sadapter, Sg for (int i = 0; i < loopV; ++i) funcCallKernel->addArg(*new SgVarRefExp(loopBase[i])); for (int i = 0; i < acrossV; ++i) - funcCallKernel->addArg(*new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[i]->len))); + funcCallKernel->addArg(*new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[i].len))); for (int i = 0; i < loopV; ++i) - funcCallKernel->addArg(*new SgArrayRefExp(*idxI, *new SgValueExp(loopSymb[i]->len))); + funcCallKernel->addArg(*new SgArrayRefExp(*idxI, *new SgValueExp(loopSymb[i].len))); for (s = uses_first, sdev = scalar_first, ln = 0; ln < uses_num; s = s->next(), ln++) // uses { @@ -3271,7 +3218,7 @@ ArgsForKernel** Create_C_Adapter_Function_Across_variants(SgSymbol *sadapter, Sg int idx[3]; SgStatement *st1; for (int i = 0; i < 3; ++i) - idx[i] = loopAcrossSymb[i]->len; + idx[i] = loopAcrossSymb[i].len; for (int i = 0; i < 3; ++i) { @@ -3316,8 +3263,8 @@ ArgsForKernel** Create_C_Adapter_Function_Across_variants(SgSymbol *sadapter, Sg { SgFunctionCallExp *tempF = new SgFunctionCallExp(*createNewFunctionSymbol("abs")); funcCall = new SgFunctionCallExp(*createNewFunctionSymbol("abs")); - funcCall->addArg((*new SgArrayRefExp(*lowI, *new SgValueExp(loopSymb[0]->len)) - *new SgArrayRefExp(*highI, *new SgValueExp(loopSymb[0]->len)))); - tempF->addArg(*new SgArrayRefExp(*idxI, *new SgValueExp(loopSymb[0]->len))); + funcCall->addArg((*new SgArrayRefExp(*lowI, *new SgValueExp(loopSymb[0].len)) - *new SgArrayRefExp(*highI, *new SgValueExp(loopSymb[0].len)))); + tempF->addArg(*new SgArrayRefExp(*idxI, *new SgValueExp(loopSymb[0].len))); e = &SgAssignOp(*new SgVarRefExp(num_elems[0]), (*funcCall + *new SgValueExp(1)) / *tempF + SgNeqOp((*funcCall + *new SgValueExp(1)) % *tempF, *new SgValueExp(0))); stmt = new SgCExpStmt(*e); st_end->insertStmtBefore(*stmt, *st_hedr); @@ -3329,8 +3276,8 @@ ArgsForKernel** Create_C_Adapter_Function_Across_variants(SgSymbol *sadapter, Sg { SgFunctionCallExp *tempF = new SgFunctionCallExp(*createNewFunctionSymbol("abs")); funcCall = new SgFunctionCallExp(*createNewFunctionSymbol("abs")); - funcCall->addArg((*new SgArrayRefExp(*lowI, *new SgValueExp(loopSymb[k + 1]->len)) - *new SgArrayRefExp(*highI, *new SgValueExp(loopSymb[k + 1]->len)))); - tempF->addArg(*new SgArrayRefExp(*idxI, *new SgValueExp(loopSymb[k + 1]->len))); + funcCall->addArg((*new SgArrayRefExp(*lowI, *new SgValueExp(loopSymb[k + 1].len)) - *new SgArrayRefExp(*highI, *new SgValueExp(loopSymb[k + 1].len)))); + tempF->addArg(*new SgArrayRefExp(*idxI, *new SgValueExp(loopSymb[k + 1].len))); e_z[k] = &((*funcCall + *new SgValueExp(1)) / *tempF + SgNeqOp((*funcCall + *new SgValueExp(1)) % *tempF, *new SgValueExp(0))); } @@ -3422,7 +3369,7 @@ ArgsForKernel** Create_C_Adapter_Function_Across_variants(SgSymbol *sadapter, Sg int flag_comment = 0; for (int i = 3; i < acrossV; ++i) { - e = &SgAssignOp(*new SgVarRefExp(acrossBase[i]), *new SgArrayRefExp(*lowI, *new SgValueExp(loopAcrossSymb[i]->len))); + e = &SgAssignOp(*new SgVarRefExp(acrossBase[i]), *new SgArrayRefExp(*lowI, *new SgValueExp(loopAcrossSymb[i].len))); stmt = new SgCExpStmt(*e); st_end->insertStmtBefore(*stmt, *st_hedr); if (i - 3 == 0) @@ -3436,7 +3383,7 @@ ArgsForKernel** Create_C_Adapter_Function_Across_variants(SgSymbol *sadapter, Sg { for (int i = 0; i < MIN(3, acrossV); ++i) { - e = &SgAssignOp(*new SgVarRefExp(acrossBase[i]), *new SgArrayRefExp(*lowI, *new SgValueExp(loopAcrossSymb[i]->len))); + e = &SgAssignOp(*new SgVarRefExp(acrossBase[i]), *new SgArrayRefExp(*lowI, *new SgValueExp(loopAcrossSymb[i].len))); stmt = new SgCExpStmt(*e); st_end->insertStmtBefore(*stmt, *st_hedr); if (i == 0 && flag_comment == 0) @@ -3445,7 +3392,7 @@ ArgsForKernel** Create_C_Adapter_Function_Across_variants(SgSymbol *sadapter, Sg for (int i = 0; i < loopV; ++i) { - e = &SgAssignOp(*new SgVarRefExp(loopBase[i]), *new SgArrayRefExp(*lowI, *new SgValueExp(loopSymb[i]->len))); + e = &SgAssignOp(*new SgVarRefExp(loopBase[i]), *new SgArrayRefExp(*lowI, *new SgValueExp(loopSymb[i].len))); stmt = new SgCExpStmt(*e); st_end->insertStmtBefore(*stmt, *st_hedr); } @@ -3458,20 +3405,20 @@ ArgsForKernel** Create_C_Adapter_Function_Across_variants(SgSymbol *sadapter, Sg SgWhileStmt *tmp; for (int i = 3; i < acrossV; ++i) { - e = &SgAssignOp(*new SgVarRefExp(acrossBase[i]), *new SgVarRefExp(acrossBase[i]) + *new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[i]->len))); + e = &SgAssignOp(*new SgVarRefExp(acrossBase[i]), *new SgVarRefExp(acrossBase[i]) + *new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[i].len))); stmt = new SgCExpStmt(*e); SgExpression *e1 = NULL; SgFunctionCallExp *func = new SgFunctionCallExp(*createNewFunctionSymbol("abs")); - func->addArg(*new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[i]->len))); - e1 = &(*new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[i]->len)) / *func); + func->addArg(*new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[i].len))); + e1 = &(*new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[i].len)) / *func); if (first) { - main_while_st = new SgWhileStmt(*e1 * *new SgVarRefExp(acrossBase[i]) <= *e1 * *new SgArrayRefExp(*highI, *new SgValueExp(loopAcrossSymb[i]->len)), *stmt); + main_while_st = new SgWhileStmt(*e1 * *new SgVarRefExp(acrossBase[i]) <= *e1 * *new SgArrayRefExp(*highI, *new SgValueExp(loopAcrossSymb[i].len)), *stmt); first = false; } else { - tmp = new SgWhileStmt(*new SgVarRefExp(acrossBase[i]) <= *new SgArrayRefExp(*highI, *new SgValueExp(loopAcrossSymb[i]->len)), *stmt); + tmp = new SgWhileStmt(*new SgVarRefExp(acrossBase[i]) <= *new SgArrayRefExp(*highI, *new SgValueExp(loopAcrossSymb[i].len)), *stmt); main_while_st->insertStmtAfter(*tmp); main_while_st = tmp; } @@ -3492,14 +3439,14 @@ ArgsForKernel** Create_C_Adapter_Function_Across_variants(SgSymbol *sadapter, Sg for (int i = 0; i < MIN(3, acrossV); ++i) { - e = &SgAssignOp(*new SgVarRefExp(acrossBase[i]), *new SgArrayRefExp(*lowI, *new SgValueExp(loopAcrossSymb[i]->len))); + e = &SgAssignOp(*new SgVarRefExp(acrossBase[i]), *new SgArrayRefExp(*lowI, *new SgValueExp(loopAcrossSymb[i].len))); stmt = new SgCExpStmt(*e); main_stmt->insertStmtBefore(*stmt, *main_while_st); } for (int i = 0; i < loopV; ++i) { - e = &SgAssignOp(*new SgVarRefExp(loopBase[i]), *new SgArrayRefExp(*lowI, *new SgValueExp(loopSymb[i]->len))); + e = &SgAssignOp(*new SgVarRefExp(loopBase[i]), *new SgArrayRefExp(*lowI, *new SgValueExp(loopSymb[i].len))); stmt = new SgCExpStmt(*e); main_stmt->insertStmtBefore(*stmt, *main_while_st); } @@ -3514,9 +3461,9 @@ ArgsForKernel** Create_C_Adapter_Function_Across_variants(SgSymbol *sadapter, Sg else main_stmt->insertStmtBefore(*while_st, *main_while_st); - e = &SgAssignOp(*new SgVarRefExp(acrossBase[2]), *new SgVarRefExp(acrossBase[2]) + *new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[2]->len))); + e = &SgAssignOp(*new SgVarRefExp(acrossBase[2]), *new SgVarRefExp(acrossBase[2]) + *new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[2].len))); stmt = new SgCExpStmt(*e); - while_st->insertStmtAfter(*stmt); + while_st->insertStmtAfter(*stmt); while_st->insertStmtAfter(*createKernelCallsInCudaHandler(funcCallKernel, s_loop_ref, idxTypeInKernel, s_blocks)); e = &SgAssignOp(*new SgRecordRefExp(*s_blocks, (char*)s_cuda_var[1]), *new SgVarRefExp(diag) / *new SgVarRefExp(nums[1]) + SgNeqOp(*new SgVarRefExp(diag) % *new SgVarRefExp(nums[1]), *new SgValueExp(0))); @@ -3554,7 +3501,7 @@ ArgsForKernel** Create_C_Adapter_Function_Across_variants(SgSymbol *sadapter, Sg SgWhileStmt *while_st = new SgWhileStmt(SgNeqOp(*new SgVarRefExp(diag) - *new SgValueExp(1), *new SgVarRefExp(M3)), *stmt); - e = &SgAssignOp(*new SgVarRefExp(acrossBase[2]), *new SgVarRefExp(acrossBase[2]) + *new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[2]->len))); + e = &SgAssignOp(*new SgVarRefExp(acrossBase[2]), *new SgVarRefExp(acrossBase[2]) + *new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[2].len))); stmt = new SgCExpStmt(*e); while_st->insertStmtAfter(*stmt, *while_st); @@ -3580,11 +3527,11 @@ ArgsForKernel** Create_C_Adapter_Function_Across_variants(SgSymbol *sadapter, Sg stmt = new SgCExpStmt(*e); if_st->insertStmtAfter(*stmt); - e = &SgAssignOp(*new SgVarRefExp(acrossBase[1]), *new SgArrayRefExp(*lowI, *new SgValueExp(loopAcrossSymb[1]->len)) * (*new SgVarRefExp(M1) <= *new SgVarRefExp(M2)) + *new SgArrayRefExp(*lowI, *new SgValueExp(loopAcrossSymb[0]->len)) * (*new SgVarRefExp(M1) > *new SgVarRefExp(M2))); + e = &SgAssignOp(*new SgVarRefExp(acrossBase[1]), *new SgArrayRefExp(*lowI, *new SgValueExp(loopAcrossSymb[1].len)) * (*new SgVarRefExp(M1) <= *new SgVarRefExp(M2)) + *new SgArrayRefExp(*lowI, *new SgValueExp(loopAcrossSymb[0].len)) * (*new SgVarRefExp(M1) > *new SgVarRefExp(M2))); stmt = new SgCExpStmt(*e); if_st->insertStmtAfter(*stmt); - e = &SgAssignOp(*new SgVarRefExp(acrossBase[0]), *new SgArrayRefExp(*lowI, *new SgValueExp(loopAcrossSymb[0]->len)) * (*new SgVarRefExp(M1) <= *new SgVarRefExp(M2)) + *new SgArrayRefExp(*lowI, *new SgValueExp(loopAcrossSymb[1]->len)) * (*new SgVarRefExp(M1) > *new SgVarRefExp(M2))); + e = &SgAssignOp(*new SgVarRefExp(acrossBase[0]), *new SgArrayRefExp(*lowI, *new SgValueExp(loopAcrossSymb[0].len)) * (*new SgVarRefExp(M1) <= *new SgVarRefExp(M2)) + *new SgArrayRefExp(*lowI, *new SgValueExp(loopAcrossSymb[1].len)) * (*new SgVarRefExp(M1) > *new SgVarRefExp(M2))); stmt = new SgCExpStmt(*e); if_st->insertStmtAfter(*stmt); } @@ -3617,21 +3564,21 @@ ArgsForKernel** Create_C_Adapter_Function_Across_variants(SgSymbol *sadapter, Sg else main_stmt->insertStmtBefore(*stmt, *main_while_st); - e = &SgAssignOp(*new SgVarRefExp(acrossBase[0]), (*new SgArrayRefExp(*lowI, *new SgValueExp(loopAcrossSymb[0]->len)) + *new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[0]->len))) * (*new SgVarRefExp(M1) <= *new SgVarRefExp(M2)) + (*new SgArrayRefExp(*lowI, *new SgValueExp(loopAcrossSymb[1]->len)) + *new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[1]->len))) * (*new SgVarRefExp(M1) > *new SgVarRefExp(M2))); + e = &SgAssignOp(*new SgVarRefExp(acrossBase[0]), (*new SgArrayRefExp(*lowI, *new SgValueExp(loopAcrossSymb[0].len)) + *new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[0].len))) * (*new SgVarRefExp(M1) <= *new SgVarRefExp(M2)) + (*new SgArrayRefExp(*lowI, *new SgValueExp(loopAcrossSymb[1].len)) + *new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[1].len))) * (*new SgVarRefExp(M1) > *new SgVarRefExp(M2))); stmt = new SgCExpStmt(*e); if (acrossV == 3) st_end->insertStmtBefore(*stmt, *st_hedr); else main_stmt->insertStmtBefore(*stmt, *main_while_st); - e = &SgAssignOp(*new SgVarRefExp(acrossBase[1]), *new SgArrayRefExp(*lowI, *new SgValueExp(loopAcrossSymb[1]->len)) * (*new SgVarRefExp(M1) <= *new SgVarRefExp(M2)) + *new SgArrayRefExp(*lowI, *new SgValueExp(loopAcrossSymb[0]->len)) * (*new SgVarRefExp(M1) > *new SgVarRefExp(M2))); + e = &SgAssignOp(*new SgVarRefExp(acrossBase[1]), *new SgArrayRefExp(*lowI, *new SgValueExp(loopAcrossSymb[1].len)) * (*new SgVarRefExp(M1) <= *new SgVarRefExp(M2)) + *new SgArrayRefExp(*lowI, *new SgValueExp(loopAcrossSymb[0].len)) * (*new SgVarRefExp(M1) > *new SgVarRefExp(M2))); stmt = new SgCExpStmt(*e); if (acrossV == 3) st_end->insertStmtBefore(*stmt, *st_hedr); else main_stmt->insertStmtBefore(*stmt, *main_while_st); - e = &SgAssignOp(*new SgVarRefExp(acrossBase[2]), *new SgArrayRefExp(*lowI, *new SgValueExp(loopAcrossSymb[2]->len)) + *new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[2]->len)) * (*new SgVarRefExp(M3) - *new SgValueExp(1))); + e = &SgAssignOp(*new SgVarRefExp(acrossBase[2]), *new SgArrayRefExp(*lowI, *new SgValueExp(loopAcrossSymb[2].len)) + *new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[2].len)) * (*new SgVarRefExp(M3) - *new SgValueExp(1))); stmt = new SgCExpStmt(*e); if (acrossV == 3) st_end->insertStmtBefore(*stmt, *st_hedr); @@ -3647,7 +3594,7 @@ ArgsForKernel** Create_C_Adapter_Function_Across_variants(SgSymbol *sadapter, Sg else main_stmt->insertStmtBefore(*while_st, *main_while_st); - e = &SgAssignOp(*new SgVarRefExp(acrossBase[0]), *new SgVarRefExp(acrossBase[0]) + *new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[0]->len)) * (*new SgVarRefExp(M1) <= *new SgVarRefExp(M2)) + *new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[1]->len)) * (*new SgVarRefExp(M1) > *new SgVarRefExp(M2))); + e = &SgAssignOp(*new SgVarRefExp(acrossBase[0]), *new SgVarRefExp(acrossBase[0]) + *new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[0].len)) * (*new SgVarRefExp(M1) <= *new SgVarRefExp(M2)) + *new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[1].len)) * (*new SgVarRefExp(M1) > *new SgVarRefExp(M2))); stmt = new SgCExpStmt(*e); while_st->insertStmtAfter(*stmt, *while_st); @@ -3683,14 +3630,14 @@ ArgsForKernel** Create_C_Adapter_Function_Across_variants(SgSymbol *sadapter, Sg else main_stmt->insertStmtBefore(*stmt, *main_while_st); - e = &SgAssignOp(*new SgVarRefExp(acrossBase[0]), *new SgArrayRefExp(*lowI, *new SgValueExp(loopAcrossSymb[0]->len)) + *new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[0]->len)) * (*new SgVarRefExp(M1) - *new SgValueExp(1))); + e = &SgAssignOp(*new SgVarRefExp(acrossBase[0]), *new SgArrayRefExp(*lowI, *new SgValueExp(loopAcrossSymb[0].len)) + *new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[0].len)) * (*new SgVarRefExp(M1) - *new SgValueExp(1))); stmt = new SgCExpStmt(*e); if (acrossV == 3) st_end->insertStmtBefore(*stmt, *st_hedr); else main_stmt->insertStmtBefore(*stmt, *main_while_st); - e = &SgAssignOp(*new SgVarRefExp(acrossBase[1]), *new SgArrayRefExp(*lowI, *new SgValueExp(loopAcrossSymb[1]->len)) * (*new SgVarRefExp(M1) > *new SgVarRefExp(M2)) + *new SgVarRefExp(acrossBase[1]) * (*new SgVarRefExp(M1) <= *new SgVarRefExp(M2))); + e = &SgAssignOp(*new SgVarRefExp(acrossBase[1]), *new SgArrayRefExp(*lowI, *new SgValueExp(loopAcrossSymb[1].len)) * (*new SgVarRefExp(M1) > *new SgVarRefExp(M2)) + *new SgVarRefExp(acrossBase[1]) * (*new SgVarRefExp(M1) <= *new SgVarRefExp(M2))); stmt = new SgCExpStmt(*e); if (acrossV == 3) st_end->insertStmtBefore(*stmt, *st_hedr); @@ -3703,31 +3650,31 @@ ArgsForKernel** Create_C_Adapter_Function_Across_variants(SgSymbol *sadapter, Sg SgExpression *e1 = NULL, *e2 = NULL; SgIfStmt *if_st1 = NULL; - e1 = &SgAssignOp(*new SgVarRefExp(acrossBase[1]), *new SgVarRefExp(acrossBase[1]) + *new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[1]->len)) + *new SgVarRefExp(*Emax) - *new SgVarRefExp(*Emin) - *new SgValueExp(1)); - e2 = &SgAssignOp(*new SgVarRefExp(acrossBase[1]), *new SgVarRefExp(acrossBase[1]) + *new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[1]->len)) - *new SgVarRefExp(*Emax) + *new SgVarRefExp(*Emin) + *new SgValueExp(1)); + e1 = &SgAssignOp(*new SgVarRefExp(acrossBase[1]), *new SgVarRefExp(acrossBase[1]) + *new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[1].len)) + *new SgVarRefExp(*Emax) - *new SgVarRefExp(*Emin) - *new SgValueExp(1)); + e2 = &SgAssignOp(*new SgVarRefExp(acrossBase[1]), *new SgVarRefExp(acrossBase[1]) + *new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[1].len)) - *new SgVarRefExp(*Emax) + *new SgVarRefExp(*Emin) + *new SgValueExp(1)); - if_st1 = new SgIfStmt(*new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[1]->len)) > *new SgValueExp(0), *new SgCExpStmt(*e1), *new SgCExpStmt(*e2)); + if_st1 = new SgIfStmt(*new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[1].len)) > *new SgValueExp(0), *new SgCExpStmt(*e1), *new SgCExpStmt(*e2)); SgIfStmt *if_st = new SgIfStmt(*new SgVarRefExp(*M1) <= *new SgVarRefExp(*M2) && *new SgVarRefExp(*M3) > *new SgVarRefExp(*Emin), *if_st1); - e = &SgAssignOp(*new SgVarRefExp(acrossBase[1]), *new SgVarRefExp(acrossBase[1]) + *new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[1]->len))); + e = &SgAssignOp(*new SgVarRefExp(acrossBase[1]), *new SgVarRefExp(acrossBase[1]) + *new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[1].len))); stmt = new SgCExpStmt(*e); if_st = new SgIfStmt(*new SgVarRefExp(*M1) > *new SgVarRefExp(*M2) && *new SgVarRefExp(*M3) > *new SgVarRefExp(*Emin), *stmt, *if_st); - e1 = &SgAssignOp(*new SgVarRefExp(acrossBase[1]), *new SgVarRefExp(acrossBase[1]) + *new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[1]->len)) + *new SgVarRefExp(*Emax) - *new SgVarRefExp(*Emin) - *new SgValueExp(1) + *funcCall); - e2 = &SgAssignOp(*new SgVarRefExp(acrossBase[1]), *new SgVarRefExp(acrossBase[1]) + *new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[1]->len)) - *new SgVarRefExp(*Emax) + *new SgVarRefExp(*Emin) + *new SgValueExp(1) + *new SgVarRefExp(M3) - *new SgVarRefExp(*Emin)); + e1 = &SgAssignOp(*new SgVarRefExp(acrossBase[1]), *new SgVarRefExp(acrossBase[1]) + *new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[1].len)) + *new SgVarRefExp(*Emax) - *new SgVarRefExp(*Emin) - *new SgValueExp(1) + *funcCall); + e2 = &SgAssignOp(*new SgVarRefExp(acrossBase[1]), *new SgVarRefExp(acrossBase[1]) + *new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[1].len)) - *new SgVarRefExp(*Emax) + *new SgVarRefExp(*Emin) + *new SgValueExp(1) + *new SgVarRefExp(M3) - *new SgVarRefExp(*Emin)); + + if_st1 = new SgIfStmt(*new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[1].len)) > *new SgValueExp(0), *new SgCExpStmt(*e1), *new SgCExpStmt(*e2)); - if_st1 = new SgIfStmt(*new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[1]->len)) > *new SgValueExp(0), *new SgCExpStmt(*e1), *new SgCExpStmt(*e2)); - if_st = new SgIfStmt(*new SgVarRefExp(*M1) <= *new SgVarRefExp(*M2) && *new SgVarRefExp(*M3) <= *new SgVarRefExp(*Emin), *if_st1, *if_st); - e = &SgAssignOp(*new SgVarRefExp(acrossBase[1]), *new SgVarRefExp(acrossBase[1]) + *new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[1]->len)) + *funcCall); + e = &SgAssignOp(*new SgVarRefExp(acrossBase[1]), *new SgVarRefExp(acrossBase[1]) + *new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[1].len)) + *funcCall); stmt = new SgCExpStmt(*e); - e = &SgAssignOp(*new SgVarRefExp(acrossBase[1]), *new SgVarRefExp(acrossBase[1]) + *new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[1]->len)) + *funcCall * *new SgValueExp(-1)); + e = &SgAssignOp(*new SgVarRefExp(acrossBase[1]), *new SgVarRefExp(acrossBase[1]) + *new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[1].len)) + *funcCall * *new SgValueExp(-1)); SgStatement* stmtElse = new SgCExpStmt(*e); - if_st1 = new SgIfStmt(*new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[1]->len)) > *new SgValueExp(0), *stmt, *stmtElse); + if_st1 = new SgIfStmt(*new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[1].len)) > *new SgValueExp(0), *stmt, *stmtElse); if_st = new SgIfStmt(*new SgVarRefExp(*M1) > *new SgVarRefExp(*M2) && *new SgVarRefExp(*M3) <= *new SgVarRefExp(*Emin), *if_st1, *if_st); @@ -3746,7 +3693,7 @@ ArgsForKernel** Create_C_Adapter_Function_Across_variants(SgSymbol *sadapter, Sg else main_stmt->insertStmtBefore(*while_st, *main_while_st); - e = &SgAssignOp(*new SgVarRefExp(acrossBase[1]), *new SgVarRefExp(acrossBase[1]) + *new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[1]->len))); + e = &SgAssignOp(*new SgVarRefExp(acrossBase[1]), *new SgVarRefExp(acrossBase[1]) + *new SgArrayRefExp(*idxI, *new SgValueExp(loopAcrossSymb[1].len))); stmt = new SgCExpStmt(*e); while_st->insertStmtAfter(*stmt, *while_st); @@ -3781,40 +3728,38 @@ ArgsForKernel** Create_C_Adapter_Function_Across_variants(SgSymbol *sadapter, Sg st_end->insertStmtBefore(*stmt, *st_hedr); } } - - // create args for kernel and return it - ArgsForKernel **argsKernel = new ArgsForKernel*[countKernels]; + + // create args for kernel and return it + vector argsKernel(countKernels); const int rtTypes[] = { rt_INT, rt_LLONG }; for (unsigned ck = 0; ck < countKernels; ++ck) { - argsKernel[ck] = new ArgsForKernel(); - - argsKernel[ck]->st_header = st_hedr; - argsKernel[ck]->cond_ = NULL; + argsKernel[ck].st_header = st_hedr; + argsKernel[ck].cond_ = NULL; SgType *typeParams = indexTypeInKernel(rtTypes[ck]); if (acrossV == 1) { - char *cond_ = new char[strlen("cond_") + strlen(loopAcrossSymb[0]->symb->identifier()) + 1]; + char *cond_ = new char[strlen("cond_") + strlen(loopAcrossSymb[0].symb->identifier()) + 1]; cond_[0] = '\0'; strcat(cond_, "cond_"); - strcat(cond_, loopAcrossSymb[0]->symb->identifier()); - argsKernel[ck]->cond_ = new SgSymbol(VARIABLE_NAME, cond_, typeParams, st_hedr); + strcat(cond_, loopAcrossSymb[0].symb->identifier()); + argsKernel[ck].cond_ = new SgSymbol(VARIABLE_NAME, cond_, typeParams, st_hedr); - char *st = new char[strlen("steps_") + strlen(loopAcrossSymb[0]->symb->identifier()) + 1]; + char *st = new char[strlen("steps_") + strlen(loopAcrossSymb[0].symb->identifier()) + 1]; st[0] = '\0'; strcat(st, "steps_"); - strcat(st, loopAcrossSymb[0]->symb->identifier()); - argsKernel[ck]->steps.push_back(new SgSymbol(VARIABLE_NAME, TestAndCorrectName(st), typeParams, st_hedr)); + strcat(st, loopAcrossSymb[0].symb->identifier()); + argsKernel[ck].steps.push_back(new SgSymbol(VARIABLE_NAME, TestAndCorrectName(st), typeParams, st_hedr)); for (int i = 0; i < loopV; ++i) { - st = new char[strlen("steps_") + strlen(loopSymb[i]->symb->identifier()) + 1]; + st = new char[strlen("steps_") + strlen(loopSymb[i].symb->identifier()) + 1]; st[0] = '\0'; strcat(st, "steps_"); - strcat(st, loopSymb[i]->symb->identifier()); - argsKernel[ck]->steps.push_back(new SgSymbol(VARIABLE_NAME, TestAndCorrectName(st), typeParams, st_hedr)); + strcat(st, loopSymb[i].symb->identifier()); + argsKernel[ck].steps.push_back(new SgSymbol(VARIABLE_NAME, TestAndCorrectName(st), typeParams, st_hedr)); } } @@ -3826,75 +3771,75 @@ ArgsForKernel** Create_C_Adapter_Function_Across_variants(SgSymbol *sadapter, Sg tmpS[0] = '\0'; strcat(tmpS, dvm_array_headers[i]); strcat(tmpS, "_x_axis"); - argsKernel[ck]->otherVars.push_back(new SgSymbol(VARIABLE_NAME, TestAndCorrectName(tmpS), typeParams, st_hedr)); + argsKernel[ck].otherVars.push_back(new SgSymbol(VARIABLE_NAME, TestAndCorrectName(tmpS), typeParams, st_hedr)); tmpS[0] = '\0'; strcat(tmpS, dvm_array_headers[i]); strcat(tmpS, "_offset_x"); - argsKernel[ck]->otherVars.push_back(new SgSymbol(VARIABLE_NAME, TestAndCorrectName(tmpS), typeParams, st_hedr)); + argsKernel[ck].otherVars.push_back(new SgSymbol(VARIABLE_NAME, TestAndCorrectName(tmpS), typeParams, st_hedr)); tmpS[0] = '\0'; strcat(tmpS, dvm_array_headers[i]); strcat(tmpS, "_Rx"); - argsKernel[ck]->otherVars.push_back(new SgSymbol(VARIABLE_NAME, TestAndCorrectName(tmpS), typeParams, st_hedr)); + argsKernel[ck].otherVars.push_back(new SgSymbol(VARIABLE_NAME, TestAndCorrectName(tmpS), typeParams, st_hedr)); tmpS[0] = '\0'; strcat(tmpS, dvm_array_headers[i]); strcat(tmpS, "_y_axis"); - argsKernel[ck]->otherVars.push_back(new SgSymbol(VARIABLE_NAME, TestAndCorrectName(tmpS), typeParams, st_hedr)); + argsKernel[ck].otherVars.push_back(new SgSymbol(VARIABLE_NAME, TestAndCorrectName(tmpS), typeParams, st_hedr)); tmpS[0] = '\0'; strcat(tmpS, dvm_array_headers[i]); strcat(tmpS, "_offset_y"); - argsKernel[ck]->otherVars.push_back(new SgSymbol(VARIABLE_NAME, TestAndCorrectName(tmpS), typeParams, st_hedr)); + argsKernel[ck].otherVars.push_back(new SgSymbol(VARIABLE_NAME, TestAndCorrectName(tmpS), typeParams, st_hedr)); tmpS[0] = '\0'; strcat(tmpS, dvm_array_headers[i]); strcat(tmpS, "_Ry"); - argsKernel[ck]->otherVars.push_back(new SgSymbol(VARIABLE_NAME, TestAndCorrectName(tmpS), typeParams, st_hedr)); + argsKernel[ck].otherVars.push_back(new SgSymbol(VARIABLE_NAME, TestAndCorrectName(tmpS), typeParams, st_hedr)); tmpS[0] = '\0'; strcat(tmpS, dvm_array_headers[i]); strcat(tmpS, "_slash"); - argsKernel[ck]->otherVars.push_back(new SgSymbol(VARIABLE_NAME, TestAndCorrectName(tmpS), typeParams, st_hedr)); - argsKernel[ck]->otherVars.push_back(new SgSymbol(VARIABLE_NAME, TestAndCorrectName(outTypeOfTransformation[i]->identifier()), typeParams, st_hedr)); + argsKernel[ck].otherVars.push_back(new SgSymbol(VARIABLE_NAME, TestAndCorrectName(tmpS), typeParams, st_hedr)); + argsKernel[ck].otherVars.push_back(new SgSymbol(VARIABLE_NAME, TestAndCorrectName(outTypeOfTransformation[i]->identifier()), typeParams, st_hedr)); } - argsKernel[ck]->arrayNames = dvm_array_headers; + argsKernel[ck].arrayNames = dvm_array_headers; } if (acrossV == 2) - argsKernel[ck]->sizeVars.push_back(new SgSymbol(VARIABLE_NAME, TestAndCorrectName("num_elem_across"), typeParams, st_hedr)); + argsKernel[ck].sizeVars.push_back(new SgSymbol(VARIABLE_NAME, TestAndCorrectName("num_elem_across"), typeParams, st_hedr)); else if (acrossV >= 3) { - argsKernel[ck]->sizeVars.push_back(new SgSymbol(VARIABLE_NAME, TestAndCorrectName("max_z"), typeParams, st_hedr)); - argsKernel[ck]->sizeVars.push_back(new SgSymbol(VARIABLE_NAME, TestAndCorrectName("SE"), typeParams, st_hedr)); // SE - argsKernel[ck]->sizeVars.push_back(new SgSymbol(VARIABLE_NAME, TestAndCorrectName("var1"), typeParams, st_hedr)); // var1 - argsKernel[ck]->sizeVars.push_back(new SgSymbol(VARIABLE_NAME, TestAndCorrectName("var2"), typeParams, st_hedr)); // var2 - argsKernel[ck]->sizeVars.push_back(new SgSymbol(VARIABLE_NAME, TestAndCorrectName("var3"), typeParams, st_hedr)); // var3 - argsKernel[ck]->sizeVars.push_back(new SgSymbol(VARIABLE_NAME, TestAndCorrectName("Emax"), typeParams, st_hedr)); // Emax - argsKernel[ck]->sizeVars.push_back(new SgSymbol(VARIABLE_NAME, TestAndCorrectName("Emin"), typeParams, st_hedr)); // Emin - argsKernel[ck]->sizeVars.push_back(new SgSymbol(VARIABLE_NAME, TestAndCorrectName("min_ij"), typeParams, st_hedr)); - argsKernel[ck]->sizeVars.push_back(new SgSymbol(VARIABLE_NAME, TestAndCorrectName("swap_ij"), typeParams, st_hedr)); + argsKernel[ck].sizeVars.push_back(new SgSymbol(VARIABLE_NAME, TestAndCorrectName("max_z"), typeParams, st_hedr)); + argsKernel[ck].sizeVars.push_back(new SgSymbol(VARIABLE_NAME, TestAndCorrectName("SE"), typeParams, st_hedr)); // SE + argsKernel[ck].sizeVars.push_back(new SgSymbol(VARIABLE_NAME, TestAndCorrectName("var1"), typeParams, st_hedr)); // var1 + argsKernel[ck].sizeVars.push_back(new SgSymbol(VARIABLE_NAME, TestAndCorrectName("var2"), typeParams, st_hedr)); // var2 + argsKernel[ck].sizeVars.push_back(new SgSymbol(VARIABLE_NAME, TestAndCorrectName("var3"), typeParams, st_hedr)); // var3 + argsKernel[ck].sizeVars.push_back(new SgSymbol(VARIABLE_NAME, TestAndCorrectName("Emax"), typeParams, st_hedr)); // Emax + argsKernel[ck].sizeVars.push_back(new SgSymbol(VARIABLE_NAME, TestAndCorrectName("Emin"), typeParams, st_hedr)); // Emin + argsKernel[ck].sizeVars.push_back(new SgSymbol(VARIABLE_NAME, TestAndCorrectName("min_ij"), typeParams, st_hedr)); + argsKernel[ck].sizeVars.push_back(new SgSymbol(VARIABLE_NAME, TestAndCorrectName("swap_ij"), typeParams, st_hedr)); } char *str = new char[32]; for (int i = 0; i < acrossV; ++i) { - argsKernel[ck]->acrossS.push_back(new SgSymbol(VARIABLE_NAME, acrossBase[i]->identifier(), typeParams, st_hedr)); // acrossBase[i] - argsKernel[ck]->symb.push_back(loopAcrossSymb[i]); + argsKernel[ck].acrossS.push_back(new SgSymbol(VARIABLE_NAME, acrossBase[i]->identifier(), typeParams, st_hedr)); // acrossBase[i] + argsKernel[ck].symb.push_back(loopAcrossSymb[i]); strcpy(str, "step"); strcat(str, strchr(acrossBase[i]->identifier(), '_')); - argsKernel[ck]->idxAcross.push_back(new SgSymbol(VARIABLE_NAME, TestAndCorrectName(str), typeParams, st_hedr)); + argsKernel[ck].idxAcross.push_back(new SgSymbol(VARIABLE_NAME, TestAndCorrectName(str), typeParams, st_hedr)); } for (int i = 0; i < loopV; ++i) { - argsKernel[ck]->notAcrS.push_back(new SgSymbol(VARIABLE_NAME, loopBase[i]->identifier(), typeParams, st_hedr)); // loopBase[i] - argsKernel[ck]->nSymb.push_back(loopSymb[i]); + argsKernel[ck].notAcrossS.push_back(new SgSymbol(VARIABLE_NAME, loopBase[i]->identifier(), typeParams, st_hedr)); // loopBase[i] + argsKernel[ck].nSymb.push_back(loopSymb[i]); strcpy(str, "step"); strcat(str, strchr(loopBase[i]->identifier(), '_')); - argsKernel[ck]->idxNotAcross.push_back(new SgSymbol(VARIABLE_NAME, TestAndCorrectName(str), typeParams, st_hedr)); + argsKernel[ck].idxNotAcross.push_back(new SgSymbol(VARIABLE_NAME, TestAndCorrectName(str), typeParams, st_hedr)); strcpy(str, "num_elem"); strcat(str, strchr(loopBase[i]->identifier(), '_')); - argsKernel[ck]->sizeVars.push_back(new SgSymbol(VARIABLE_NAME, TestAndCorrectName(str), typeParams, st_hedr)); + argsKernel[ck].sizeVars.push_back(new SgSymbol(VARIABLE_NAME, TestAndCorrectName(str), typeParams, st_hedr)); } if (acrossV == 1 || acrossV == 2 || acrossV >= 3) { - argsKernel[ck]->otherVars.push_back(new SgSymbol(VARIABLE_NAME, TestAndCorrectName("type_of_run"), typeParams, st_hedr)); + argsKernel[ck].otherVars.push_back(new SgSymbol(VARIABLE_NAME, TestAndCorrectName("type_of_run"), typeParams, st_hedr)); char *t = new char[32]; for (int i = 0; i < acrossV + loopV; ++i) { @@ -3903,13 +3848,13 @@ ArgsForKernel** Create_C_Adapter_Function_Across_variants(SgSymbol *sadapter, Sg t[0] = '\0'; strcat(t, "idxs_"); strcat(t, p); - argsKernel[ck]->baseIdxsInKer.push_back(new SgSymbol(VARIABLE_NAME, TestAndCorrectName(t), typeParams, st_hedr)); + argsKernel[ck].baseIdxsInKer.push_back(new SgSymbol(VARIABLE_NAME, TestAndCorrectName(t), typeParams, st_hedr)); } delete[]t; } delete[]str; - + } // end of creation args for kernel @@ -3928,11 +3873,11 @@ void MakeDeclarationsForKernel_On_C_Across(SgType *indexType) // declare do_variables DeclareDoVars(indexType); - // declare private(local in kernel) variables + // declare private(local in kernel) variables DeclarePrivateVars(); // declare variables, used in loop and passed by reference: - // & = *p_; + // & = *p_; DeclareUsedVars(); } @@ -3941,11 +3886,11 @@ void MakeDeclarationsForKernelAcross(SgType *indexType) #if debugMode mywarn("strat: MakeDeclarations Function"); #endif - + // declare do_variables DeclareDoVars(); - // declare private(local in kernel) variables + // declare private(local in kernel) variables DeclarePrivateVars(); // declare dummy arguments: @@ -3956,7 +3901,7 @@ void MakeDeclarationsForKernelAcross(SgType *indexType) // declare array coefficients DeclareArrayCoeffsInKernel(indexType); - // declare bases for arrays + // declare bases for arrays DeclareArrayBases(); // declare variables, used in loop @@ -3978,63 +3923,33 @@ SgExpression *CreateKernelDummyListAcross(ArgsForKernel *argsKer, SgType *idxTyp arg_list = AddListToList(CreateArrayDummyList(idxTypeInKernel), CreateRedDummyList(idxTypeInKernel)); // base_ref + ... - // + [+red_var_2+...+red_var_M] + _grid [ + ...] + // + [+red_var_2+...+red_var_M] + _grid [ + ...] // + 'blocks' if (argsKer->symb.size() < 3) { - for (list::iterator it = argsKer->sizeVars.begin(); it != argsKer->sizeVars.end(); ++it) - { - ae = new SgExprListExp(*new SgVarRefExp(*it)); - arg_list = AddListToList(arg_list, ae); - } + for (int it = 0; it < argsKer->sizeVars.size(); ++it) + arg_list = AddListToList(arg_list, new SgExprListExp(*new SgVarRefExp(argsKer->sizeVars[it]))); } - for (list::iterator it = argsKer->acrossS.begin(); it != argsKer->acrossS.end(); ++it) - { - ae = new SgExprListExp(*new SgVarRefExp(*it)); - arg_list = AddListToList(arg_list, ae); - } - for (list::iterator it = argsKer->notAcrS.begin(); it != argsKer->notAcrS.end(); ++it) - { - ae = new SgExprListExp(*new SgVarRefExp(*it)); - arg_list = AddListToList(arg_list, ae); - } - - for (list::iterator it = argsKer->idxAcross.begin(); it != argsKer->idxAcross.end(); ++it) - { - ae = new SgExprListExp(*new SgVarRefExp(*it)); - arg_list = AddListToList(arg_list, ae); - } - - for (list::iterator it = argsKer->idxNotAcross.begin(); it != argsKer->idxNotAcross.end(); ++it) - { - ae = new SgExprListExp(*new SgVarRefExp(*it)); - arg_list = AddListToList(arg_list, ae); - } - //ae = options.isOn(C_CUDA) ? new SgExprListExp(*new SgArrayRefExp(*s_blocks_k,*eln)) : new SgExprListExp(*new SgArrayRefExp(*s_blocks_k)); // + 'blocks' - // //ae = options.isOn(C_CUDA) ? new SgExprListExp(*new SgPointerDerefExp(*new SgVarRefExp(s_blocks_k))) : new SgExprListExp(*new SgVarRefExp(s_blocks_k)); - //arg_list = AddListToList(arg_list,ae); - //if(s_red_count_k) //[+ 'red_count'] - //{ ae = new SgExprListExp(*new SgVarRefExp(s_red_count_k)); - // arg_list = AddListToList(arg_list,ae); - //} - // //[+ 'overall_blocks'] - //if(s_overall_blocks) - //{ ae = new SgExprListExp(*new SgVarRefExp(s_overall_blocks)); - // arg_list = AddListToList(arg_list,ae); - //} + for (int it = 0; it < argsKer->acrossS.size(); ++it) + arg_list = AddListToList(arg_list, new SgExprListExp(*new SgVarRefExp(argsKer->acrossS[it]))); + + for (int it = 0; it < argsKer->notAcrossS.size(); ++it) + arg_list = AddListToList(arg_list, new SgExprListExp(*new SgVarRefExp(argsKer->notAcrossS[it]))); + + for (int it = 0; it < argsKer->idxAcross.size(); ++it) + arg_list = AddListToList(arg_list, new SgExprListExp(*new SgVarRefExp(argsKer->idxAcross[it]))); + + for (int it = 0; it < argsKer->idxNotAcross.size(); ++it) + arg_list = AddListToList(arg_list, new SgExprListExp(*new SgVarRefExp(argsKer->idxNotAcross[it]))); + if (uses_list) arg_list = AddListToList(arg_list, CreateUsesDummyList()); //[+ ] if (argsKer->symb.size() >= 3) - { - for (list::iterator it = argsKer->sizeVars.begin(); it != argsKer->sizeVars.end(); ++it) - { - ae = new SgExprListExp(*new SgVarRefExp(*it)); - arg_list = AddListToList(arg_list, ae); - } - } + for (int it = 0; it < argsKer->sizeVars.size(); ++it) + arg_list = AddListToList(arg_list, new SgExprListExp(*new SgVarRefExp(argsKer->sizeVars[it]))); if (argsKer->acrossS.size() != 1) { @@ -4075,7 +3990,7 @@ SgExpression *CreateKernelDummyListAcross(ArgsForKernel *argsKer, SgType *idxTyp return arg_list; } -SgStatement *CreateLoopKernelAcross(SgSymbol *skernel, ArgsForKernel *argsKer, SgType *idxTypeInKernel) +SgStatement *CreateLoopKernelAcross(SgSymbol *skernel, ArgsForKernel* argsKer, SgType *idxTypeInKernel) { #if debugMode mywarn("strat: CreateLoopKernelAcross"); @@ -4098,7 +4013,7 @@ SgStatement *CreateLoopKernelAcross(SgSymbol *skernel, ArgsForKernel *argsKer, S return(NULL); nloop = ParLoopRank(); - // create kernel procedure for loop in Fortran-Cuda language or kernel function in C_Cuda + // create kernel procedure for loop in Fortran-Cuda language or kernel function in C_Cuda // creating Header and End Statement of Kernel if (options.isOn(C_CUDA)) { @@ -4121,12 +4036,12 @@ SgStatement *CreateLoopKernelAcross(SgSymbol *skernel, ArgsForKernel *argsKer, S // create dummy argument list of kernel: if (options.isOn(C_CUDA)) - fe->setLhs(CreateKernelDummyListAcross(argsKer, longType)); //s_red_count_k, + fe->setLhs(CreateKernelDummyListAcross(argsKer, longType)); //s_red_count_k, else // create dummy argument list and add it to kernel header statement (Fortran-Cuda) - kernel_st->setExpression(0, *CreateKernelDummyListAcross(argsKer, longType)); //s_red_count_k, + kernel_st->setExpression(0, *CreateKernelDummyListAcross(argsKer, longType)); //s_red_count_k, - // generating block of index variables calculation + // generating block of index variables calculation #if debugMode mywarn("start: block4"); @@ -4154,7 +4069,7 @@ SgStatement *CreateLoopKernelAcross(SgSymbol *skernel, ArgsForKernel *argsKer, S if (options.isOn(C_CUDA)) for_st = new SgForStmt(&SgAssignOp(*new SgVarRefExp(argsKer->otherVarsForOneTh[size - 1]), *new SgVarRefExp(argsKer->otherVars[size1 - 3])), &(*sign * *new SgVarRefExp(argsKer->otherVarsForOneTh[size - 1]) <= *sign * *new SgVarRefExp(argsKer->otherVars[size1 - 2])), &SgAssignOp(*new SgVarRefExp(argsKer->otherVarsForOneTh[size - 1]), *new SgVarRefExp(argsKer->otherVarsForOneTh[size - 1]) + *new SgVarRefExp(argsKer->otherVars[size1 - 1])), NULL); else - for_st = new SgForStmt(argsKer->otherVarsForOneTh[size - 1], new SgVarRefExp(argsKer->otherVars[size1 - 3]), new SgVarRefExp(argsKer->otherVars[size1 - 2]), new SgVarRefExp(argsKer->otherVars[size1 - 1]), NULL); + for_st = new SgForStmt(argsKer->otherVarsForOneTh[size - 1], new SgVarRefExp(argsKer->otherVars[size1 - 3]), new SgVarRefExp(argsKer->otherVars[size1 - 2]), new SgVarRefExp(argsKer->otherVars[size1 - 1]), NULL); inner_for_st = for_st; for (int i = size - 2; i >= 0; i--) @@ -4172,7 +4087,7 @@ SgStatement *CreateLoopKernelAcross(SgSymbol *skernel, ArgsForKernel *argsKer, S } if_st = new SgIfStmt(SgEqOp(*new SgVarRefExp(*tid), *new SgValueExp(0)), *for_st); - cur_in_kernel->insertStmtAfter(*if_st, *kernel_st); + cur_in_kernel->insertStmtAfter(*if_st, *kernel_st); #if debugMode mywarn(" end: block4"); @@ -4215,7 +4130,7 @@ SgStatement *CreateLoopKernelAcross(SgSymbol *skernel, ArgsForKernel *argsKer, S if (stk->variant() == CONTROL_END) { - if (stk->hasLabel() || stk == loop_body) // when body of DO_ENDDO loop is empty, stk == loop_body + if (stk->hasLabel() || stk == loop_body) // when body of DO_ENDDO loop is empty, stk == loop_body stk->setVariant(CONT_STAT); else { @@ -4238,11 +4153,11 @@ SgStatement *CreateLoopKernelAcross(SgSymbol *skernel, ArgsForKernel *argsKer, S vector < stack < SgStatement*> > zero = vector < stack < SgStatement*> >(0); Translate_Fortran_To_C(inner_for_st, inner_for_st->lastNodeOfStmt(), zero, 0); } - + cur_st = last; createBodyKernel = false; } - + #if debugMode mywarn(" end: inserting loop body"); mywarn("start: create reduction block"); @@ -4309,7 +4224,20 @@ static SgStatement* makeBlockIdxAssigment(SgSymbol* tid, const char* XYZ) return st; } -SgStatement *CreateLoopKernelAcross(SgSymbol *skernel, ArgsForKernel *argsKer, int acrossNum, SgType *idxTypeInKernel) +static void createDeclaration(SgSymbol* toDecl) +{ + SgStatement* st = toDecl->makeVarDeclStmt(); + st->setExpression(2, *new SgExprListExp(*new SgExpression(ACC_VALUE_OP))); + kernel_st->insertStmtAfter(*st); +} + +static void createDeclaration(const vector& toDecl) +{ + for (int it = 0; it < toDecl.size(); ++it) + createDeclaration(toDecl[it]); +} + +SgStatement *CreateLoopKernelAcross(SgSymbol *skernel, ArgsForKernel* argsKer, int acrossNum, SgType *idxTypeInKernel) { #if debugMode mywarn("strat: CreateLoopKernelAcross"); @@ -4327,15 +4255,15 @@ SgStatement *CreateLoopKernelAcross(SgSymbol *skernel, ArgsForKernel *argsKer, i SgSymbol *tid = NULL, *tid1 = NULL, *tid2 = NULL, *s_red_count_k = NULL, *coords = NULL; SgIfStmt *if_st = NULL, *if_st1 = NULL, *if_st2 = NULL; SgForStmt *mainFor = NULL; - SgSymbol *tmpvar1 = NULL; + SgSymbol *tmpvar1 = NULL; SgExpression **leftExprs, **rightExprs; SgType *longType = idxTypeInKernel; - + if (!skernel) return(NULL); nloop = ParLoopRank(); - // create kernel procedure for loop in Fortran-Cuda language or kernel function in C_Cuda + // create kernel procedure for loop in Fortran-Cuda language or kernel function in C_Cuda // creating Header and End Statement of Kernel if (options.isOn(C_CUDA)) { @@ -4358,7 +4286,7 @@ SgStatement *CreateLoopKernelAcross(SgSymbol *skernel, ArgsForKernel *argsKer, i st_end = kernel_st->lexNext(); cur_in_kernel = st = kernelScope = kernel_st; - // !!creating variables and making structures for reductions + // !!creating variables and making structures for reductions CompleteStructuresForReductionInKernel(); //CompleteStructuresForReductionInKernelAcross(); if (red_list) @@ -4371,7 +4299,7 @@ SgStatement *CreateLoopKernelAcross(SgSymbol *skernel, ArgsForKernel *argsKer, i // create dummy argument list and add it to kernel header statement (Fortran-Cuda) kernel_st->setExpression(0, *CreateKernelDummyListAcross(argsKer, idxTypeInKernel)); // s_red_count_k, - // generating block of index variables calculation + // generating block of index variables calculation #if debugMode mywarn("start: block4"); @@ -4380,7 +4308,7 @@ SgStatement *CreateLoopKernelAcross(SgSymbol *skernel, ArgsForKernel *argsKer, i SgArrayType *tpArr = new SgArrayType(*longType); SgValueExp *dimSize = new SgValueExp((int)(argsKer->symb.size() + argsKer->nSymb.size())); tpArr->addDimension(dimSize); - + coords = new SgSymbol(VARIABLE_NAME, TestAndCorrectName("coords"), *longType, *cur_in_kernel); coords->setType(tpArr); @@ -4468,38 +4396,38 @@ SgStatement *CreateLoopKernelAcross(SgSymbol *skernel, ArgsForKernel *argsKer, i int idx_exprs = 0; int count_of_dims = argsKer->nSymb.size() + argsKer->symb.size(); - list::iterator itAcr = argsKer->symb.begin(); - list::iterator it = argsKer->nSymb.begin(); - list::iterator itAcrS = argsKer->acrossS.begin(); - list::iterator itS = argsKer->notAcrS.begin(); - list::iterator it_sizeV = argsKer->sizeVars.begin(); - list::iterator itIdxAcr = argsKer->idxAcross.begin(); - list::iterator itIdx = argsKer->idxNotAcross.begin(); + vector::iterator itAcr = argsKer->symb.begin(); + vector::iterator it = argsKer->nSymb.begin(); + vector::iterator itAcrS = argsKer->acrossS.begin(); + vector::iterator itS = argsKer->notAcrossS.begin(); + vector::iterator it_sizeV = argsKer->sizeVars.begin(); + vector::iterator itIdxAcr = argsKer->idxAcross.begin(); + vector::iterator itIdx = argsKer->idxNotAcross.begin(); leftExprs = new SgExpression*[count_of_dims]; rightExprs = new SgExpression*[count_of_dims]; e = &(*new SgVarRefExp(*itAcrS)); - st = AssignStatement(*new SgVarRefExp((*itAcr)->symb), *e); + st = AssignStatement(*new SgVarRefExp((*itAcr).symb), *e); - leftExprs[idx_exprs] = &(*new SgVarRefExp((*itAcr)->symb)); + leftExprs[idx_exprs] = &(*new SgVarRefExp((*itAcr).symb)); rightExprs[idx_exprs] = &(*new SgVarRefExp(*itAcrS)); idx_exprs++; if (argsKer->nSymb.size() == 1) { - st = AssignStatement(*new SgVarRefExp((*it)->symb), *new SgVarRefExp(*itS) + *new SgVarRefExp(*tid) * *new SgVarRefExp(*itIdx)); + st = AssignStatement(*new SgVarRefExp((*it).symb), *new SgVarRefExp(*itS) + *new SgVarRefExp(*tid) * *new SgVarRefExp(*itIdx)); - leftExprs[idx_exprs] = &(*new SgVarRefExp((*it)->symb)); + leftExprs[idx_exprs] = &(*new SgVarRefExp((*it).symb)); rightExprs[idx_exprs] = &(*new SgVarRefExp(*itS) + *new SgVarRefExp(*tid) * *new SgVarRefExp(*itIdx)); idx_exprs++; } else if (argsKer->nSymb.size() == 2) { - st = AssignStatement(*new SgVarRefExp((*it)->symb), *new SgVarRefExp(*itS) + *new SgVarRefExp(*tid) * *new SgVarRefExp(*itIdx)); + st = AssignStatement(*new SgVarRefExp((*it).symb), *new SgVarRefExp(*itS) + *new SgVarRefExp(*tid) * *new SgVarRefExp(*itIdx)); - leftExprs[idx_exprs] = &(*new SgVarRefExp((*it)->symb)); + leftExprs[idx_exprs] = &(*new SgVarRefExp((*it).symb)); rightExprs[idx_exprs] = &(*new SgVarRefExp(*itS) + *new SgVarRefExp(*tid) * *new SgVarRefExp(*itIdx)); idx_exprs++; @@ -4507,9 +4435,9 @@ SgStatement *CreateLoopKernelAcross(SgSymbol *skernel, ArgsForKernel *argsKer, i itIdx++; itS++; - st = AssignStatement(*new SgVarRefExp((*it)->symb), *new SgVarRefExp(*itS) + *new SgVarRefExp(*tid1) * *new SgVarRefExp(*itIdx)); + st = AssignStatement(*new SgVarRefExp((*it).symb), *new SgVarRefExp(*itS) + *new SgVarRefExp(*tid1) * *new SgVarRefExp(*itIdx)); - leftExprs[idx_exprs] = &(*new SgVarRefExp((*it)->symb)); + leftExprs[idx_exprs] = &(*new SgVarRefExp((*it).symb)); rightExprs[idx_exprs] = &(*new SgVarRefExp(*itS) + *new SgVarRefExp(*tid1) * *new SgVarRefExp(*itIdx)); idx_exprs++; @@ -4519,9 +4447,9 @@ SgStatement *CreateLoopKernelAcross(SgSymbol *skernel, ArgsForKernel *argsKer, i } else if (argsKer->nSymb.size() >= 3) { - st = AssignStatement(*new SgVarRefExp((*it)->symb), *new SgVarRefExp(*itS) + *new SgVarRefExp(*tid) * *new SgVarRefExp(*itIdx)); + st = AssignStatement(*new SgVarRefExp((*it).symb), *new SgVarRefExp(*itS) + *new SgVarRefExp(*tid) * *new SgVarRefExp(*itIdx)); - leftExprs[idx_exprs] = &(*new SgVarRefExp((*it)->symb)); + leftExprs[idx_exprs] = &(*new SgVarRefExp((*it).symb)); rightExprs[idx_exprs] = &(*new SgVarRefExp(*itS) + *new SgVarRefExp(*tid) * *new SgVarRefExp(*itIdx)); idx_exprs++; @@ -4529,9 +4457,9 @@ SgStatement *CreateLoopKernelAcross(SgSymbol *skernel, ArgsForKernel *argsKer, i itIdx++; itS++; - st = AssignStatement(*new SgVarRefExp((*it)->symb), *new SgVarRefExp(*itS) + *new SgVarRefExp(*tid1) * *new SgVarRefExp(*itIdx)); + st = AssignStatement(*new SgVarRefExp((*it).symb), *new SgVarRefExp(*itS) + *new SgVarRefExp(*tid1) * *new SgVarRefExp(*itIdx)); - leftExprs[idx_exprs] = &(*new SgVarRefExp((*it)->symb)); + leftExprs[idx_exprs] = &(*new SgVarRefExp((*it).symb)); rightExprs[idx_exprs] = &(*new SgVarRefExp(*itS) + *new SgVarRefExp(*tid1) * *new SgVarRefExp(*itIdx)); idx_exprs++; @@ -4551,9 +4479,9 @@ SgStatement *CreateLoopKernelAcross(SgSymbol *skernel, ArgsForKernel *argsKer, i funCall->addArg(*e_z1); tmp_exp = &(*new SgVarRefExp(*itS) + *funCall * *new SgVarRefExp(*itIdx)); - st = AssignStatement(*new SgVarRefExp((*it)->symb), *tmp_exp); + st = AssignStatement(*new SgVarRefExp((*it).symb), *tmp_exp); - leftExprs[idx_exprs] = &(*new SgVarRefExp((*it)->symb)); + leftExprs[idx_exprs] = &(*new SgVarRefExp((*it).symb)); rightExprs[idx_exprs] = &(*new SgVarRefExp(*itS) + *funCall * *new SgVarRefExp(*itIdx)); idx_exprs++; @@ -4575,9 +4503,9 @@ SgStatement *CreateLoopKernelAcross(SgSymbol *skernel, ArgsForKernel *argsKer, i tmp_exp = &(*new SgVarRefExp(*itS) + *funCall * *new SgVarRefExp(*itIdx)); } - st = AssignStatement(*new SgVarRefExp((*it)->symb), *tmp_exp); + st = AssignStatement(*new SgVarRefExp((*it).symb), *tmp_exp); - leftExprs[idx_exprs] = &(*new SgVarRefExp((*it)->symb)); + leftExprs[idx_exprs] = &(*new SgVarRefExp((*it).symb)); rightExprs[idx_exprs] = &(tmp_exp->copy()); idx_exprs++; @@ -4591,9 +4519,9 @@ SgStatement *CreateLoopKernelAcross(SgSymbol *skernel, ArgsForKernel *argsKer, i } else { - st = AssignStatement(*new SgVarRefExp((*it)->symb), *new SgVarRefExp(*itS) + *new SgVarRefExp(*tid2) * *new SgVarRefExp(*itIdx)); + st = AssignStatement(*new SgVarRefExp((*it).symb), *new SgVarRefExp(*itS) + *new SgVarRefExp(*tid2) * *new SgVarRefExp(*itIdx)); - leftExprs[idx_exprs] = &(*new SgVarRefExp((*it)->symb)); + leftExprs[idx_exprs] = &(*new SgVarRefExp((*it).symb)); rightExprs[idx_exprs] = &(*new SgVarRefExp(*itS) + *new SgVarRefExp(*tid2) * *new SgVarRefExp(*itIdx)); idx_exprs++; @@ -4694,7 +4622,7 @@ SgStatement *CreateLoopKernelAcross(SgSymbol *skernel, ArgsForKernel *argsKer, i if (!options.isOn(C_CUDA) && options.isOn(GPU_O0)) { for (size_t i = 0; i < argsKer->baseIdxsInKer.size(); ++i) - mainFor->lastExecutable()->insertStmtAfter(*AssignStatement(*&leftExprs[i]->copy(), (*(&leftExprs[i]->copy())) + *new SgVarRefExp(argsKer->steps[i])), *mainFor); + mainFor->lastExecutable()->insertStmtAfter(*AssignStatement(*&leftExprs[i]->copy(), (*(&leftExprs[i]->copy())) + *new SgVarRefExp(argsKer->steps[i])), *mainFor); } delete []leftExprs; @@ -4716,17 +4644,17 @@ SgStatement *CreateLoopKernelAcross(SgSymbol *skernel, ArgsForKernel *argsKer, i leftExprs = new SgExpression*[count_of_dims]; rightExprs = new SgExpression*[count_of_dims]; - list::iterator itAcr = argsKer->symb.begin(); - list::iterator it = argsKer->nSymb.begin(); - list::iterator itAcrS = argsKer->acrossS.begin(); - list::iterator itS = argsKer->notAcrS.begin(); - list::iterator it_sizeV = argsKer->sizeVars.begin(); - list::iterator itIdxAcr = argsKer->idxAcross.begin(); - list::iterator itIdx = argsKer->idxNotAcross.begin(); + vector::iterator itAcr = argsKer->symb.begin(); + vector::iterator it = argsKer->nSymb.begin(); + vector::iterator itAcrS = argsKer->acrossS.begin(); + vector::iterator itS = argsKer->notAcrossS.begin(); + vector::iterator it_sizeV = argsKer->sizeVars.begin(); + vector::iterator itIdxAcr = argsKer->idxAcross.begin(); + vector::iterator itIdx = argsKer->idxNotAcross.begin(); e = &(*new SgVarRefExp(*itAcrS) - *new SgVarRefExp(*tid) * *new SgVarRefExp(*itIdxAcr)); - st = AssignStatement(*new SgVarRefExp((*itAcr)->symb), *e); - leftExprs[idx_exprs] = new SgVarRefExp((*itAcr)->symb); + st = AssignStatement(*new SgVarRefExp((*itAcr).symb), *e); + leftExprs[idx_exprs] = new SgVarRefExp((*itAcr).symb); rightExprs[idx_exprs] = &(*new SgVarRefExp(*itAcrS) - *new SgVarRefExp(*tid) * *new SgVarRefExp(*itIdxAcr)); idx_exprs++; @@ -4735,8 +4663,8 @@ SgStatement *CreateLoopKernelAcross(SgSymbol *skernel, ArgsForKernel *argsKer, i itIdxAcr++; e = &(*new SgVarRefExp(*itAcrS) + *new SgVarRefExp(*tid) * *new SgVarRefExp(*itIdxAcr)); - st = AssignStatement(*new SgVarRefExp((*itAcr)->symb), *e); - leftExprs[idx_exprs] = new SgVarRefExp((*itAcr)->symb); + st = AssignStatement(*new SgVarRefExp((*itAcr).symb), *e); + leftExprs[idx_exprs] = new SgVarRefExp((*itAcr).symb); rightExprs[idx_exprs] = &(*new SgVarRefExp(*itAcrS) + *new SgVarRefExp(*tid) * *new SgVarRefExp(*itIdxAcr)); idx_exprs++; @@ -4746,17 +4674,17 @@ SgStatement *CreateLoopKernelAcross(SgSymbol *skernel, ArgsForKernel *argsKer, i if (argsKer->nSymb.size() == 1) { - st = AssignStatement(*new SgVarRefExp((*it)->symb), *new SgVarRefExp(*itS) + *new SgVarRefExp(*tid1) * + st = AssignStatement(*new SgVarRefExp((*it).symb), *new SgVarRefExp(*itS) + *new SgVarRefExp(*tid1) * *new SgVarRefExp(*itIdx)); - leftExprs[idx_exprs] = new SgVarRefExp((*it)->symb); + leftExprs[idx_exprs] = new SgVarRefExp((*it).symb); rightExprs[idx_exprs] = &(*new SgVarRefExp(*itS) + *new SgVarRefExp(*tid1) * *new SgVarRefExp(*itIdx)); idx_exprs++; } else if (argsKer->nSymb.size() >= 2) { - st = AssignStatement(*new SgVarRefExp((*it)->symb), *new SgVarRefExp(*itS) + *new SgVarRefExp(*tid1) * + st = AssignStatement(*new SgVarRefExp((*it).symb), *new SgVarRefExp(*itS) + *new SgVarRefExp(*tid1) * *new SgVarRefExp(*itIdx)); - leftExprs[idx_exprs] = new SgVarRefExp((*it)->symb); + leftExprs[idx_exprs] = new SgVarRefExp((*it).symb); rightExprs[idx_exprs] = &(*new SgVarRefExp(*itS) + *new SgVarRefExp(*tid1) * *new SgVarRefExp(*itIdx)); idx_exprs++; @@ -4776,8 +4704,8 @@ SgStatement *CreateLoopKernelAcross(SgSymbol *skernel, ArgsForKernel *argsKer, i funCall->addArg(*e_z1); tmp_exp = &(*new SgVarRefExp(*itS) + *funCall * *new SgVarRefExp(*itIdx)); - st = AssignStatement(*new SgVarRefExp((*it)->symb), *tmp_exp); - leftExprs[idx_exprs] = new SgVarRefExp((*it)->symb); + st = AssignStatement(*new SgVarRefExp((*it).symb), *tmp_exp); + leftExprs[idx_exprs] = new SgVarRefExp((*it).symb); rightExprs[idx_exprs] = &(*new SgVarRefExp(*itS) + *funCall * *new SgVarRefExp(*itIdx)); idx_exprs++; @@ -4805,8 +4733,8 @@ SgStatement *CreateLoopKernelAcross(SgSymbol *skernel, ArgsForKernel *argsKer, i } it--; - st = AssignStatement(*new SgVarRefExp((*it)->symb), *tmp_exp); - leftExprs[idx_exprs] = new SgVarRefExp((*it)->symb); + st = AssignStatement(*new SgVarRefExp((*it).symb), *tmp_exp); + leftExprs[idx_exprs] = new SgVarRefExp((*it).symb); idx_exprs++; e_z1 = &(*e_z1 * *e_z2); @@ -4822,9 +4750,9 @@ SgStatement *CreateLoopKernelAcross(SgSymbol *skernel, ArgsForKernel *argsKer, i else for (; it != argsKer->nSymb.end(); it++, itS++, itIdx++) { - st = AssignStatement(*new SgVarRefExp((*it)->symb), *new SgVarRefExp(*itS) + *new SgVarRefExp(*tid2) * + st = AssignStatement(*new SgVarRefExp((*it).symb), *new SgVarRefExp(*itS) + *new SgVarRefExp(*tid2) * *new SgVarRefExp(*itIdx)); - leftExprs[idx_exprs] = new SgVarRefExp((*it)->symb); + leftExprs[idx_exprs] = new SgVarRefExp((*it).symb); rightExprs[idx_exprs] = &(*new SgVarRefExp(*itS) + *new SgVarRefExp(*tid2) * *new SgVarRefExp(*itIdx)); idx_exprs++; } @@ -4887,7 +4815,7 @@ SgStatement *CreateLoopKernelAcross(SgSymbol *skernel, ArgsForKernel *argsKer, i } else if (argsKer->symb.size() >= 3) // body for >3 dependence { - // attention!! adding to support all variants!! не проверено + // attention!! adding to support all variants!!не проверено if (argsKer->nSymb.size() >= 1) { @@ -4900,13 +4828,14 @@ SgStatement *CreateLoopKernelAcross(SgSymbol *skernel, ArgsForKernel *argsKer, i SgSymbol *max_z, *se, *emax, *emin, *v1, *v2, *v3, *min_ij, *swap_ij, *i, *j; SgSymbol **num_elems; SgIfStmt *if_st3; - list::iterator itAcr = argsKer->symb.begin(); - list::iterator it = argsKer->nSymb.begin(); - list::iterator itAcrS = argsKer->acrossS.begin(); - list::iterator itS = argsKer->notAcrS.begin(); - list::iterator it_sizeV = argsKer->sizeVars.begin(); - list::iterator itIdxAcr = argsKer->idxAcross.begin(); - list::iterator itIdx = argsKer->idxNotAcross.begin(); + + vector::iterator itAcr = argsKer->symb.begin(); + vector::iterator it = argsKer->nSymb.begin(); + vector::iterator itAcrS = argsKer->acrossS.begin(); + vector::iterator itS = argsKer->notAcrossS.begin(); + vector::iterator it_sizeV = argsKer->sizeVars.begin(); + vector::iterator itIdxAcr = argsKer->idxAcross.begin(); + vector::iterator itIdx = argsKer->idxNotAcross.begin(); SgExpression **leftExprs, **rightExprs; int idx_exprs = 0; @@ -4959,7 +4888,7 @@ SgStatement *CreateLoopKernelAcross(SgSymbol *skernel, ArgsForKernel *argsKer, i itAcrS--; itIdxAcr--; - if_st = new SgIfStmt(*new SgVarRefExp(*tid) < *new SgVarRefExp((*itAcr)->symb), *if_st2); + if_st = new SgIfStmt(*new SgVarRefExp(*tid) < *new SgVarRefExp((*itAcr).symb), *if_st2); if (argsKer->nSymb.size() == 0) if_st3 = new SgIfStmt(*new SgVarRefExp(*tid1) < *new SgVarRefExp(*max_z), *if_st); else @@ -4973,23 +4902,23 @@ SgStatement *CreateLoopKernelAcross(SgSymbol *skernel, ArgsForKernel *argsKer, i cur_in_kernel->insertStmtAfter(*if_st3, *kernel_st); cur_in_kernel = if_st->lexNext(); - st1 = AssignStatement(*new SgVarRefExp((*itAcr)->symb), *new SgVarRefExp(*min_ij)); + st1 = AssignStatement(*new SgVarRefExp((*itAcr).symb), *new SgVarRefExp(*min_ij)); - st = AssignStatement(*new SgVarRefExp((*itAcr)->symb), *new SgValueExp(2) * *new SgVarRefExp(*min_ij) - *new SgVarRefExp(se) - + st = AssignStatement(*new SgVarRefExp((*itAcr).symb), *new SgValueExp(2) * *new SgVarRefExp(*min_ij) - *new SgVarRefExp(se) - *new SgVarRefExp(tid1) + *new SgVarRefExp(emax) - *new SgVarRefExp(emin) - *new SgValueExp(1)); if_st1 = new SgIfStmt(*new SgVarRefExp(*tid1) + *new SgVarRefExp(se) < *new SgVarRefExp(*emax), *st1, *st); - st1 = AssignStatement(*new SgVarRefExp((*itAcr)->symb), *new SgVarRefExp(*tid1) + *new SgVarRefExp(se)); + st1 = AssignStatement(*new SgVarRefExp((*itAcr).symb), *new SgVarRefExp(*tid1) + *new SgVarRefExp(se)); if_st1 = new SgIfStmt(*new SgVarRefExp(*tid1) + *new SgVarRefExp(se) < *new SgVarRefExp(*emin), *st1, *if_st1); if_st3->insertStmtAfter(*if_st1); - i = (*itAcr)->symb; - st1 = AssignStatement(*new SgVarRefExp((*itAcr)->symb), *new SgVarRefExp(*itAcrS) + ((*new SgVarRefExp(tid1) * + i = (*itAcr).symb; + st1 = AssignStatement(*new SgVarRefExp((*itAcr).symb), *new SgVarRefExp(*itAcrS) + ((*new SgVarRefExp(tid1) * (*new SgVarRefExp(v1) + *new SgVarRefExp(v3)) - *new SgVarRefExp(tid))) * *new SgVarRefExp(*itIdxAcr)); - leftExprs[idx_exprs] = new SgVarRefExp((*itAcr)->symb); + leftExprs[idx_exprs] = new SgVarRefExp((*itAcr).symb); rightExprs[idx_exprs] = &(*new SgVarRefExp(*itAcrS) + ((*new SgVarRefExp(tid1) * (*new SgVarRefExp(v1) + *new SgVarRefExp(v3)) - *new SgVarRefExp(tid))) * *new SgVarRefExp(*itIdxAcr)); idx_exprs++; @@ -4999,11 +4928,11 @@ SgStatement *CreateLoopKernelAcross(SgSymbol *skernel, ArgsForKernel *argsKer, i itIdxAcr++; itAcr++; - j = (*itAcr)->symb; - st1 = AssignStatement(*new SgVarRefExp((*itAcr)->symb), *new SgVarRefExp(*itAcrS) + (*new SgVarRefExp(tid1) * + j = (*itAcr).symb; + st1 = AssignStatement(*new SgVarRefExp((*itAcr).symb), *new SgVarRefExp(*itAcrS) + (*new SgVarRefExp(tid1) * *new SgVarRefExp(v2) + *new SgVarRefExp(tid)) * *new SgVarRefExp(*itIdxAcr)); - leftExprs[idx_exprs] = new SgVarRefExp((*itAcr)->symb); + leftExprs[idx_exprs] = new SgVarRefExp((*itAcr).symb); rightExprs[idx_exprs] = &(*new SgVarRefExp(*itAcrS) + (*new SgVarRefExp(tid1) * *new SgVarRefExp(v2) + *new SgVarRefExp(tid)) * *new SgVarRefExp(*itIdxAcr)); idx_exprs++; @@ -5012,10 +4941,10 @@ SgStatement *CreateLoopKernelAcross(SgSymbol *skernel, ArgsForKernel *argsKer, i itIdxAcr++; itAcr++; - st1 = AssignStatement(*new SgVarRefExp((*itAcr)->symb), *new SgVarRefExp(*itAcrS) - *new SgVarRefExp(tid1) * + st1 = AssignStatement(*new SgVarRefExp((*itAcr).symb), *new SgVarRefExp(*itAcrS) - *new SgVarRefExp(tid1) * *new SgVarRefExp(*itIdxAcr)); - leftExprs[idx_exprs] = new SgVarRefExp((*itAcr)->symb); + leftExprs[idx_exprs] = new SgVarRefExp((*itAcr).symb); rightExprs[idx_exprs] = &(*new SgVarRefExp(*itAcrS) - *new SgVarRefExp(tid1) * *new SgVarRefExp(*itIdxAcr)); idx_exprs++; @@ -5027,7 +4956,7 @@ SgStatement *CreateLoopKernelAcross(SgSymbol *skernel, ArgsForKernel *argsKer, i itIdxAcr++; itAcr++; - leftExprs[idx_exprs] = new SgVarRefExp((*itAcr)->symb); + leftExprs[idx_exprs] = new SgVarRefExp((*itAcr).symb); rightExprs[idx_exprs] = &(*new SgVarRefExp(*itAcrS)); idx_exprs++; } @@ -5035,10 +4964,10 @@ SgStatement *CreateLoopKernelAcross(SgSymbol *skernel, ArgsForKernel *argsKer, i if (argsKer->nSymb.size() == 1) { - st1 = AssignStatement(*new SgVarRefExp((*it)->symb), *new SgVarRefExp(*itS) + *new SgVarRefExp(tid2) * + st1 = AssignStatement(*new SgVarRefExp((*it).symb), *new SgVarRefExp(*itS) + *new SgVarRefExp(tid2) * *new SgVarRefExp(*itIdx)); - leftExprs[idx_exprs] = new SgVarRefExp((*it)->symb); + leftExprs[idx_exprs] = new SgVarRefExp((*it).symb); rightExprs[idx_exprs] = &(*new SgVarRefExp(*itS) + *new SgVarRefExp(tid2) * *new SgVarRefExp(*itIdx)); idx_exprs++; } @@ -5051,9 +4980,9 @@ SgStatement *CreateLoopKernelAcross(SgSymbol *skernel, ArgsForKernel *argsKer, i funCall->addArg(*e_z1); tmp_exp = &(*new SgVarRefExp(*itS) + *funCall * *new SgVarRefExp(*itIdx)); - st = AssignStatement(*new SgVarRefExp((*it)->symb), *tmp_exp); + st = AssignStatement(*new SgVarRefExp((*it).symb), *tmp_exp); - leftExprs[idx_exprs] = new SgVarRefExp((*it)->symb); + leftExprs[idx_exprs] = new SgVarRefExp((*it).symb); rightExprs[idx_exprs] = &(*new SgVarRefExp(*itS) + *funCall * *new SgVarRefExp(*itIdx)); idx_exprs++; @@ -5079,9 +5008,9 @@ SgStatement *CreateLoopKernelAcross(SgSymbol *skernel, ArgsForKernel *argsKer, i } it--; - st = AssignStatement(*new SgVarRefExp((*it)->symb), *tmp_exp); + st = AssignStatement(*new SgVarRefExp((*it).symb), *tmp_exp); - leftExprs[idx_exprs] = new SgVarRefExp((*it)->symb); + leftExprs[idx_exprs] = new SgVarRefExp((*it).symb); idx_exprs++; e_z1 = &(*e_z1 * *e_z2); @@ -5123,7 +5052,7 @@ SgStatement *CreateLoopKernelAcross(SgSymbol *skernel, ArgsForKernel *argsKer, i st1 = AssignStatement(new SgArrayRefExp(*coords, *new SgVarRefExp(argsKer->baseIdxsInKer[0]) + *new SgValueExp(1)), new SgVarRefExp(v3)); if_st2 = new SgIfStmt(*new SgVarRefExp(swap_ij) * *new SgVarRefExp(v3), *st1); - st1 = AssignStatement(new SgArrayRefExp(*coords, *new SgVarRefExp(argsKer->baseIdxsInKer[1]) + *new SgValueExp(1)), + st1 = AssignStatement(new SgArrayRefExp(*coords, *new SgVarRefExp(argsKer->baseIdxsInKer[1]) + *new SgValueExp(1)), new SgArrayRefExp(*coords, *new SgVarRefExp(argsKer->baseIdxsInKer[0]) + *new SgValueExp(1))); if_st2->insertStmtAfter(*st1); @@ -5160,12 +5089,12 @@ SgStatement *CreateLoopKernelAcross(SgSymbol *skernel, ArgsForKernel *argsKer, i SgStatement *currStForInsetGetXY = cur_in_kernel; vector forDeclarationInKernel; set uniqueNames; - + // create, insert, optimize and convert loop_body into kernel { SgStatement *stk, *last; vector allNewInfo; - + if (argsKer->symb.size() == 1) { if (options.isOn(GPU_O0)) @@ -5177,12 +5106,11 @@ SgStatement *CreateLoopKernelAcross(SgSymbol *skernel, ArgsForKernel *argsKer, i block = CreateIfForRedBlack(loop_body, nloop); - last = cur_in_kernel->lexNext(); + last = cur_in_kernel->lexNext(); if (argsKer->symb.size() == 1 && allNewInfo.size() != 0 && options.isOn(GPU_O0)) //insert needed assigns { - list::iterator itIdxAcr = argsKer->idxAcross.begin(); - SgIfStmt *ifSt = new SgIfStmt(*new SgVarRefExp(*itIdxAcr) > *new SgValueExp(0), *&allNewInfo[0].loadsBeforePlus[0]->copy(), *&allNewInfo[0].loadsBeforeMinus[0]->copy()); + SgIfStmt *ifSt = new SgIfStmt(*new SgVarRefExp(argsKer->idxAcross[0]) > *new SgValueExp(0), *&allNewInfo[0].loadsBeforePlus[0]->copy(), *&allNewInfo[0].loadsBeforeMinus[0]->copy()); for (size_t i = 0; i < allNewInfo.size(); ++i) { if (i == 0) @@ -5200,18 +5128,18 @@ SgStatement *CreateLoopKernelAcross(SgSymbol *skernel, ArgsForKernel *argsKer, i ifSt->insertStmtAfter(*&allNewInfo[i].loadsBeforePlus[k]->copy(), *ifSt); ifSt->falseBody()->insertStmtBefore(*&allNewInfo[i].loadsBeforeMinus[k]->copy(), *ifSt); } - } + } } mainFor->insertStmtBefore(*ifSt); } - + if (argsKer->symb.size() == 1 && options.isOn(GPU_O0)) cur_in_kernel->insertStmtAfter(*block, *mainFor); //cur_in_kernel is innermost FOR stmt else cur_in_kernel->insertStmtAfter(*block, *if_st); //cur_in_kernel is innermost IF statement if (options.isOn(C_CUDA)) - { + { if (block->comments() == NULL) block->addComment("// Loop body"); } @@ -5226,7 +5154,7 @@ SgStatement *CreateLoopKernelAcross(SgSymbol *skernel, ArgsForKernel *argsKer, i if (stk->variant() == CONTROL_END) { - if (stk->hasLabel() || stk == loop_body) // when body of DO_ENDDO loop is empty, stk == loop_body + if (stk->hasLabel() || stk == loop_body) // when body of DO_ENDDO loop is empty, stk == loop_body stk->setVariant(CONT_STAT); else { @@ -5243,8 +5171,7 @@ SgStatement *CreateLoopKernelAcross(SgSymbol *skernel, ArgsForKernel *argsKer, i if (argsKer->symb.size() == 1 && allNewInfo.size() != 0 && options.isOn(GPU_O0)) //insert needed assigns { - list::iterator itIdxAcr = argsKer->idxAcross.begin(); - SgIfStmt *ifSt = new SgIfStmt(*new SgVarRefExp(*itIdxAcr) > *new SgValueExp(0), *&allNewInfo[0].loadsInForPlus[0]->copy(), *&allNewInfo[0].loadsInForMinus[0]->copy()); + SgIfStmt *ifSt = new SgIfStmt(*new SgVarRefExp(argsKer->idxAcross[0]) > *new SgValueExp(0), *&allNewInfo[0].loadsInForPlus[0]->copy(), *&allNewInfo[0].loadsInForMinus[0]->copy()); for (size_t i = 0; i < allNewInfo.size(); ++i) { @@ -5272,12 +5199,12 @@ SgStatement *CreateLoopKernelAcross(SgSymbol *skernel, ArgsForKernel *argsKer, i else { for (size_t k = 0; k < allNewInfo[i].stores.size(); ++k) - mainFor->lastExecutable()->lexPrev()->lexPrev()->insertStmtBefore(*&allNewInfo[i].stores[k]->copy()); + mainFor->lastExecutable()->lexPrev()->lexPrev()->insertStmtBefore(*&allNewInfo[i].stores[k]->copy()); } - } + } size_t k = allNewInfo[0].swapsUp.size() - 1; - ifSt = new SgIfStmt(*new SgVarRefExp(*itIdxAcr) > *new SgValueExp(0), *&allNewInfo[0].swapsDown[k]->copy(), *&allNewInfo[0].swapsUp[k]->copy()); + ifSt = new SgIfStmt(*new SgVarRefExp(argsKer->idxAcross[0]) > *new SgValueExp(0), *&allNewInfo[0].swapsDown[k]->copy(), *&allNewInfo[0].swapsUp[k]->copy()); for (size_t i = 0; i < allNewInfo.size(); ++i) { size_t last; @@ -5412,7 +5339,7 @@ SgStatement *CreateLoopKernelAcross(SgSymbol *skernel, ArgsForKernel *argsKer, i else Translate_Fortran_To_C(if_st, if_st->lastNodeOfStmt(), copyOfBody, 0); // countOfCopies } - + cur_st = last; if (createBodyKernel == false) createBodyKernel = true; @@ -5529,7 +5456,7 @@ SgStatement *CreateLoopKernelAcross(SgSymbol *skernel, ArgsForKernel *argsKer, i int loc_el_num = 0; if (isSgExprListExp(red_expr_ref)) { - red_expr_ref = red_expr_ref->lhs(); // reduction variable reference + red_expr_ref = red_expr_ref->lhs(); // reduction variable reference loc_var_ref = er->lhs()->rhs()->rhs()->lhs(); //location array reference en = er->lhs()->rhs()->rhs()->rhs()->lhs(); // number of elements in location array loc_el_num = LocElemNumber(en); @@ -5591,14 +5518,7 @@ SgStatement *CreateLoopKernelAcross(SgSymbol *skernel, ArgsForKernel *argsKer, i SgSymbol *redGrid = new SgSymbol(VARIABLE_NAME, tmp_list->red_grid->identifier()); redGrid->setType(*new SgArrayType(*tmp_list->red_grid->type())); - list::iterator it_sizeV = argsKer->sizeVars.begin(); - it_sizeV++; - it_sizeV++; - it_sizeV++; - it_sizeV++; - it_sizeV++; - it_sizeV++; - SgSymbol *emin = *it_sizeV; + SgSymbol *emin = argsKer->sizeVars[6]; funcCall->addArg(*new SgArrayRefExp(*redGrid, *new SgVarRefExp(*tid) + *new SgVarRefExp(*tid1) * *new SgVarRefExp(*emin))); funcCall->addArg(*red_expr_ref); st = AssignStatement(*new SgArrayRefExp(*redGrid, *new SgVarRefExp(*tid) + *new SgVarRefExp(*tid1) * *new SgVarRefExp(*emin)), *funcCall); @@ -5620,14 +5540,7 @@ SgStatement *CreateLoopKernelAcross(SgSymbol *skernel, ArgsForKernel *argsKer, i } else { - list::iterator it_sizeV = argsKer->sizeVars.begin(); - it_sizeV++; - it_sizeV++; - it_sizeV++; - it_sizeV++; - it_sizeV++; - it_sizeV++; - SgSymbol *emin = *it_sizeV; + SgSymbol *emin = argsKer->sizeVars[6]; e1 = &(*new SgVarRefExp(*tid) + *new SgVarRefExp(*tid1) * *new SgVarRefExp(*emin)); } e = NULL; @@ -5636,11 +5549,11 @@ SgStatement *CreateLoopKernelAcross(SgSymbol *skernel, ArgsForKernel *argsKer, i redGrid->setType(*new SgArrayType(*tmp_list->red_grid->type())); SgExpression* red_ref = NULL; - + if (tmp_list->redvar_size == 0) red_ref = red_expr_ref; else // TODO - red_ref = new SgArrayRefExp(*red_expr_ref->symbol(), *new SgVarRefExp(freeS)); + red_ref = new SgArrayRefExp(*red_expr_ref->symbol(), *new SgVarRefExp(freeS)); if (num == 1) e = &(*new SgArrayRefExp(*redGrid, *e1) + *red_ref); @@ -5704,7 +5617,7 @@ SgStatement *CreateLoopKernelAcross(SgSymbol *skernel, ArgsForKernel *argsKer, i needComment = 0; } } - + DeclarationCreateReductionBlocksAcross(nloop, red_list); } else if (red_list && argsKer->nSymb.size() > 0) // generating reduction calculation blocks @@ -5765,88 +5678,41 @@ SgStatement *CreateLoopKernelAcross(SgSymbol *skernel, ArgsForKernel *argsKer, i if (!options.isOn(C_CUDA)) { - for (list::iterator it1 = argsKer->sizeVars.begin(); it1 != argsKer->sizeVars.end(); ++it1) - { - st = (*it1)->makeVarDeclStmt(); - st->setExpression(2, *new SgExprListExp(*new SgExpression(ACC_VALUE_OP))); - kernel_st->insertStmtAfter(*st); - } - - for (list::iterator it = argsKer->acrossS.begin(); it != argsKer->acrossS.end(); ++it) - { - st = (*it)->makeVarDeclStmt(); - st->setExpression(2, *new SgExprListExp(*new SgExpression(ACC_VALUE_OP))); - kernel_st->insertStmtAfter(*st); - } - - for (list::iterator it = argsKer->notAcrS.begin(); it != argsKer->notAcrS.end(); ++it) - { - st = (*it)->makeVarDeclStmt(); - st->setExpression(2, *new SgExprListExp(*new SgExpression(ACC_VALUE_OP))); - kernel_st->insertStmtAfter(*st); - } - - for (list::iterator it = argsKer->idxAcross.begin(); it != argsKer->idxAcross.end(); ++it) - { - st = (*it)->makeVarDeclStmt(); - st->setExpression(2, *new SgExprListExp(*new SgExpression(ACC_VALUE_OP))); - kernel_st->insertStmtAfter(*st); - } - - for (list::iterator it = argsKer->idxNotAcross.begin(); it != argsKer->idxNotAcross.end(); ++it) - { - st = (*it)->makeVarDeclStmt(); - st->setExpression(2, *new SgExprListExp(*new SgExpression(ACC_VALUE_OP))); - kernel_st->insertStmtAfter(*st); - } + createDeclaration(argsKer->sizeVars); + createDeclaration(argsKer->acrossS); + createDeclaration(argsKer->notAcrossS); + createDeclaration(argsKer->idxAcross); + createDeclaration(argsKer->idxNotAcross); for (size_t i = 0; i < argsKer->otherVars.size() / 8 * 8; i += 8) { - st = argsKer->otherVars[i]->makeVarDeclStmt(); - st->setExpression(2, *new SgExprListExp(*new SgExpression(ACC_VALUE_OP))); - kernel_st->insertStmtAfter(*st); + createDeclaration(argsKer->otherVars[i]); addDeclExpList(argsKer->otherVars[i + 3], st->expr(0)); - st = argsKer->otherVars[i + 1]->makeVarDeclStmt(); - st->setExpression(2, *new SgExprListExp(*new SgExpression(ACC_VALUE_OP))); - kernel_st->insertStmtAfter(*st); + createDeclaration(argsKer->otherVars[i + 1]); addDeclExpList(argsKer->otherVars[i + 4], st->expr(0)); - st = argsKer->otherVars[i + 2]->makeVarDeclStmt(); - st->setExpression(2, *new SgExprListExp(*new SgExpression(ACC_VALUE_OP))); - kernel_st->insertStmtAfter(*st); + createDeclaration(argsKer->otherVars[i + 2]); addDeclExpList(argsKer->otherVars[i + 5], st->expr(0)); - st = argsKer->otherVars[i + 6]->makeVarDeclStmt(); - st->setExpression(2, *new SgExprListExp(*new SgExpression(ACC_VALUE_OP))); - kernel_st->insertStmtAfter(*st); + createDeclaration(argsKer->otherVars[i + 6]); addDeclExpList(argsKer->otherVars[i + 7], st->expr(0)); } if (argsKer->otherVars.size() != 0 && argsKer->otherVars.size() % 8 != 0) - { - st = argsKer->otherVars[argsKer->otherVars.size() - 1]->makeVarDeclStmt(); - st->setExpression(2, *new SgExprListExp(*new SgExpression(ACC_VALUE_OP))); - kernel_st->insertStmtAfter(*st); - } + createDeclaration(argsKer->otherVars[argsKer->otherVars.size() - 1]); for (size_t i = 0; i < argsKer->baseIdxsInKer.size(); ++i) { if (i == 0) - { - st = argsKer->baseIdxsInKer[i]->makeVarDeclStmt(); - st->setExpression(2, *new SgExprListExp(*new SgExpression(ACC_VALUE_OP))); - kernel_st->insertStmtAfter(*st); - } + createDeclaration(argsKer->baseIdxsInKer[i]); else addDeclExpList(argsKer->baseIdxsInKer[i], st->expr(0)); } if (argsKer->cond_ != NULL) { - st = argsKer->cond_->makeVarDeclStmt(); - st->setExpression(2, *new SgExprListExp(*new SgExpression(ACC_VALUE_OP))); - kernel_st->insertStmtAfter(*st); + createDeclaration(argsKer->cond_); for (size_t i = 0; i < argsKer->steps.size(); ++i) addDeclExpList(argsKer->steps[i], st->expr(0)); } @@ -5860,7 +5726,7 @@ SgStatement *CreateLoopKernelAcross(SgSymbol *skernel, ArgsForKernel *argsKer, i kernel_st->insertStmtAfter(*new SgStatement(IMPL_DECL), *kernel_st); ACROSS_MOD_IN_KERNEL = 0; - return(kernel_st); + return kernel_st; } @@ -5918,13 +5784,13 @@ void DeclarationOfReductionBlockInKernelAcross(SgExpression *ered, reduction_ope i = ind = loc_el_num = 0; //end of init block - // analys of reduction operation + // analys of reduction operation // ered - reduction operation (variant==ARRAY_OP) - ev = ered->rhs(); // reduction variable reference for reduction operations except MINLOC,MAXLOC - if (isSgExprListExp(ev)) // for MAXLOC,MINLOC + ev = ered->rhs(); // reduction variable reference for reduction operations except MINLOC,MAXLOC + if (isSgExprListExp(ev)) // for MAXLOC,MINLOC { loc_var = ev->rhs()->lhs()->symbol(); //location array reference - ev = ev->lhs(); // reduction variable reference + ev = ev->lhs(); // reduction variable reference } else loc_var = NULL; @@ -5935,7 +5801,7 @@ void DeclarationOfReductionBlockInKernelAcross(SgExpression *ered, reduction_ope // _block(i)%(1) = (1) // [_block(i)%(2) = (2) ] // . . . - // create and declare array '_block' + // create and declare array '_block' red_var = ev->symbol(); if (rsl->locvar) @@ -6022,7 +5888,7 @@ void CreateReductionBlocksAcross(SgStatement *stat, int nloop, SgExpression *red i_var = dost->symbol(); if (nloop > 1) j_var = dost->controlParent()->symbol(); - else + else { j_var = IndVarInKernel(i_var); newst = j_var->makeVarDeclStmt(); @@ -6032,26 +5898,16 @@ void CreateReductionBlocksAcross(SgStatement *stat, int nloop, SgExpression *red // declare '_block' array for each reduction var // = threadIdx%x -1 + [ (threadIdx%y - 1) * blockDim%x [ + (threadIdx%z - 1) * blockDim%x * blockDim%y ] ] // or C_Cuda - // = threadIdx%x + [ threadIdx%y * blockDim%x [ + threadIdx%z * blockDim%x * blockDim%y ] ] + // = threadIdx%x + [ threadIdx%y * blockDim%x [ + threadIdx%z * blockDim%x * blockDim%y ] ] re = ThreadIdxRefExpr("x"); if (nloop > 1) re = &(*re + (*ThreadIdxRefExpr("y")) * (*new SgRecordRefExp(*s_blockdim, "x"))); if (nloop > 2) re = &(*re + (*ThreadIdxRefExpr("z")) * (*new SgRecordRefExp(*s_blockdim, "x") * (*new SgRecordRefExp(*s_blockdim, "y")))); - + if (options.isOn(C_CUDA)) // global cuda index - { - /*SgExpression& globalX = (*new SgRecordRefExp(*s_blockdim, "x") * *new SgRecordRefExp(*s_blockidx, "x") + *new SgRecordRefExp(*s_threadidx, "x")); - SgExpression& globalY = (*new SgRecordRefExp(*s_blockdim, "y") * *new SgRecordRefExp(*s_blockidx, "y") + *new SgRecordRefExp(*s_threadidx, "y")); - SgExpression& globalZ = (*new SgRecordRefExp(*s_blockdim, "z") * *new SgRecordRefExp(*s_blockidx, "z") + *new SgRecordRefExp(*s_threadidx, "z")); - - SgExpression& globalDimX = (*new SgRecordRefExp(*s_griddim, "x") * *new SgRecordRefExp(*s_blockdim, "x")); - SgExpression& globalDimY = (*new SgRecordRefExp(*s_griddim, "y") * *new SgRecordRefExp(*s_blockdim, "y") * globalDimX); - - ass = new SgAssignStmt(*new SgVarRefExp(i_var), globalX + globalY * globalDimX + globalZ * globalDimY);*/ - - + { // gIDX = threadIdx.x + threadIdx.y * blockDim.x + threadIdx.z * blockDim.x * blockDim.y + (blockIdx.x + blockIdx.y * gridDim.x + blockIdx.z * gridDim.x * gridDim.y) * blockDim.x * blockDim.y * blockDim.z; SgExpression& thrX = *new SgRecordRefExp(*s_threadidx, "x"); SgExpression& thrY = *new SgRecordRefExp(*s_threadidx, "y"); diff --git a/dvm/fdvm/trunk/fdvm/aks_structs.cpp b/dvm/fdvm/trunk/fdvm/aks_structs.cpp index 935858b..ab4da20 100644 --- a/dvm/fdvm/trunk/fdvm/aks_structs.cpp +++ b/dvm/fdvm/trunk/fdvm/aks_structs.cpp @@ -1,5 +1,12 @@ #include "dvm.h" #include "aks_structs.h" +#include +#include +#include + +using std::vector; +using std::string; +using std::map; #define DEBUG_LV1 true #if 1 @@ -16,22 +23,20 @@ SgExpression* findDirect(SgExpression *inExpr, int DIR) if (inExpr) { if (inExpr->variant() == DIR) - { return inExpr; - } else { - if (inExpr->lhs()) - temp = findDirect(inExpr->lhs(), DIR); + if (inExpr->lhs()) + temp = findDirect(inExpr->lhs(), DIR); - if(temp == NULL && inExpr->rhs()) + if(temp == NULL && inExpr->rhs()) temp = findDirect(inExpr->rhs(), DIR); } } return temp; } -static SgSymbol** fillDataOfArray(SgExpression* on, int& dimInPar) +static vector fillDataOfArray(SgExpression* on, int& dimInPar) { dimInPar = 0; SgExpression* temp = on; @@ -40,7 +45,8 @@ static SgSymbol** fillDataOfArray(SgExpression* on, int& dimInPar) dimInPar++; temp = temp->rhs(); } - SgSymbol** symbInPar = new SgSymbol * [dimInPar]; + + vector symbInPar(dimInPar); temp = on; for (int i = 0; i < dimInPar; ++i) { @@ -50,20 +56,20 @@ static SgSymbol** fillDataOfArray(SgExpression* on, int& dimInPar) return symbInPar; } -SageArrayIdxs* GetIdxInParDir(const std::map& on, SgExpression *across, bool tie = false) +static void printError() { - SageArrayIdxs *ret = new SageArrayIdxs(); - SageArrayIdxs *act = ret; - int allDim = 0; - int dimInPar = 0; - SgSymbol** symbInPar = NULL; - ret->next = NULL; - ret->array_expr = NULL; - ret->read_write = -1; - ret->dim = 0; - ret->symb = NULL; + err("internal error in across", 424, first_do_par); + exit(-1); +} + +static vector GetIdxInParDir(const map& on, SgExpression *across, bool tie = false) +{ + vector ret; + + int dimInPar = 0; + vector symbInPar; + vector toAnalyze; - std::vector toAnalyze; if (across->lhs()->variant() == EXPR_LIST) toAnalyze.push_back(across->lhs()); else @@ -80,15 +86,12 @@ SageArrayIdxs* GetIdxInParDir(const std::map& on, Sg across = toAnalyze[i]; while (across) { - if (symbInPar == NULL) + if (symbInPar.size() == 0) { if (on.size() == 0) - { - fprintf(stderr, "internal error in across convertion for GPU\n"); - exit(-1); - } + printError(); else if (on.size() == 1) - symbInPar = fillDataOfArray(on.begin()->second, dimInPar); + symbInPar = fillDataOfArray(on.begin()->second, dimInPar); } SgExpression *t = across->lhs(); @@ -99,20 +102,14 @@ SageArrayIdxs* GetIdxInParDir(const std::map& on, Sg if (t->variant() == ARRAY_REF) { if (on.find(t->symbol()->identifier()) == on.end()) - { - fprintf(stderr, "internal error in across convertion for GPU\n"); - exit(-1); - } + printError(); else symbInPar = fillDataOfArray(on.find(t->symbol()->identifier())->second, dimInPar); } else if (t->variant() == ARRAY_OP) { if (on.find(t->lhs()->symbol()->identifier()) == on.end()) - { - fprintf(stderr, "internal error in across convertion for GPU\n"); - exit(-1); - } + printError(); else symbInPar = fillDataOfArray(on.find(t->lhs()->symbol()->identifier())->second, dimInPar); } @@ -134,53 +131,50 @@ SageArrayIdxs* GetIdxInParDir(const std::map& on, Sg dim++; tmp = tmp->rhs(); } - act->next = new SageArrayIdxs(); - act = act->next; - act->next = NULL; - act->symb = new SageSymbols*[dim]; - act->dim = dim; + + SageArrayIdxs act; + + act.symb.resize(dim); + act.dim = dim; for (int i = 0; i < dim; ++i) { - act->symb[i] = new SageSymbols(); - act->symb[i]->across_left = t->lhs()->lhs()->valueInteger(); - act->symb[i]->across_right = t->lhs()->rhs()->valueInteger(); - if (act->symb[i]->across_left != 0 || act->symb[i]->across_right != 0) - act->symb[i]->symb = symbInPar[i]; + act.symb[i].across_left = t->lhs()->lhs()->valueInteger(); + act.symb[i].across_right = t->lhs()->rhs()->valueInteger(); + if (act.symb[i].across_left != 0 || act.symb[i].across_right != 0) + act.symb[i].symb = symbInPar[i]; else if (i < dimInPar) - act->symb[i]->symb = symbInPar[i]; + act.symb[i].symb = symbInPar[i]; else - act->symb[i]->symb = NULL; - act->symb[i]->next = NULL; + act.symb[i].symb = NULL; t = t->rhs(); } - allDim++; + ret.push_back(act); across = across->rhs(); } } - ret->dim = allDim; + return ret; } -SageAcrossInfo* GetLoopsWithParAndAcrDir() +SageAcrossInfo GetLoopsWithParAndAcrDir() { - SageAcrossInfo *q = NULL; + SageAcrossInfo retVal; SgStatement *temp = dvm_parallel_dir; if (temp->variant() == DVM_PARALLEL_ON_DIR) { SgExpression *t = findDirect(temp->expr(1), ACROSS_OP); SgExpression *tie = findDirect(temp->expr(1), ACC_TIE_OP); - - std::map arrays; + + map arrays; if (t != NULL) { - q = new SageAcrossInfo(); if (temp->expr(0) && temp->expr(0)->lhs()) { arrays[temp->expr(0)->symbol()->identifier()] = temp->expr(0)->lhs(); - q->idx = GetIdxInParDir(arrays, t); - } + retVal.idxs = GetIdxInParDir(arrays, t); + } else if (tie) { SgExpression* list = tie->lhs(); @@ -189,34 +183,24 @@ SageAcrossInfo* GetLoopsWithParAndAcrDir() arrays[list->lhs()->symbol()->identifier()] = list->lhs()->lhs(); list = list->rhs(); } - q->idx = GetIdxInParDir(arrays, t, true); + retVal.idxs = GetIdxInParDir(arrays, t, true); } else - { - fprintf(stderr, "internal error in across convertion for GPU\n"); - exit(-1); - } - q->next = NULL; + printError(); } - } - return q; + } + return retVal; } -SageSymbols *GetSymbInParalell(int *n, SgExpression *first) +vector GetSymbInParalell(SgExpression *first) { - SageSymbols *retval; - SageSymbols *p_t = new SageSymbols(); - retval = p_t; + vector retval; while(first) { - SageSymbols *q = new SageSymbols(); - q->len = -1; - q->next = NULL; - q->symb = first->lhs()->symbol(); - p_t->next = q; - p_t = q; - n[0]++; + SageSymbols q(first->lhs()->symbol(), -1, 0, 0); + retval.push_back(q); + first = first->rhs(); } - return retval->next; + return retval; } diff --git a/dvm/fdvm/trunk/fdvm/dvm.cpp b/dvm/fdvm/trunk/fdvm/dvm.cpp index 2ed78be..c63cd24 100644 --- a/dvm/fdvm/trunk/fdvm/dvm.cpp +++ b/dvm/fdvm/trunk/fdvm/dvm.cpp @@ -4757,7 +4757,7 @@ void RedistributeArray(SgSymbol *das, int idisars, SgExpression *distr_rule_list if_st = doIfThenConstrForRedis(headref_flag,stdis,iamv); /*08.05.17*/ where = end_if = if_st->lexNext()->lexNext(); // reffer to ENDIF statement i1 = ndvm; - if(ACC_program) /*ACC*/ + if(ACC_program || parloop_by_handler) /*ACC*/ where->insertStmtBefore(*Redistribute_H(headref,sign),*where->controlParent()); amvref = (ia & TEMPLATE_BIT) ? headref : GetAMView( headref); //inserting after ELSE @@ -4838,7 +4838,7 @@ void RedistributeArray(SgSymbol *das, int idisars, SgExpression *distr_rule_list else { SgExpression *amvref; - if(ACC_program) /*ACC*/ + if(ACC_program || parloop_by_handler) /*ACC*/ where->insertStmtBefore(*Redistribute_H(headref,sign),*where->controlParent()); amvref = (ia & TEMPLATE_BIT) ? headref : GetAMView( headref); @@ -5108,7 +5108,7 @@ void RealignArray(SgSymbol *als, SgSymbol *tgs, int iaxis, int nr, SgExpression return; } iamv = ndvm; - if(ACC_program ) /*ACC*/ + if(ACC_program || parloop_by_handler) /*ACC*/ { if( !(ia & POSTPONE_BIT) ) doCallAfter(Realign_H(HeaderRef(als),new_sign)); else { diff --git a/dvm/fdvm/trunk/fdvm/funcall.cpp b/dvm/fdvm/trunk/fdvm/funcall.cpp index db93dc8..1c387d7 100644 --- a/dvm/fdvm/trunk/fdvm/funcall.cpp +++ b/dvm/fdvm/trunk/fdvm/funcall.cpp @@ -2668,7 +2668,7 @@ SgStatement *RTL_GPU_Init() SgCallStmt *call = new SgCallStmt(*fdvm[DVMH_INIT]); fmask[DVMH_INIT] = 2; call -> addArg(*DVM000(ndvm)); - if(!only_debug && ACC_program) + if(!only_debug && (ACC_program || parloop_by_handler)) call -> addComment(OpenMpComment_InitFlags(ndvm)); int flag = 1; diff --git a/dvm/fdvm/trunk/fdvm/parloop.cpp b/dvm/fdvm/trunk/fdvm/parloop.cpp index ec5ae39..6dc5acc 100644 --- a/dvm/fdvm/trunk/fdvm/parloop.cpp +++ b/dvm/fdvm/trunk/fdvm/parloop.cpp @@ -282,7 +282,7 @@ void EndOfParallelLoopNest(SgStatement *stmt, SgStatement *end_stmt, SgStatement ConsistentArraysStart(cons_list); if(iconsg) {//there is synchronous CONSISTENT clause in PARALLEL - if(IN_COMPUTE_REGION || parloop_by_handler) /*ACC*/ + if(IN_COMPUTE_REGION) /*ACC*/ // generating call statement: // call dvmh_handle_consistent(ConsistGroupRef) doCallAfter(HandleConsistent(consgref)); @@ -2227,6 +2227,8 @@ SgExpression *MappingList(SgStatement *stmt, SgExpression *aref) (el = new SgExprListExp(*e))->setRhs(arglist); arglist = el; } + (el = new SgExprListExp(*ConstRef(nt)))->setRhs(arglist); // add rank to axis list + arglist = el; return arglist; } diff --git a/dvm/fdvm/trunk/include/aks_structs.h b/dvm/fdvm/trunk/include/aks_structs.h index 83ea4c3..65f96c2 100644 --- a/dvm/fdvm/trunk/include/aks_structs.h +++ b/dvm/fdvm/trunk/include/aks_structs.h @@ -1,80 +1,65 @@ #pragma once #include "acc_data.h" - -struct SageStOp -{ - SgForStmt *loop_op; - SgStatement *comment_op; - SageStOp *inner_loops; - SageStOp *next; - int count_inner_loops; - int line_code; - int numChList1; - int numChList2; - int depth; - int LoopNest; -}; struct SageSymbols { - SgSymbol *symb; - int len; - SageSymbols *next; - int across_left; - int across_right; + SageSymbols() + { + across_left = across_right = 0; + len = -1; + symb = NULL; + } + + SageSymbols(SgSymbol* symb, int len, int across_left, int across_right) : + symb(symb), len(len), across_left(across_left), across_right(across_right) + { } + + SgSymbol *symb; + int len; + int across_left; + int across_right; }; struct SageArrayIdxs { - SageSymbols **symb; - int dim; - int read_write; - SgExpression *array_expr; - SageArrayIdxs *next; -}; - -struct Templates -{ - SageSymbols *first; - int count; - int read_write; - int count_write_read; - Templates *next; + std::vector symb; + int dim; + int read_write; + SgExpression *array_expr; }; struct SageAcrossInfo { - SageStOp *Op; - SageArrayIdxs *idx; - SageAcrossInfo *next; + std::vector idxs; }; struct ArgsForKernel { - SgStatement *st_header; - std::list symb; - std::list nSymb; - std::list sizeVars; - std::list acrossS; - std::list notAcrS; - std::list idxAcross; - std::list idxNotAcross; - std::vector otherVars; - std::vector arrayNames; - std::vector otherVarsForOneTh; - std::vector baseIdxsInKer; - SgSymbol *cond_; - std::vector steps; + SgStatement *st_header; + std::vector symb; + std::vector nSymb; + std::vector sizeVars; + std::vector acrossS; + std::vector notAcrossS; + std::vector idxAcross; + std::vector idxNotAcross; + + std::vector otherVars; + std::vector arrayNames; + std::vector otherVarsForOneTh; + std::vector baseIdxsInKer; + SgSymbol *cond_; + std::vector steps; }; /*struct GetXYInfo { - std::vector AllExp; - SgSymbol *varName; - char *arrayName; - long type; - int placeF; - int placeS; + std::vector AllExp; + SgSymbol *varName; + char *arrayName; + long type; + int placeF; + int placeS; };*/ @@ -91,133 +76,132 @@ void getIdxs(char*, int&, int&); struct ParamsForAllVariants { - SgSymbol *s_adapter; - SgSymbol *s_kernel_symb; - int loopV; - int acrossV; - int allDims; - SageSymbols **loopSymb; - SageSymbols **loopAcrossSymb; - char *nameOfNewSAdapter; - char *nameOfNewKernelSymb; - int type; + SgSymbol *s_adapter; + SgSymbol *s_kernel_symb; + int loopV; + int acrossV; + int allDims; + std::vector loopSymb; + std::vector loopAcrossSymb; + char *nameOfNewSAdapter; + char *nameOfNewKernelSymb; + int type; }; struct Bound { - int L; - int R; - bool exL; - bool exR; - bool ifDdot; - SgExpression *additionalExpr; + int L; + int R; + bool exL; + bool exR; + bool ifDdot; + SgExpression *additionalExpr; }; struct BestPattern { - std::vector what; - std::vector bounds; - SgExpression *bestPatt; - int count_of_pattern; + std::vector what; + std::vector bounds; + SgExpression *bestPatt; + int count_of_pattern; }; struct Pattern { - int count_read_op; - int count_write_op; - SgExpression *symbs; + int count_read_op; + int count_write_op; + SgExpression *symbs; }; struct AnalyzeStat { - SgSymbol *replaceSymbol; - int ifHasDim; - SgSymbol *name_of_array; - SgExpression *ex_name_of_array; - std::vector patterns; + SgSymbol *replaceSymbol; + int ifHasDim; + SgSymbol *name_of_array; + SgExpression *ex_name_of_array; + std::vector patterns; }; // struct acrossInfo { - char *nameOfArray; - SgSymbol *symbol; - int allDim; - int acrossPos; - int widthL; - int widthR; - int acrossNum; - std::vector dims; - std::vector symbs; + char *nameOfArray; + SgSymbol *symbol; + int allDim; + int acrossPos; + int widthL; + int widthR; + int acrossNum; + std::vector dims; + std::vector symbs; }; struct newInfo { - SgSymbol *newArray; - std::vector dimSize; - std::vector loadsBeforePlus; - std::vector loadsInForPlus; - std::vector loadsBeforeMinus; - std::vector loadsInForMinus; - std::vector stores; - std::vector swapsDown; - std::vector swapsUp; + SgSymbol *newArray; + std::vector dimSize; + std::vector loadsBeforePlus; + std::vector loadsInForPlus; + std::vector loadsBeforeMinus; + std::vector loadsInForMinus; + std::vector stores; + std::vector swapsDown; + std::vector swapsUp; }; // end // block struct Group { - char *strOfmain; // - SgExpression *mainPattern; - std::vector inGroup; - std::vector len; - std::vector sortLen; - newInfo replaceInfo; // replace info with all needed loads and swaps for optimization + char *strOfmain; // + SgExpression *mainPattern; + std::vector inGroup; + std::vector len; + std::vector sortLen; + newInfo replaceInfo; // replace info with all needed loads and swaps for optimization }; struct PositionGroup { - std::map tableReplace; // table of mapping new private variables to distributed arrays for replacing in loop body - std::map tableNewVars; // table of new private variables that is needed to add in cuda kernel - int position; // position of fixed variable in distributed loop, index 0 corresponds to the first variable. - SgExpression *idxInPos; // - std::vector allPosGr; // all groups of array access patterns with fixed loop variables, which is distributed + std::map tableReplace; // table of mapping new private variables to distributed arrays for replacing in loop body + std::map tableNewVars; // table of new private variables that is needed to add in cuda kernel + int position; // position of fixed variable in distributed loop, index 0 corresponds to the first variable. + SgExpression *idxInPos; // + std::vector allPosGr; // all groups of array access patterns with fixed loop variables, which is distributed }; struct ArrayGroup { - SgSymbol *arrayName; // name of distribute array - std::vector allGroups; // all groups, where one loop variable is fixed + SgSymbol *arrayName; // name of distribute array + std::vector allGroups; // all groups, where one loop variable is fixed }; // end of block struct LoopInfo { - std::vector loopSymbols; - std::vector lowBounds; - std::vector highBounds; - std::vector steps; - int lineNumber; + std::vector loopSymbols; + std::vector lowBounds; + std::vector highBounds; + std::vector steps; + int lineNumber; }; struct ArrayIntents { - std::vector arrayList; - std::vector intent; + std::vector arrayList; + std::vector intent; }; struct AnalyzeReturnGpuO1 { - std::vector allStat; - std::vector bestPatterns; - std::vector allArrayGroup; + std::vector allStat; + std::vector bestPatterns; + std::vector allArrayGroup; }; // functions SgExpression* findDirect(SgExpression*, int); -//SageArrayIdxs* GetIdxInParDir(SgExpression*, SgExpression*); -SageAcrossInfo* GetLoopsWithParAndAcrDir(); -SageSymbols *GetSymbInParalell(int*, SgExpression*); +SageAcrossInfo GetLoopsWithParAndAcrDir(); +std::vector GetSymbInParalell(SgExpression*); int GetIdxPlaceInParDir(SageSymbols*, SgSymbol*); diff --git a/sapfor/experts/Sapfor_2017/_src/Sapfor.cpp b/sapfor/experts/Sapfor_2017/_src/Sapfor.cpp index e80bf3f..125f73b 100644 --- a/sapfor/experts/Sapfor_2017/_src/Sapfor.cpp +++ b/sapfor/experts/Sapfor_2017/_src/Sapfor.cpp @@ -321,7 +321,8 @@ static string unparseProjectIfNeed(SgFile* file, const int curr_regime, const bo //TODO: add freeForm for each file if (curr_regime == INSERT_INCLUDES && filesToInclude.find(file_name) != filesToInclude.end()) { - unparseToBuf = removeIncludeStatsAndUnparse(file, file_name, fout_name.c_str(), allIncludeFiles, out_free_form == 1, moduleUsesByFile, moduleDecls, getObjectForFileFromMap(file_name, exctactedModuleStats), toString, true); //, + unparseToBuf = removeIncludeStatsAndUnparse(file, file_name, fout_name.c_str(), allIncludeFiles, out_free_form == 1, moduleUsesByFile, + moduleDecls, getObjectForFileFromMap(file_name, exctactedModuleStats), toString, true); auto itI = filesToInclude.find(file_name); for (auto& incl : itI->second) if (allIncludeFiles.find(incl) != allIncludeFiles.end()) @@ -329,7 +330,8 @@ static string unparseProjectIfNeed(SgFile* file, const int curr_regime, const bo } else { - unparseToBuf = removeIncludeStatsAndUnparse(file, file_name, fout_name.c_str(), allIncludeFiles, out_free_form == 1, moduleUsesByFile, moduleDecls, getObjectForFileFromMap(file_name, exctactedModuleStats), toString); + unparseToBuf = removeIncludeStatsAndUnparse(file, file_name, fout_name.c_str(), allIncludeFiles, out_free_form == 1, moduleUsesByFile, + moduleDecls, getObjectForFileFromMap(file_name, exctactedModuleStats), toString); // copy includes that have not changed if (folderName != NULL) @@ -995,6 +997,10 @@ static bool runAnalysis(SgProject &project, const int curr_regime, const bool ne fileIt->second.insert(first->fileName()); } } + + if (inlcudeAllFiles) + if (fileIt->second.size()) + filesToInclude[file_name] = fileIt->second; } else if (curr_regime == REMOVE_AND_CALC_SHADOW) { @@ -2825,6 +2831,8 @@ int main(int argc, char **argv) debSh = 1; else if (string(curr_arg) == "-noLogo") noLogo = true; + else if (string(curr_arg) == "-includeAll") + inlcudeAllFiles = true; break; default: break; diff --git a/sapfor/experts/Sapfor_2017/_src/SapforData.h b/sapfor/experts/Sapfor_2017/_src/SapforData.h index 89b59a3..a94134e 100644 --- a/sapfor/experts/Sapfor_2017/_src/SapforData.h +++ b/sapfor/experts/Sapfor_2017/_src/SapforData.h @@ -46,6 +46,7 @@ bool ignoreArrayDistributeState = false; bool fullDepGraph = false; bool noLogo = false; bool withTemplateInfo = false; +bool inlcudeAllFiles = false; // for pass INSERT_INLCUDES uint64_t currentAvailMemory = 0; int QUALITY; // quality of conflicts search in graph @@ -82,7 +83,7 @@ std::map>> commentsToInclude; // //for INSERT_INCLUDES -std::map> filesToInclude; +std::map> filesToInclude; // file -> includes // //for PASSES DEPENDENSIES diff --git a/sapfor/experts/Sapfor_2017/_src/Utils/version.h b/sapfor/experts/Sapfor_2017/_src/Utils/version.h index d049651..4e2e3d5 100644 --- a/sapfor/experts/Sapfor_2017/_src/Utils/version.h +++ b/sapfor/experts/Sapfor_2017/_src/Utils/version.h @@ -1,3 +1,3 @@ #pragma once -#define VERSION_SPF "2256" +#define VERSION_SPF "2257"