7 Commits

24 changed files with 1748 additions and 223 deletions

View File

@@ -236,6 +236,7 @@
#define DVM_EXIT_INTERVAL_DIR 639 /* DVM-F */
#define DVM_TEMPLATE_CREATE_DIR 640 /* DVM-F */
#define DVM_TEMPLATE_DELETE_DIR 641 /* DVM-F */
#define PRIVATE_AR_DECL 642 /* DVM-F */
/***************** variant tags for low level nodes ********************/

View File

@@ -238,6 +238,7 @@ script using "tag". Run make tag.h to regenerate this file */
tag [ DVM_EXIT_INTERVAL_DIR ] = "DVM_EXIT_INTERVAL_DIR";
tag [ DVM_TEMPLATE_CREATE_DIR ] = "DVM_TEMPLATE_CREATE_DIR";
tag [ DVM_TEMPLATE_DELETE_DIR ] = "DVM_TEMPLATE_DELETE_DIR";
tag [ PRIVATE_AR_DECL ] = "PRIVATE_AR_DECL";
/***************** variant tags for low level nodes ********************/

View File

@@ -139,6 +139,8 @@ DEFNODECODE(CONT_STAT, "%CMNT%PUTTABcontinue;%NL",
's',0,BIFNODE)
DEFNODECODE(VAR_DECL, "%CMNT%SETFLAG(VARDECL)%IF (%CHECKFLAG(ENUM) == %NULL)%IF (%CHECKFLAG(CLASSDECL) != %NULL)%PROTECTION%ENDIF%PUTTAB%DECLSPEC%TYPE %ENDIF%LL1%IF (%CHECKFLAG(ENUM) == %NULL);%ENDIF%UNSETFLAG(VARDECL)%NL",
's',0,BIFNODE)
DEFNODECODE(PRIVATE_AR_DECL, "%CMNT%PUTTABPrivateArray<%LL1,%LL2> %LL3;%NL",
's',0,BIFNODE)
DEFNODECODE(PARAM_DECL, "%ERROR",
's',0,BIFNODE)
DEFNODECODE(COMM_STAT, "%ERROR",

File diff suppressed because it is too large Load Diff

View File

@@ -633,7 +633,7 @@ ArgsForKernel *Create_C_Adapter_Function_Across(SgSymbol *sadapter)
// clear information
allRegNames.clear();
SgStatement *st_hedr, *st_end, *first_exec, *stmt;
SgStatement *st_hedr=NULL, *st_end, *first_exec, *stmt;
vector<SgStatement*> cuda_kernel;
SgExpression *fe, *ae, *el, *arg_list;
SgType *typ;
@@ -700,6 +700,7 @@ ArgsForKernel *Create_C_Adapter_Function_Across(SgSymbol *sadapter)
kernel_symbNew += "_llong";
cuda_kernel[t] = CreateLoopKernelAcross(new SgSymbol(FUNCTION_NAME, kernel_symbNew.c_str(), *C_VoidType(), *block_C), &retValueForKernel[t], indexTypeInKernel(rtTypes[t]));
if (options.isOn(RTC))
{
acc_call_list = ACC_RTC_ExpandCallList(acc_call_list);
@@ -839,7 +840,7 @@ ArgsForKernel *Create_C_Adapter_Function_Across(SgSymbol *sadapter)
first_exec = st_end;
mywarn("start: create dummy argument list ");
// create dummy argument list: loop_ref, <dvm-array-headers>, <uses>
// create dummy argument list: loop_ref, <dvm-array-headers>, <uses> ,<private-array-shapes>
typ = C_PointerType(C_Derived_Type(s_DvmhLoopRef));
s_loop_ref = new SgSymbol(VARIABLE_NAME, "loop_ref", *typ, *st_hedr);
argsForVariantFunction.push_back(s_loop_ref);
@@ -879,6 +880,46 @@ ArgsForKernel *Create_C_Adapter_Function_Across(SgSymbol *sadapter)
arg_list->setRhs(*new SgExprListExp(*ae));
arg_list = arg_list->rhs();
}
if (options.isOn(C_CUDA)) // <private-array-shapes>
{
int idim;
SgExpression *elp;
SgType *t = C_PointerType(C_DvmType());
for (elp=private_list; elp; elp = elp->rhs())
{
s = elp->lhs()->symbol();
if (IS_ARRAY(s) && !TestArrayShape(s))
{
el = NULL;
for (idim = 1; idim<=Rank(s); idim++)
{
sarg = new SgSymbol(VARIABLE_NAME, DimSizeName(s, idim), *t, *st_hedr);
argsForVariantFunction.push_back(sarg);
ae = new SgVarRefExp(sarg);
ae->setType(t);
ae = new SgPointerDerefExp(*ae);
arg_list->setRhs(*new SgExprListExp(*ae));
arg_list = arg_list->rhs();
}
el = NULL;
for (idim = 1; idim<=Rank(s); idim++)
{
sarg = new SgSymbol(VARIABLE_NAME, BoundName(s, idim, 1), *t, *st_hedr);
argsForVariantFunction.push_back(sarg);
ae = new SgVarRefExp(sarg);
ae->setType(t);
ae = new SgPointerDerefExp(*ae);
arg_list->setRhs(*new SgExprListExp(*ae));
arg_list = arg_list->rhs();
}
}
}
}
mywarn(" end: create dummy argument list ");
mywarn("start: create IF BLOCK ");
@@ -1112,6 +1153,7 @@ ArgsForKernel *Create_C_Adapter_Function_Across(SgSymbol *sadapter)
mywarn(" end: create IF BLOCK ");
}
if (options.isOn(C_CUDA))
RenamingCudaFunctionVariables(st_hedr, s_loop_ref, 0); //(st_hedr, current_symbol->next(), 0);
@@ -1127,14 +1169,14 @@ vector<ArgsForKernel> Create_C_Adapter_Function_Across_OneThread(SgSymbol *sadap
SgSymbol **reduction_ptr;
SgSymbol *lowI, *highI, *idxI;
symb_list *sl;
SgStatement *st_hedr, *st_end, *stmt, *first_exec;
SgExpression *fe, *ae, *arg_list, *el, *e, *espec, *er;
SgSymbol *s_loop_ref, *sarg, *s, *sb, *sg, *sdev, *h_first, *hgpu_first, *base_first, *uses_first, *scalar_first;
SgStatement *st_hedr, *st_end, *stmt, *first_exec, *stmt_save;
SgExpression *fe, *ae, *arg_list, *el, *e, *espec, *er, *e_all_private_size = NULL;
SgSymbol *s_loop_ref, *sarg, *s, *sb, *sg, *sdev, *h_first, *hgpu_first, *base_first, *uses_first, *scalar_first, *private_first=NULL;
SgSymbol *s_blocks, *s_threads, *s_dev_num, *s_tmp_var, *idxTypeInKernel;
SgType *typ;
SgFunctionCallExp *funcCall;
vector<char*> dvm_array_headers;
int ln, num, uses_num, has_red_array, use_device_num, num_of_red_arrays = 0, nbuf = 0;
int ln, num, uses_num, has_red_array, use_device_num, num_of_red_arrays = 0, nbuf = 0, lnp = 0;
// init block
reduction_ptr = NULL;
@@ -1206,8 +1248,58 @@ vector<ArgsForKernel> Create_C_Adapter_Function_Across_OneThread(SgSymbol *sadap
}
uses_num = ln;
mywarn(" end: create dummy argument list ");
if (options.isOn(C_CUDA)) // <private-array-shapes>
{
int idim;
SgExpression *elp;
SgType *t = C_PointerType(C_DvmType());
for (elp=private_list; elp; elp = elp->rhs())
{
s = elp->lhs()->symbol();
if (IS_ARRAY(s) && !TestArrayShape(s))
{
el = NULL;
for (idim = Rank(s); idim; idim--)
{
sarg = new SgSymbol(VARIABLE_NAME, DimSizeName(s, idim), *t, *st_hedr);
ae = new SgVarRefExp(sarg);
ae->setType(t);
el = AddElementToList(el, new SgPointerDerefExp(*ae));
}
arg_list = AddListToList(arg_list, &el->copy());
if (!elp->lhs()->attributeValue(0, DIM_SIZES))
{
SgExpression **edim = new (SgExpression *);
*edim = el;
elp->lhs()->addAttribute(DIM_SIZES, (void *)edim, sizeof(SgExpression *) );
}
el = NULL;
for (idim = Rank(s); idim; idim--)
{
sarg = new SgSymbol(VARIABLE_NAME, BoundName(s, idim, 1), *t, *st_hedr);
ae = new SgVarRefExp(sarg);
ae->setType(t);
el = AddElementToList(el, new SgPointerDerefExp(*ae));
}
arg_list = AddListToList(arg_list, &el->copy());
if (!elp->lhs()->attributeValue(0, L_BOUNDS))
{
SgExpression **elb = new (SgExpression *);
*elb = el;
elp->lhs()->addAttribute(L_BOUNDS, (void *)elb, sizeof(SgExpression *) );
}
while (arg_list->rhs() != 0)
arg_list = arg_list->rhs();
}
}
}
mywarn(" end: create dummy argument list ");
// create variable's declarations: <dvm_array_headers>,<dvm_array_bases>,<scalar_device_addr>,<reduction_variables>,<private-arrays>,blocks_info [ or blocksS,idxL,idxH ],stream,blocks,threads
if (red_list) // reduction section
{
mywarn("start: in reduction section ");
@@ -1269,7 +1361,7 @@ vector<ArgsForKernel> Create_C_Adapter_Function_Across_OneThread(SgSymbol *sadap
btype = loc_type->baseType();
else
btype = loc_type;
//!printf("__112\n");
SgArrayType *typearray = new SgArrayType(*C_Type(btype));
typearray->addRange(*new SgValueExp(loc_el_num));
s_loc_var->setType(*typearray);
@@ -1282,7 +1374,6 @@ vector<ArgsForKernel> Create_C_Adapter_Function_Across_OneThread(SgSymbol *sadap
st_hedr->insertStmtAfter(*stmt, *st_hedr);
}
//!printf("__113\n");
/*--- executable statements: register reductions in RTS ---*/
e = &SgAssignOp(*new SgVarRefExp(s_tmp_var), *new SgValueExp(ln+1));
stmt = new SgCExpStmt(*e);
@@ -1438,10 +1529,12 @@ vector<ArgsForKernel> Create_C_Adapter_Function_Across_OneThread(SgSymbol *sadap
st_end->insertStmtBefore(*stmt, *st_hedr);
stmt->addComment("// Get bounds");
mywarn(" end: create assigns");
stmt_save = stmt;
stmt = new SgCExpStmt(SgAssignOp(*new SgRecordRefExp(*s_blocks, "x"), *new SgValueExp(1)));
st_end->insertStmtBefore(*stmt, *st_hedr);
stmt->addComment("// Start counting");
SgStatement *st_where = stmt;
stmt = new SgCExpStmt(SgAssignOp(*new SgRecordRefExp(*s_threads, "x"), *new SgValueExp(1)));
st_end->insertStmtBefore(*stmt, *st_hedr);
@@ -1495,6 +1588,7 @@ vector<ArgsForKernel> Create_C_Adapter_Function_Across_OneThread(SgSymbol *sadap
for (int i = NumberOfCoeffs(sg); i>0; i--)
funcCall->addArg(*new SgArrayRefExp(*sg, *new SgValueExp(i)));
}
if (red_list)
{
reduction_operation_list *rsl;
@@ -1520,6 +1614,7 @@ vector<ArgsForKernel> Create_C_Adapter_Function_Across_OneThread(SgSymbol *sadap
funcCall->addArg(*new SgArrayRefExp(*s, *new SgValueExp(i)));
}
s = s->next();
if (options.isOn(C_CUDA))
funcCall->addArg(*new SgVarRefExp(reduction_ptr[i]));
else
@@ -1543,6 +1638,35 @@ vector<ArgsForKernel> Create_C_Adapter_Function_Across_OneThread(SgSymbol *sadap
}
}
e_all_private_size = sizeOfPrivateArraysInBytes();
if (options.isOn(C_CUDA) && e_all_private_size)
{
for (el=private_list, lnp=0; el; el=el->rhs())
{
s = el->lhs()->symbol();
if (IS_ARRAY(s))
{
sarg = new SgSymbol(VARIABLE_NAME, PointerNameForPrivateArray(s), *C_PointerType(C_VoidType()), *st_hedr);
ae = new SgCastExp(*C_PointerType( C_Type(s->type()->baseType())), *new SgVarRefExp(sarg));
funcCall->addArg(*ae);
if (!lnp)
private_first = sarg;
lnp++;
if (!TestArrayShape(s))
{
SgExpression **eatr = (SgExpression **) el->lhs()->attributeValue(0, DIM_SIZES);
SgExpression *ela;
for (ela = *eatr; ela; ela = ela->rhs())
funcCall->addArg(SgDerefOp(*new SgVarRefExp(ela->lhs()->lhs()->symbol())));
eatr = (SgExpression **) el->lhs()->attributeValue(0, L_BOUNDS);
for (ela = *eatr; ela; ela = ela->rhs())
funcCall->addArg(SgDerefOp(*new SgVarRefExp(ela->lhs()->lhs()->symbol())));
}
}
}
}
for (int i = 0; i < acrossV + loopV; ++i)
{
funcCall->addArg(*new SgArrayRefExp(*lowI, *new SgValueExp(i)));
@@ -1557,7 +1681,7 @@ vector<ArgsForKernel> Create_C_Adapter_Function_Across_OneThread(SgSymbol *sadap
if (red_list)
{
ln = 0;
for (er = red_list; er; er = er->rhs(), ++ln)
for (er = red_list, s = red_first; er; er = er->rhs(), ++ln, s=s->next())
{
funcCall = new SgFunctionCallExp(*createNewFunctionSymbol("cudaMemcpy"));
funcCall->addArg(SgAddrOp(*new SgVarRefExp(&(er->lhs()->rhs()->symbol()->copy()))));
@@ -1571,9 +1695,10 @@ vector<ArgsForKernel> Create_C_Adapter_Function_Across_OneThread(SgSymbol *sadap
stmt = new SgCExpStmt(*e);
st_end->insertStmtBefore(*stmt, *st_hedr);
stmt = new SgCExpStmt(*RedPost(s_loop_ref, s_tmp_var, &(er->lhs()->rhs()->symbol()->copy()), NULL)); // loop_red_post_
stmt = new SgCExpStmt(*RedPost(s_loop_ref, s_tmp_var, s, NULL)); // loop_red_post_
st_end->insertStmtBefore(*stmt, *st_hedr);
}
ln = 0;
for (er = red_list; er; er = er->rhs(), ++ln)
{
@@ -1585,6 +1710,18 @@ vector<ArgsForKernel> Create_C_Adapter_Function_Across_OneThread(SgSymbol *sadap
stmt->addComment("// Free temporary variables");
}
}
// insert code for big private arrays
if (options.isOn(C_CUDA) && e_all_private_size)
{
GetMemoryForPrivateArrays(private_first, s_loop_ref, lnp, st_where, st_hedr, new SgValueExp(1));
// to dispose private arrays
for (s = private_first, ln = 0; ln < lnp; s = s->next(), ln++) // private arrays
{
stmt = new SgCExpStmt(*DisposePrivateArray(s_loop_ref, s));
st_end->insertStmtBefore(*stmt, *st_hedr);
}
}
// create args for kernel and return it
vector<ArgsForKernel> argsKernel(countKernels);
for (unsigned i = 0; i < countKernels; ++i)
@@ -1594,6 +1731,7 @@ vector<ArgsForKernel> Create_C_Adapter_Function_Across_OneThread(SgSymbol *sadap
mywarn(" end Adapter Function");
if (options.isOn(C_CUDA))
RenamingCudaFunctionVariables(st_hedr, s_loop_ref, 0);
return argsKernel;
}
@@ -1602,8 +1740,8 @@ static inline void insertReductionArgs(SgSymbol **reduction_ptr, SgSymbol **redu
SgFunctionCallExp *funcCallKernel, SgSymbol* numBlocks, int &has_red_array)
{
reduction_operation_list *rsl;
SgSymbol *s;
SgExpression *e;
SgSymbol *s = NULL;
SgExpression *e = NULL;
for (rsl = red_struct_list, s = red_first; rsl; rsl = rsl->next) //s!=s_blocks_info
{
@@ -1640,19 +1778,43 @@ static inline void insertReductionArgs(SgSymbol **reduction_ptr, SgSymbol **redu
else
funcCallKernel->addArg(*new SgCastExp(*C_PointerType(new SgDescriptType(*SgTypeChar(), BIT_SIGNED)), *new SgVarRefExp(reduction_ptr[i])));
//TODO!!
if (rsl->locvar) //MAXLOC,MINLOC
{
for (int k = 0; k < rsl->number; ++k)
funcCallKernel->addArg(*new SgArrayRefExp(*reduction_loc_symb[i], *new SgValueExp(k)));
s = s->next();
e = new SgCastExp(*C_PointerType(options.isOn(C_CUDA) ? C_Type(rsl->locvar->type()) : new SgDescriptType(*SgTypeChar(), BIT_SIGNED)), *new SgVarRefExp(s));
if (options.isOn(C_CUDA))
e = new SgCastExp(*C_PointerType(C_Type(rsl->locvar->type())), *new SgVarRefExp(reduction_loc_ptr[i]));
else
e = new SgCastExp(*C_PointerType(new SgDescriptType(*SgTypeChar(), BIT_SIGNED)), *new SgVarRefExp(s));// TODO it like in C_Cuda
funcCallKernel->addArg(*e);
s = s->next();
}
}
}
static void createPrivatePointers(SgSymbol* &private_first, int &lnp, SgStatement* st_hedr, SgExpression* &e_all_private_size)
{
private_first = NULL;
if (options.isOn(C_CUDA) && (e_all_private_size=sizeOfPrivateArraysInBytes()))
{
SgExpression *el, *ae;
SgSymbol *sarg;
for (el=private_list, lnp=0; el; el=el->rhs())
{
SgSymbol *s = el->lhs()->symbol();
if (IS_ARRAY(s))
{
sarg = new SgSymbol(VARIABLE_NAME, PointerNameForPrivateArray(s), *C_PointerType(C_VoidType()), *st_hedr);
if (!lnp)
private_first = sarg;
lnp++;
}
}
}
}
static void createArgsForKernelForTwoDeps(SgFunctionCallExp*& funcCallKernel, SgSymbol* kernel_symb, SgExpression* espec, SgSymbol*& sg, SgSymbol* hgpu_first,
SgSymbol*& sb, SgSymbol* base_first, symb_list*& sl, int& ln, int num, SgExpression*& e, SgSymbol** reduction_ptr,
@@ -1660,7 +1822,7 @@ static void createArgsForKernelForTwoDeps(SgFunctionCallExp*& funcCallKernel, Sg
SgSymbol* diag, const int& loopV, SgSymbol** num_elems, const int& acrossV, SgSymbol* acrossBase[16], SgSymbol* loopBase[16],
SgSymbol* idxI, const vector<SageSymbols>& loopAcrossSymb, const vector<SageSymbols>& loopSymb, SgSymbol*& s, SgSymbol* uses_first,
SgSymbol*& sdev, SgSymbol* scalar_first, int uses_num, vector<char*>& dvm_array_headers,
SgSymbol** addressingParams, SgSymbol** outTypeOfTransformation, SgSymbol* type_of_run, SgSymbol* bIdxs)
SgSymbol** addressingParams, SgSymbol** outTypeOfTransformation, SgSymbol* type_of_run, SgSymbol* bIdxs, SgSymbol* private_first, int lnp)
{
funcCallKernel = CallKernel(kernel_symb, espec);
@@ -1728,6 +1890,32 @@ static void createArgsForKernelForTwoDeps(SgFunctionCallExp*& funcCallKernel, Sg
}
}
if (options.isOn(C_CUDA) && private_first) // there are big private arrays
{
SgExpression *el, *ae;
SgSymbol *sarg, *sp, *s;
int ln;
for (sp = private_first, el = private_list, ln = 0; ln < lnp; sp = sp->next(), el = el->rhs(), ln++)
{
while (!IS_ARRAY(el->lhs()->symbol()))
el = el->rhs();
s = el->lhs()->symbol();
ae = new SgCastExp(*C_PointerType( C_Type(s->type()->baseType())), *new SgVarRefExp(sp));
funcCallKernel->addArg(*ae);
if (!TestArrayShape(s))
{
SgExpression **eatr = (SgExpression **) el->lhs()->attributeValue(0, DIM_SIZES);
SgExpression *ela;
for (ela = *eatr; ela; ela = ela->rhs())
funcCallKernel->addArg(SgDerefOp(*new SgVarRefExp(ela->lhs()->lhs()->symbol())));
eatr = (SgExpression **) el->lhs()->attributeValue(0, L_BOUNDS);
for (ela = *eatr; ela; ela = ela->rhs())
funcCallKernel->addArg(SgDerefOp(*new SgVarRefExp(ela->lhs()->lhs()->symbol())));
}
}
}
if (options.isOn(AUTO_TFM))
{
for (size_t i = 0; i < dvm_array_headers.size(); ++i)
@@ -1767,14 +1955,14 @@ vector<ArgsForKernel> Create_C_Adapter_Function_Across_variants(SgSymbol *sadapt
symb_list *sl;
SgStatement *st_hedr, *st_end, *stmt, *first_exec;
SgExpression *fe, *ae, *arg_list, *el, *e, *espec, *ex, *er;
SgSymbol *s_loop_ref, *sarg, *s, *sb, *sg, *sdev, *h_first, *hgpu_first, *base_first, *uses_first, *scalar_first;
SgExpression *fe, *ae, *arg_list, *el, *e, *espec, *ex, *er, *e_all_private_size = NULL, *e_totalThreads;
SgSymbol *s_loop_ref, *sarg, *s, *sb, *sg, *sdev, *h_first, *hgpu_first, *base_first, *uses_first, *scalar_first, *private_first;
SgSymbol *s_blocks, *s_threads, *s_dev_num, *s_tmp_var, *type_of_run, *s_i = NULL, *s_k = NULL, *s_tmp_var_1;
SgSymbol *idxTypeInKernel;
SgType *typ;
SgFunctionCallExp *funcCall, *funcCallKernel;
vector<char*> dvm_array_headers;
int ln, num, uses_num, has_red_array, use_device_num, num_of_red_arrays, nbuf = 0;
int ln, num, uses_num, has_red_array, use_device_num, num_of_red_arrays, nbuf = 0, lnp;
// init block
lowI = highI = idxI = elem = red_blocks = shared_mem = stream_t = bIdxs = NULL;
@@ -1850,6 +2038,56 @@ vector<ArgsForKernel> Create_C_Adapter_Function_Across_variants(SgSymbol *sadapt
}
uses_num = ln;
if (options.isOn(C_CUDA)) // <private-array-shapes>
{
int idim;
SgExpression *elp;
SgType *t = C_PointerType(C_DvmType());
for (elp=private_list; elp; elp = elp->rhs())
{
s = elp->lhs()->symbol();
if (IS_ARRAY(s) && !TestArrayShape(s))
{
el = NULL;
for (idim = Rank(s); idim; idim--)
{
sarg = new SgSymbol(VARIABLE_NAME, DimSizeName(s, idim), *t, *st_hedr);
ae = new SgVarRefExp(sarg);
ae->setType(t);
el = AddElementToList(el, new SgPointerDerefExp(*ae));
}
arg_list = AddListToList(arg_list, &el->copy());
if (!elp->lhs()->attributeValue(0, DIM_SIZES))
{
SgExpression **edim = new (SgExpression *);
*edim = el;
elp->lhs()->addAttribute(DIM_SIZES, (void *)edim, sizeof(SgExpression *) );
}
el = NULL;
for (idim = Rank(s); idim; idim--)
{
sarg = new SgSymbol(VARIABLE_NAME, BoundName(s, idim, 1), *t, *st_hedr);
ae = new SgVarRefExp(sarg);
ae->setType(t);
el = AddElementToList(el, new SgPointerDerefExp(*ae));
}
arg_list = AddListToList(arg_list, &el->copy());
if (!elp->lhs()->attributeValue(0, L_BOUNDS))
{
SgExpression **elb = new (SgExpression *);
*elb = el;
elp->lhs()->addAttribute(L_BOUNDS, (void *)elb, sizeof(SgExpression *) );
}
while (arg_list->rhs() != 0)
arg_list = arg_list->rhs();
}
}
}
type_of_run = new SgSymbol(VARIABLE_NAME, TestAndCorrectName("type_of_run"), *LongT, *st_hedr);
ae = new SgVarRefExp(type_of_run);
ae->setType(LongT);
@@ -1941,7 +2179,7 @@ vector<ArgsForKernel> Create_C_Adapter_Function_Across_variants(SgSymbol *sadapt
btype = loc_type->baseType();
else
btype = loc_type;
//!printf("__112\n");
SgArrayType *typearray = new SgArrayType(*C_Type(btype));
typearray->addRange(*new SgValueExp(loc_el_num));
s_loc_var->setType(*typearray);
@@ -1955,7 +2193,7 @@ vector<ArgsForKernel> Create_C_Adapter_Function_Across_variants(SgSymbol *sadapt
st_hedr->insertStmtAfter(*stmt, *st_hedr);
}
//!printf("__113\n");
/*--- executable statements: register reductions in RTS ---*/
e = &SgAssignOp(*new SgVarRefExp(s_tmp_var), *new SgValueExp(ln+1));
stmt = new SgCExpStmt(*e);
@@ -2209,6 +2447,7 @@ vector<ArgsForKernel> Create_C_Adapter_Function_Across_variants(SgSymbol *sadapt
stmt = makeSymbolDeclarationWithInit(s, new SgValueExp(0));
st_hedr->insertStmtAfter(*stmt, *st_hedr);
}
// create indxs
for (int i = 0; i < acrossV; ++i)
{
@@ -2556,7 +2795,7 @@ vector<ArgsForKernel> Create_C_Adapter_Function_Across_variants(SgSymbol *sadapt
e = &SgAssignOp(*new SgVarRefExp(s_blocks), *f);
stmt = new SgCExpStmt(*e);
st_end->insertStmtBefore(*stmt, *st_hedr);
stmt->addComment("//Start method");
stmt->addComment("// Start method");
e = &SgAssignOp(*new SgVarRefExp(acrossBase[0]), *new SgArrayRefExp(*lowI, *new SgValueExp(loopAcrossSymb[0].len)));
stmt = new SgCExpStmt(*e);
@@ -2722,7 +2961,6 @@ vector<ArgsForKernel> Create_C_Adapter_Function_Across_variants(SgSymbol *sadapt
st_end->insertStmtBefore(*stmt, *st_hedr);
}
}
mywarn("start: in adding args section");
/* args for kernel */
@@ -2781,6 +3019,35 @@ vector<ArgsForKernel> Create_C_Adapter_Function_Across_variants(SgSymbol *sadapt
sdev = sdev->next();
}
}
e_all_private_size = sizeOfPrivateArraysInBytes();
if (options.isOn(C_CUDA) && e_all_private_size)
{
for (el=private_list, lnp=0; el; el=el->rhs())
{
s = el->lhs()->symbol();
if (IS_ARRAY(s))
{
sarg = new SgSymbol(VARIABLE_NAME, PointerNameForPrivateArray(s), *C_PointerType(C_VoidType()), *st_hedr);
ae = new SgCastExp(*C_PointerType( C_Type(s->type()->baseType())), *new SgVarRefExp(sarg));
funcCallKernel->addArg(*ae);
if (!lnp)
private_first = sarg;
lnp++;
if (!TestArrayShape(s))
{
SgExpression **eatr = (SgExpression **) el->lhs()->attributeValue(0, DIM_SIZES);
SgExpression *ela;
for (ela = *eatr; ela; ela = ela->rhs())
funcCallKernel->addArg(SgDerefOp(*new SgVarRefExp(ela->lhs()->lhs()->symbol())));
eatr = (SgExpression **) el->lhs()->attributeValue(0, L_BOUNDS);
for (ela = *eatr; ela; ela = ela->rhs())
funcCallKernel->addArg(SgDerefOp(*new SgVarRefExp(ela->lhs()->lhs()->symbol())));
}
}
}
}
funcCallKernel->addArg(*new SgVarRefExp(type_of_run));
for (int i = 0; i < acrossV + loopV; ++i)
funcCallKernel->addArg(*new SgArrayRefExp(*bIdxs, *new SgValueExp(i)));
@@ -2816,7 +3083,15 @@ vector<ArgsForKernel> Create_C_Adapter_Function_Across_variants(SgSymbol *sadapt
SgForStmt *simple;
simple = new SgForStmt(&SgAssignOp(*new SgVarRefExp(tmpV), *new SgValueExp(0)), &(*new SgVarRefExp(tmpV1) < *new SgArrayRefExp(*highI, *new SgValueExp(loopAcrossSymb[0].len))), expr, stmt);
st_end->insertStmtBefore(*simple);
stmt = simple;
}
stmt->addComment("// GPU execution");
if (options.isOn(C_CUDA) && e_all_private_size)
{
e_totalThreads = &(*new SgRecordRefExp(*s_blocks, "x") * *new SgRecordRefExp(*s_blocks, "y") * *new SgRecordRefExp(*s_blocks, "z") * *new SgRecordRefExp(*s_threads, "x") * *new SgRecordRefExp(*s_threads, "y") * *new SgRecordRefExp(*s_threads, "z"));
GetMemoryForPrivateArrays(private_first, s_loop_ref, lnp, stmt, st_hedr, e_totalThreads);
}
}
else if (acrossV == 2) // ACROSS with two dependence: generate method
{
@@ -2972,7 +3247,8 @@ vector<ArgsForKernel> Create_C_Adapter_Function_Across_variants(SgSymbol *sadapt
mywarn(" end: out red section");
}
createPrivatePointers(private_first, lnp, st_hedr, e_all_private_size);
GetMemoryForPrivateArrays (private_first, s_loop_ref, lnp, st_end, st_hedr, new SgVarRefExp(q));
mywarn("strat: init bases");
// init bases
for (int i = 0; i < acrossV; ++i)
@@ -3014,7 +3290,7 @@ vector<ArgsForKernel> Create_C_Adapter_Function_Across_variants(SgSymbol *sadapt
reduction_ptr, reduction_loc_ptr, reduction_symb, reduction_loc_symb, red_blocks,
has_red_array, diag, loopV, num_elems, acrossV, acrossBase, loopBase, idxI,
loopAcrossSymb, loopSymb, s, uses_first, sdev, scalar_first, uses_num, dvm_array_headers,
addressingParams, outTypeOfTransformation, type_of_run, bIdxs);
addressingParams, outTypeOfTransformation, type_of_run, bIdxs, private_first, lnp);
stmt = createKernelCallsInCudaHandler(funcCallKernel, s_loop_ref, idxTypeInKernel, s_blocks);
while_st->insertStmtAfter(*stmt);
@@ -3093,7 +3369,7 @@ vector<ArgsForKernel> Create_C_Adapter_Function_Across_variants(SgSymbol *sadapt
reduction_ptr, reduction_loc_ptr, reduction_symb, reduction_loc_symb, red_blocks,
has_red_array, q, loopV, num_elems, acrossV, acrossBase, loopBase, idxI,
loopAcrossSymb, loopSymb, s, uses_first, sdev, scalar_first, uses_num, dvm_array_headers,
addressingParams, outTypeOfTransformation, type_of_run, bIdxs);
addressingParams, outTypeOfTransformation, type_of_run, bIdxs, private_first, lnp);
while_st1->insertStmtAfter(*createKernelCallsInCudaHandler(funcCallKernel, s_loop_ref, idxTypeInKernel, s_blocks));
while_st2->insertStmtAfter(*createKernelCallsInCudaHandler(funcCallKernel, s_loop_ref, idxTypeInKernel, s_blocks));
@@ -3105,7 +3381,7 @@ vector<ArgsForKernel> Create_C_Adapter_Function_Across_variants(SgSymbol *sadapt
reduction_ptr, reduction_loc_ptr, reduction_symb, reduction_loc_symb, red_blocks,
has_red_array, elem, loopV, num_elems, acrossV, acrossBase, loopBase, idxI,
loopAcrossSymb, loopSymb, s, uses_first, sdev, scalar_first, uses_num, dvm_array_headers,
addressingParams, outTypeOfTransformation, type_of_run, bIdxs);
addressingParams, outTypeOfTransformation, type_of_run, bIdxs, private_first, lnp);
while_st3->insertStmtAfter(*createKernelCallsInCudaHandler(funcCallKernel, s_loop_ref, idxTypeInKernel, s_blocks));
while_st4->insertStmtAfter(*createKernelCallsInCudaHandler(funcCallKernel, s_loop_ref, idxTypeInKernel, s_blocks));
@@ -3190,6 +3466,30 @@ vector<ArgsForKernel> Create_C_Adapter_Function_Across_variants(SgSymbol *sadapt
sdev = sdev->next();
}
}
createPrivatePointers(private_first, lnp, st_hedr, e_all_private_size);
if (options.isOn(C_CUDA) && private_first) // there are big private arrays
{
SgSymbol *sp;
for (sp = private_first, el = private_list, ln = 0; ln < lnp; sp = sp->next(), el = el->rhs(), ln++)
{
while (!IS_ARRAY(el->lhs()->symbol()))
el = el->rhs();
s = el->lhs()->symbol();
ae = new SgCastExp(*C_PointerType( C_Type(s->type()->baseType())), *new SgVarRefExp(sp));
funcCallKernel->addArg(*ae);
if (!TestArrayShape(s))
{
SgExpression **eatr = (SgExpression **) el->lhs()->attributeValue(0, DIM_SIZES);
SgExpression *ela;
for (ela = *eatr; ela; ela = ela->rhs())
funcCallKernel->addArg(SgDerefOp(*new SgVarRefExp(ela->lhs()->lhs()->symbol())));
eatr = (SgExpression **) el->lhs()->attributeValue(0, L_BOUNDS);
for (ela = *eatr; ela; ela = ela->rhs())
funcCallKernel->addArg(SgDerefOp(*new SgVarRefExp(ela->lhs()->lhs()->symbol())));
}
}
}
funcCall = new SgFunctionCallExp(*createNewFunctionSymbol("MIN"));
funcCall->addArg(*new SgVarRefExp(M1));
funcCall->addArg(*new SgVarRefExp(M2));
@@ -3393,6 +3693,18 @@ vector<ArgsForKernel> Create_C_Adapter_Function_Across_variants(SgSymbol *sadapt
mywarn(" end: out red section");
}
if (options.isOn(C_CUDA) && private_first)
{
SgFunctionCallExp *f1 = new SgFunctionCallExp(*createNewFunctionSymbol("MAX"));
SgFunctionCallExp *f2 = new SgFunctionCallExp(*createNewFunctionSymbol("MAX"));
f1->addArg(*new SgVarRefExp(M1));
f1->addArg(*new SgVarRefExp(M2));
f2->addArg(*f1);
f2->addArg(*new SgVarRefExp(M3));
e_totalThreads = &(*new SgVarRefExp(Emin) * *f2);
GetMemoryForPrivateArrays (private_first, s_loop_ref, lnp, st_end, st_hedr, e_totalThreads);
}
int flag_comment = 0;
for (int i = 3; i < acrossV; ++i)
{
@@ -3755,6 +4067,13 @@ vector<ArgsForKernel> Create_C_Adapter_Function_Across_variants(SgSymbol *sadapt
st_end->insertStmtBefore(*stmt, *st_hedr);
}
}
// to dispose private arrays
if (options.isOn(C_CUDA) && e_all_private_size)
for (s = private_first, ln = 0; ln < lnp; s = s->next(), ln++) // private arrays
{
stmt = new SgCExpStmt(*DisposePrivateArray(s_loop_ref, s));
st_end->insertStmtBefore(*stmt, *st_hedr);
}
// create args for kernel and return it
vector<ArgsForKernel> argsKernel(countKernels);
@@ -3903,7 +4222,7 @@ void MakeDeclarationsForKernel_On_C_Across(SgType *indexType)
DeclareDoVars(indexType);
// declare private(local in kernel) variables
DeclarePrivateVars();
DeclarePrivateVars(indexType);
// declare variables, used in loop and passed by reference:
// <type> &<name> = *p_<name>;
@@ -3920,7 +4239,7 @@ void MakeDeclarationsForKernelAcross(SgType *indexType)
DeclareDoVars();
// declare private(local in kernel) variables
DeclarePrivateVars();
DeclarePrivateVars(indexType);
// declare dummy arguments:
@@ -3976,6 +4295,9 @@ SgExpression *CreateKernelDummyListAcross(ArgsForKernel *argsKer, SgType *idxTyp
if (uses_list)
arg_list = AddListToList(arg_list, CreateUsesDummyList()); //[+ <uses> ]
if (private_list)
arg_list = AddListToList(arg_list, CreatePrivateDummyList()); //[+ dummys for private arrays ]
if (argsKer->symb.size() >= 3)
for (int it = 0; it < argsKer->sizeVars.size(); ++it)
arg_list = AddListToList(arg_list, new SgExprListExp(*new SgVarRefExp(argsKer->sizeVars[it])));
@@ -5513,6 +5835,7 @@ SgStatement *CreateLoopKernelAcross(SgSymbol *skernel, ArgsForKernel* argsKer, i
flag_func_call = 0; // maxloc
else if (num == 10)
flag_func_call = 0; // minloc
if (flag_func_call == 1)
{
SgFunctionCallExp *funcCall = new SgFunctionCallExp(*createNewFunctionSymbol(str_operation));
@@ -5607,9 +5930,9 @@ SgStatement *CreateLoopKernelAcross(SgSymbol *skernel, ArgsForKernel* argsKer, i
locGrid->setType(*new SgArrayType(*tmp_list->loc_grid->type()));
if (options.isOn(C_CUDA))
st = AssignStatement(*new SgArrayRefExp(*locGrid, *new SgValueExp(i), *e1), *new SgArrayRefExp(*loc_var_ref->symbol(), *new SgValueExp(i)));
st = AssignStatement(*new SgArrayRefExp(*locGrid, *new SgValueExp(loc_el_num) * *e1 + *new SgValueExp(i)), *new SgArrayRefExp(*loc_var_ref->symbol(), *new SgValueExp(i)));
else
st = AssignStatement(*new SgArrayRefExp(*locGrid, *new SgValueExp(i + 1), *e1), *new SgArrayRefExp(*loc_var_ref->symbol(), *new SgValueExp(i + 1)));
st = AssignStatement(*new SgArrayRefExp(*locGrid, *new SgValueExp(i + 1), *e1), *new SgArrayRefExp(*loc_var_ref->symbol(), *new SgValueExp(i + 1)));//TODO it like in C_Cuda
ifSt->insertStmtAfter(*st);
}
}
@@ -5802,18 +6125,17 @@ SgSymbol *RedBlockSymbolInKernelAcross(SgSymbol *s, SgType *type)
void DeclarationOfReductionBlockInKernelAcross(SgExpression *ered, reduction_operation_list *rsl)
{
SgStatement *ass, *newst, *current, *if_st, *while_st, *typedecl, *st, *do_st;
SgExpression *le, *re, *eatr, *cond, *ev;
SgStatement *newst, *current, *if_st, *while_st, *typedecl, *st, *do_st;
SgExpression *eatr, *cond, *ev;
SgSymbol *red_var, *red_var_k, *s_block, *loc_var, *sf;
SgType *rtype;
int i, ind;
//init block
ass = newst = current = if_st = while_st = typedecl = st = do_st = NULL;
le = re = eatr = cond = ev = NULL;
newst = current = if_st = while_st = typedecl = st = do_st = NULL;
eatr = cond = ev = NULL;
red_var = red_var_k = s_block = loc_var = sf = NULL;
rtype = NULL;
i = ind = loc_el_num = 0;
loc_el_num = 0;
//end of init block
// analys of reduction operation
@@ -5838,13 +6160,13 @@ void DeclarationOfReductionBlockInKernelAcross(SgExpression *ered, reduction_ope
if (rsl->locvar)
{
newst = Declaration_Statement(rsl->locvar); //declare location variable
newst = Declaration_Statement(LocRedVariableSymbolInKernel(rsl)); //declare location variable
kernel_st->insertStmtAfter(*newst, *kernel_st);
}
if (rsl->redvar_size > 0)
{
newst = Declaration_Statement(rsl->redvar); //declare reduction variable
newst = Declaration_Statement(RedVariableSymbolInKernel(rsl->redvar,NULL,NULL)); //declare reduction variable
kernel_st->insertStmtAfter(*newst, *kernel_st);
}
else if (rsl->redvar_size < 0)
@@ -5853,26 +6175,22 @@ void DeclarationOfReductionBlockInKernelAcross(SgExpression *ered, reduction_ope
newst = Declaration_Statement(red_var_k); //declare reduction variable
kernel_st->insertStmtAfter(*newst, *kernel_st);
}
rtype = (rsl->redvar_size >= 0) ? TypeOfRedBlockSymbol(ered) : red_var_k->type();
s_block = RedBlockSymbolInKernelAcross(red_var, rtype);
newst = Declaration_Statement(s_block);
if (options.isOn(C_CUDA))
newst->addDeclSpec(BIT_CUDA_SHARED | BIT_EXTERN);
else
//XXX: shared memory doesnt use in ACROSS by C_Cuda
if (!options.isOn(C_CUDA))
{
rtype = (rsl->redvar_size >= 0) ? TypeOfRedBlockSymbol(ered) : red_var_k->type();
s_block = RedBlockSymbolInKernelAcross(red_var, rtype);
newst = Declaration_Statement(s_block);
eatr = new SgExprListExp(*new SgExpression(ACC_SHARED_OP));
newst->setExpression(2, *eatr);
}
kernel_st->insertStmtAfter(*newst, *kernel_st);
kernel_st->insertStmtAfter(*newst, *kernel_st);
if (isSgExprListExp(ered->rhs())) //MAXLOC,MINLOC
{
typedecl = MakeStructDecl(rtype->symbol());
kernel_st->insertStmtAfter(*typedecl, *kernel_st);
if (isSgExprListExp(ered->rhs())) //MAXLOC,MINLOC
{
typedecl = MakeStructDecl(rtype->symbol());
kernel_st->insertStmtAfter(*typedecl, *kernel_st);
}
}
}

View File

@@ -5,10 +5,12 @@
using namespace std;
// special storages to avoid recomputing
map<string, SgExpression*> lhs;
map<string, SgExpression*> rhs;
map<SgExpression*, string> unparsedLhs;
map<SgExpression*, string> unparsedRhs;
static map<string, SgExpression*> lhs;
static map<string, SgExpression*> rhs;
static map<SgExpression*, string> unparsedLhs;
static map<SgExpression*, string> unparsedRhs;
extern reduction_operation_list* red_struct_list;
template<typename InIt1, typename InIt2, typename OutIt>
static inline OutIt difference(InIt1 first1, InIt1 last1, InIt2 first2, InIt2 last2, OutIt dest)
@@ -1221,6 +1223,13 @@ Loop::Loop(SgStatement* loop_body, bool enable_opt, bool irreg_access) :
irregular_acc_opt(irreg_access), enable_opt(enable_opt), loop_body(loop_body),
dimension(0), acrossType(0), acrossDims(NULL), do_irreg_opt(false)
{
reduction_operation_list* rsl;
for (rsl = red_struct_list; rsl; rsl = rsl->next)
{
if (rsl->locvar) //MAXLOC,MINLOC
redArrays.insert(rsl->locvar);
}
lhs.clear();
rhs.clear();
unparsedLhs.clear();
@@ -1420,7 +1429,7 @@ void Loop::analyzeAssignments(SgExpression* ex, const int blockIndex)
else
{
SgSymbol* symbol = ex->symbol();
if (isSgArrayType(symbol->type()) != NULL)
if (isSgArrayType(symbol->type()) != NULL && redArrays.find(symbol) == redArrays.end())
{
SgExpression* subscripts = ((SgArrayRefExp*)(ex))->subscripts();
if (!subscripts)
@@ -1950,6 +1959,13 @@ void Loop::buildCFG()
Loop::Loop(SgStatement* stmt) : do_irreg_opt(false)
{
reduction_operation_list* rsl;
for (rsl = red_struct_list; rsl; rsl = rsl->next)
{
if (rsl->locvar) //MAXLOC,MINLOC
redArrays.insert(rsl->locvar);
}
lhs.clear(); rhs.clear(); unparsedLhs.clear(); unparsedRhs.clear();
buildCFG();
}

View File

@@ -4942,3 +4942,48 @@ SgExpression *RtcSetLang(SgSymbol *s_loop_ref, const int lang)
fe->addArg(*new SgKeywordValExp("UNKNOWN_CUDA"));
return(fe);
}
SgExpression *GetDeviceProp(SgSymbol *s_loop_ref, SgExpression *ep)
{// generating function call:
// DvmType loop_cuda_get_device_prop(DvmType *InDvmhLoop, DvmType prop);
SgFunctionCallExp *fe = new SgFunctionCallExp(*fdvm[GET_DEVICE_PROP]);
fe->addArg(*new SgVarRefExp(s_loop_ref));
fe->addArg(*ep);
return(fe);
}
SgExpression *GetMaxBlocks(SgSymbol *s_loop_ref, SgSymbol *s_max_blocks, SgSymbol *s_needed_bytes)
{// generating function call:
// DvmType loop_cuda_get_max_blocks(DvmType *InDvmhLoop, DvmType maxBlocks, DvmType neededBytesForBlock)
SgFunctionCallExp *fe = new SgFunctionCallExp(*fdvm[GET_MAX_BLOCKS]);
fe->addArg(*new SgVarRefExp(s_loop_ref));
fe->addArg(*new SgVarRefExp(s_max_blocks));
fe->addArg(*new SgVarRefExp(s_needed_bytes));
return(fe);
}
SgExpression *GetPrivateArray(SgSymbol *s_loop_ref, SgExpression *e_bytes)
{// generating function call:
// DvmType *loop_cuda_get_private_array(DvmType *InDvmhLoop, UDvmType neededBytes)
SgFunctionCallExp *fe = new SgFunctionCallExp(*fdvm[GET_PRIVATE_ARR]);
fe->addArg(*new SgVarRefExp(s_loop_ref));
fe->addArg(*e_bytes);
return(fe);
}
SgExpression *DisposePrivateArray(SgSymbol *s_loop_ref, SgSymbol *s_array)
{// generating function call:
// void loop_cuda_dispose_private_array(DvmType *InDvmhLoop, void *array)
SgFunctionCallExp *fe = new SgFunctionCallExp(*fdvm[DISPOSE_PRIVATE_AR]);
fe->addArg(*new SgVarRefExp(s_loop_ref));
fe->addArg(*new SgVarRefExp(s_array));
return(fe);
}

View File

@@ -104,6 +104,7 @@ private:
SgStatement* loop_body;
int dimension;
std::map<SgSymbol*, Array*> arrays;
std::set<SgSymbol*> redArrays;
int* acrossDims;
int acrossType;
std::vector<SgSymbol*> symbols;

View File

@@ -261,6 +261,10 @@ const int END_OF_USE_LIST = 1050; /*ACC*/
const int END_OF_USE_LIST = 1050; /*ACC*/
const int ROUTINE_ATTR = 1051; /*ACC*/
const int DATA_REGION_SYMB = 1052; /*ACC*/
const int REMOTE_ACCESS_BUF = 1053; /*ACC*/
const int L_BOUNDS = 1054; /*ACC*/
const int DIM_SIZES = 1055; /*ACC*/
const int PRIVATE_ARRAY = 1056; /*ACC*/
const int PRIVATE_POINTER = 1057; /*ACC*/
const int MAX_LOOP_LEVEL = 20; // 7 - maximal number of loops in parallel loop nest
@@ -1275,6 +1279,7 @@ SgSymbol *isSameRedVar(char *name);
SgSymbol *isSameRedVar(char *name);
SgSymbol *isSameArray(char *name);
SgSymbol *isSameIndexVar(char *name);
SgType * C_LongLongType();
SgType * C_UnsignedLongLongType();
SgType * C_DvmType();
SgType * C_CudaIndexType();
@@ -1372,6 +1377,7 @@ int TestOneGroupStatement(SgStatement *stmt);
int TestOneGroupStatement(SgStatement *stmt);
void DeclareUsedVars();
void DeclareInternalPrivateVars();
void DeclarePrivateVars();
void DeclarePrivateVars(SgType *idxTypeInKernel);
void DeclareArrayBases();
void DeclareArrayCoeffsInKernel(SgType*);
@@ -1434,6 +1440,19 @@ SgSymbol *HeaderSymbolForHandler(SgSymbol *ar);
SgSymbol *HeaderSymbolForHandler(SgSymbol *ar);
void TestRoutineAttribute(SgSymbol *s, SgStatement *routine_interface);
int LookForRoutineDir(SgStatement *interfaceFunc);
SgStatement *Interface(SgSymbol *s);
SgExpression *sizeOfElementInBytes(SgSymbol *symb);
SgExpression *sizeOfPrivateArraysInBytes();
SgExpression *ProductOfDimSizeArgs(SgExpression *esizes);
//void doPrivateArrayList(SgExpression *private_arrays, SgStatement *st_hedr);
void addPrivateArrayList(SgFunctionCallExp *fcall, SgExpression *private_arrays, SgStatement *st_hedr);
int TestArrayShape(SgSymbol *ar);
SgExpression *DimSizeListOfPrivateArrays();
SgExpression *BoundListOfPrivateArrays();
SgExpression * DummyListForPrivateArrays(SgStatement *st_hedr);
SgExpression *CreatePrivateDummyList();
char *PointerNameForPrivateArray(SgSymbol *symb);
void GetMemoryForPrivateArrays(SgSymbol *private_first, SgSymbol *s_loop_ref, int nump, SgStatement *st_end, SgStatement *st_hedr, SgExpression *e_totalThreads);
SgSymbol *LocRedVariableSymbolInKernel(reduction_operation_list *rsl);
/* acc_analyzer.cpp */
@@ -1900,6 +1919,10 @@ SgStatement *Consistent_H (int il, SgExpression *hedr, SgExpression *axis_list);
SgStatement *Consistent_H (int il, SgExpression *hedr, SgExpression *axis_list);
SgStatement *LoopRemoteAccess_H (int il, SgExpression *hedr, SgSymbol *ar, SgExpression *axis_list);
SgStatement *RemoteAccess_H2 (SgExpression *buf_hedr, SgSymbol *ar, SgExpression *ar_hedr, SgExpression *axis_list);
SgStatement *GetRemoteBuf (SgSymbol *loop_s, int n, SgSymbol *s_buf_head);
SgExpression *GetDeviceProp(SgSymbol *s_loop_ref, SgExpression *ep);
SgExpression *GetMaxBlocks(SgSymbol *s_loop_ref, SgSymbol *s_max_blocks, SgSymbol *s_needed_bytes);
SgExpression *GetPrivateArray(SgSymbol *s_loop_ref, SgExpression *e_bytes);
SgExpression *DisposePrivateArray(SgSymbol *s_loop_ref, SgSymbol *s_array);
/* io.cpp */
@@ -2089,7 +2112,6 @@ char *Check_Correct_Name(const char *name);
/* acc_f2c.cpp */
void Translate_Fortran_To_C(SgStatement *stat, SgStatement *last, std::vector <std::stack <SgStatement*> > &, int);
SgStatement* Translate_Fortran_To_C(SgStatement* Stmt, bool isSapforConv = false);
SgStatement* Translate_Fortran_To_C(SgStatement* Stmt, bool isSapforConv = false);
SgSymbol* createNewFunctionSymbol(const char *name);
void swapDimentionsInprivateList(void);
@@ -2103,6 +2125,9 @@ void RenamingNewProcedureVariables(SgSymbol *proc_name);
void RenamingNewProcedureVariables(SgSymbol *proc_name);
SgSymbol *hasSameNameAsSource(SgSymbol *symb);
void RenamingCudaFunctionVariables(SgStatement *first, SgSymbol *k_symb, int replace_flag);
void replaceVariableSymbSameNameInStatements(SgStatement *first, SgStatement *last, SgSymbol *symb, SgSymbol *s_new, int replace_flag);
void RenamingCalledProcedureSymbols(SgStatement *header, SgStatement *copy_header);
void RenamingCalledProcedureSymbolsInKernel(SgSymbol *first_symb);
/* acc_across.cpp */
ArgsForKernel *Create_C_Adapter_Function_Across(SgSymbol *sadapter);
@@ -2238,7 +2263,7 @@ void ConvertLoopWithLabelToEnddoLoop (SgStatement *stat); /*OMP*/
// options on FDVM converter
enum OPTIONS {
AUTO_TFM = 0, ONE_THREAD, SPEED_TEST_L0, SPEED_TEST_L1, GPU_O0, GPU_O1, RTC, C_CUDA, OPT_EXP_COMP,
O_HOST, NO_CUDA, NO_BL_INFO, LOOP_ANALYSIS, PRIVATE_ANALYSIS, IO_RTS, READ_ALL, NO_REMOTE, NO_PURE_FUNC,
O_HOST, NO_CUDA, NO_BL_INFO, LOOP_ANALYSIS, PRIVATE_ANALYSIS, IO_RTS, READ_ALL, NO_REMOTE, NO_PURE_FUNC,
GPU_IRR_ACC, O_PL, O_PL2, BIG_P, NUM_OPT};
// ONE_THREAD - compile one thread CUDA-kernels only for across (TODO for all CUDA-kernels)
// SPEED_TEST_L0, SPEED_TEST_L1 - debug options for speed testof CUDA-kernels for across

View File

@@ -63,6 +63,7 @@
#define DVM_EXIT_INTERVAL_DIR 639
#define DVM_TEMPLATE_CREATE_DIR 640
#define DVM_TEMPLATE_DELETE_DIR 641
#define PRIVATE_AR_DECL 642
#define BLOCK_OP 705
#define NEW_SPEC_OP 706
#define REDUCTION_OP 707

View File

@@ -334,3 +334,7 @@ name_dvm[GUESS_INDEX_TYPE] = "loop_guess_index_type_";
name_dvm[GUESS_INDEX_TYPE_2]="dvmh_loop_guess_index_type_C";
name_dvm[RTC_SET_LANG] = "loop_cuda_rtc_set_lang";
name_dvm[GET_REMOTE_BUF_C] = "dvmh_loop_get_remote_buf_C";
name_dvm[GET_DEVICE_PROP] = "loop_cuda_get_device_prop";
name_dvm[GET_MAX_BLOCKS] = "loop_cuda_get_max_blocks";
name_dvm[GET_PRIVATE_ARR] = "loop_cuda_get_private_array";
name_dvm[DISPOSE_PRIVATE_AR]="loop_cuda_dispose_private_array";

View File

@@ -332,5 +332,9 @@ enum {
GUESS_INDEX_TYPE_2,
RTC_SET_LANG,
GET_REMOTE_BUF_C,
GET_DEVICE_PROP,
GET_MAX_BLOCKS,
GET_PRIVATE_ARR,
DISPOSE_PRIVATE_AR,
MAX_LIBFUN_NUM
};

View File

@@ -236,6 +236,7 @@
#define DVM_EXIT_INTERVAL_DIR 639 /* DVM-F */
#define DVM_TEMPLATE_CREATE_DIR 640 /* DVM-F */
#define DVM_TEMPLATE_DELETE_DIR 641 /* DVM-F */
#define PRIVATE_AR_DECL 642 /* DVM-F */
/***************** variant tags for low level nodes ********************/

View File

@@ -238,6 +238,7 @@ script using "tag". Run make tag.h to regenerate this file */
tag [ DVM_EXIT_INTERVAL_DIR ] = "DVM_EXIT_INTERVAL_DIR";
tag [ DVM_TEMPLATE_CREATE_DIR ] = "DVM_TEMPLATE_CREATE_DIR";
tag [ DVM_TEMPLATE_DELETE_DIR ] = "DVM_TEMPLATE_DELETE_DIR";
tag [ PRIVATE_AR_DECL ] = "PRIVATE_AR_DECL";
/***************** variant tags for low level nodes ********************/

View File

@@ -193,6 +193,8 @@ set(TR_CONV _src/Transformations/convert_to_c.cpp
_src/Transformations/convert_to_c.h)
set(TR_IMPLICIT_NONE _src/Transformations/set_implicit_none.cpp
_src/Transformations/set_implicit_none.h)
set(TR_REPLACE_ARRAYS_IN_IO _src/Transformations/replace_dist_arrays_in_io.cpp
_src/Transformations/replace_dist_arrays_in_io.h)
set(TRANSFORMS
${TR_DEAD_CODE}
@@ -211,7 +213,8 @@ set(TRANSFORMS
${TR_PRIV_DEL}
${TR_CONV}
${TR_PRIV_DEL}
${TR_IMPLICIT_NONE})
${TR_IMPLICIT_NONE}
${TR_REPLACE_ARRAYS_IN_IO})
set(CFG _src/CFGraph/IR.cpp
_src/CFGraph/IR.h
@@ -444,6 +447,7 @@ source_group (Transformations\\RenameSymbols FILES ${RENAME_SYMBOLS})
source_group (Transformations\\GlobalVariables FILES ${TR_GV})
source_group (Transformations\\ConvertToC FILES ${TR_CONV})
source_group (Transformations\\SetImplicitNone FILES ${TR_IMPLICIT_NONE})
source_group (Transformations\\ReplaceArraysInIO FILES ${TR_REPLACE_ARRAYS_IN_IO})
source_group (CreateIntervals FILES ${CREATE_INTER_T})

View File

@@ -958,13 +958,13 @@ static bool replaceCommonArray(const string &fileName,
return false;
}
static pair<SgSymbol*, SgSymbol*> copyArray(const pair<string, int> &place,
const DIST::Array *array,
const vector<ParallelRegionLines>& lines,
const string &suffix,
string& filename,
map<string, map<int, set<string>>>& newDeclsToInclude,
map<string, map<int, set<string>>>& copied)
pair<SgSymbol*, SgSymbol*> copyArray(const pair<string, int> &place,
const DIST::Array *array,
const vector<ParallelRegionLines>& lines,
const string &suffix,
string& filename,
map<string, map<int, set<string>>>& newDeclsToInclude,
map<string, map<int, set<string>>>& copied)
{
string fileName = place.first;
int switchRes = SgFile::switchToFile(fileName);

View File

@@ -14,3 +14,11 @@ int printCheckRegions(const char *fileName, const std::vector<ParallelRegion*> &
bool checkRegionsResolving(const std::vector<ParallelRegion*> &regions, const std::map<std::string, std::vector<FuncInfo*>> &allFuncInfo, const std::map<std::string, CommonBlock*> &commonBlocks, std::map<std::string, std::vector<Messages>> &SPF_messages, bool sharedMemoryParallelization);
int resolveParRegions(std::vector<ParallelRegion*>& regions, const std::map<std::string, std::vector<FuncInfo*>>& allFuncInfo, std::map<std::string, std::vector<Messages>>& SPF_messages, bool sharedMemoryParallelization, std::map<std::string, std::map<int, std::set<std::string>>>& copyDecls);
void insertRealignsBeforeFragments(ParallelRegion* reg, SgFile* file, const std::set<DIST::Array*>& distrArrays, const std::map<DIST::Array*, std::set<DIST::Array*>>& arrayLinksByFuncCalls);
std::pair<SgSymbol*, SgSymbol*> copyArray(const std::pair<std::string, int>& place,
const DIST::Array* array,
const std::vector<ParallelRegionLines>& lines,
const std::string& suffix,
std::string& filename,
std::map<std::string, std::map<int, std::set<std::string>>>& newDeclsToInclude,
std::map<std::string, std::map<int, std::set<std::string>>>& copied);

View File

@@ -16,6 +16,8 @@
#include "../DynamicAnalysis/gcov_info.h"
#include "PredictScheme.h"
#include "../Utils/SgUtils.h"
#include "../DirectiveProcessing/directive_parser.h"
#include "../Distribution/DvmhDirective.h"
using std::map;
using std::string;
@@ -125,7 +127,7 @@ void processFileToPredict(SgFile *file, PredictorStats &predictorCounts)
predictorCounts.TotalScorePar += predictorCounts.ParallelCount;
}
static void calculate_for_parallel_loop(SgStatement* loop, const map<int, Gcov_info>& gcov,
static void calculateForParallelLoop(SgStatement* loop, const map<int, Gcov_info>& gcov,
uint64_t& paralle_exec_count, uint64_t& count_of_parallel_lines) {
for (auto st = loop; st != loop->lastNodeOfStmt(); st = st->lexNext()) {
int line = st->lineNumber();
@@ -150,39 +152,42 @@ static void calculate_for_parallel_loop(SgStatement* loop, const map<int, Gcov_i
}
}
void calculate_stats_for_predictor(const map<string, vector<FuncInfo*>>& allFuncInfo,
const map<string, map<int, Gcov_info>>& gCovInfo) {
void calculateStatsForPredictor(const map<string, vector<FuncInfo*>>& allFuncInfo,
const map<string, map<int, Gcov_info>>& gCovInfo) {
uint64_t total_exec_count = 0;
uint64_t parallel_exec_count = 0;
uint64_t count_of_parallel_lines = 0;
for (auto& byFile : allFuncInfo) {
for (auto& byFile : allFuncInfo)
{
int ok = SgFile::switchToFile(byFile.first);
if (ok == -1)
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
auto it = gCovInfo.find(byFile.first);
if (it == gCovInfo.end()) {
if (it == gCovInfo.end())
{
__spf_print(1, "bad gcov info\n");
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
}
auto& gcov = it->second;
for (auto& func : byFile.second) {
for (auto& func : byFile.second)
{
SgStatement* stat = func->funcPointer->GetOriginal();
for (auto st = stat->lexNext(); st != stat->lastNodeOfStmt(); st = st->lexNext())
{
uint64_t paralle_exec = 0;
uint64_t lines_count = 0;
if (st->variant() == DVM_PARALLEL_ON_DIR) {
if (st->variant() == DVM_PARALLEL_ON_DIR)
{
auto loop = st->lexNext();
checkNull(loop, convertFileName(__FILE__).c_str(), __LINE__);
if (loop->variant() != FOR_NODE)
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
calculate_for_parallel_loop(loop, gcov, paralle_exec, lines_count);
calculateForParallelLoop(loop, gcov, paralle_exec, lines_count);
st = loop->lastNodeOfStmt();
parallel_exec_count += paralle_exec;
@@ -207,8 +212,8 @@ void calculate_stats_for_predictor(const map<string, vector<FuncInfo*>>& allFunc
continue;
auto& info = it->second;
if (info.getNumLine() != line) {
if (info.getNumLine() != line)
{
__spf_print(1, "bad gcov info\n");
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
}
@@ -226,3 +231,106 @@ void calculate_stats_for_predictor(const map<string, vector<FuncInfo*>>& allFunc
stats << "parallel_rate;" << parallel_exec_count / (double)total_exec_count << std::endl;
stats.close();
}
void parseDvmDirForPredictor(const map<string, vector<FuncInfo*>>& allFuncInfo,
const map<string, map<int, Gcov_info>>& gCovInfo)
{
ofstream dirs("dirs.csv");
for (auto& byFile : allFuncInfo)
{
int ok = SgFile::switchToFile(byFile.first);
if (ok == -1)
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
auto it = gCovInfo.find(byFile.first);
if (it == gCovInfo.end())
{
__spf_print(1, "bad gcov info\n");
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
}
auto& gcov = it->second;
for (auto& func : byFile.second)
{
SgStatement* stat = func->funcPointer->GetOriginal();
for (auto st = stat->lexNext(); st != stat->lastNodeOfStmt(); st = st->lexNext())
{
SgExpression* list;
SgExpression* dup;
auto line = 0;
switch (st->variant())
{
case DVM_PARALLEL_ON_DIR:
{
auto stat = st;
while (isDVM_stat(stat))
stat = stat->lexPrev();
line = stat->lineNumber(); // XXX
list = st->expr(1);
auto it = gcov.find(line);
auto& info = it->second;
if (info.getNumLine() != line)
{
__spf_print(1, "bad gcov info\n");
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
}
dirs << info.getExecutedCount() << ";" << "PARALLEL;" << st->expr(2)->unparse() << ";" << st->expr(0)->unparse() << ";";
while (list)
{
dirs << list->lhs()->unparse() << ";";
list = list->rhs();
}
dirs << std::endl;
}
break;
case DVM_VAR_DECL: // TODO
dup = st->expr(2)->lhs()->copyPtr();
dup->setLhs(NULL);
dirs << "1;" << dup->unparse() << ";" << st->expr(2)->lhs()->lhs()->unparse() << ";" << st->expr(0)->unparse() << ";\n";
break;
case DVM_DISTRIBUTE_DIR:
dirs << "1;" << "DISTRIBUTE;" << st->expr(1)->unparse() << ";" << st->expr(0)->unparse() << ";\n";
break;
case DVM_ALIGN_DIR:
dirs << "1;" << "ALIGN;" << st->expr(0)->unparse() << "(" << st->expr(1)->unparse() << ");" << st->expr(2)->unparse() << ";\n";
break;
case DVM_SHADOW_DIR:
dirs << "1;" << "SHADOW;" << st->expr(0)->unparse() << "(" << st->expr(1)->unparse() << ");\n";
break;
case DVM_REMOTE_ACCESS_DIR:
{
line = st->lexNext()->lineNumber();
auto it = gcov.find(line);
auto& info = it->second;
if (info.getNumLine() != line)
{
__spf_print(1, "bad gcov info\n");
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
}
dirs << info.getExecutedCount() << ";" << "REMOTE_ACCESS;";
list = st->expr(0);
while (list)
{
dirs << list->lhs()->unparse() << ";";
list = list->rhs();
}
dirs << "\n";
break;
}
default:
//printf("var = %d line %d\n", st->variant(), st->lineNumber());
break;
}
}
}
}
dirs.close();
}

View File

@@ -54,4 +54,5 @@ public:
void processFileToPredict(SgFile *file, PredictorStats &predictorCounts);
void calculate_stats_for_predictor(const std::map<std::string, std::vector<FuncInfo*>>& allFuncInfo, const std::map<std::string, std::map<int, Gcov_info>>& gCovInfo);
void calculateStatsForPredictor(const std::map<std::string, std::vector<FuncInfo*>>& allFuncInfo, const std::map<std::string, std::map<int, Gcov_info>>& gCovInfo);
void parseDvmDirForPredictor(const std::map<std::string, std::vector<FuncInfo*>>& allFuncInfo, const std::map<std::string, std::map<int, Gcov_info>>& gCovInfo);

View File

@@ -24,6 +24,8 @@
#include "ParallelizationRegions/resolve_par_reg_conflicts.h"
#include "ParallelizationRegions/expand_extract_reg.h"
#include "Transformations/replace_dist_arrays_in_io.h"
#include "Distribution/Distribution.h"
#include "Distribution/GraphCSR.h"
#include "Distribution/Arrays.h"
@@ -1567,6 +1569,10 @@ static bool runAnalysis(SgProject &project, const int curr_regime, const bool ne
if (error)
internalExit = 1;
}
else if (curr_regime == REMOVE_DIST_ARRAYS_FROM_IO)
{
replaceDistributedArraysInIO(parallelRegions, allFuncInfo, SPF_messages, newCopyDeclToIncl);
}
else if (curr_regime == LOOP_GRAPH)
{
if (keepFiles)
@@ -1894,7 +1900,10 @@ static bool runAnalysis(SgProject &project, const int curr_regime, const bool ne
else if (curr_regime == GET_MIN_MAX_BLOCK_DIST)
__spf_print(1, "GET_MIN_MAX_BLOCK_DIST: %d %d\n", min_max_block.first, min_max_block.second);
else if (curr_regime == GET_STATS_FOR_PREDICTOR)
calculate_stats_for_predictor(allFuncInfo, gCovInfo);
{
calculateStatsForPredictor(allFuncInfo, gCovInfo);
parseDvmDirForPredictor(allFuncInfo, gCovInfo);
}
const float elapsed = duration_cast<milliseconds>(high_resolution_clock::now() - timeForPass).count() / 1000.;
const float elapsedGlobal = duration_cast<milliseconds>(high_resolution_clock::now() - globalTime).count() / 1000.;
@@ -2330,6 +2339,7 @@ void runPass(const int curr_regime, const char *proj_name, const char *folderNam
findFunctionsToInclude(true);
break;
// all these cases run UNPARSE_FILE after
case REMOVE_DIST_ARRAYS_FROM_IO:
case RENAME_SYMBOLS:
case RESOLVE_PAR_REGIONS:
case CREATE_PARALLEL_REGIONS:

View File

@@ -87,6 +87,8 @@ enum passes {
REMOVE_DVM_INTERVALS,
VERIFY_DVM_DIRS,
REMOVE_DIST_ARRAYS_FROM_IO,
SUBST_EXPR,
SUBST_EXPR_RD,
REVERT_SUBST_EXPR,
@@ -260,6 +262,7 @@ static void setPassValues()
passNames[INSERT_INCLUDES] = "INSERT_INCLUDES";
passNames[REMOVE_DVM_DIRS] = "REMOVE_DVM_DIRS";
passNames[VERIFY_DVM_DIRS] = "VERIFY_DVM_DIRS";
passNames[REMOVE_DIST_ARRAYS_FROM_IO] = "REMOVE_DIST_ARRAYS_FROM_IO";
passNames[SUBST_EXPR] = "SUBST_EXPR";
passNames[SUBST_EXPR_RD] = "SUBST_EXPR_RD";
passNames[CALL_GRAPH2] = "CALL_GRAPH2";

View File

@@ -0,0 +1,478 @@
#include "replace_dist_arrays_in_io.h"
#include "../ParallelizationRegions/resolve_par_reg_conflicts.h"
#include <string>
#include <map>
#include <set>
using std::map;
using std::set;
using std::string;
using std::vector;
using std::to_string;
using std::make_pair;
#define DEBUG_TRACE 0
static void findArrays(SgExpression* exp, set<SgSymbol*>& arrays)
{
if (exp)
{
if (isSgArrayRefExp(exp))
arrays.insert(exp->symbol());
findArrays(exp->lhs(), arrays);
findArrays(exp->rhs(), arrays);
}
}
static void populateDistributedIoArrays(map<DIST::Array*, set<SgStatement*>>& arrays, SgStatement* stat)
{
auto var = stat->variant();
if (var != READ_STAT && var != PRINT_STAT && var != WRITE_STAT)
return;
// check if such IO allowed in dvm:
// list should consist only of single array and format string should be *
bool need_replace = false;
SgExpression* ioList = stat->expr(0);
if (!ioList)
return;
if (ioList->variant() != EXPR_LIST)
return;
if (ioList->rhs() == NULL)
{
SgExpression* arg = ioList->lhs();
if (!arg)
return;
if (arg->variant() != ARRAY_REF)
return;
if (arg->lhs())
need_replace = true;
}
else
{
need_replace = true;
}
if (!need_replace)
{
switch (var)
{
case PRINT_STAT:
{
SgExpression* fmt = stat->expr(1);
if (!fmt || fmt->variant() != SPEC_PAIR || fmt->lhs()->variant() != KEYWORD_VAL)
{
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
return;
}
if (fmt->rhs()->variant() != KEYWORD_VAL || fmt->rhs()->sunparse() != "*")
need_replace = true;
break;
}
case READ_STAT:
case WRITE_STAT:
{
SgExpression* spec = stat->expr(1);
__spf_print(DEBUG_TRACE, "[%d: %s (%d)]\n", 2000, spec->rhs()->unparse(), spec->rhs()->variant());
if (!spec || spec->variant() != EXPR_LIST ||
spec->lhs()->variant() != SPEC_PAIR ||
!spec->rhs() || !spec->rhs()->lhs() || spec->rhs()->lhs()->variant() != SPEC_PAIR)
{
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
return;
}
SgExpression* unit_val = spec->lhs()->rhs(), * fmt_val = spec->rhs()->lhs()->rhs();
if (unit_val->variant() != KEYWORD_VAL || unit_val->sunparse() != "*" ||
fmt_val->variant() != KEYWORD_VAL || fmt_val->sunparse() != "*")
need_replace = true;
break;
}
default:
break;
}
}
if (!need_replace)
return;
set<SgSymbol*> found_arrays;
for (int i = 0; i < 3; i++)
findArrays(stat->expr(i), found_arrays);
for (auto* by_symb : found_arrays)
{
string array_name = string(by_symb->identifier());
DIST::Array* array_p = getArrayFromDeclarated(declaratedInStmt(by_symb), array_name);
if (array_p && array_p->GetDistributeFlagVal() == Distribution::distFlag::DISTR && arrays[array_p].insert(stat).second)
__spf_print(DEBUG_TRACE, "[%d]: add array %s\n", stat->lineNumber(), array_p->GetName().c_str());
}
__spf_print(DEBUG_TRACE, "[replace]\n");
}
static void replaceArrayRec(SgSymbol* arr, SgSymbol* replace_by, SgExpression* exp, bool& has_read, bool& has_write, bool from_read, bool from_write)
{
if (!exp)
return;
if (exp->symbol() && strcmp(exp->symbol()->identifier(), arr->identifier()) == 0)
{
has_read |= from_read;
has_write |= from_write;
exp->setSymbol(replace_by);
}
switch (exp->variant())
{
case FUNC_CALL:
{
replaceArrayRec(arr, replace_by, exp->rhs(), has_read, has_write, true, false);
replaceArrayRec(arr, replace_by, exp->lhs(), has_read, has_write, true, true);
break;
}
case EXPR_LIST:
{
replaceArrayRec(arr, replace_by, exp->lhs(), has_read, has_write, from_read, from_write);
replaceArrayRec(arr, replace_by, exp->rhs(), has_read, has_write, from_read, from_write);
break;
}
default:
{
replaceArrayRec(arr, replace_by, exp->lhs(), has_read, has_write, true, false);
replaceArrayRec(arr, replace_by, exp->rhs(), has_read, has_write, true, false);
break;
}
}
}
static void replaceArrayRec(SgSymbol* arr, SgSymbol* replace_by, SgStatement* st, bool& has_read, bool& has_write)
{
if (!st)
return;
switch (st->variant())
{
case ASSIGN_STAT:
case READ_STAT:
{
replaceArrayRec(arr, replace_by, st->expr(0), has_read, has_write, false, true);
replaceArrayRec(arr, replace_by, st->expr(1), has_read, has_write, true, false);
break;
}
case PROC_STAT:
case FUNC_STAT:
{
replaceArrayRec(arr, replace_by, st->expr(0), has_read, has_write, true, false);
replaceArrayRec(arr, replace_by, st->expr(1), has_read, has_write, true, true);
break;
}
default:
{
for (int i = 0; i < 3; i++)
replaceArrayRec(arr, replace_by, st->expr(i), has_read, has_write, true, false);
break;
}
}
}
static void copyArrayBetweenStatements(SgSymbol* replace_symb, SgSymbol* replace_by, SgStatement* start, SgStatement* last)
{
while (start->lexNext() && !isSgExecutableStatement(start->lexNext()))
start = start->lexNext();
auto* stop = last->lexNext();
bool has_read = false, has_write = false;
for (auto* st = start; st != stop; st = st->lexNext())
replaceArrayRec(replace_symb, replace_by, st, has_read, has_write);
if (has_read)
{
// A_copy = A
SgAssignStmt* assign = new SgAssignStmt(*new SgArrayRefExp(*replace_by), *new SgArrayRefExp(*replace_symb));
assign->setlineNumber(getNextNegativeLineNumber()); // before region
auto* parent = start->controlParent();
if (parent && parent->lastNodeOfStmt() == start)
parent = parent->controlParent();
start->insertStmtAfter(*assign, *parent);
}
if (has_write)
{
// A = A_reg
SgAssignStmt* assign = new SgAssignStmt(*new SgArrayRefExp(*replace_symb), *new SgArrayRefExp(*replace_by));
//TODO: bug with insertion
//assign->setlineNumber(getNextNegativeLineNumber()); // after region
last->insertStmtBefore(*assign, *(last->controlParent()));
}
}
static void replaceArrayInFragment(DIST::Array* arr, const set<SgStatement*> usages, SgSymbol* replace_by, SgStatement* start, SgStatement* last, const string& filename)
{
while (start->lexNext() && !isSgExecutableStatement(start->lexNext()))
start = start->lexNext();
auto* replace_symb = arr->GetDeclSymbol();
set<SgStatement*> not_opened, not_closed, copied;
for (auto* it = start; it; it = it->controlParent())
not_opened.insert(it);
for (auto* it = last; it; it = it->controlParent())
not_closed.insert(it);
for (auto* io_stmt : usages)
{
bool already_copied = false;
SgStatement* copy_scope = NULL;
for (auto* par = io_stmt; par; par = par->controlParent())
{
if (copied.find(par) != copied.end())
{
already_copied = true;
break;
}
else if (not_opened.find(par) != not_opened.end() || not_closed.find(par) != not_closed.end())
{
copy_scope = par;
break;
}
}
if (already_copied)
continue;
auto* scope_start = copy_scope, * scope_end = copy_scope->lastNodeOfStmt();
__spf_print(DEBUG_TRACE, "[scope to copy] %d\n", copy_scope->lineNumber());
if (not_opened.find(copy_scope) != not_opened.end())
{
auto* from = start->lastNodeOfStmt() ? start->lastNodeOfStmt() : start;
for (auto* st = from; st; st = st->controlParent())
{
__spf_print(DEBUG_TRACE, "[find start of parent %d] %d\n", copy_scope->lineNumber(), st->lineNumber());
if (st->controlParent() == copy_scope)
{
scope_start = st->lastNodeOfStmt() ? st->lastNodeOfStmt() : st;
break;
}
}
}
if (not_closed.find(copy_scope) != not_closed.end())
{
for (auto* st = last; st; st = st->controlParent())
{
__spf_print(DEBUG_TRACE, "[find end of parent %d] %d\n", copy_scope->lineNumber(), st->lineNumber());
if (st->controlParent() == copy_scope)
{
scope_end = st;
break;
}
}
}
copyArrayBetweenStatements(replace_symb, replace_by, scope_start, scope_end);
__spf_print(DEBUG_TRACE, "[copy %s] [%d, %d]\n", arr->GetName().c_str(), scope_start->lineNumber(), scope_end->lineNumber());
copied.insert(copy_scope);
}
}
static bool ioReginBound(SgStatement* stat, SgStatement* last_io_bound)
{
auto var = stat->variant();
if (var == PROC_STAT || var == FUNC_STAT || var == PROG_HEDR || var == FUNC_HEDR || var == PROC_HEDR || var == FUNC_STAT || var == FOR_NODE || var == LOOP_NODE)
return true;
if (last_io_bound && last_io_bound->lastNodeOfStmt() && last_io_bound->lastNodeOfStmt() == stat)
return true;
int parent_var;
if (var == CONTROL_END &&
((parent_var = stat->controlParent()->variant()) == PROG_HEDR ||
parent_var == PROC_HEDR || parent_var == FUNC_HEDR))
{
return true;
}
return false;
}
void replaceDistributedArraysInIO(vector<ParallelRegion*>& regions,
const map<string, vector<FuncInfo*>>& allFuncInfo,
map<string, vector<Messages>>& SPF_messages,
map<string, map<int, set<string>>>& newDeclsToInclude)
{
map<DIST::Array*, SgSymbol*> created_copies;
map<string, map<int, set<string>>> copied;
for (auto& region : regions)
{
__spf_print(DEBUG_TRACE, "[%s]: enter region\n", region->GetName().c_str());
for (auto& linesByFile : region->GetAllLinesToModify())
{
const auto& filename = linesByFile.first;
if (SgFile::switchToFile(filename) < 0) {
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
return;
}
for (auto& lines : linesByFile.second) {
__spf_print(DEBUG_TRACE, "[fragment] %s: %d:%d %d\n", filename.c_str(), lines.lines.first,
lines.lines.second, lines.isImplicit());
SgStatement* curr_stmt, * end;
if (lines.isImplicit())
{
curr_stmt = current_file->SgStatementAtLine(lines.lines.first);
end = current_file->SgStatementAtLine(lines.lines.second);
if (end)
end = end->lexNext();
}
else
{
curr_stmt = lines.stats.first->GetOriginal();
end = lines.stats.second->GetOriginal()->lexNext();
}
map<DIST::Array*, set<SgStatement*>> need_replace;
SgStatement* last_io_bound = NULL;
while (curr_stmt != end)
{
if (!curr_stmt)
break;
auto var = curr_stmt->variant();
if (var == PROC_HEDR || var == PROG_HEDR || var == FUNC_HEDR)
{
curr_stmt = curr_stmt->lexNext();
while (curr_stmt && !isSgExecutableStatement(curr_stmt))
{
last_io_bound = curr_stmt;
curr_stmt = curr_stmt->lexNext();
}
if (!curr_stmt)
break;
}
if (ioReginBound(curr_stmt, last_io_bound))
{
if (last_io_bound)
{
__spf_print(DEBUG_TRACE, "[io region] [%d, %d]\n", last_io_bound->lineNumber(), curr_stmt->lineNumber());
for (const auto& p : need_replace)
{
auto it = created_copies.find(p.first);
if (it != created_copies.end())
replaceArrayInFragment(p.first, p.second, it->second, last_io_bound, curr_stmt, filename);
else
{
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
return;
}
}
}
need_replace.clear();
last_io_bound = curr_stmt;
}
__spf_print(DEBUG_TRACE, "[line] %d (%d)\n", curr_stmt->lineNumber(), curr_stmt->variant());
for (int i = 0; i < 3; i++)
{
if (curr_stmt->expr(i))
{
__spf_print(DEBUG_TRACE, "[%d: %s (%d)]\n", i, curr_stmt->expr(i)->unparse(), curr_stmt->expr(i)->variant());
}
}
populateDistributedIoArrays(need_replace, curr_stmt);
for (const auto& by_array_to_copy : need_replace)
{
auto* array_to_copy = by_array_to_copy.first;
auto it = created_copies.find(array_to_copy);
if (it == created_copies.end())
{
bool fromModule = (array_to_copy->GetLocation().first == DIST::l_MODULE);
const string locationName = array_to_copy->GetLocation().second;
auto place = *array_to_copy->GetDeclInfo().begin();
string fileName = place.first;
string suffix = "_io_l";
if (fromModule)
suffix = "_io_m";
auto origCopy = copyArray(place, array_to_copy, linesByFile.second, suffix + to_string(region->GetId()), fileName, newDeclsToInclude, copied);
SgStatement* decl = SgStatement::getStatementByFileAndLine(place.first, place.second);
if(decl)
decl = decl->lexNext();
if(decl)
{
string dir_str;
if (decl->comments())
{
string str_comment = string(decl->comments());
if(str_comment.size() && str_comment.back() != '\n')
dir_str += "\n";
}
dir_str += "!$SPF ANALYSIS(PROCESS_PRIVATE(" + string(origCopy.second->identifier()) + "))\n";
decl->addComment(dir_str.c_str());
}
created_copies.insert({ array_to_copy, origCopy.second });
}
}
curr_stmt = curr_stmt->lexNext();
}
}
}
}
}

View File

@@ -0,0 +1,11 @@
#pragma once
#include "../ParallelizationRegions/ParRegions.h"
#include "../Utils/SgUtils.h"
#include "../Utils/errors.h"
#include "../GraphCall/graph_calls.h"
void replaceDistributedArraysInIO(std::vector<ParallelRegion*>& regions,
const std::map<std::string, std::vector<FuncInfo*>>& allFuncInfo,
std::map<std::string, std::vector<Messages>>& SPF_messages,
std::map<std::string, std::map<int, std::set<std::string>>>& newDeclsToInclude);

View File

@@ -257,6 +257,8 @@ void InitPassesDependencies(map<passes, vector<passes>> &passDepsIn, set<passes>
list({ REVERT_SUBST_EXPR_RD, CONVERT_LOOP_TO_ASSIGN }) <= Pass(RESOLVE_PAR_REGIONS);
list({ REVERT_SUBST_EXPR_RD, CONVERT_LOOP_TO_ASSIGN, FILL_PAR_REGIONS}) <= Pass(REMOVE_DIST_ARRAYS_FROM_IO);
Pass(REVERT_SUBST_EXPR_RD) <= Pass(EXPAND_EXTRACT_PAR_REGION);
Pass(FILL_PAR_REGIONS) <= Pass(PRINT_PAR_REGIONS_ERRORS);