Refactor shared memory parallelization #49

Merged
Alexander_KS merged 10 commits from refactor_shared_memory_parallelization into master 2024-07-18 06:50:40 +00:00
4 changed files with 207 additions and 209 deletions
Showing only changes of commit 444f7e36bc - Show all commits

View File

@@ -1843,10 +1843,10 @@ void selectParallelDirectiveForVariant(File* file, ParallelRegion* currParReg,
const bool isMyRegion = loop->region == currParReg;
const bool noUserDir = loop->userDvmDirective == NULL;
DIST::Array* sameAlignTemplate = NULL;
const bool sameAligns = loop->isArrayTemplatesTheSame(sameAlignTemplate, regionId, arrayLinksByFuncCalls);
const bool sameAligns = sharedMemoryParallelization ? true : loop->isArrayTemplatesTheSame(sameAlignTemplate, regionId, arrayLinksByFuncCalls);
bool freeLoopDistr = true;
if (hasDirective && loop->directive->arrayRef2->IsLoopArray())
if (!sharedMemoryParallelization && hasDirective && loop->directive->arrayRef2->IsLoopArray())
{
bool ok = false;
for (auto& elem : distribution)
@@ -1879,33 +1879,38 @@ void selectParallelDirectiveForVariant(File* file, ParallelRegion* currParReg,
//try to unite loops
if (createNestedLoops(loop, depInfoForLoopGraph, mapFuncInfo, messages))
parDirective = loop->recalculateParallelDirective();
bool topCheck = isOnlyTopPerfect(loop, distribution);
bool needToContinue = false;
if (topCheck)
if(!sharedMemoryParallelization)
{
//<Array, linksWithTempl> -> dims not mached
map<DIST::Array*, vector<bool>> dimsNotMatch;
if (!checkCorrectness(*parDirective, distribution, reducedG, allArrays, arrayLinksByFuncCalls, loop->getAllArraysInLoop(), messages, loop->lineNum, dimsNotMatch, regionId))
{
if (!tryToResolveUnmatchedDims(dimsNotMatch, loop, regionId, parDirective, reducedG, allArrays, arrayLinksByFuncCalls, distribution, mapFuncInfo))
needToContinue = addRedistributionDirs(file, distribution, toInsert, loop, mapLoopsInFile, parDirective, regionId, messages, arrayLinksByFuncCalls, sameAlignTemplate);
}
}
else
needToContinue = addRedistributionDirs(file, distribution, toInsert, loop, mapLoopsInFile, parDirective, regionId, messages, arrayLinksByFuncCalls, sameAlignTemplate);
bool topCheck = isOnlyTopPerfect(loop, distribution);
if (needToContinue)
continue;
bool needToContinue = false;
if (topCheck)
{
//<Array, linksWithTempl> -> dims not mached
map<DIST::Array*, vector<bool>> dimsNotMatch;
if (!checkCorrectness(*parDirective, distribution, reducedG, allArrays, arrayLinksByFuncCalls, loop->getAllArraysInLoop(), messages, loop->lineNum, dimsNotMatch, regionId))
{
if (!tryToResolveUnmatchedDims(dimsNotMatch, loop, regionId, parDirective, reducedG, allArrays, arrayLinksByFuncCalls, distribution, mapFuncInfo))
needToContinue = addRedistributionDirs(file, distribution, toInsert, loop, mapLoopsInFile, parDirective, regionId, messages, arrayLinksByFuncCalls, sameAlignTemplate);
}
}
else
needToContinue = addRedistributionDirs(file, distribution, toInsert, loop, mapLoopsInFile, parDirective, regionId, messages, arrayLinksByFuncCalls, sameAlignTemplate);
if (needToContinue)
continue;
}
vector<pair<DIST::Array*, const DistrVariant*>> newRules;
constructRules(newRules, distribution, loop);
if(!sharedMemoryParallelization)
constructRules(newRules, distribution, loop);
Directive* dirImpl = parDirective->genDirective(file, newRules, loop, reducedG, allArrays, regionId, arrayLinksByFuncCalls);
#if __SPF
//move label before loop
if (loop->hasRedistribute())
if (!sharedMemoryParallelization && loop->hasRedistribute())
{
auto prev = loop->loop->lexPrev();
if (!prev)

View File

@@ -281,75 +281,102 @@ static vector<SgExpression*>
for (int z = 0; z < loops.size(); ++z)
{
currLoop = loops[z];
const uint64_t regId = sharedMemoryParallelization ? (uint64_t)currLoop : currLoop->region->GetId();
auto dirForLoop = currLoop->directiveForLoop;
auto tmplP = pairs.first->GetTemplateArray(regId, sharedMemoryParallelization != 0);
auto links = pairs.first->GetLinksWithTemplate(regId);
// no mapping for this loop, skip this
if (tmplP == dirForLoop->arrayRef)
if(!sharedMemoryParallelization)
{
for (int z = 0; z < links.size(); ++z)
const uint64_t regId = sharedMemoryParallelization ? (uint64_t)currLoop : currLoop->region->GetId();
auto dirForLoop = currLoop->directiveForLoop;
auto tmplP = pairs.first->GetTemplateArray(regId, sharedMemoryParallelization != 0);
auto links = pairs.first->GetLinksWithTemplate(regId);
// no mapping for this loop, skip this
if (tmplP == dirForLoop->arrayRef)
{
int dim = links[z];
if (dim >= 0)
for (int z = 0; z < links.size(); ++z)
{
if (dirForLoop->on[dim].first != "*")
int dim = links[z];
if (dim >= 0)
{
needToAdd = true;
subs[z] = new SgVarRefExp(findSymbolOrCreate(file, dirForLoop->on[dim].first));
break;
if (dirForLoop->on[dim].first != "*")
{
needToAdd = true;
subs[z] = new SgVarRefExp(findSymbolOrCreate(file, dirForLoop->on[dim].first));
break;
}
}
}
}
}
else if (pairs.second == dirForLoop->arrayRef)
{
for (int z = 0; z < dirForLoop->on.size(); ++z)
{
if (dirForLoop->on[z].first != "*")
{
needToAdd = true;
subs[z] = new SgVarRefExp(findSymbolOrCreate(file, dirForLoop->on[z].first));
break;
}
}
}
else if (!dirForLoop->arrayRef->IsTemplate())
{
set<DIST::Array*> realRefsLocal;
getRealArrayRefs(dirForLoop->arrayRef, dirForLoop->arrayRef, realRefsLocal, arrayLinksByFuncCalls);
if (realRefsLocal.size() == 0)
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
auto tmplP = (*realRefsLocal.begin())->GetTemplateArray(regId, sharedMemoryParallelization != 0);
auto links = (*realRefsLocal.begin())->GetLinksWithTemplate(regId);
auto tmplP_et = pairs.first->GetTemplateArray(regId, sharedMemoryParallelization != 0);
auto links_et = pairs.first->GetLinksWithTemplate(regId);
if (tmplP == tmplP_et)
else if (pairs.second == dirForLoop->arrayRef)
{
for (int z = 0; z < dirForLoop->on.size(); ++z)
{
if (dirForLoop->on[z].first != "*")
{
const int idx = links[z];
for (int p = 0; p < links_et.size(); ++p)
{
if (idx >= 0 && links_et[p] == idx)
{
subs[p] = new SgVarRefExp(findSymbolOrCreate(file, dirForLoop->on[z].first));
needToAdd = true;
break;
}
}
{
needToAdd = true;
subs[z] = new SgVarRefExp(findSymbolOrCreate(file, dirForLoop->on[z].first));
break;
}
}
}
else if (!dirForLoop->arrayRef->IsTemplate())
{
set<DIST::Array*> realRefsLocal;
getRealArrayRefs(dirForLoop->arrayRef, dirForLoop->arrayRef, realRefsLocal, arrayLinksByFuncCalls);
if (realRefsLocal.size() == 0)
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
auto tmplP = (*realRefsLocal.begin())->GetTemplateArray(regId, sharedMemoryParallelization != 0);
auto links = (*realRefsLocal.begin())->GetLinksWithTemplate(regId);
auto tmplP_et = pairs.first->GetTemplateArray(regId, sharedMemoryParallelization != 0);
auto links_et = pairs.first->GetLinksWithTemplate(regId);
if (tmplP == tmplP_et)
{
for (int z = 0; z < dirForLoop->on.size(); ++z)
{
if (dirForLoop->on[z].first != "*")
{
const int idx = links[z];
for (int p = 0; p < links_et.size(); ++p)
{
if (idx >= 0 && links_et[p] == idx)
{
subs[p] = new SgVarRefExp(findSymbolOrCreate(file, dirForLoop->on[z].first));
needToAdd = true;
break;
}
}
break;
}
}
}
}
}
else
{
for (const auto& source : { currLoop->readOpsForLoop, currLoop->writeOpsForLoop }) {
auto array_it = source.find(pairs.second);
if (array_it != source.end()) {
bool dim_found = false;
for (int i = 0; i < array_it->second.size(); i++) {
if (array_it->second[i].coefficients.size() != 0)
{
needToAdd = true;
dim_found = true;
subs[i] = new SgVarRefExp(findSymbolOrCreate(file, currLoop->loopSymbol));
break;
}
}
if (dim_found)
break;
}
}
}
}
@@ -477,7 +504,7 @@ ParallelDirective::genDirective(File* file, const vector<pair<DIST::Array*, cons
{
const set<DIST::Array*>& acrossOutAttribute = currLoop->acrossOutAttribute;
const map<DIST::Array*, pair<vector<ArrayOp>, vector<bool>>>& readOps = currLoop->readOps;
map< DIST::Array*, vector<ArrayOp>>& remoteReads = currLoop->remoteRegularReads;
map<DIST::Array*, vector<ArrayOp>>& remoteReads = currLoop->remoteRegularReads;
Statement* loop = currLoop->loop;
string directive = "";
@@ -501,6 +528,10 @@ ParallelDirective::genDirective(File* file, const vector<pair<DIST::Array*, cons
LoopGraph* pLoop = currLoop;
const set<string> allFiles = getAllFilesInProject();
map<string, DIST::Array*> arrayByName;
for (DIST::Array* arr : currLoop->getAllArraysInLoop())
arrayByName[arr->GetName()] = arr;
for (int z = 0; z < nested; ++z)
{
loopSymbs.push_back(loopG->symbol());
@@ -552,21 +583,25 @@ ParallelDirective::genDirective(File* file, const vector<pair<DIST::Array*, cons
p->setRhs(NULL);
}
DIST::Array* mapTo = arrayRef2->IsLoopArray() ? arrayRef : arrayRef2;
auto onTo = arrayRef2->IsLoopArray() ? on : on2;
DIST::Array* mapTo;
dirStatement[2] = new Expression(expr);
if (sharedMemoryParallelization)
{
directive += ")";
}
else
{
mapTo = arrayRef2->IsLoopArray() ? arrayRef : arrayRef2;
directive += ") ON " + mapTo->GetShortName() + "(";
}
SgArrayRefExp* arrayExpr = NULL;
string arrayExprS = "";
if (!sharedMemoryParallelization)
{
auto onTo = arrayRef2->IsLoopArray() ? on : on2;
SgSymbol* symbForPar = NULL;
if (arrayRef->IsTemplate())
{
@@ -729,10 +764,23 @@ ParallelDirective::genDirective(File* file, const vector<pair<DIST::Array*, cons
{
const int i1 = ordered[k];
vector<map<pair<int, int>, int>> shiftsByAccess;
DIST::Array* currArray = NULL;
DIST::Array* currArray = allArrays.GetArrayByName(across[i1].first.second);
if (currArray == NULL)
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
if (!sharedMemoryParallelization)
{
currArray = allArrays.GetArrayByName(across[i1].first.second);
if (currArray == NULL)
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
}
else
{
auto currArray_it = arrayByName.find(across[i1].first.second);
if (currArray_it == arrayByName.end())
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
currArray = currArray_it->second;
}
bool isOut = acrossOutAttribute.find(currArray) != acrossOutAttribute.end();
string bounds = genBounds(across[i1], acrossShifts[i1], reducedG, allArrays, remoteReads, readOps, true, regionId, distribution, arraysInAcross, shiftsByAccess, arrayLinksByFuncCalls);

View File

@@ -268,71 +268,30 @@ static inline string calculateShifts(DIST::GraphCSR<int, double, attrType> &redu
const uint64_t regionId,
const map<DIST::Array*, set<DIST::Array*>> &arrayLinksByFuncCalls)
{
vector<tuple<DIST::Array*, int, pair<int, int>>> ruleForOn =
getAlignRuleWithTemplate(arrayRef, arrayLinksByFuncCalls, reducedG, allArrays, regionId);
vector<tuple<DIST::Array*, int, pair<int, int>>> ruleForOn, ruleForShadow;
vector<tuple<DIST::Array*, int, pair<int, int>>> ruleForShadow =
getAlignRuleWithTemplate(calcForArray, arrayLinksByFuncCalls, reducedG, allArrays, regionId);
if (!sharedMemoryParallelization)
{
ruleForOn = getAlignRuleWithTemplate(arrayRef, arrayLinksByFuncCalls, reducedG, allArrays, regionId);
ruleForShadow = getAlignRuleWithTemplate(calcForArray, arrayLinksByFuncCalls, reducedG, allArrays, regionId);
}
string out = "";
// check for distributed and not mapped dims -> zero them out ('coeffs.second')
set<DIST::Array*> refs;
getRealArrayRefs(calcForArray, calcForArray, refs, arrayLinksByFuncCalls);
if (sharedMemoryParallelization == 0)
{//TODO: need to correct errors
/*for (auto& array : refs)
{
DIST::Array* tmpl = array->GetTemplateArray(regionId);
checkNull(tmpl, convertFileName(__FILE__).c_str(), __LINE__);
auto align = array->GetLinksWithTemplate(regionId);
bool found = false;
for (auto& t : distribution)
{
if (t.first == tmpl)
{
found = true;
for (int aDim = 0; aDim < align.size(); ++aDim)
{
int link = align[aDim];
if (link != -1)
{
int tLink = link;
if (!arrayRef->IsTemplate())
{
auto alignMain = arrayRef->GetLinksWithTemplate(regionId);
for (int z = 0; z < alignMain.size(); ++z)
if (alignMain[z] == tLink)
tLink = z;
}
if (t.second->distRule[link] == dist::BLOCK && baseOnRule[aDim].first == "*")
{
for (int z = 0; z < coeffs.second.size(); ++z)
coeffs.second[z].first = coeffs.second[z].second = 0;
return out;
}
}
}
}
}
if (!found)
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
} */
}
const pair<vector<ArrayOp>, vector<bool>> *currReadOp = NULL;
auto readIt = readOps.find(calcForArray);
if (readIt != readOps.end())
currReadOp = &(readIt->second);
findAndReplaceDimentions(ruleForOn, allArrays);
findAndReplaceDimentions(ruleForShadow, allArrays);
if(!sharedMemoryParallelization)
{
findAndReplaceDimentions(ruleForOn, allArrays);
findAndReplaceDimentions(ruleForShadow, allArrays);
}
const int len = (int)coeffs.second.size();
vector<pair<int, int>> shift(len);
@@ -352,7 +311,20 @@ static inline string calculateShifts(DIST::GraphCSR<int, double, attrType> &redu
// no unrecognized read operations
if (currReadOp->second[k] == false)
{
if (get<0>(ruleForShadow[k]) != NULL)
if (sharedMemoryParallelization)
{
for (auto& coefs : currReadOp->first[k].coefficients)
{
auto currAccess = coefs.first;
const int currShift = coefs.first.second;
auto itFound = shiftsByAccess[k].find(currAccess);
if (itFound == shiftsByAccess[k].end())
itFound = shiftsByAccess[k].insert(itFound, make_pair(currAccess, currShift));
}
}
else if (get<0>(ruleForShadow[k]) != NULL)
{
const pair<int, int> currRuleShadow = get<2>(ruleForShadow[k]);
@@ -451,22 +423,25 @@ static inline string calculateShifts(DIST::GraphCSR<int, double, attrType> &redu
}
}
if (coeffs.second[k].first + shift[k].first < 0)
shift[k].first = -coeffs.second[k].first;
if (coeffs.second[k].second + shift[k].second < 0)
shift[k].second = -coeffs.second[k].second;
if (isAcross)
if(!sharedMemoryParallelization)
{
if (coeffs.second[k] == make_pair(0, 0))
shift[k] = make_pair(0, 0);
}
else if (isNonDistributedDim(ruleForOn, ruleForShadow, k, distribution, parallelOnRule))
{
shift[k].first = -coeffs.second[k].first;
shift[k].second = -coeffs.second[k].second;
shiftsByAccess[k].clear();
if (coeffs.second[k].first + shift[k].first < 0)
shift[k].first = -coeffs.second[k].first;
if (coeffs.second[k].second + shift[k].second < 0)
shift[k].second = -coeffs.second[k].second;
if (isAcross)
{
if (coeffs.second[k] == make_pair(0, 0))
shift[k] = make_pair(0, 0);
}
else if (isNonDistributedDim(ruleForOn, ruleForShadow, k, distribution, parallelOnRule))
{
shift[k].first = -coeffs.second[k].first;
shift[k].second = -coeffs.second[k].second;
shiftsByAccess[k].clear();
}
}
sprintf(buf, "%d:%d", coeffs.second[k].first + shift[k].first, coeffs.second[k].second + shift[k].second);
@@ -500,34 +475,37 @@ string ParallelDirective::genBounds(pair<pair<string, string>, vector<pair<int,
checkNull(shadowArray, convertFileName(__FILE__).c_str(), __LINE__);
auto on_ext = on;
//replace to template align ::on
if (arrayRef->IsTemplate() == false && sharedMemoryParallelization == 0)
if(!sharedMemoryParallelization)
{
vector<tuple<DIST::Array*, int, pair<int, int>>> ruleForRef =
getAlignRuleWithTemplate(arrayRef, arrayLinksByFuncCalls, reducedG, allArrays, regionId);
findAndReplaceDimentions(ruleForRef, allArrays);
on_ext.clear();
for (int i = 0; i < ruleForRef.size(); ++i)
//replace to template align ::on
if (arrayRef->IsTemplate() == false)
{
if (get<0>(ruleForRef[i]))
vector<tuple<DIST::Array*, int, pair<int, int>>> ruleForRef =
getAlignRuleWithTemplate(arrayRef, arrayLinksByFuncCalls, reducedG, allArrays, regionId);
findAndReplaceDimentions(ruleForRef, allArrays);
on_ext.clear();
for (int i = 0; i < ruleForRef.size(); ++i)
{
on_ext.resize(get<0>(ruleForRef[i])->GetDimSize());
break;
if (get<0>(ruleForRef[i]))
{
on_ext.resize(get<0>(ruleForRef[i])->GetDimSize());
break;
}
}
if (on_ext.size() == 0)
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
std::fill(on_ext.begin(), on_ext.end(), make_pair("*", make_pair(0, 0)));
for (int i = 0; i < ruleForRef.size(); ++i)
if (get<0>(ruleForRef[i]))
on_ext[get<1>(ruleForRef[i])] = on[i];
}
if (on_ext.size() == 0)
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
std::fill(on_ext.begin(), on_ext.end(), make_pair("*", make_pair(0, 0)));
for (int i = 0; i < ruleForRef.size(); ++i)
if (get<0>(ruleForRef[i]))
on_ext[get<1>(ruleForRef[i])] = on[i];
}
//replace single dim to key word 'SINGLE'
for (int i = 0; i < on_ext.size(); ++i)
//replace single dim to key word 'SINGLE'
for (int i = 0; i < on_ext.size(); ++i)
{
if (on_ext[i].first != "*")
{
@@ -535,8 +513,9 @@ string ParallelDirective::genBounds(pair<pair<string, string>, vector<pair<int,
on_ext[i].first = "SINGLE";
}
}
}
string ret = "";
string ret = "";
if (isAcross)
{
arraysInAcross.insert(shadowArray);

View File

@@ -541,7 +541,7 @@ static bool runAnalysis(SgProject &project, const int curr_regime, const bool ne
}
else if (curr_regime == CORRECT_FORMAT_PLACE)
checkAndMoveFormatOperators(file, getObjectForFileFromMap(file_name, SPF_messages), false);
else if (curr_regime == CREATE_PARALLEL_DIRS)
else if (curr_regime == CREATE_PARALLEL_DIRS || curr_regime == INSERT_PARALLEL_DIRS_NODIST)
{
auto &loopsInFile = getObjectForFileFromMap(file_name, loopGraph);
@@ -568,40 +568,6 @@ static bool runAnalysis(SgProject &project, const int curr_regime, const bool ne
UniteNestedDirectives(loopsInFile);
}
else if (curr_regime == INSERT_PARALLEL_DIRS_NODIST)
{
auto& loopsInFile = getObjectForFileFromMap(file_name, loopGraph);
map<int, LoopGraph*> mapLoopsInFile;
createMapLoopGraph(loopsInFile, mapLoopsInFile);
map<string, FuncInfo*> mapFuncInfo;
createMapOfFunc(allFuncInfo, mapFuncInfo);
for (int z = 0; z < parallelRegions.size(); ++z)
{
vector<Directive*> toInsert;
DIST::Arrays<int>& allArrays = parallelRegions[z]->GetAllArraysToModify();
map<LoopGraph*, void*> depInfoForLoopGraphV;
for (auto& elem : depInfoForLoopGraph)
depInfoForLoopGraphV[elem.first] = elem.second;
selectParallelDirectiveForVariantNoDist(new File(file), parallelRegions[z], allArrays, loopsInFile, mapLoopsInFile, mapFuncInfo,
toInsert, arrayLinksByFuncCalls, depInfoForLoopGraphV, getObjectForFileFromMap(file_name, SPF_messages));
if (toInsert.size() > 0)
{
auto it = createdDirectives.find(file_name);
if (it == createdDirectives.end())
createdDirectives.insert(it, make_pair(file_name, toInsert));
else
for (int m = 0; m < toInsert.size(); ++m)
it->second.push_back(toInsert[m]);
}
}
}
else if (curr_regime == INSERT_SHADOW_DIRS || curr_regime == EXTRACT_SHADOW_DIRS)
{
const bool extract = (curr_regime == EXTRACT_SHADOW_DIRS);
@@ -2139,7 +2105,7 @@ void runPass(const int curr_regime, const char *proj_name, const char *folderNam
string additionalName = (consoleMode && folderName == NULL) ? "__shared" : "";
runAnalysis(*project, INSERT_PARALLEL_DIRS_NODIST, false);
runAnalysis(*project, CREATE_PARALLEL_DIRS, false);
runPass(REVERT_SUBST_EXPR_RD, proj_name, folderName);