loop analyzer refactor for shared memory parallelization

This commit is contained in:
2024-06-20 13:00:01 +03:00
parent 1be169e7ff
commit 7daf1b4038
4 changed files with 324 additions and 332 deletions

View File

@@ -91,6 +91,8 @@ static LoopGraph* createDirectiveForLoop(LoopGraph *currentLoop, MapToArray &mai
}
currentLoop->directive = directive;
if(!sharedMemoryParallelization)
{
for (auto& read : currentLoop->readOpsArray)
{
const string shortName = read->GetName();
@@ -127,6 +129,7 @@ static LoopGraph* createDirectiveForLoop(LoopGraph *currentLoop, MapToArray &mai
}
}
}
}
if (currentLoop->directive)
currentLoop->acrossOutAttribute.insert(acrossOutArrays.begin(), acrossOutArrays.end());
@@ -553,6 +556,8 @@ void createParallelDirectives(const map<LoopGraph*, map<DIST::Array*, ArrayInfo*
mainArray.hasWrite = true;
mainArray.mainAccess;
if(!sharedMemoryParallelization)
{
set<string> uniqNamesWithAcross;
fillArraysWithAcrossStatus(currLoop, uniqNamesWithAcross);
@@ -738,9 +743,12 @@ void createParallelDirectives(const map<LoopGraph*, map<DIST::Array*, ArrayInfo*
// now OmegaTest is used for searching dependencies
if (!mainArray.hasWrite)
findMainArrayFromRead(currAccesses, mainArray, itersCount, arrayLinksByFuncCalls);
}
if (!hasConflict &&
mainArray.arrayRef != NULL && mainArray.dimentionPos != -1 &&
bool dimPosFound = sharedMemoryParallelization ||
(mainArray.arrayRef != NULL && mainArray.dimentionPos != -1);
if (dimPosFound &&
!currLoop->hasLimitsToParallel() &&
(currLoop->lineNum > 0 || (currLoop->lineNum < 0 && currLoop->altLineNum > 0)))
{
@@ -749,7 +757,7 @@ void createParallelDirectives(const map<LoopGraph*, map<DIST::Array*, ArrayInfo*
const int dimPos = mainArray.dimentionPos;
//change array to template if ACROSS was not found or not loop_array
if (mainArray.underAcross == false && !(sharedMemoryParallelization == 1 && mainArray.arrayRef->IsLoopArray()))
if (!sharedMemoryParallelization && mainArray.underAcross == false && !mainArray.arrayRef->IsLoopArray())
{
set<DIST::Array*> realArrayRef;
getRealArrayRefs(mainArray.arrayRef, mainArray.arrayRef, realArrayRef, arrayLinksByFuncCalls);
@@ -787,8 +795,11 @@ void createParallelDirectives(const map<LoopGraph*, map<DIST::Array*, ArrayInfo*
LoopGraph *loop = createDirectiveForLoop(currLoop, mainArray, acrossOutArrays);
parDir = loop->directive;
if (parDir != NULL)
{
if(!sharedMemoryParallelization)
{
parDir->arrayRef2 = mainArrayOfLoop;
if (mainArray.underAcross == false)
{
for (int i = 0; i < mainArrayOfLoop->GetDimSize(); ++i)
@@ -807,6 +818,8 @@ void createParallelDirectives(const map<LoopGraph*, map<DIST::Array*, ArrayInfo*
parDir->on2 = parDir->on;
addShadowFromAnalysis(parDir, currAccesses);
}
loop->directiveForLoop = new ParallelDirective(*loop->directive);
}
__spf_print(PRINT_DIR_RESULT, " directive created\n");

View File

@@ -204,7 +204,7 @@ vector<int> matchSubscriptToLoopSymbols(const vector<SgForStmt*> &parentLoops, S
if (countOfSymbols > 1)
{
__spf_print(PRINT_ARRAY_ARCS, " <%d|%d> ", 0, 0);
if (currRegime == DATA_DISTR)
if (currRegime == DATA_DISTR || currRegime == SHARED_MEMORY_PAR)
{
const pair<bool, string> &arrayRefString = constructArrayRefForPrint(arrayRef, dimNum, origSubscr);
__spf_print(1, "WARN: array ref '%s' at line %d has more than one loop's variables\n", arrayRefString.second.c_str(), currLine);
@@ -238,7 +238,7 @@ vector<int> matchSubscriptToLoopSymbols(const vector<SgForStmt*> &parentLoops, S
for (int i = 0; i < (int)parentLoops.size(); ++i)
addInfoToMap(loopInfo, parentLoops[i], currOrigArrayS, arrayRef, dimNum, REMOTE_TRUE, currLine, numOfSubscriptions);
}
else if (currRegime == DATA_DISTR)
else if (currRegime == DATA_DISTR || currRegime == SHARED_MEMORY_PAR)
{
const pair<bool, string> &arrayRefString = constructArrayRefForPrint(arrayRef, dimNum, origSubscr);
@@ -294,7 +294,7 @@ vector<int> matchSubscriptToLoopSymbols(const vector<SgForStmt*> &parentLoops, S
if (side == RIGHT)
addInfoToMap(loopInfo, parentLoops[position], currOrigArrayS, arrayRef, dimNum, REMOTE_TRUE, currLine, numOfSubscriptions);
}
else if (currRegime == DATA_DISTR)
else if (currRegime == DATA_DISTR || currRegime == SHARED_MEMORY_PAR)
{
const pair<bool, string> &arrayRefString = constructArrayRefForPrint(arrayRef, dimNum, origSubscr);
__spf_print(1, "WARN: can not calculate index expression for array ref '%s' at line %d\n", arrayRefString.second.c_str(), currLine);
@@ -387,7 +387,8 @@ vector<int> matchSubscriptToLoopSymbols(const vector<SgForStmt*> &parentLoops, S
return allPositions;
}
static vector<int> matchArrayToLoopSymbols(const vector<SgForStmt*> &parentLoops, SgExpression *currExp, const int side,
static vector<int> matchArrayToLoopSymbols(const vector<SgForStmt*> &parentLoops, vector<set<string>>& privatesVarsForLoop,
SgExpression *currExp, const int side,
map<SgForStmt*, map<SgSymbol*, ArrayInfo>> &loopInfo, const int currLine,
map<int, LoopGraph*> &sortedLoopGraph, const ParallelRegion *reg, const double currentW,
const map<DIST::Array*, set<DIST::Array*>> &arrayLinksByFuncCalls)
@@ -440,7 +441,9 @@ static vector<int> matchArrayToLoopSymbols(const vector<SgForStmt*> &parentLoops
vector<int> canNotMapToLoop;
for (int i = 0; i < wasFoundForLoop.size(); ++i)
{
if (wasFoundForLoop[i] != 1)
if (wasFoundForLoop[i] != 1 &&
// always true for distributed data case
privatesVarsForLoop[i].find(string(arrayRef->symbol()->identifier())) == privatesVarsForLoop[i].end())
{
auto itLoop = sortedLoopGraph.find(parentLoops[i]->lineNumber());
if (itLoop == sortedLoopGraph.end())
@@ -456,7 +459,7 @@ static vector<int> matchArrayToLoopSymbols(const vector<SgForStmt*> &parentLoops
if (side == LEFT)
{
if (ifUnknownArrayAssignFound && (currRegime == DATA_DISTR))
if (ifUnknownArrayAssignFound && (currRegime == DATA_DISTR || currRegime == SHARED_MEMORY_PAR))
{
const string arrayRefS = arrayRef->unparse();
for (auto &line : canNotMapToLoop)
@@ -517,7 +520,8 @@ static vector<int> matchArrayToLoopSymbols(const vector<SgForStmt*> &parentLoops
}
static void mapArrayRef(SgStatement* currentSt, SgExpression* currExp,
const vector<SgForStmt*>& parentLoops, const int side, const int lineNum,
const vector<SgForStmt*>& parentLoops, vector<set<string>>& privatesVarsForLoop,
const int side, const int lineNum,
map<SgForStmt*, map<SgSymbol*, ArrayInfo>>& loopInfo,
map<int, LoopGraph*> &sortedLoopGraph, map<string, pair<SgSymbol*, SgStatement*>>& notMappedDistributedArrays,
set<string>& mappedDistrbutedArrays,
@@ -533,7 +537,7 @@ static void mapArrayRef(SgStatement* currentSt, SgExpression* currExp,
__spf_print(PRINT_ARRAY_ARCS, "%s to array <%s> on line %d: ", printSide, OriginalSymbol(currExp->symbol())->identifier(), lineNum);
bool wasMapped = false;
vector<int> matched = matchArrayToLoopSymbols(parentLoops, currExp, side, loopInfo, lineNum, sortedLoopGraph, reg, currentW, arrayLinksByFuncCalls);
vector<int> matched = matchArrayToLoopSymbols(parentLoops, privatesVarsForLoop, currExp, side, loopInfo, lineNum, sortedLoopGraph, reg, currentW, arrayLinksByFuncCalls);
for (int z = 0; z < matched.size(); ++z)
wasMapped |= (matched[z] != 0);
@@ -570,7 +574,8 @@ static void findArrayRef(const vector<SgForStmt*> &parentLoops, SgExpression *cu
if (isArrayRef(currExp))
{
//... and current array is not in private list
if (privatesVars.find(string(OriginalSymbol(currExp->symbol())->identifier())) == privatesVars.end())
if (sharedMemoryParallelization ||
privatesVars.find(string(OriginalSymbol(currExp->symbol())->identifier())) == privatesVars.end())
{
if (wasDistributedArrayRef)
{
@@ -582,23 +587,21 @@ static void findArrayRef(const vector<SgForStmt*> &parentLoops, SgExpression *cu
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
if (itLoop->second->perfectLoop != depth)
break;
if (!(sharedMemoryParallelization && side == RIGHT))
itLoop->second->hasIndirectAccess = true;
if (sharedMemoryParallelization && side == RIGHT)
itLoop->second->hasIndirectAccess = false;
}
mapArrayRef(currentSt, currExp, parentLoops, side, lineNum, loopInfo, sortedLoopGraph,
mapArrayRef(currentSt, currExp, parentLoops, privatesVarsForLoop, side, lineNum, loopInfo, sortedLoopGraph,
notMappedDistributedArrays, mappedDistrbutedArrays, reg, currentW, arrayLinksByFuncCalls);
}
else
{
wasDistributedArrayRef = true;
mapArrayRef(currentSt, currExp, parentLoops, side, lineNum, loopInfo, sortedLoopGraph,
mapArrayRef(currentSt, currExp, parentLoops, privatesVarsForLoop, side, lineNum, loopInfo, sortedLoopGraph,
notMappedDistributedArrays, mappedDistrbutedArrays, reg, currentW, arrayLinksByFuncCalls);
}
}
else
{
if (currRegime == DATA_DISTR && side == LEFT)
else if (currRegime == DATA_DISTR && side == LEFT)
{
auto symb = OriginalSymbol(currExp->symbol());
SgStatement *decl = declaratedInStmt(symb);
@@ -653,8 +656,6 @@ static void findArrayRef(const vector<SgForStmt*> &parentLoops, SgExpression *cu
const string key = string(OriginalSymbol(currExp->symbol())->identifier());
if (loopsPrivates.find(key) == loopsPrivates.end())
{
if (sharedMemoryParallelization == 0)
{
for (auto& loop : parentLoops)
{
@@ -670,44 +671,6 @@ static void findArrayRef(const vector<SgForStmt*> &parentLoops, SgExpression *cu
sortedLoopGraph[loop->lineNumber()]->hasWritesToNonDistribute = true;
}
}
}
//TODO: this case looks strange
/*else if (loopsRedUnited.find(key) == loopsRedUnited.end())
{
auto saveReg = currRegime;
currRegime = ARRAY_ACC_CORNER;
bool wasMapped = false;
map<SgForStmt*, map<SgSymbol*, ArrayInfo>> tmpLoopInfo = loopInfo;
vector<int> matched = matchArrayToLoopSymbols(parentLoops, currExp, side, tmpLoopInfo, currLine, sortedLoopGraph, reg, currentW, arrayLinksByFuncCalls);
for (int z = 0; z < matched.size(); ++z)
wasMapped |= (matched[z] != 0);
currRegime = saveReg;
if (wasMapped)
{
if (sharedMemoryParallelization == 0)
{
int z = 0;
for (auto& loop : parentLoops)
{
if (tmpLoopInfo.find(loop) != tmpLoopInfo.end() && matched[z])
{
wstring messageE, messageR;
__spf_printToLongBuf(messageE, L"write to non distributed array '%s' in this loop", to_wstring(symb->identifier()).c_str());
__spf_printToLongBuf(messageR, R60, to_wstring(symb->identifier()).c_str());
if (loop->lineNumber() > 0)
currMessages->push_back(Messages(WARR, loop->lineNumber(), messageR, messageE, 1026));
sortedLoopGraph[loop->lineNumber()]->hasWritesToNonDistribute = true;
}
++z;
}
}
}
} */
if (loopsPrivates.find(key) != loopsPrivates.end() || loopsRedUnited.find(key) != loopsRedUnited.end())
{
@@ -727,7 +690,6 @@ static void findArrayRef(const vector<SgForStmt*> &parentLoops, SgExpression *cu
}
}
}
}
nextSide = (side == LEFT) ? RIGHT : side;
}
@@ -1384,7 +1346,7 @@ static void convertOneLoop(LoopGraph *currLoop, map<LoopGraph*, map<DIST::Array*
SgStatement *decl = declaratedInStmt(currentArray);
const char *symbIdent = currentArray->identifier();
if (privateArrays.find(symbIdent) == privateArrays.end())
if (privateArrays.find(symbIdent) == privateArrays.end() || sharedMemoryParallelization)
{
const tuple<int, string, string> uniqKey = getUniqName(commonBlocks, decl, currentArray);
@@ -1401,7 +1363,7 @@ static void convertOneLoop(LoopGraph *currLoop, map<LoopGraph*, map<DIST::Array*
else
arrayToAdd = itFound->second;
if (arrayToAdd->IsNotDistribute() == true)
if (!sharedMemoryParallelization && arrayToAdd->IsNotDistribute() == true)
continue;
set<DIST::Array*> links;
@@ -1609,6 +1571,7 @@ void loopAnalyzer(SgFile *file, vector<ParallelRegion*> &regions, map<tuple<int,
modulesByName[modules[i]->symbol()->identifier()] = modules[i];
map<string, set<string>> privatesByModule;
if(!sharedMemoryParallelization)
for (int i = 0; i < modules.size(); ++i)
privatesByModule[modules[i]->symbol()->identifier()] = getPrivatesFromModule(modules[i], declaredArrays, declaratedArraysSt, modulesByName);
@@ -1674,6 +1637,8 @@ void loopAnalyzer(SgFile *file, vector<ParallelRegion*> &regions, map<tuple<int,
loopsForFunction.push_back(loop);
}
if(!sharedMemoryParallelization)
{
SgStatement* tmpModFind = st;
while (tmpModFind->variant() != GLOBAL)
{
@@ -1681,6 +1646,8 @@ void loopAnalyzer(SgFile *file, vector<ParallelRegion*> &regions, map<tuple<int,
if (tmpModFind->variant() == MODULE_STMT)
fillFromModule(tmpModFind->symbol(), privatesByModule, privatesVars);
}
}
commonBlocks.clear();
getCommonBlocksRef(commonBlocks, st, st->lastNodeOfStmt());
__spf_print(PRINT_PROF_INFO, " number of common blocks %d\n", (int)commonBlocks.size());
@@ -1727,16 +1694,20 @@ void loopAnalyzer(SgFile *file, vector<ParallelRegion*> &regions, map<tuple<int,
if (isSgExecutableStatement(st) == NULL)
delcsStatViewed.insert(st);
else if (!isDVM_stat(st) && !isSPF_stat(st))
else if (!sharedMemoryParallelization &&
!isDVM_stat(st) && !isSPF_stat(st))
for (int i = 0; i < 3; ++i)
fillPrivatesFromDecl(st->expr(i), delcsSymbViewed, delcsStatViewed, declaredArrays, declaratedArraysSt, privatesVars);
//printf("new st with var = %d, on line %d\n", st->variant(), st->lineNumber());
const int currV = st->variant();
if (currV == FOR_NODE)
{
if(!sharedMemoryParallelization)
{
tryToFindPrivateInAttributes(st, privatesVars);
fillNonDistrArraysAsPrivate(st, declaredArrays, declaratedArraysSt, privatesVars);
}
set<string> toAdd;
tryToFindPrivateInAttributes(st, toAdd);
@@ -1783,6 +1754,7 @@ void loopAnalyzer(SgFile *file, vector<ParallelRegion*> &regions, map<tuple<int,
unitedPrivates.insert(privVar);
set<string> setDiff;
if(!sharedMemoryParallelization)
for (auto &privVars : privatesVars)
if (unitedPrivates.find(privVars) == unitedPrivates.end())
setDiff.insert(privVars);
@@ -2084,14 +2056,18 @@ void loopAnalyzer(SgFile *file, vector<ParallelRegion*> &regions, map<tuple<int,
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
}
if(!sharedMemoryParallelization)
for (auto it = itF->second.begin(); it != itF->second.end(); ++it)
privatesVars.insert(*it);
}
}
else
{
if(!sharedMemoryParallelization)
{
tryToFindPrivateInAttributes(st, privatesVars);
fillNonDistrArraysAsPrivate(st, declaredArrays, declaratedArraysSt, privatesVars);
}
if (isDVM_stat(st) == false && isSgExecutableStatement(st))
{
@@ -2125,7 +2101,7 @@ void loopAnalyzer(SgFile *file, vector<ParallelRegion*> &regions, map<tuple<int,
}
auto convertedLoopInfo = convertLoopInfo(loopInfo, sortedLoopGraph, privatesVars, commonBlocks, declaredArrays, arrayLinksByFuncCalls, createdArrays);
if (regime == DATA_DISTR)
if (regime == DATA_DISTR || regime == SHARED_MEMORY_PAR)
{
processLoopInformationForFunction(convertedLoopInfo);
@@ -2185,6 +2161,7 @@ void loopAnalyzer(SgFile *file, vector<ParallelRegion*> &regions, map<tuple<int,
}
}
if(!sharedMemoryParallelization)
addToDistributionGraph(convertedLoopInfo, arrayLinksByFuncCalls);
for (auto &toDel : tmpLoops)
@@ -2193,7 +2170,6 @@ void loopAnalyzer(SgFile *file, vector<ParallelRegion*> &regions, map<tuple<int,
if (!skipDeps)
{
for (auto &loopLine : loopWithOutArrays)
{
if (loopLine > 0)
@@ -2301,6 +2277,9 @@ void loopAnalyzer(SgFile *file, vector<ParallelRegion*> &regions, map<tuple<int,
selectFreeLoopsForParallelization(loopsForFunction, funcName, (regime == DATA_DISTR), regions, messagesForFile);
}
if(regime == SHARED_MEMORY_PAR)
createParallelDirectives(convertedLoopInfo, regions, arrayLinksByFuncCalls, messagesForFile);
__spf_print(PRINT_PROF_INFO, "Function ended\n");
}
}

View File

@@ -17,7 +17,7 @@
typedef std::pair<std::pair<int, int>, std::pair<int, int>> attrType;
namespace DIST = Distribution;
enum REGIME { DATA_DISTR, COMP_DISTR, REMOTE_ACC, ARRAY_ACC_CORNER, UNDEF };
enum REGIME { DATA_DISTR, COMP_DISTR, REMOTE_ACC, ARRAY_ACC_CORNER, SHARED_MEMORY_PAR, UNDEF };
// loop_analyzer.cpp
bool checkExistence(SgExpression *exp, const std::string& doName);

View File

@@ -562,9 +562,9 @@ static bool runAnalysis(SgProject &project, const int curr_regime, const bool ne
else if (curr_regime == LOOP_ANALYZER_NODIST)
{
auto& loopsInFile = getObjectForFileFromMap(file_name, loopGraph);
loopAnalyzerNoDist(file, parallelRegions, createdArrays, getObjectForFileFromMap(file_name, SPF_messages),
loopAnalyzer(file, parallelRegions, createdArrays, getObjectForFileFromMap(file_name, SPF_messages), SHARED_MEMORY_PAR,
allFuncInfo, declaredArrays, declaratedArraysSt, arrayLinksByFuncCalls, createDefUseMapByPlace(),
&(loopsInFile));
false, &(loopsInFile));
UniteNestedDirectives(loopsInFile);
}