#include "leak_detector.h" #include #include #include #include #include #include #include #include #include using std::vector; using std::map; using std::unordered_map; using std::tuple; using std::set; using std::string; using std::wstring; using std::pair; using std::make_pair; using std::get; #include "graph_loops.h" #include "graph_calls_func.h" #include "errors.h" #include "../Distribution/Distribution.h" #include "../Distribution/CreateDistributionDirs.h" #include "../ParallelizationRegions/ParRegions.h" #include "../VisualizerCalls/get_information.h" #include "../DirectiveProcessing/directive_creator.h" static void fillWriteReadOps(LoopGraph *&currLoop, DIST::Array *symbol, const ArrayInfo *arrayOps) { auto it = currLoop->readOps.find(symbol); if (it == currLoop->readOps.end()) it = currLoop->readOps.insert(it, make_pair(symbol, make_pair(vector(), vector()))); const int dim = symbol->GetDimSize(); it->second.first.resize(dim); it->second.second.resize(dim); for (int z = 0; z < dim; ++z) { it->second.first[z] = arrayOps->readOps[z]; it->second.second[z] = arrayOps->unrecReadOps[z]; } auto it1 = currLoop->writeOps.find(symbol); if (it1 == currLoop->writeOps.end()) it1 = currLoop->writeOps.insert(it1, make_pair(symbol, vector())); it1->second.resize(dim); for (int z = 0; z < dim; ++z) it1->second[z] = arrayOps->writeOps[z]; } static void uniteVectors(const ArrayOp &from, ArrayOp &to) { for (auto &elemFrom : from.coefficients) { auto it = to.coefficients.find(elemFrom.first); if (it == to.coefficients.end()) it = to.coefficients.insert(it, elemFrom); else it->second += elemFrom.second; } } static void uniteChildReadInfo(LoopGraph *currLoop) { if (currLoop->perfectLoop > 1) { int depth = currLoop->perfectLoop; while (depth != 1) { LoopGraph *part1 = currLoop, *part2 = currLoop; for (int i = 0; i < depth - 1; ++i) part1 = part1->children[0]; for (int i = 0; i < depth - 2; ++i) part2 = part2->children[0]; set newToAdd; for (auto it = part1->readOps.begin(); it != part1->readOps.end(); ++it) { auto it2 = part2->readOps.find(it->first); if (it2 == part2->readOps.end()) newToAdd.insert(it->first); } for (auto it = part2->readOps.begin(); it != part2->readOps.end(); ++it) { auto it2 = part1->readOps.find(it->first); if (it2 != part1->readOps.end()) { const vector &toAddReads = it2->second.first; const vector &toAddUnrecReads = it2->second.second; for (int i = 0; i < it->second.first.size(); ++i) uniteVectors(toAddReads[i], it->second.first[i]); for (int i = 0; i < it->second.second.size(); ++i) it->second.second[i] = it->second.second[i] || toAddUnrecReads[i]; } } for (auto &arrayMissed : newToAdd) part2->readOps[arrayMissed] = part1->readOps[arrayMissed]; depth--; } } else { for (int i = 0; i < currLoop->children.size(); ++i) uniteChildReadInfo(currLoop->children[i]); } } static void fillConflictState(LoopGraph *currLoop, map &foundConflicts, map> &unitedWROps) { for (int i = 0; i < currLoop->children.size(); ++i) { if (i > 0) { foundConflicts.clear(); unitedWROps.clear(); } fillConflictState(currLoop->children[i], foundConflicts, unitedWROps); } for (auto it = currLoop->writeOps.begin(); it != currLoop->writeOps.end(); ++it) { DIST::Array *arrayN = it->first; vector currWrites = it->second; //TODO: почему раньше надо было смотреть и чтения?! Ведь эти конфликты разрешаются c помощью REMOTE /*auto itRead = currLoop->readOps.find(arrayN); if (itRead != currLoop->readOps.end()) { const vector &currReads = itRead->second.first; for (int i = 0; i < currWrites.size(); ++i) uniteVectors(currReads[i], currWrites[i]); }*/ auto it2 = unitedWROps.find(arrayN); if (it2 != unitedWROps.end()) { vector &unitedW = it2->second; for (int i = 0; i < unitedW.size(); ++i) { if (currWrites[i].coefficients.size() != 0 && unitedW[i].coefficients.size() != 0) { foundConflicts[arrayN] = true; currLoop->hasConflicts[arrayN] = true; } else if (unitedW[i].coefficients.size() == 0) unitedW[i] = currWrites[i]; else { for (auto &oldWrites : currWrites[i].coefficients) { auto it = unitedW[i].coefficients.find(oldWrites.first); if (it == unitedW[i].coefficients.end()) it = unitedW[i].coefficients.insert(it, make_pair(oldWrites.first, 0)); it->second += oldWrites.second; } } } } else it2 = unitedWROps.insert(it2, make_pair(arrayN, currWrites)); } } void processLoopInformationForFunction(map> &loopInfo) { // fill reads info for (auto& loop : loopInfo) { LoopGraph *graphLoop = loop.first; const map &currInfo = loop.second; for (auto it3 = currInfo.begin(); it3 != currInfo.end(); ++it3) fillWriteReadOps(graphLoop, it3->first, it3->second); for (auto it3 = currInfo.begin(); it3 != currInfo.end(); ++it3) { const ArrayInfo *currInfo = it3->second; bool nulReads = true; for (int z = 0; z < currInfo->getDimSize(); ++z) { if (currInfo->readOps[z].coefficients.size() != 0) { nulReads = false; break; } } if (!nulReads) graphLoop->readOpsArray.insert(it3->first); } } // fill conflict state for (auto& loop : loopInfo) { map foundConflicts; map> unitedWROps; fillConflictState(loop.first, foundConflicts, unitedWROps); } //save read and write info for (auto& loop : loopInfo) { LoopGraph* graphLoop = loop.first; graphLoop->writeOpsForLoop = graphLoop->writeOps; for (auto& elem : graphLoop->readOps) graphLoop->readOpsForLoop[elem.first] = elem.second.first; } //unite reads info for (auto& loop : loopInfo) uniteChildReadInfo(loop.first); } #define GROUP_BY_REQUEST 1 #if GROUP_BY_REQUEST class GroupItem { private: int maxDim1; int maxDim2; vector, map>> coeffs; public: GroupItem(int maxD1, int maxD2) : maxDim1(maxD1), maxDim2(maxD2), coeffs(maxD1 * maxD2) { } void inline AddToGroup(int dim1, int dim2, const attrType &key, const double currW) { auto shiftedAndInversedKey = DIST::inverseArcByShifts(DIST::shiftByDiffInArc(key)); int pos = dim2 * maxDim1 + dim1; auto ¤t = coeffs[pos]; current.first = make_pair(dim1, dim2); auto it = current.second.find(shiftedAndInversedKey); if (it == current.second.end()) it = current.second.insert(it, make_pair(shiftedAndInversedKey, 0.0)); it->second += currW; } const vector, map>>& GetCoeffs() const { return coeffs; } }; static void inline addGroup(DIST::GraphCSR &G, DIST::Arrays &allArrays, const map, GroupItem> &group, const links linkType) { for (auto &elem : group) { DIST::Array *from = elem.first.first; DIST::Array *to = elem.first.second; for (auto &coeffs : elem.second.GetCoeffs()) { const auto &arc = coeffs.first; for (auto &weight : coeffs.second) AddArrayAccess(G, allArrays, from, to, arc, weight.second, weight.first, linkType); } } } #endif static double calculateSizes(const vector> &in, vector &out) { double all = 1.0; for (auto &elem : in) { if (elem.first >= elem.second) { out.push_back(2); all *= 2; } else { out.push_back(elem.second - elem.first + 1); all *= (elem.second - elem.first + 1); } } return all; } static bool addToGraph(DIST::GraphCSR &G, DIST::Arrays &allArrays, const ArrayInfo *from, DIST::Array *fromSymb, const ArrayInfo *to, DIST::Array *toSymb, const links linkType) { bool loopHasWrite = false; #if GROUP_BY_REQUEST map, GroupItem> ww_links; map, GroupItem> wr_links; map, GroupItem> rr_links; #endif auto sizesFromPair = fromSymb->GetSizes(); auto sizesToPair = toSymb->GetSizes(); vector sizesFrom; vector sizesTo; double allFrom = calculateSizes(sizesFromPair, sizesFrom); double allTo = calculateSizes(sizesToPair, sizesTo); if (linkType == WW_link) { // add W-R and W-W for (int dimFrom = 0; dimFrom < from->getDimSize(); ++dimFrom) { for (int dimTo = 0; dimTo < to->getDimSize(); ++dimTo) { if ((from->writeOps[dimFrom].coefficients.size() != 0) || (to->writeOps[dimTo].coefficients.size() != 0)) loopHasWrite = true; if ((from->writeOps[dimFrom].coefficients.size() != 0 || from->readOps[dimFrom].coefficients.size() != 0) && (to->writeOps[dimTo].coefficients.size() != 0 || to->readOps[dimTo].coefficients.size() != 0)) { for (auto &writeFrom : from->writeOps[dimFrom].coefficients) { for (auto &writeTo : to->writeOps[dimTo].coefficients) #if GROUP_BY_REQUEST { const auto key = make_pair(fromSymb, toSymb); auto it = ww_links.find(key); if (it == ww_links.end()) it = ww_links.insert(it, make_pair(key, GroupItem(fromSymb->GetDimSize(), toSymb->GetDimSize()))); it->second.AddToGroup(dimFrom, dimTo, make_pair(writeFrom.first, writeTo.first), writeTo.second * allTo + writeFrom.second * allFrom); } #else AddArrayAccess(G, allArrays, fromSymb, toSymb, make_pair(dimFrom, dimTo), writeTo.second * allTo + writeFrom.second * allFrom, make_pair(writeFrom.first, writeTo.first), WW_link); #endif } } } } } if (linkType == WR_link) { for (int dimFrom = 0; dimFrom < from->getDimSize(); ++dimFrom) { for (int dimTo = 0; dimTo < to->getDimSize(); ++dimTo) { if ((from->writeOps[dimFrom].coefficients.size() != 0) || (to->writeOps[dimTo].coefficients.size() != 0)) loopHasWrite = true; if ((from->writeOps[dimFrom].coefficients.size() != 0 || from->readOps[dimFrom].coefficients.size() != 0) && (to->writeOps[dimTo].coefficients.size() != 0 || to->readOps[dimTo].coefficients.size() != 0)) { for (auto &writeFrom : from->writeOps[dimFrom].coefficients) { for (auto &readTo : to->readOps[dimTo].coefficients) #if GROUP_BY_REQUEST { const auto key = make_pair(fromSymb, toSymb); auto it = wr_links.find(key); if (it == wr_links.end()) it = wr_links.insert(it, make_pair(key, GroupItem(fromSymb->GetDimSize(), toSymb->GetDimSize()))); it->second.AddToGroup(dimFrom, dimTo, make_pair(writeFrom.first, readTo.first), readTo.second * allTo); } #else AddArrayAccess(G, allArrays, fromSymb, toSymb, make_pair(dimFrom, dimTo), readTo.second * allTo, make_pair(writeFrom.first, readTo.first), WR_link); #endif } } } } } //add R-R, if no W if (linkType == RR_link) { for (int dimFrom = 0; dimFrom < from->getDimSize(); ++dimFrom) for (int dimTo = 0; dimTo < to->getDimSize(); ++dimTo) if (from->readOps[dimFrom].coefficients.size() != 0 && to->readOps[dimTo].coefficients.size() != 0) for (auto &readFrom : from->readOps[dimFrom].coefficients) for (auto &readTo : to->readOps[dimTo].coefficients) #if GROUP_BY_REQUEST { const auto key = make_pair(fromSymb, toSymb); auto it = rr_links.find(key); if (it == rr_links.end()) it = rr_links.insert(it, make_pair(key, GroupItem(fromSymb->GetDimSize(), toSymb->GetDimSize()))); it->second.AddToGroup(dimFrom, dimTo, make_pair(readFrom.first, readTo.first), readTo.second * std::max(allTo, allFrom)); } #else AddArrayAccess(G, allArrays, fromSymb, toSymb, make_pair(dimFrom, dimTo), readTo.second * std::max(allTo, allFrom), make_pair(readFrom.first, readTo.first), RR_link); #endif } #if GROUP_BY_REQUEST addGroup(G, allArrays, ww_links, WW_link); addGroup(G, allArrays, wr_links, WR_link); addGroup(G, allArrays, rr_links, RR_link); #endif return loopHasWrite; } //TODO: check for recursion!! void getRealArrayRefs(DIST::Array* addTo, DIST::Array* curr, set& realArrayRefs, const map>& arrayLinksByFuncCalls) { auto itLink = arrayLinksByFuncCalls.find(curr); if (itLink == arrayLinksByFuncCalls.end()) realArrayRefs.insert(curr); else for (auto& link : itLink->second) getRealArrayRefs(addTo, link, realArrayRefs, arrayLinksByFuncCalls); } void getAllArrayRefs(DIST::Array *addTo, DIST::Array *curr, set &allArrayRefs, const map> &arrayLinksByFuncCalls) { auto itLink = arrayLinksByFuncCalls.find(curr); allArrayRefs.insert(curr); if (itLink == arrayLinksByFuncCalls.end()) return; else for (auto &link : itLink->second) if (allArrayRefs.find(link) == allArrayRefs.end()) getAllArrayRefs(addTo, link, allArrayRefs, arrayLinksByFuncCalls); } #define DEB_GRAPH 0 static bool processLinks(const vector> &currAccessesV, DIST::Arrays &allArrays, map> &realArrayRefs, DIST::GraphCSR &graph, const links linkType) { bool has_Wr_Ww_edges = false; int countAdd = 0; for (int z = 0; z < currAccessesV.size(); ++z) { const ArrayInfo& fromUniq = *currAccessesV[z].second; allArrays.AddArrayToGraph(currAccessesV[z].first); for (auto &fromSymb : realArrayRefs[currAccessesV[z].first]) { for (int z1 = (linkType == WR_link) ? 0 : z + 1; z1 < currAccessesV.size(); ++z1) { if (z1 == z) continue; const ArrayInfo &toUniq = *(currAccessesV[z1].second); allArrays.AddArrayToGraph(currAccessesV[z1].first); for (auto &toSymb : realArrayRefs[currAccessesV[z1].first]) { bool res = addToGraph(graph, allArrays, &fromUniq, fromSymb, &toUniq, toSymb, linkType); countAdd++; has_Wr_Ww_edges |= res; } } } const set& realRefsSet = realArrayRefs[currAccessesV[z].first]; if (realRefsSet.size() > 1) { const vector realRefs(realRefsSet.begin(), realRefsSet.end()); ArrayInfo unitedCopy = fromUniq; //copy read operations to write if empty for (int z = 0; z < unitedCopy.readOps.size(); ++z) for (auto& elem : unitedCopy.readOps[z].coefficients) if (unitedCopy.writeOps[z].coefficients.find(elem.first) == unitedCopy.writeOps[z].coefficients.end()) unitedCopy.writeOps[z].coefficients[elem.first] = elem.second; for (int k1 = 0; k1 < realRefs.size(); ++k1) { for (int k2 = k1 + 1; k2 < realRefs.size(); ++k2) { addToGraph(graph, allArrays, &unitedCopy, realRefs[k1], &unitedCopy, realRefs[k2], WW_link); countAdd++; } } } } #if DEB_GRAPH __spf_print(DEB_GRAPH, "added count = %d\n", countAdd); #endif return has_Wr_Ww_edges; } static bool sortByLine(const pair>>& l, const pair>>& r) { return l.first->lineNum < r.first->lineNum; } static bool sortByName(const pair& l, const pair& r) { return l.first->GetName() < r.first->GetName(); } void addToDistributionGraph(const map> &loopInfo, const map> &arrayLinksByFuncCalls) { vector>>> sortedInfo; for (auto& loopAccess : loopInfo) { vector> toAdd(loopAccess.second.begin(), loopAccess.second.end()); sort(toAdd.begin(), toAdd.end(), sortByName); sortedInfo.push_back(make_pair(loopAccess.first, toAdd)); } sort(sortedInfo.begin(), sortedInfo.end(), sortByLine); #if 0 for (auto& loop : sortedInfo) { printf("info for loop %d %s\n", loop.first->lineNum, loop.first->fileName.c_str()); for (auto& elem : loop.second) { printf(" for Array %s\n", elem.first->GetName().c_str()); elem.second->printInfo(); } } #endif for (auto& loopAccess : sortedInfo) { createNeededException(); ParallelRegion *currReg = loopAccess.first->region; if (currReg == NULL) { __spf_print(1, "Skip loop on line %d - no parallel region for this loop\n", loopAccess.first->lineNum); continue; } if (!loopAccess.first->isFor()) continue; DIST::GraphCSR& G = currReg->GetGraphToModify(); DIST::GraphCSR& loopGraph = loopAccess.first->getGraphToModify(); __spf_print(DEB_GRAPH, "added to loop %d %s\n", loopAccess.first->lineNum, loopAccess.first->fileName.c_str()); DIST::Arrays &allArrays = currReg->GetAllArraysToModify(); //printf("for loop on line %d: \n", it->first->lineNum); const vector> & currAccessesV = loopAccess.second; map> realArrayRefs; for (auto &access : currAccessesV) getRealArrayRefs(access.first, access.first, realArrayRefs[access.first], arrayLinksByFuncCalls); bool has_Wr_edges = false, has_Ww_edges = false, has_Rr_edges = false; has_Wr_edges = processLinks(currAccessesV, allArrays, realArrayRefs, sharedMemoryParallelization == 0 ? G :loopGraph, WW_link); has_Ww_edges |= processLinks(currAccessesV, allArrays, realArrayRefs, sharedMemoryParallelization == 0 ? G : loopGraph, WR_link); if (!has_Wr_edges && !has_Ww_edges) has_Rr_edges = processLinks(currAccessesV, allArrays, realArrayRefs, sharedMemoryParallelization == 0 ? G : loopGraph, RR_link); if (sharedMemoryParallelization) { if (!has_Wr_edges && !has_Ww_edges && !has_Rr_edges) for (auto& elem : realArrayRefs) for (auto& array : elem.second) allArrays.AddArrayToGraph(array); } #if 0 { char fName[256]; sprintf(fName, "_graph_reg%d_%s.txt", it->first->lineNum, it->first->fileName.c_str()); loopGraph.CreateGraphWiz(fName, vector>(), allArrays, true); } #endif } } #undef DEB_GRAPH static bool addToDistributionGraph(const LoopGraph *loopInfo, const string &inFunction, int nesting) { ParallelRegion *currReg = loopInfo->region; if (currReg == NULL || loopInfo->hasLimitsToParallel()) { __spf_print(1, "Skip loop on line %d\n", loopInfo->lineNum); return false; } if (loopInfo->perfectLoop < nesting || nesting < 1) printInternalError(convertFileName(__FILE__).c_str(), __LINE__); bool checkFlag = true; const LoopGraph* check = loopInfo; for (int z = 0; z < nesting; ++z, check->children.size() ? check = check->children[0] : check) checkFlag = checkFlag && check->withoutDistributedArrays; if (checkFlag == false) printInternalError(convertFileName(__FILE__).c_str(), __LINE__); DIST::GraphCSR &G = currReg->GetGraphToModify(); DIST::Arrays &allArrays = currReg->GetAllArraysToModify(); string fullLoopName = loopInfo->genLoopArrayName(inFunction); string loopName = fullLoopName; vector regs; regs.push_back(currReg->GetName()); DIST::Array *loopArray = new DIST::Array(fullLoopName, loopName, nesting, getUniqArrayId(), loopInfo->fileName, loopInfo->lineNum, make_pair(DIST::l_LOCAL, inFunction), NULL, false, false, false, regs, 0); if (loopInfo->calculatedCountOfIters == 0) { const LoopGraph* updateInfo = loopInfo; vector> toAdd; for (int z = 0; z < nesting; ++z, updateInfo->children.size() ? updateInfo = updateInfo->children[0] : updateInfo) { if (updateInfo->startEndExpr.first && updateInfo->startEndExpr.second) { if (updateInfo->stepVal < 0) toAdd.push_back(make_pair(updateInfo->startEndExpr.second, updateInfo->startEndExpr.first)); else toAdd.push_back(updateInfo->startEndExpr); } } if (toAdd.size()) loopArray->SetSizesExpr(toAdd); } const LoopGraph* updateInfo = loopInfo; for (int z = 0; z < nesting; ++z, updateInfo->children.size() ? updateInfo = updateInfo->children[0] : updateInfo) loopArray->ExtendDimSize(z, (updateInfo->stepVal < 0) ? make_pair(updateInfo->endVal, updateInfo->startVal) : make_pair(updateInfo->startVal, updateInfo->endVal)); loopArray->SetLoopArray(true); allArrays.AddArrayToGraph(loopArray); return true; } void selectFreeLoopsForParallelization(const vector& loops, const string& funcName, bool isDistribute, const vector& regions, vector& messagesForFile) { for (auto& loopRef : loops) { if (loopRef->withoutDistributedArrays && loopRef->region && !loopRef->hasLimitsToParallel() && loopRef->lineNum > 0) { int nesting = 0; LoopGraph* it = loopRef; for (int z = 0; z < loopRef->perfectLoop; ++z, it->children.size() ? it = it->children[0] : it) if (it->withoutDistributedArrays && it->region && !it->hasLimitsToParallel() && it->lineNum > 0) ++nesting; if (isDistribute) addToDistributionGraph(loopRef, funcName, nesting); else { auto region = loopRef->region; auto allArrays = region->GetAllArrays(); string fullLoopName = loopRef->genLoopArrayName(funcName); auto loopArray = allArrays.GetArrayByName(fullLoopName); vector tmpArrayInfo(nesting); map> convertedLoopInfo; LoopGraph* it = loopRef; for (int z = 0; z < nesting; ++z, it->children.size() ? it = it->children[0] : it) { ArrayInfo& curr = tmpArrayInfo[z]; curr.setDimSize(nesting); ArrayOp tmpOp(make_pair(make_pair(1, 0), 1.0)); for (int k = 0; k < nesting; ++k) if (k == z) curr.writeOps[k] = tmpOp; map tmpAdd; tmpAdd.insert(make_pair(loopArray, &curr)); convertedLoopInfo.insert(make_pair(it, tmpAdd)); } createParallelDirectives(convertedLoopInfo, regions, map>(), messagesForFile); } } else selectFreeLoopsForParallelization(loopRef->children, funcName, isDistribute, regions, messagesForFile); } } static void printBlanks(FILE *file, const int sizeOfBlank, const int countOfBlanks) { for (int k = 0; k < countOfBlanks; ++k) for (int m = 0; m < sizeOfBlank; ++m) fprintf(file, " "); } static void printLoopGraphLvl(FILE *file, const vector &childs, const int lvl, bool withRegs = false) { for (int k = 0; k < (int)childs.size(); ++k) { bool needToPrint = true; if (withRegs) if (childs[k]->region == NULL) needToPrint = false; if (needToPrint) { printBlanks(file, 2, lvl); fprintf(file, "FOR on line %d -- %d", childs[k]->lineNum, childs[k]->lineNumAfterLoop); if (childs[k]->perfectLoop > 1) fprintf(file, " [PERFECT]"); if (childs[k]->hasGoto) fprintf(file, " [HAS GOTO]"); if (childs[k]->hasPrints) fprintf(file, " [HAS I/O OPS]"); if (childs[k]->region) fprintf(file, " [REGION %s]", childs[k]->region->GetName().c_str()); if (childs[k]->userDvmDirective) fprintf(file, " [USER DVM]"); fprintf(file, " [IT = %d / MULT = %f]", childs[k]->countOfIters, childs[k]->countOfIterNested); fprintf(file, "\n"); for (int i = 0; i < (int)childs[k]->calls.size(); ++i) { printBlanks(file, 2, lvl); fprintf(file, "CALL %s [%d]\n", childs[k]->calls[i].first.c_str(), childs[k]->calls[i].second); } } printLoopGraphLvl(file, childs[k]->children, lvl + 1, withRegs); } } int printLoopGraph(const char *fileName, const map> &loopGraph, bool withRegs) { FILE *file = fopen(fileName, "w"); if (file == NULL) { __spf_print(1, "can not open file %s\n", fileName); return -1; } map>::const_iterator it; for (it = loopGraph.begin(); it != loopGraph.end(); it++) { fprintf(file, "*** FILE %s\n", it->first.c_str()); printLoopGraphLvl(file, it->second, 1, withRegs); fprintf(file, "\n"); } fclose(file); return 0; } static void isAllOk(const vector &loops, vector &currMessages, set &isNotOkey, set &uniqMessages) { for (int i = 0; i < loops.size(); ++i) { if (loops[i]->region) { if (loops[i]->countOfIters == 0 && loops[i]->region && loops[i]->isFor()) { wstring bufE, bufR; __spf_printToLongBuf(bufE, L" Can not calculate count of iterations for this loop, information about iterations in all loops in parallel regions '%s' will be ignored", to_wstring(loops[i]->region->GetName()).c_str()); auto itM = uniqMessages.find(bufE); if (itM == uniqMessages.end()) { uniqMessages.insert(itM, bufE); __spf_printToLongBuf(bufR, R48, to_wstring(loops[i]->region->GetName()).c_str()); currMessages.push_back(Messages(NOTE, loops[i]->lineNum, bufR, bufE, 1016)); __spf_print(1, " Can not calculate count of iterations for loop on line %d, information about iterations in all loops in parallel regions '%s' will be ignored\n", loops[i]->lineNum, loops[i]->region->GetName().c_str()); } isNotOkey.insert(loops[i]->region); } isAllOk(loops[i]->children, currMessages, isNotOkey, uniqMessages); } } } static void setToDefaultCountIter(vector &loops, const set &isNotOkey) { for (int i = 0; i < loops.size(); ++i) { if (loops[i]->region) { if (isNotOkey.find(loops[i]->region) != isNotOkey.end() && loops[i]->countOfIters <= 0) loops[i]->countOfIters = 2; setToDefaultCountIter(loops[i]->children, isNotOkey); } } } static void multiplyCountIter(vector &loops, const double allCount) { for (int i = 0; i < loops.size(); ++i) { loops[i]->countOfIterNested = loops[i]->countOfIters * allCount; multiplyCountIter(loops[i]->children, loops[i]->countOfIterNested); } } static void recAddToChildren(vector &loops, const double coef, map &interprocCoefs) { for (auto &loop : loops) { auto it = interprocCoefs.find(loop); if (it == interprocCoefs.end()) it = interprocCoefs.insert(it, make_pair(loop, 0.0)); it->second += coef; recAddToChildren(loop->children, coef, interprocCoefs); } } static void multiplyCountIterIP(vector &loops, const double allCount, map &interprocCoefs) { for (auto &loop : loops) { const double coef = loop->countOfIters * allCount; recAddToChildren(loop->funcChildren, coef, interprocCoefs); multiplyCountIterIP(loop->funcChildren, coef, interprocCoefs); } } static void fillInterprocLinks(const map &mapFunc, vector &loops, const map> &allLoops) { for (auto &loop : loops) { set funNames; for (auto &call : loop->calls) funNames.insert(call.first); if (funNames.size()) { for (auto &call : funNames) { auto it = mapFunc.find(call); if (it != mapFunc.end()) { FuncInfo *currF = it->second; for (auto &loopInFunc : currF->loopsInFunc) loop->funcChildren.push_back(loopInFunc); } } } fillInterprocLinks(mapFunc, loop->children, allLoops); } } static void fillInterprocLinks(vector& loops) { for (auto& loop : loops) { for (auto& funcCh : loop->funcChildren) funcCh->funcParents.push_back(loop); fillInterprocLinks(loop->children); } } void checkCountOfIter(map> &loopGraph, const map> &allFuncInfo, map> &SPF_messages) { set isNotOkey; map mapFunc; createMapOfFunc(allFuncInfo, mapFunc); for (auto& loopsInFile : loopGraph) fillInterprocLinks(mapFunc, loopsInFile.second, loopGraph); for (auto& loopsInFile : loopGraph) fillInterprocLinks(loopsInFile.second); for (auto &loopsInFile : loopGraph) { set uniqMessages; auto itM = SPF_messages.find(loopsInFile.first); if (itM == SPF_messages.end()) itM = SPF_messages.insert(itM, make_pair(loopsInFile.first, vector())); isAllOk(loopsInFile.second, itM->second, isNotOkey, uniqMessages); } if (isNotOkey.size() != 0) { for (auto &loopsInFile : loopGraph) setToDefaultCountIter(loopsInFile.second, isNotOkey); } for (auto &loopsInFile : loopGraph) multiplyCountIter(loopsInFile.second, 1.0); set linkTo; for (auto &loopsInFile : loopGraph) { for (auto &loop : loopsInFile.second) { for (auto &ch : loop->children) linkTo.insert(ch); for (auto &ch : loop->funcChildren) linkTo.insert(ch); } } bool changed = true; while (changed) { changed = false; for (auto &loop : linkTo) { for (auto &ch : loop->children) { if (linkTo.find(ch) == linkTo.end()) { linkTo.insert(ch); changed = true; } } for (auto &ch : loop->funcChildren) { if (linkTo.find(ch) == linkTo.end()) { linkTo.insert(ch); changed = true; } } } } set dontLink; for (auto &loopsInFile : loopGraph) for (auto &loop : loopsInFile.second) if (linkTo.find(loop) == linkTo.end()) dontLink.insert(loop); map interprocCoefs; auto tmpParam = vector(dontLink.begin(), dontLink.end()); multiplyCountIterIP(tmpParam, 1.0, interprocCoefs); for (auto &loop : interprocCoefs) loop.first->countOfIterNested *= loop.second; } static void updateLoopIoAndStopsByFuncCalls(vector &loopGraph, map mapFunc) { for (auto &loop : loopGraph) { vector, set>> funNames; for (auto &call : loop->calls) { string currF = call.first; set recCalls; recCalls.insert(currF); bool changed = true; while (changed) { changed = false; set local = recCalls; for (auto &elem : local) { auto itF = mapFunc.find(elem); if (itF != mapFunc.end()) { for (auto &toAdd : itF->second->callsFrom) { if (recCalls.find(toAdd) == recCalls.end()) { recCalls.insert(toAdd); changed = true; } } } } } funNames.push_back(make_pair(call, recCalls)); } if (funNames.size()) { for (auto &calls : funNames) { const int lineInLoop = calls.first.second; for (auto &call : calls.second) { auto itF = mapFunc.find(call); if (itF != mapFunc.end()) { if (itF->second->linesOfIO.size() != 0) { loop->hasPrints = true; loop->linesOfIO.insert(lineInLoop); } if (itF->second->linesOfStop.size() != 0) { loop->hasStops = true; loop->linesOfStop.insert(lineInLoop); } } } } } updateLoopIoAndStopsByFuncCalls(loop->children, mapFunc); } } void updateLoopIoAndStopsByFuncCalls(map> &loopGraph, const map> &allFuncInfo) { map mapFunc; createMapOfFunc(allFuncInfo, mapFunc); for (auto &byFile : loopGraph) updateLoopIoAndStopsByFuncCalls(byFile.second, mapFunc); } static void checkArraysMapping(vector &loopList, map> flagUse, vector &messages, const int topLine, set &checked); static void fillFromLoop(LoopGraph *loop, map> flagUse, vector &messages, const int topLine, set &checked) { for (auto &write_op : loop->writeOps) { DIST::Array *array = write_op.first; if (flagUse.find(array) == flagUse.end()) { vector tmp(array->GetDimSize()); std::fill(tmp.begin(), tmp.end(), 0); flagUse[array] = tmp; checked.insert(array); } for (int dim = 0; dim < write_op.second.size(); ++dim) { for (auto &coef : write_op.second[dim].coefficients) { if (coef.first.first != 0) { flagUse[array][dim]++; break; } } } } checkArraysMapping(loop->children, flagUse, messages, topLine, checked); } static void checkArraysMapping(vector &loopList, map> flagUse, vector &messages, const int topLine, set &checked) { if (loopList.size() > 0) { for (auto &loop : loopList) fillFromLoop(loop, flagUse, messages, topLine, checked); } else { for (auto &elem : flagUse) { for (int z = 0; z < elem.second.size(); ++z) { if (elem.second[z] > 1) { if (!elem.first->IsDimDepracated(z)) { std::wstring bufw, bufR; __spf_printToLongBuf(bufw, L" Array '%s' can not be distributed due to different writes to %d dimension, this dimension will deprecated", to_wstring(elem.first->GetShortName()).c_str(), z + 1); __spf_printToLongBuf(bufR, R85, z + 1,to_wstring(elem.first->GetShortName()).c_str()); messages.push_back(Messages(NOTE, topLine, bufR, bufw, 1047)); elem.first->DeprecateDimension(z); } } } } } } //TODO: need to improve interproc analysis void checkArraysMapping(const map> &loopGraph, map> &SPF_messages, const map> &arrayLinksByFuncCalls) { set checked; for (auto &loopByFile : loopGraph) { auto &messages = getObjectForFileFromMap(loopByFile.first.c_str(), SPF_messages); for (auto &loop : loopByFile.second) { if (loop->children.size() > 0) { map> flagUse; fillFromLoop(loop, flagUse, messages, loop->lineNum, checked); } } } for (auto &elem : checked) { if (elem->IsAllDeprecated()) { wstring bufw, bufR; __spf_printToLongBuf(bufw, L" Array '%s' can not be distributed due to all dimensions will deprecated", to_wstring(elem->GetShortName()).c_str()); __spf_printToLongBuf(bufR, R86, to_wstring(elem->GetShortName()).c_str()); for (auto &decl : elem->GetDeclInfo()) getObjectForFileFromMap(decl.first.c_str(), SPF_messages).push_back(Messages(NOTE, decl.second, bufR, bufw, 1047)); elem->SetDistributeFlag(DIST::SPF_PRIV); } } } static bool isMapped(const vector &allOps) { bool mapped = false; for (auto &ops : allOps) { for (auto &coefs : ops.coefficients) { if (coefs.first.first != 0) { mapped = true; break; } } if (mapped) break; } return mapped; } static void filterArrayInCSRGraph(vector &loops, const map &mapFuncInfo, const ParallelRegion *reg, const map> &arrayLinksByFuncCalls, const map &trees, map> &messages) { for (auto &loop : loops) { if (loop->region == reg) { if (loop->calls.size()) { bool bounds = loop->hasGoto || loop->hasPrints || loop->hasStops || loop->hasUnknownArrayAssigns || loop->hasNonRectangularBounds || loop->hasIndirectAccess || loop->hasWritesToNonDistribute || loop->hasDifferentAlignRules; if (bounds == false ) { if (loop->usedArrays.size()) { set realRefs; for (auto &array : loop->usedArrays) getRealArrayRefs(array, array, realRefs, arrayLinksByFuncCalls); set wasMapped; for (auto &read : loop->readOps) { set readRefs; getRealArrayRefs(read.first, read.first, readRefs, arrayLinksByFuncCalls); if (isMapped(read.second.first)) wasMapped.insert(readRefs.begin(), readRefs.end()); } for (auto &write : loop->writeOps) { set writeRefs; getRealArrayRefs(write.first, write.first, writeRefs, arrayLinksByFuncCalls); if (isMapped(write.second)) wasMapped.insert(writeRefs.begin(), writeRefs.end()); } if (wasMapped.size() == 0) filterArrayInCSRGraph(loop->children, mapFuncInfo, reg, arrayLinksByFuncCalls, trees, messages); else { set deprecated; int treeNum = -1; map treeNumCount; //filter by graph loop's arrays //TODO for (auto &array : realRefs) { if (wasMapped.find(array) == wasMapped.end()) continue; auto itA = trees.find(array); if (itA == trees.end() || itA->second < 0) { wstring bufw, bufR; __spf_printToLongBuf(bufw, L" Array '%s' can not be distributed", to_wstring(array->GetShortName()).c_str()); __spf_printToLongBuf(bufR, R87, to_wstring(array->GetShortName()).c_str()); getObjectForFileFromMap(loop->fileName.c_str(), messages).push_back(Messages(NOTE, loop->lineNum, bufR, bufw, 1047)); deprecated.insert(array); array->SetDistributeFlag(DIST::SPF_PRIV); } else { if (treeNumCount.find(itA->second) == treeNumCount.end()) treeNumCount[itA->second] = 1; else treeNumCount[itA->second]++; } } if (treeNumCount.size() == 0) continue; auto itT = treeNumCount.begin(); treeNum = itT->first; int countT = itT->second; itT++; for (; itT != treeNumCount.end(); itT++) { if (itT->second > countT) { countT = itT->second; treeNum = itT->first; } } for (auto &array : realRefs) { if (wasMapped.find(array) == wasMapped.end()) continue; auto itA = trees.find(array); if (itA == trees.end() || itA->second != treeNum) { wstring bufw, bufR; __spf_printToLongBuf(bufw, L" Array '%s' can not be distributed", to_wstring(array->GetShortName()).c_str()); __spf_printToLongBuf(bufR, R88, to_wstring(array->GetShortName()).c_str()); getObjectForFileFromMap(loop->fileName.c_str(), messages).push_back(Messages(NOTE, loop->lineNum, bufR, bufw, 1047)); deprecated.insert(array); array->SetDistributeFlag(DIST::SPF_PRIV); } } set inCalls; for (auto &call : loop->calls) { auto itF = mapFuncInfo.find(call.first); if (itF != mapFuncInfo.end()) inCalls.insert(itF->second->allUsedArrays.begin(), itF->second->allUsedArrays.end()); } for (auto &inCall : inCalls) { if (realRefs.find(inCall) == realRefs.end() && deprecated.find(inCall) == deprecated.end()) { bool needToDeprecated = false; if (trees.find(inCall) == trees.end()) needToDeprecated = true; else { if (trees.find(inCall)->second != treeNum) needToDeprecated = true; } if (needToDeprecated) { wstring bufw, bufR; __spf_printToLongBuf(bufw, L" Array '%s' can not be distributed", to_wstring(inCall->GetShortName()).c_str()); __spf_printToLongBuf(bufR, R89, to_wstring(inCall->GetShortName()).c_str()); getObjectForFileFromMap(loop->fileName.c_str(), messages).push_back(Messages(NOTE, loop->lineNum, bufR, bufw, 1047)); deprecated.insert(inCall); inCall->SetDistributeFlag(DIST::SPF_PRIV); } } } } } } else filterArrayInCSRGraph(loop->children, mapFuncInfo, reg, arrayLinksByFuncCalls, trees, messages); } } } } void filterArrayInCSRGraph(map> &loopGraph, map> &allFuncs, ParallelRegion *reg, const map> &arrayLinksByFuncCalls, map> &messages) { map mapFuncInfo; map trees; auto arrays = reg->GetAllArrays().GetArrays(); int count = 0; for (auto &array : arrays) if (!array->IsLoopArray() && !array->IsTemplate() && array->GetLocation().first != DIST::l_PARAMETER) count++; if (count <= 1) return; reg->GetGraphToModify().FindAllArraysTrees(trees, reg->GetAllArrays()); createMapOfFunc(allFuncs, mapFuncInfo); int lastTreesNum = trees.size(); for (auto &array : arrays) if (!array->IsLoopArray() && !array->IsTemplate() && array->GetLocation().first != DIST::l_PARAMETER) if (trees.find(array) == trees.end()) trees[array] = lastTreesNum++; if (trees.size()) for (auto &byFile : loopGraph) filterArrayInCSRGraph(byFile.second, mapFuncInfo, reg, arrayLinksByFuncCalls, trees, messages); } void LoopGraph::reduceAccessGraph() { for (auto& ch : children) ch->reduceAccessGraph(); checkNull(region, convertFileName(__FILE__).c_str(), __LINE__); if (accessGraph.GetNumberOfV() != 0) DIST::createOptimalDistribution(accessGraph, reducedAccessGraph, region->GetAllArrays(), region->GetId(), false); } void LoopGraph::createVirtualTemplateLinks(const map>& arrayLinksByFuncCalls, map>& SPF_messages, bool isMpiProgram) { if (region == NULL) { for (auto& ch : children) ch->createVirtualTemplateLinks(arrayLinksByFuncCalls, SPF_messages, isMpiProgram); return; } auto allArrays = region->GetAllArrays(); __spf_print(1, "*** FOR LOOP on line %d and file '%s':\n", lineNum, fileName.c_str()); #if 0 { char fName[256]; sprintf(fName, "_graph_reg%d_%s.txt", lineNum, fileName.c_str()); accessGraph.CreateGraphWiz(fName, vector>(), allArrays, true); } #endif set canNotMapped; createDistributionDirs(reducedAccessGraph, allArrays, dataDirectives, SPF_messages, arrayLinksByFuncCalls, isMpiProgram, usedArrays.size() ? usedArrays : usedArraysAll); createAlignDirs(reducedAccessGraph, allArrays, dataDirectives, (uint64_t)this, arrayLinksByFuncCalls, SPF_messages, &canNotMapped, usedArrays); for (auto& elem : canNotMapped) if (usedArraysWrite.find(elem) != usedArraysWrite.end()) hasUnknownArrayAssigns = true; auto result = dataDirectives.GenAlignsRules(); for (int i = 0; i < result.size(); ++i) __spf_print(1, " %s\n", result[i].c_str()); #if 0 //if (lineNum == 56 && fileName == "exchange_6.f") { char fName[256]; sprintf(fName, "_graph_reduced_with_templ_reg%d_%s.txt", lineNum, fileName.c_str()); reducedAccessGraph.CreateGraphWiz(fName, vector>(), allArrays, true); } #endif for (auto& ch : children) ch->createVirtualTemplateLinks(arrayLinksByFuncCalls, SPF_messages, isMpiProgram); }