Files
SAPFOR/src/Predictor/PredictScheme.cpp

683 lines
24 KiB
C++
Raw Normal View History

#include "leak_detector.h"
2023-09-14 19:43:13 +03:00
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <cstdint>
#include <fstream>
2023-09-14 19:43:13 +03:00
#include <map>
#include <vector>
#include <set>
#include <string>
#include <queue>
#include "dvm.h"
#include "../DynamicAnalysis/gcov_info.h"
2025-04-20 21:34:31 +03:00
#include "../DynamicAnalysis/gCov_parser_func.h"
2023-09-14 19:43:13 +03:00
#include "PredictScheme.h"
2025-06-04 13:08:38 +03:00
#include "SgUtils.h"
2024-10-07 14:50:37 +03:00
#include "../DirectiveProcessing/directive_parser.h"
#include "../Distribution/DvmhDirective.h"
2025-06-04 13:08:38 +03:00
#include "graph_loops_func.h"
#include "expr_transform.h"
#include "../LoopAnalyzer/loop_analyzer.h"
2025-09-23 08:21:05 +03:00
#include "CFGraph/CFGraph.h"
#include "json.hpp"
2023-09-14 19:43:13 +03:00
using std::map;
using std::string;
using std::vector;
using std::set;
using std::ofstream;
using std::pair;
using std::tuple;
using json = nlohmann::json;
2023-09-14 19:43:13 +03:00
static void fillParallel(SgExpression *exp, ParallelStats &parStats, int &totalScoreComm)
{
if (exp)
{
SgExprListExp *list;
switch (exp->variant())
{
case SHADOW_RENEW_OP:
list = isSgExprListExp(exp->lhs());
if (list)
parStats.ShadowCount += list->length();
totalScoreComm += list->length();
break;
case REDUCTION_OP:
list = isSgExprListExp(exp->lhs());
if (list)
parStats.ReductionCount += list->length();
totalScoreComm += list->length();
break;
case REMOTE_ACCESS_OP:
list = isSgExprListExp(exp->lhs());
if (list)
{
parStats.RemoteCount += list->length();
//TODO:
totalScoreComm += 100 * list->length();
}
break;
case ACROSS_OP:
if (exp->lhs()->variant() == DDOT)
list = isSgExprListExp(exp->lhs()->rhs());
else
list = isSgExprListExp(exp->lhs());
if (list)
parStats.AcrossCount += list->length();
totalScoreComm += 10 * list->length();
break;
default:
break;
}
fillParallel(exp->rhs(), parStats, totalScoreComm);
fillParallel(exp->lhs(), parStats, totalScoreComm);
}
}
void processFileToPredict(SgFile *file, PredictorStats &predictorCounts)
{
SgStatement* prev = NULL;
for (SgStatement *st = file->firstStatement(); st; st = st->lexNext())
{
SgExprListExp *list;
switch (st->variant())
{
case DVM_PARALLEL_ON_DIR:
predictorCounts.ParallelCount++;
for (int i = 0; i < 3; ++i)
fillParallel(st->expr(i), predictorCounts.ParallelStat, predictorCounts.TotalScoreComm);
break;
case DVM_REDISTRIBUTE_DIR:
case DVM_REALIGN_DIR:
if (prev->variant() == DVM_NEW_VALUE_DIR)
break;
list = isSgExprListExp(st->expr(0));
if (list)
{
int len = list->length();
predictorCounts.RedistributeCount += len;
predictorCounts.TotalScoreComm += 10000 * len;
}
else
{
predictorCounts.RedistributeCount++;
predictorCounts.TotalScoreComm += 10000;
}
break;
case DVM_REMOTE_ACCESS_DIR:
for (int i = 0; i < 3; ++i)
{
list = isSgExprListExp(st->expr(i));
if (list)
{
predictorCounts.RemoteCount += list->length();
predictorCounts.TotalScoreComm += 100 * list->length();
}
}
break;
case DVM_INTERVAL_DIR:
case DVM_ENDINTERVAL_DIR:
case DVM_EXIT_INTERVAL_DIR:
predictorCounts.IntervalCount++;
break;
default:
break;
}
prev = st;
}
predictorCounts.TotalScorePar += predictorCounts.ParallelCount;
}
2024-10-07 14:50:37 +03:00
static void calculateForParallelLoop(SgStatement* loop, const map<int, Gcov_info>& gcov,
uint64_t& paralle_exec_count, uint64_t& count_of_parallel_lines) {
for (auto st = loop; st != loop->lastNodeOfStmt(); st = st->lexNext()) {
int line = st->lineNumber();
if (line <= 0)
continue;
auto it = gcov.find(line);
if (it == gcov.end()) {
__spf_print(1, "bad gcov info\n");
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
}
auto& info = it->second;
if (info.getNumLine() != line) {
__spf_print(1, "bad gcov info\n");
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
}
count_of_parallel_lines++;
paralle_exec_count += info.getExecutedCount();
}
}
static json info;
2024-10-07 14:50:37 +03:00
void calculateStatsForPredictor(const map<string, vector<FuncInfo*>>& allFuncInfo,
const map<string, map<int, Gcov_info>>& gCovInfo) {
json cluster;
json program;
cluster["cluster_info"] = { {"num_nodes", 0},
{"cores_per_node", 0},
{"threads_per_node", 0},
{"memory_per_node_gb", 0},
{"network_bandwidth_gbps", 0},
{"network_latency_ms", 0}
};
program["program_info"]["sequential_execution_time_sec"] = 0.0;
program["program_info"]["launch_grid"] = { {"dimensions", {0, 0, 0} }, {"total_processes", 0} };
uint64_t total_exec_count = 0;
uint64_t parallel_exec_count = 0;
uint64_t count_of_parallel_lines = 0;
2024-10-07 14:50:37 +03:00
for (auto& byFile : allFuncInfo)
{
int ok = SgFile::switchToFile(byFile.first);
if (ok == -1)
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
auto it = gCovInfo.find(byFile.first);
2024-10-07 14:50:37 +03:00
if (it == gCovInfo.end())
{
__spf_print(1, "bad gcov info\n");
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
}
auto& gcov = it->second;
2024-10-07 14:50:37 +03:00
for (auto& func : byFile.second)
{
SgStatement* stat = func->funcPointer->GetOriginal();
for (auto st = stat->lexNext(); st != stat->lastNodeOfStmt(); st = st->lexNext())
{
uint64_t paralle_exec = 0;
uint64_t lines_count = 0;
2025-04-20 21:34:31 +03:00
if (st->variant() == DVM_PARALLEL_ON_DIR)
2024-10-07 14:50:37 +03:00
{
auto loop = st->lexNext();
checkNull(loop, convertFileName(__FILE__).c_str(), __LINE__);
if (loop->variant() != FOR_NODE)
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
2025-04-20 21:34:31 +03:00
if (__gcov_doesThisLineExecuted(byFile.first, loop->lineNumber()))
{
calculateForParallelLoop(loop, gcov, paralle_exec, lines_count);
st = loop->lastNodeOfStmt();
2025-04-20 21:34:31 +03:00
parallel_exec_count += paralle_exec;
count_of_parallel_lines += lines_count;
2025-04-20 21:34:31 +03:00
__spf_print(1, " PAR LOOP [%d %s] total exec %llu, total exec lines %llu, avg %.16e\n",
loop->lineNumber(), byFile.first.c_str(), paralle_exec, lines_count, paralle_exec / (double)lines_count);
}
}
}
for (auto st = stat->lexNext(); st != stat->lastNodeOfStmt(); st = st->lexNext())
{
2025-04-20 21:34:31 +03:00
if (!isSgExecutableStatement(st) || isDVM_stat(st) || isSPF_stat(st) ||
!__gcov_doesThisLineExecuted(byFile.first, st->lineNumber()))
continue;
int line = st->lineNumber();
if (line <= 0)
continue;
auto it = gcov.find(line);
if (it == gcov.end())
continue;
auto& info = it->second;
2024-10-07 14:50:37 +03:00
if (info.getNumLine() != line)
{
__spf_print(1, "bad gcov info\n");
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
}
total_exec_count += info.getExecutedCount();
}
}
}
__spf_print(1, " average_parallel_exec %.16e\n", parallel_exec_count / (double)count_of_parallel_lines);
__spf_print(1, " parallel_rate %.16e\n", parallel_exec_count / (double)total_exec_count);
program["program_info"]["average_parallel_line_executions"] = parallel_exec_count / (double)count_of_parallel_lines;
program["program_info"]["parallel_execution_fraction"] = parallel_exec_count / (double)total_exec_count;
info = { cluster, program };
}
static const Gcov_info& getInfo(SgStatement* st, const map<int, Gcov_info> &gcov)
{
auto stat = st;
while (isDVM_stat(stat))
stat = stat->lexPrev();
int line = stat->lineNumber(); // XXX
auto list = st->expr(1);
auto it = gcov.find(line);
auto& info = it->second;
if (info.getNumLine() != line)
{
__spf_print(1, "bad gcov info\n");
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
}
return info;
}
static json parseDistribution(const map<DIST::Array*, int>& byPos, SgSymbol* arr, SgExpression* list, int line)
{
json dist;
auto array = getArrayFromDeclarated(declaratedInStmt(arr), arr->identifier());
if (array == NULL || byPos.find(array) == byPos.end())
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
dist["line"] = line;
dist["array_id"] = byPos.at(array);
while (list)
{
dist["distribution_spec"].push_back(list->lhs()->unparse());
list = list->rhs();
}
return dist;
}
static json parseAlign(const map<DIST::Array*, int>& byPos, SgSymbol* srcArr, SgSymbol* tgtArr,
SgExpression *listSrc, SgExpression* listTgt, int line)
{
json align;
auto arraySrc = getArrayFromDeclarated(declaratedInStmt(srcArr), srcArr->identifier());
if (arraySrc == NULL || byPos.find(arraySrc) == byPos.end())
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
auto arrayTgt = getArrayFromDeclarated(declaratedInStmt(tgtArr), tgtArr->identifier());
if (arrayTgt == NULL || byPos.find(arrayTgt) == byPos.end())
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
align["line"] = line;
align["source_array_id"] = byPos.at(arraySrc);
align["target_array_id"] = byPos.at(arrayTgt);
vector<pair<string, SgSymbol*>> srcSymbs;
auto list = listSrc;
while (list)
{
srcSymbs.push_back({ list->lhs()->unparse(), list->lhs()->symbol() });
list = list->rhs();
}
vector<pair<int, int>> coefs(srcSymbs.size());
list = listTgt;
while (list)
{
auto exp = list->lhs();
bool has = false;
for (int z = 0; z < srcSymbs.size(); ++z)
{
has = recSymbolFind(exp, srcSymbs[z].first, VAR_REF);
if (has)
{
getCoefsOfSubscript(coefs[z], exp, srcSymbs[z].second);
if (coefs[z].first == 0)
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
break;
}
}
list = list->rhs();
}
for (int z = 0; z < coefs.size(); ++z)
{
if (coefs[z].first == 0)
continue;
if (coefs[z].second)
align["rules"].push_back({ z, coefs[z].first });
else
align["rules"].push_back({ z, coefs[z].first, coefs[z].second });
}
return align;
}
static SgStatement* findBefore(SgStatement* st)
{
while (st)
{
st = st->lexPrev();
if (isSgProgHedrStmt(st))
break;
if (isDVM_stat(st) || isSPF_stat(st))
continue;
if (isSgExecutableStatement(st))
break;
}
return st;
}
static void fillAcrossShadow(vector<pair<pair<Symbol*, string>, vector<pair<int, int>>>>& dirs, SgStatement *st,
const map<DIST::Array*, int>& byPos, const string& type, json& typed, json& parallel)
{
for (auto& dir : dirs)
{
auto& symb = dir.first;
auto& access = dir.second;
DIST::Array* arr = getArrayFromDeclarated(declaratedInStmt(symb.first), symb.first->identifier());
if (arr == NULL || byPos.find(arr) == byPos.end())
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
json item;
item["line"] = st->lineNumber();
item["array_id"] = byPos.at(arr);
item["communication_pattern"] = "NEAREST_NEIGHBOR";
if (access.size())
{
for (int z = 0; z < access.size(); ++z)
item["width"].push_back({ z, access[z].first, access[z].second });
}
else
{
auto& spec = arr->GetShadowSpec();
//TODO: analyze spec of array for shadow
for (int z = 0; z < spec.size(); ++z)
item["width"].push_back({ z, 1, 1 });
}
typed.push_back(item);
parallel["shadow_renews"].push_back(typed.size() - 1);
}
2024-10-07 14:50:37 +03:00
}
static void parallelDir(const map<DIST::Array*, int>& byPos, SgExpression* spec, SgSymbol* arr, SgExpression* arrSpec,
SgStatement* st, SgExpression* clauses, const map<int, Gcov_info>& gcov, json& directives,
const map<string, CommonBlock*>& commonBlocks, const map<string, vector<FuncInfo*>>& allFuncInfo)
{
json parallel;
json& shadow_renew = directives["shadow_renew"];
json& reduction = directives["reduction"];
json& remote_access = directives["remote_access"];
json& across = directives["across"];
vector<pair<string, SgSymbol*>> loopSymbs;
auto list = spec;
while (list)
{
loopSymbs.push_back({ list->lhs()->unparse(), list->lhs()->symbol() });
list = list->rhs();
}
parallel["line"] = st->lineNumber();
parallel["loops_count"] = loopSymbs.size();
SgStatement* loop = isSgForStmt(st->lexNext());
2025-04-20 21:34:31 +03:00
if (loop == NULL)
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
SgStatement* lastNode = loop->lastNodeOfStmt();
SgStatement* before = findBefore(loop);
if (before == NULL)
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
vector<int64_t> execs;
for (int z = 0; z < loopSymbs.size(); ++z)
{
auto& info = getInfo(loop, gcov);
execs.push_back(info.getExecutedCount());
loop = loop->lexNext();
}
2025-04-20 21:34:31 +03:00
for (int z = execs.size() - 1; z > 0; --z)
if (execs[z - 1] != 0)
execs[z] /= execs[z - 1];
auto& info = getInfo(before, gcov);
2025-04-22 18:08:28 +03:00
if (info.getExecutedCount() && loopSymbs.size() > 1)
2025-04-20 21:34:31 +03:00
execs[0] /= info.getExecutedCount();
parallel["iterations_count"] = execs;
DvmDirective directive;
fillInfoFromDirective(new Statement(st), directive);
vector<int> empty;
parallel["shadow_renews"] = empty;
parallel["reductions"] = empty;
parallel["remote_accesses"] = empty;
parallel["acrosses"] = empty;
for (auto& op : directive.reduction)
{
for (auto& var : op.second)
{
json item;
item["line"] = st->lineNumber();
item["operation"] = op.first;
if (!isSgArrayType(var->type()))
{
item["reduction_type"] = "SCALAR";
item["size_bytes"] = getSizeOfType(var->type());
item["elements_count"] = 1;
}
else
{
item["reduction_type"] = "ARRAY";
auto type = isSgArrayType(var->type());
item["size_bytes"] = getSizeOfType(type->baseType());
item["elements_count"] = type->dimension();
}
reduction.push_back(item);
parallel["reductions"].push_back(reduction.size() - 1);
}
}
fillAcrossShadow(directive.shadowRenew, st, byPos, "shadow_renews", shadow_renew, parallel);
fillAcrossShadow(directive.across, st, byPos, "acrosses", across, parallel);
auto func = getFuncStat(st);
auto& funcInFile = allFuncInfo.at(st->fileName());
FuncInfo* currF = NULL;
for (auto& elem : funcInFile)
if (elem->funcName == func->symbol()->identifier())
currF = elem;
if (currF == NULL)
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
auto cfg = buildCFGforCurrentFunc(func, SAPFOR::CFG_Settings(true, false, false, true, false, false, true), commonBlocks, allFuncInfo);
//TODO IP analysis
unsigned countOfAccess = 0;
unsigned countOfOps = 0;
if (cfg.find(currF) == cfg.end())
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
//skip all parallel loops
loop = st->lexNext();
for (int z = 0; z < loopSymbs.size(); ++z)
loop = loop->lexNext();
int lineStart = loop->lineNumber();
int lineEnd = lastNode->lexNext()->lineNumber();
//dumpCFG(cfg, false);
//TODO: calculate access in bytes
for (auto& block : cfg[currF])
{
for (auto& ir : block->getInstructions())
{
auto line = ir->getLine();
if (line < lineStart || line >= lineEnd)
continue;
auto inst = ir->getInstruction();
if (inst->isAccess())
countOfAccess++;
if (inst->isArith())
countOfOps++;
//printf("%s %d %d\n", inst->dump().c_str(), inst->isAccess(), inst->isArith());
}
}
deleteCFG(cfg);
parallel["computational_intensity"] = countOfOps > 0 ? ((double)countOfOps / (double)countOfAccess) : 0;
directives["parallel"].push_back(parallel);
}
void parseDvmDirForPredictor(const map<tuple<int, string, string>, pair<DIST::Array*, DIST::ArrayAccessInfo*>>& declaredArrays,
const map<string, CommonBlock*>& commonBlocks,
const map<string, vector<FuncInfo*>>& allFuncInfo,
2024-10-07 14:50:37 +03:00
const map<string, map<int, Gcov_info>>& gCovInfo)
{
auto& program = info[1]["program_info"];
2024-10-07 14:50:37 +03:00
map<DIST::Array*, int> byPos;
int pos = 0;
for (auto& arrayElem : declaredArrays)
{
json jArray;
auto& array = arrayElem.second.first;
auto sizes = array->GetSizes();
for (int z = 0; z < array->GetDimSize(); ++z)
jArray["dimensions"].push_back(sizes[z].second - sizes[z].first + 1);
jArray["name"] = array->GetName();
jArray["element_size_bytes"] = array->GetTypeSize();
program["arrays_info"].push_back(jArray);
byPos[array] = pos++;
}
auto& directives = program["directives"];
2024-10-07 14:50:37 +03:00
for (auto& byFile : allFuncInfo)
{
int ok = SgFile::switchToFile(byFile.first);
if (ok == -1)
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
auto it = gCovInfo.find(byFile.first);
if (it == gCovInfo.end())
{
__spf_print(1, "bad gcov info\n");
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
}
auto& gcov = it->second;
for (auto& func : byFile.second)
{
SgStatement* stat = func->funcPointer->GetOriginal();
for (auto st = stat->lexNext(); st != stat->lastNodeOfStmt(); st = st->lexNext())
{
2025-04-20 21:34:31 +03:00
if (!__gcov_doesThisLineExecuted(byFile.first, st->lineNumber()))
continue;
2024-10-07 14:50:37 +03:00
SgExpression* list;
SgExpression* dup;
auto line = 0;
2024-10-07 14:50:37 +03:00
switch (st->variant())
{
case DVM_PARALLEL_ON_DIR:
2025-04-22 18:08:28 +03:00
parallelDir(byPos, st->expr(2),
st->expr(0) ? st->expr(0)->symbol() : NULL,
st->expr(0) ? st->expr(0)->lhs() : NULL,
st, st->expr(1), gcov, directives, commonBlocks, allFuncInfo);
break;
case DVM_VAR_DECL: // TODO
{
auto type = st->expr(2)->lhs();
if (type->variant() == DISTRIBUTE_OP)
2024-10-07 14:50:37 +03:00
{
list = st->expr(0);
while (list)
2024-10-07 14:50:37 +03:00
{
directives["distribute"].push_back(parseDistribution(byPos, list->lhs()->symbol(), type->lhs(), st->lineNumber()));
list = list->rhs();
2024-10-07 14:50:37 +03:00
}
}
else if (type->variant() == ALIGN_OP)
{
list = st->expr(0);
2024-10-07 14:50:37 +03:00
while (list)
{
2025-04-22 18:08:28 +03:00
if (type->lhs()) // if ALIGN A(...) with B(...)
directives["align"].push_back(parseAlign(byPos, list->lhs()->symbol(), type->rhs()->symbol(), type->lhs(), type->rhs()->lhs(), st->lineNumber()));
2024-10-07 14:50:37 +03:00
list = list->rhs();
}
}
}
2024-10-07 14:50:37 +03:00
break;
case DVM_DISTRIBUTE_DIR:
directives["distribute"].push_back(parseDistribution(byPos, st->expr(0)->lhs()->symbol(), st->expr(1), st->lineNumber()));
break;
2024-10-07 14:50:37 +03:00
case DVM_ALIGN_DIR:
directives["align"].push_back(parseAlign(byPos, st->expr(0)->lhs()->symbol(), st->expr(2)->symbol(), st->expr(1), st->expr(2)->lhs(), st->lineNumber()));
2024-10-07 14:50:37 +03:00
break;
2025-04-22 18:08:28 +03:00
case DVM_REALIGN_DIR:
directives["realign"].push_back(parseAlign(byPos, st->expr(0)->lhs()->symbol(), st->expr(2)->symbol(), st->expr(1), st->expr(2)->lhs(), st->lineNumber()));
break;
2024-10-07 14:50:37 +03:00
case DVM_SHADOW_DIR:
//dirs << "1;" << "SHADOW;" << st->expr(0)->unparse() << "(" << st->expr(1)->unparse() << ");\n";
2024-10-07 14:50:37 +03:00
break;
case DVM_REMOTE_ACCESS_DIR:
{
line = st->lexNext()->lineNumber();
auto it = gcov.find(line);
auto& info = it->second;
if (info.getNumLine() != line)
{
__spf_print(1, "bad gcov info\n");
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
}
//dirs << info.getExecutedCount() << ";" << "REMOTE_ACCESS;";
2024-10-07 14:50:37 +03:00
list = st->expr(0);
while (list)
{
//dirs << list->lhs()->unparse() << ";";
2024-10-07 14:50:37 +03:00
list = list->rhs();
}
//dirs << "\n";
2024-10-07 14:50:37 +03:00
break;
}
default:
//printf("var = %d line %d\n", st->variant(), st->lineNumber());
break;
}
}
}
}
//printf("%s\n", info.dump(2).c_str());
ofstream dump("info.json");
dump << info.dump(2) << std::endl;
dump.flush();
dump.close();
}