diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..225f44b --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +.vscode +ocelot/AtomicRegionInference/build +benchmarks/ctests/*.ll \ No newline at end of file diff --git a/readme.md b/README.md similarity index 100% rename from readme.md rename to README.md diff --git a/benchmarks/ctests/example01.c b/benchmarks/ctests/example01.c new file mode 100644 index 0000000..4b5b66f --- /dev/null +++ b/benchmarks/ctests/example01.c @@ -0,0 +1,18 @@ +#include + +void Fresh(int x) { printf("Fresh\n"); } +void Consistent(int x, int id) { printf("Consistent\n"); } + +void atomic_start() {} +void atomic_end() {} + +int tmp() { return 0; } +int (*IO_NAME1)() = tmp; +void log(int x) {} + +int app() { + int x = tmp(); + Fresh(x); + log(x); + return 0; +} \ No newline at end of file diff --git a/benchmarks/ctests/example02.c b/benchmarks/ctests/example02.c new file mode 100644 index 0000000..1047d9e --- /dev/null +++ b/benchmarks/ctests/example02.c @@ -0,0 +1,24 @@ +void Fresh(int x) {} +void Consistent(int x, int id) {} + +void atomic_start() {} +void atomic_end() {} + +int sense() { return 0; } +int (*IO_NAME)() = sense; + +int norm(int t) { return t; } + +void log(int x) {} + +int tmp() { + int t = sense(); + int t_norm = norm(t); + return t_norm; +} + +void app() { + int x = tmp(); + Fresh(x); + log(x); +} \ No newline at end of file diff --git a/benchmarks/ctests/example03.c b/benchmarks/ctests/example03.c new file mode 100644 index 0000000..033a5fd --- /dev/null +++ b/benchmarks/ctests/example03.c @@ -0,0 +1,18 @@ +void Fresh(int x) {} +void Consistent(int x, int id) {} + +void atomic_start() {} +void atomic_end() {} + +int input() { return 0; } +int (*IO_NAME)() = input; + +void log(int x) {} + +void app() { + int x = input(); + int y = 1; + int z = x + 1; + log(z); + Fresh(x); +} \ No newline at end of file diff --git a/ocelot/AtomicRegionInference/CMakeLists.txt b/ocelot/AtomicRegionInference/CMakeLists.txt index 24e2883..81d46f5 100644 --- a/ocelot/AtomicRegionInference/CMakeLists.txt +++ b/ocelot/AtomicRegionInference/CMakeLists.txt @@ -1,8 +1,22 @@ -cmake_minimum_required(VERSION 3.1) +cmake_minimum_required(VERSION 3.6) +project(InferAtoms) +# LLVM uses C++17. +set(CMAKE_CXX_STANDARD 17) + +# Load LLVMConfig.cmake. If this fails, consider setting `LLVM_DIR` to point +# to your LLVM installation's `lib/cmake/llvm` directory. find_package(LLVM REQUIRED CONFIG) + +# Include the part of LLVM's CMake libraries that defines +# `add_llvm_pass_plugin`. +include(AddLLVM) + +# Use LLVM's preprocessor definitions, include directories, and library search +# paths. add_definitions(${LLVM_DEFINITIONS}) include_directories(${LLVM_INCLUDE_DIRS}) link_directories(${LLVM_LIBRARY_DIRS}) -add_subdirectory(src) # Use your pass name here. +# Our pass lives in this subdirectory. +add_subdirectory(src) diff --git a/ocelot/AtomicRegionInference/Makefile b/ocelot/AtomicRegionInference/Makefile new file mode 100644 index 0000000..42cb370 --- /dev/null +++ b/ocelot/AtomicRegionInference/Makefile @@ -0,0 +1,26 @@ +.PHONY: clean_tests clean eg1 eg2 + +eg1: + TEST=example01 make test +eg2: + TEST=example02 make test +eg3: + TEST=example03 make test + +test: + $(MAKE) -C build all + clang -S -emit-llvm\ + -fpass-plugin=build/src/InferAtomsPass.dylib\ + -fno-discard-value-names\ + ../../benchmarks/ctests/$(TEST).c\ + -o ../../benchmarks/ctests/$(TEST).ll + clang -S -emit-llvm\ + -fno-discard-value-names\ + ../../benchmarks/ctests/$(TEST).c\ + -o ../../benchmarks/ctests/$(TEST).orig.ll + +clean_tests: + find ../../benchmarks/ctests -name "*.ll" -exec rm -rf {} \; + +clean: + rm -rf build diff --git a/ocelot/AtomicRegionInference/README.md b/ocelot/AtomicRegionInference/README.md index d9a1c3a..5895b8c 100644 --- a/ocelot/AtomicRegionInference/README.md +++ b/ocelot/AtomicRegionInference/README.md @@ -1,14 +1,21 @@ -# region-inference-pass +# Atomic Region Inference -LLVM Pass for inferring atomic regions +LLVM Pass for inferring atomic regions. Tested to work with LLVM 17. -Build: +To build the pass: - $ mkdir build - $ cd build - $ cmake .. - $ make +```sh +mkdir build +cd build +cmake .. +make +``` -Run: +You may bootstrap Clang to use the pass to compile a C file like so: - $ opt -load build/src/libInferAtomicPass.so -atomize something.bc +```sh +clang -S -emit-llvm -fpass-plugin=src/InferAtomsPass.dylib -fno-discard-value-names ../../../benchmarks/ctests/example01.c +``` + +Or, when testing, use the shortcuts provided in the Makefile (e.g., `make eg1`), +which produce two LLVM IRs with and without the pass enabled. diff --git a/ocelot/AtomicRegionInference/src/CMakeLists.txt b/ocelot/AtomicRegionInference/src/CMakeLists.txt index 03033ee..e44f56e 100644 --- a/ocelot/AtomicRegionInference/src/CMakeLists.txt +++ b/ocelot/AtomicRegionInference/src/CMakeLists.txt @@ -1,23 +1,7 @@ -add_library(InferAtomicPass MODULE +add_llvm_pass_plugin(InferAtomsPass # List your source files here. - InferAtomicPass.cpp - ConsistentInference.cpp + InferAtoms.cpp TaintTracker.cpp + InferFreshCons.cpp + Helpers.cpp ) - -# Use C++11 to compile our pass (i.e., supply -std=c++11). -target_compile_features(InferAtomicPass PRIVATE cxx_range_for cxx_auto_type) - -# LLVM is (typically) built with no C++ RTTI. We need to match that; -# otherwise, we'll get linker errors about missing RTTI data. -set_target_properties(InferAtomicPass PROPERTIES - COMPILE_FLAGS "-fno-rtti" -) - -# Get proper shared-library behavior (where symbols are not necessarily -# resolved when the shared library is linked) on OS X. -if(APPLE) - set_target_properties(InferAtomicPass PROPERTIES - LINK_FLAGS "-undefined dynamic_lookup" - ) -endif(APPLE) diff --git a/ocelot/AtomicRegionInference/src/ConsistentInference.cpp b/ocelot/AtomicRegionInference/src/ConsistentInference.cpp deleted file mode 100644 index f59be4b..0000000 --- a/ocelot/AtomicRegionInference/src/ConsistentInference.cpp +++ /dev/null @@ -1,505 +0,0 @@ -#include "include/ConsistentInference.h" - -#define DEBUGINFER 0 -//Come back to this. it can crash and if pass not run with debug, shouldn't be needed -#if 0 -namespace { - - // Find closest debug info. Note that LLVM throws fatal error if we don't add debug info -// to call instructions that we insert (if the parent function has debug info). -DebugLoc findClosestDebugLoc(Instruction *instr) -{ - - DIScope *scope = instr->getFunction()->getSubprogram(); - Instruction *instrWithDebugLoc = instr; - while (!instrWithDebugLoc->getDebugLoc() && instrWithDebugLoc->getPrevNode() != NULL) - instrWithDebugLoc = instrWithDebugLoc->getPrevNode(); - if (instrWithDebugLoc->getDebugLoc()) // if found an instruction with info, use that info - return DebugLoc(instrWithDebugLoc->getDebugLoc()); - else // use the parent function's info (can't see any better source) - return DebugLoc::get(instr->getFunction()->getSubprogram()->getLine(), /* col */ 0, scope); -} - -} // namespace anon -#endif -using namespace std; -using namespace llvm; -Instruction* ConsistentInference::insertRegionInst(int toInsertType, Instruction* insertBefore) { - - Instruction* call; - IRBuilder<> builder(insertBefore); - //build and insert a region start inst - if (toInsertType == 0) { - //Constant* c = M->getOrInsertFunction(""); - call = builder.CreateCall(atomStart); - #if DEBUGINFER - errs() << "create start\n"; - #endif - } else { - //build and insert a region start inst - #if DEBUGINFER - errs() << "Inserting end at: "<< *insertBefore<<"\n"; - #endif - call = builder.CreateCall(atomEnd); - #if DEBUGINFER - errs() << "create end\n"; - #endif - } - return call; -} - -//if a direct pred is also a successor, then it's a for loop block -bool ConsistentInference::loopCheck(BasicBlock* bb) { - StringRef bbname = bb->getName().drop_front(2); - if (!bb->hasNPredecessors(1)) { - for (auto it = pred_begin(bb), et = pred_end(bb); it != et; ++it) { - BasicBlock* predecessor = *it; - StringRef pname = predecessor->getName().drop_front(2); - // errs() << "comparing " << pname<< " and " < 0) { - // errs() << "comparison is true\n"; - return true; - } - } - } - return false; -} - - -//find the first block after a for loop -BasicBlock* ConsistentInference::getLoopEnd(BasicBlock* bb) { - Instruction* ti = bb->getTerminator(); - BasicBlock* end = ti->getSuccessor(0); - ti = end->getTerminator(); - // errs() << "end is " << end->getName() << "\n"; - //for switch inst, succ 0 is the fall through - end = ti->getSuccessor(1); - // errs() << "end is " << end->getName() << "\n"; - return end; -} - -/*Top level region inference function -- could flatten later*/ -void ConsistentInference::inferConsistent(std::map allSets) -{ - //TODO: start with pseudo code structure from design doc - for( auto map : allSets ) { - #if DEBUGINFER - errs() << "DEBUGINFER: starting set " << map.first << "\n"; - #endif - addRegion(map.second, 0); - } - -} - -/*The only difference is outer map vs outer vec*/ -void ConsistentInference::inferFresh(inst_vec_vec allSets) -{ - //TODO: start with pseudo code structure from design doc - for( auto singleVec : allSets ) { - addRegion(singleVec, 1); - } - -} - -//Region type: 0 for Con, 1 for fresh -void ConsistentInference::addRegion(inst_vec conSet, int regionType) -{ - //construct a map of set item to bb - map blocks; - //a queue for regions that still need to be processed - queue> regionsNeeded; - - for(Instruction* item : conSet) { - blocks[item] = item->getParent(); - } - - regionsNeeded.push(blocks); - - Function* root; - for (Function& f : *m) { - if (f.getName().equals("app")) { - root = &f; - } - } - - //iterate until no more possible regions - //THEN pick the best one - vector> regionsFound; - while (!regionsNeeded.empty()) { - //need to raise all blocks in the map until - //they are the same - map blockMap = regionsNeeded.front(); - regionsNeeded.pop(); - //record which functions have been travelled through - set nested; - - while (!sameFunction(blockMap)) { - //to think on: does this change? - Function* goal = commonPredecessor(blockMap, root); - for (Instruction* item : conSet) { - //not all blocks need to be moved up - Function* currFunc = blockMap[item]->getParent(); - nested.insert(currFunc); - if(currFunc!=goal) { - - //if more than one call: - //callChain info is already in the starting set - //so only explore a caller if it's in conSet - bool first = true; - for(User* use : currFunc->users()) { - //if (regionType == 1) { - if(! (find(conSet.begin(), conSet.end(), use)!=conSet.end())) { - continue; - } - //errs() << "Use: "<< *use << " is in call chain\n"; - //} - Instruction* inst = dyn_cast(use); - #if DEBUGINFER - errs() << "DEBUGINFER: examining use: "<< *inst<<"\n"; - #endif - if (inst == NULL) { - //errs () <<"ERROR: use " << *use << "not an instruction\n"; - break; - } - //update the original map - if (first) { - blockMap[item] = inst->getParent(); - first = false; - } else { - //copy the blockmap, update, add to queue - Instruction* inst = dyn_cast(use); - map copy; - for(auto map : blockMap) { - copy[map.first] = map.second; - } - copy[item] = inst->getParent(); - regionsNeeded.push(copy); - } - }//end forall uses - }//end currFunc check - }//end forall items - }//end same function check - - - - /**Now, all bb in the map are in the same function, so we can run - * dom or post-dom analysis on that function**/ - #if DEBUGINFER - errs() << "DEBUGINFER: start dom tree analysis\n"; - #endif - Function* home = blockMap.begin()->second->getParent(); - if(home == nullptr) { - #if DEBUGINFER - errs() << "DEBUGINFER: no function found\n"; - #endif - continue; - } - DominatorTree& domTree = pass->getAnalysis(*home).getDomTree(); - //Find the closest point that dominates - BasicBlock* startDom = blockMap.begin()->second; - for (auto map : blockMap) { - startDom = domTree.findNearestCommonDominator(map.second, startDom); - } - //TODO: if an inst in the set is in the bb, we can truncate? - #if DEBUGINFER - errs() << "DEBUGINFER: start post dom tree analysis\n"; - #endif - //Flip directions for the region end - PostDominatorTree& postDomTree = pass->getAnalysis(*home).getPostDomTree(); - //Find the closest point that dominates - BasicBlock* endDom = blockMap.begin()->second; - for (auto map : blockMap) { - #if DEBUGINFER - if (endDom!=nullptr) { - errs() << "finding post dom of:" << map.second->getName()<< " and " << endDom->getName()<< "\n"; - } else { - errs() << "endDom is null\n"; - } - #endif - endDom = postDomTree.findNearestCommonDominator(map.second, endDom); - } - if (startDom==nullptr) { - errs() << "ERROR: null start\n"; - } else if (endDom==nullptr) { - errs() << "ERROR: null end\n"; - } - #if DEBUGINFER - errs() << "DEBUGINFER: match scope\n"; - #endif - //need to make the start and end dominate each other as well. - startDom = domTree.findNearestCommonDominator(startDom, endDom); - endDom = postDomTree.findNearestCommonDominator(startDom, endDom); - - //extra check to disallow loop conditional block as the end - if(loopCheck(endDom)) { - endDom = getLoopEnd(endDom); - } - - - - if (startDom==nullptr) { - errs() << "ERROR: null start after scope merge\n"; - } else if (endDom==nullptr) { - errs() << "ERROR: null end after scope merge\n"; - } -#if DEBUGINFER - errs() << "DEBUGINFER: insert insts\n"; -#endif - //TODO: fallback if endDom is null? Need hyper-blocks, I think - //possibly can do a truncation check, to lessen the size a little, but could that interfere with compiler optimizations? - Instruction* regionStart = truncate(startDom, true, conSet, nested); - Instruction* regionEnd = truncate(endDom, false, conSet, nested); - if (regionStart==nullptr) { - errs() << "ERROR: null start after truncation\n"; - } else if (regionEnd==nullptr) { - errs() << "ERROR: null end after truncation\n"; - } else { - //errs() << "Region start is before " << *regionStart<<" and region end is before " << *regionEnd<<"\n"; - } - - //insert into regions found - regionsFound.push_back(make_pair(regionStart, regionEnd)); - }//end while regions needed - - //now see which region is smallest -- instruction count? they must dominate - //each other, so there's no possibility of not running into the start from - //the end - pair smallestReg = findSmallest(regionsFound); - //errs() << "Smallest Region was " << *smallestReg.first<< " and " << *smallestReg.second <<"\n"; - Instruction* regionStart = smallestReg.first; - Instruction* regionEnd = smallestReg.second; - insertRegionInst(0, regionStart); - insertRegionInst(1, regionEnd); - //}//end while regions needed -} - -/*Function to truncate a bb if the instruction is in the bb */ -Instruction* ConsistentInference::truncate(BasicBlock* bb, bool forwards, inst_vec conSet, set nested) -{ - //truncate the front - if(forwards) { - for (Instruction& inst : *bb) { - //stop at first inst in the basic block that is in the set. - if (find(conSet.begin(), conSet.end(), &inst)!=conSet.end()){ - return &inst; - } - //need to stop at relevant callIsnsts as well - else if (CallInst* ci = dyn_cast(&inst)){ - if (nested.find(ci->getCalledFunction())!=nested.end()) { - return &inst; - } - } - - } - //otherwise just return the last inst - return &bb->back(); - } - //reverse directions if not forwards - Instruction* prev = NULL; - for(BasicBlock::reverse_iterator i = bb->rbegin(), e = bb->rend(); i!=e;++i) { - Instruction* inst = &*i; - if (find(conSet.begin(), conSet.end(), inst)!=conSet.end()){ - //need to return the previous inst (next in fowards), as it should be inserted before the returned inst - - if (prev == NULL) { - //only happens if use is a ret inst, which is a scope use to make the branching - //work, not an actual one, so this is safe - return inst; - } - return prev; - } - else if (CallInst* ci = dyn_cast(inst)){ - if (nested.find(ci->getCalledFunction())!=nested.end()) { - return prev; - } - } - prev = inst; - } - //otherwise just return first inst of the block - //errs() << "truncate returning " << bb->front() << "\n"; - return &bb->front(); -} - - -Function* ConsistentInference::commonPredecessor(map blockMap, Function* root) -{ - vector funcList; - //add the parents, without duplicates - for (auto map : blockMap) { - if(!(find(funcList.begin(), funcList.end(), map.second->getParent())!=funcList.end())) { - funcList.push_back(map.second->getParent()); - #if DEBUGINFER - errs() << "DEBUGINFER: adding: " << map.second->getParent()->getName()<<"\n"; - #endif - } - } - //easy case: everything is already in the same function - if(funcList.size()==1) { - return funcList.at(0); - } - /* Algo Goal: get the deepest function that still calls (or is) all funcs in funcList. - * Consider: multiple calls? Should be dealt with in the add region function -- eventually each caller - * gets its own region - */ - Function* goal = nullptr; - //Function* root = m->getFunction("app"); - #if DEBUGINFER - errs() << "DEBUGINFER: starting from " << root->getName() << "\n"; - #endif - deepCaller(root, funcList, &goal); - if(goal == nullptr) { - errs() << "ERROR: deepCaller failed\n"; - } - return goal; -} - -/*Recursive: from a root, returns list of called funcs. */ -vector ConsistentInference::deepCaller(Function* root, vector funcList, Function** goal) -{ - vector calledFuncs; - bool mustIncludeSelf = false; - - for (inst_iterator inst = inst_begin(root), E = inst_end(root); inst != E; ++inst) { - if(CallInst* ci = dyn_cast(&(*inst))) { - calledFuncs.push_back(ci->getCalledFunction()); - } - } - vector explorationList; - for (Function* item : funcList) { - - //skip over root or called funcs - if ((find(calledFuncs.begin(), calledFuncs.end(), item)!=calledFuncs.end()) || item == root) { - if (item == root) { - mustIncludeSelf = true; - } - continue; - } - explorationList.push_back(item); - #if DEBUGINFER - errs() << "need to find " << item->getName() <<"\n"; - #endif - } - //this function is a root of a call tree that calls everything in the func List - if (explorationList.empty()) { - #if DEBUGINFER - errs() << "empty list\n"; - #endif - *goal = root; - return calledFuncs; - } - //otherwise recurse - Function* candidate = nullptr; - for (Function* called : calledFuncs) { - vector partial = deepCaller(called, explorationList, &candidate); - //if candidate is set, it means called is a root for everything in the explorationList - if (candidate!=nullptr) { - *goal = candidate; - #if DEBUGINFER - errs() << "New candidate: " << (*goal)->getName() << "\n"; - #endif - } - //remove from explorationList, but add to calledFuncs - for (Function* item : partial) { - func_vec::iterator place = find(explorationList.begin(), explorationList.end(), item); - if(place!=explorationList.end()) { - explorationList.erase(place); - } - calledFuncs.push_back(item); - } - - } - //current point is a root - if(explorationList.empty()) { - //not the deepest - if (candidate!=nullptr && !mustIncludeSelf) { - *goal = candidate; - } else { - //is the deepest - *goal = root; - } - } - return calledFuncs; -} - - - - -/*Recursive: get the min of the maximum length of each regions*/ -inst_inst_pair ConsistentInference::findSmallest(vectorregionsFound) -{ - inst_inst_pair best; - int best_count = 2147483647; - - for (inst_inst_pair candidate : regionsFound) { - Function* root = candidate.first->getFunction(); - int pre = 0 ; - int found = 0; - for (Instruction& inst : *candidate.first->getParent()) { - pre++; - if (&inst==candidate.first) { - break; - - } - } - //get the max length from the bb to the end instruction - vector v; - int length = getSubLength(candidate.first->getParent(), candidate.second, v); - //substract the prefix before the start inst - length -= pre; - if (length < best_count) { - best_count = length; - best = candidate; - //errs() << "best candidate is " << *candidate.first << " and " << - // *candidate.second << " with length " << length << "\n"; - } - - } - return best; -} -//helper func, recursive -int ConsistentInference::getSubLength(BasicBlock* bb, Instruction* end, vector visited){ - int count = 0; - int max_ret = 0; - visited.push_back(bb); - for (Instruction& inst : *bb) { - count++; - if (&inst == end){ - return count; - } - if(CallInst* ci = dyn_cast(&inst)){ - Function* cf = ci->getCalledFunction(); - if (!cf->empty() && cf!=NULL) { - //errs() <<"attempting function " << cf->getName() << "\n"; - count+= cf->getInstructionCount(); - } - } - if (inst.isTerminator()) { - int numS = inst.getNumSuccessors(); - for (int i = 0; i < numS; i++) { - BasicBlock* next = inst.getSuccessor(i); - //already counted -- do something more fancy for loops? - if (find(visited.begin(), visited.end(), next)!=visited.end()) { - continue; - } - int intermed = getSubLength(inst.getSuccessor(i), end, visited); - if (intermed > max_ret) { - max_ret = intermed; - } - } - } - } - return count + max_ret; -} - -bool ConsistentInference::sameFunction(map blockMap) -{ - Function* comp = blockMap.begin()->second->getParent(); - for (auto map : blockMap) { - if (map.second->getParent()!= comp) { - return false; - } - } - return true; -} - - diff --git a/ocelot/AtomicRegionInference/src/Helpers.cpp b/ocelot/AtomicRegionInference/src/Helpers.cpp new file mode 100644 index 0000000..a0f62cf --- /dev/null +++ b/ocelot/AtomicRegionInference/src/Helpers.cpp @@ -0,0 +1,36 @@ +#include "include/Helpers.h" + +std::string getSimpleNodeLabel(const Value* node) { + if (node->hasName()) { + // #if DEBUG + // errs() << "Node has name\n"; + // #endif + return node->getName().str(); + } + + std::string str; + raw_string_ostream OS(str); + + node->printAsOperand(OS, false); + return str; +} + +bool isAnnot(const StringRef annotName) { + return annotName.equals("Fresh") || annotName.equals("Consistent") || annotName.equals("FreshConsistent"); +} + +void printInstInsts(const inst_insts_map& iim, bool onlyCalls) { + for (auto& [inst, inputs] : iim) { + if (!onlyCalls || isa(inst)) { + errs() << *inst << " ->\n"; + for (auto* input : inputs) errs() << *input << "\n"; + errs() << "\n"; + } + } +} + +void printInsts(const inst_vec& iv) { + for (auto& inst : iv) { + errs() << *inst << "\n"; + } +} diff --git a/ocelot/AtomicRegionInference/src/InferAtomicPass.cpp b/ocelot/AtomicRegionInference/src/InferAtomicPass.cpp deleted file mode 100644 index a105841..0000000 --- a/ocelot/AtomicRegionInference/src/InferAtomicPass.cpp +++ /dev/null @@ -1,516 +0,0 @@ -#include "include/InferAtomicPass.h" -#include "include/TaintTracker.h" - -#define CAPSIZE 1000 -#define PRINTMAPS 1 -#define FRESHDEBUG 1 - -void InferAtomicModulePass::removeAnnotations(inst_vec* toDelete) -{ - //delete all the annotation function calls - bool instsLeftToDelete = true; - Instruction* candidate; - while (instsLeftToDelete) { - instsLeftToDelete = false; - //can't delete while directly iterating through the module - for (Function& f : *this->m) { - for (BasicBlock& bb : f) { - for (Instruction& inst : bb) { - - //for now, let's just delete unused core or compiler builtin functions - if(isa(&inst)) { - if (find(toDelete->begin(), toDelete->end(), &inst)!=toDelete->end()) { - candidate = &inst; - instsLeftToDelete = true; - break; - } - } - } - } - } - //recheck, as this could be the last iteration - if(instsLeftToDelete) { - #if DEBUG - errs() << "DEBUG: deleting: " << candidate->getName() <<"\n"; - #endif - candidate->replaceAllUsesWith(UndefValue::get(candidate->getType())); - candidate->eraseFromParent(); - } - - } - //now delete all the annotation functions - //vector toDeleteF; - bool functionsLeftToDelete = true; - Function* candidatef; - while (functionsLeftToDelete) { - functionsLeftToDelete = false; - //can't delete while directly iterating through the module - for (Function& f : *this->m) { - if (f.hasName()) { - //for now, let's just delete unused core or compiler builtin functions - if(f.getName().contains("Fresh")||f.getName().contains("Consistent")) { - candidatef = &f; - functionsLeftToDelete = true; - break; - - } - } - - } - - //recheck, as this could be the last iteration - if(functionsLeftToDelete) { -#if DEBUG - errs() << "DEBUG: deleting: " << candidatef->getName() <<"\n"; -#endif - - candidatef->replaceAllUsesWith(UndefValue::get(candidatef->getType())); - candidatef->eraseFromParent(); - } - } -} - -/* - * Top-level pass for atomic region inference - */ -bool InferAtomicModulePass::runOnModule(Module &M) { - m = &M; - capacitorSize = CAPSIZE; - - //TODO: init atomStart/End with the proper functions - for (Function& F : M) { - if (F.getName().contains("atomic_start")) { - #if DEBUG - errs() << "DEBUG: found atom start\n"; - #endif - atomStart = &F; - } - if (F.getName().contains("atomic_end")) { - #if DEBUG - errs() << "DEBUG: found atom end\n"; - #endif - atomEnd = &F; - } - } - - //Build the consistent set and fresh lists here, to only - //go through all the declarations once. - std::map conVars; - inst_vec_vec freshVars; - inst_insts_map inputInfo = buildInputs(m); - inst_vec toDelete; - getAnnotations(&conVars, &freshVars, inputInfo, &toDelete); - //TODO: need to add unique point of call chain prefix to con set - #if PRINTMAPS - errs () << "Initial fresh is: \n"; - for (inst_vec item : freshVars) { - for (Instruction* item2 : item) { - errs() << *item2 << "\n"; - } - } - errs() << "End init fresh\n"; - #endif - - #if PRINTMAPS - errs () << "Initial consistent is: \n"; - for (auto map : conVars) { - errs() << "Begin set\n"; - for (Instruction* item2 : map.second) { - errs() << *item2 << "\n"; - } - } - errs() << "End init Consistent\n"; - #endif - - #if PRINTMAPS - errs() << "Printing map:\n"; - for (auto map : inputInfo) { - if (isa(map.first)) { - errs() << *(map.first) << "in map\n"; - for (Value* l : map.second) { - errs() << *l << "\n"; - } - } - } - #endif - map allConSets = collectCon(conVars, inputInfo); - inst_vec_vec allFresh = collectFresh(freshVars, inputInfo); - - - - #if PRINTMAPS - errs () << "Fresh is: \n"; - for (inst_vec item : allFresh) { - for (Instruction* item2 : item) { - errs() << *item2 << "\n"; - } - } - errs() << "End fresh\n"; - #endif - - #if PRINTMAPS - errs () << "Consistent is: \n"; - for (auto map : allConSets) { - for (Instruction* item2 : map.second) { - errs() << *item2 << "\n"; - } - } - errs() << "End Consistent\n"; - #endif - - - - //will do consistency first - ConsistentInference* ci = new ConsistentInference(this, &M, atomStart, atomEnd); - - ci->inferConsistent(allConSets); - ci->inferFresh(allFresh); - - //delete annotations - removeAnnotations(&toDelete); - - return true; -} - - -/**This function finds annotated variables)**/ -void InferAtomicModulePass::getAnnotations(std::map* conSets, inst_vec_vec* freshVars, - inst_insts_map inputMap, inst_vec* toDelete) -{ - //note: delete the annotation functions afterwards - map recallSet; - - for (Function& f : *m) { - for (BasicBlock& bb : f) { - for (Instruction& inst : bb) { - if(CallInst* ci = dyn_cast(&inst)) { - Function* called = ci->getCalledFunction(); - //various empty or null checks - if (called==NULL) { - continue; - } - if (called->empty()||!called->hasName()) { - continue; - } - //covers both Consistent and FreshConsistent - if (called->getName().contains("Consistent")) { - //first para is var, second is id - toDelete->push_back(ci); - int setID; - //v.push_back(ai); <<-- don't actually need this? - //bit cast use of x, then value operand of store - Instruction* var = dyn_cast(ci->getOperand(0)); - - if (var==NULL) { - //errs() << "error casting with " << *ci <<"\n"; - continue; - } - // errs() << "New consistent annot. with " << *var<<"\n"; - Value* id = ci->getOperand(1); - if(ConstantInt* cint = dyn_cast(id)) { - setID = cint->getSExtValue(); - } - queue customUsers; - set v; - //v.emplace(ci); - //in case var itself is iOp - for (Instruction* iOp : inputMap[var]) { - v.emplace(iOp); - } - - //customUsers.push(var); - for (Value* use : var->users()) { - //don't push the annotation - if (use == ci) { - continue; - } - //errs() << "DEBUG: pushing use of var: " << *use << "\n"; - customUsers.push(use); - } - while(!customUsers.empty()) { - Value* use = customUsers.front(); - customUsers.pop(); - //errs() << "DEBUG: use is " << *use << " of var " << *var<<"\n"; - if (Instruction* instUse = dyn_cast(use)) { - for (Instruction* iOp : inputMap[instUse]) { - v.emplace(iOp); - // errs() << "DEBUG: adding to v " << *iOp << "\n"; - } - } - if(isa(use)||isa(use)) { - for (Value* use2 : use->users()) { - // errs() << "DEBUG: use2 is " << *use2 << "\n"; - if(StoreInst* si = dyn_cast(use2)){ - for (Instruction* iOp : inputMap[si]) { - v.emplace(iOp); - // errs() << "DEBUG: adding to v " << *iOp << "\n"; - } - } - // errs() << "DEBUG: pushing use2 of var: " << *use2 << "\n"; - customUsers.push(use2); - } - } - - if(isa(use)) { - for (Value* use2 : use->users()) { - // errs() << "DEBUG: use2 is " << *use2 << "\n"; - if(StoreInst* si = dyn_cast(use2)){ - //v.push_back(si); - for (Instruction* iOp : inputMap[si]) { - v.emplace(iOp); - // errs() << "DEBUG: adding to v " << *iOp << "\n"; - } - } - // errs() << "DEBUG: pushing use2 of var: " << *use2 << "\n"; - customUsers.push(use2); - } - } - } - //last case - if (v.empty()) { - //some entries have a first link with ci, not var - - for (Instruction* iOp : inputMap[ci]) { - if (inputMap[ci].size() == 1) { - for (Instruction* origLink : inputMap[iOp]) { - v.emplace(origLink); - } - } else { - v.emplace(iOp); - } - - } - - - } - //for later deletion purposes - inputMap.erase(ci); - - - if (!v.empty()) { - inst_vec temp; - for (Instruction* item : v) { - temp.push_back(item); - } - //add the collected list to the map - if(conSets->find(setID)!=conSets->end()) { - conSets->at(setID).insert(conSets->at(setID).end(), temp.begin(), temp.end()); - } else { - conSets->emplace(setID, temp); - } - } - - } - if (called->getName().contains("Fresh")) { - set v; - toDelete->push_back(ci); - inputMap.erase(ci); - Value* var = ci->getOperand(0); - if (Instruction* inst = dyn_cast(var)) { - v.emplace(inst); - } else { - //errs() << "error casting\n"; - } - //errs() << "New Fresh annot. with " << *var<<"\n"; - // v.push_back(ci); - - for(Value* use : var->users()) { - if(StoreInst* si = dyn_cast(use)){ - // errs() << "DEBUG: pushing " << *use << "\n"; - v.emplace(si); - } - if(isa(use)) { - for (Value* use2 : use->users()) { - // errs() << "DEBUG: pushing " << *use2 << "\n"; - if(StoreInst* si = dyn_cast(use2)){ - v.emplace(si); - } - } - } - } - if (!v.empty()) { - inst_vec temp; - for (Instruction* item : v) { - temp.push_back(item); - } - freshVars->push_back(temp); - } - } - - } - - } - } - } - -} - - - - -/*Given the starting point annotations of conSets, find the -deepest unique point of the call chain*/ -map InferAtomicModulePass::collectCon(map startingPoints, inst_insts_map inputMap) -{ - map toReturn; - for (pair iv : startingPoints ) { - set unique; - map> callChains; - //each item should be the starting point from a different annot - for(Instruction* item : iv.second) { - #if FRESHDEBUG - errs() << "Starting point: " << *item << "\n"; - #endif - //add self to call chain - callChains[item].insert(item); - - for (Instruction* iOp : inputMap[item]) { - // unique.insert(iOp); - callChains[item].insert(iOp); - queue toExplore; - toExplore.push(iOp); - while (!toExplore.empty()) { - Instruction* curr = toExplore.front(); - toExplore.pop(); - for (Instruction* intermed : inputMap[curr]) { - if (! (find(callChains[item].begin(), callChains[item].end(), intermed) - !=callChains[item].end())) { - callChains[item].insert(intermed); - toExplore.push(intermed); - } - } - } - - }// finish constructing call chain for one annot. in the set - - }//constructed call chains for ALL annot. in the set. - //now check the call chain - - //int index = 0; - //map foundUniquePoint; - //clean up the call chains - - for(auto ccmap : callChains) { - for (Instruction* possibility : ccmap.second) { - //if the link is in the same function, then continue - //errs() << "examining possibility: " << *possibility << "\n"; - bool sf = false; - for (Instruction* link : inputMap[possibility]) { - //errs() << "next link is" << *link << "\n"; - if ((link!=possibility) && link->getFunction() == possibility->getFunction()) { - sf = true; - - } - } - if (sf) { - continue; - } - bool isUnique = true; - for (auto ccmapNest : callChains) { - //if self then skip - if (ccmapNest == ccmap) { - continue; - } - //otherwise check if this map also contains the possibility - if (find(ccmapNest.second.begin(), ccmapNest.second.end(), possibility) - != ccmapNest.second.end()) - { - isUnique = false; - break; - } - } - if (isUnique){ - unique.insert(possibility); - // errs() << "Found unique!" << *possibility << "\n"; - } else { - //try another poss. - continue; - } - } - } - - - inst_vec v; - for (Instruction* item2 : unique) { - if (!isa(item2)) { - v.push_back(item2); - } - } - toReturn[iv.first] = v; - }//end starting point check - - return toReturn; -} - -/*This function collects the input srcs and uses off of the fresh annotated vars*/ -inst_vec_vec InferAtomicModulePass::collectFresh(inst_vec_vec startingPoints, inst_insts_map inputMap) -{ - inst_vec_vec toReturn; - - for (inst_vec iv : startingPoints ) { - set unique; - set callChain; - for(Instruction* item : iv) { - #if FRESHDEBUG - errs() << "Starting point: " << *item << "\n"; - #endif - //uses (forwards) are direct only (might need a little chaining for direct in rs to be direct in IR) - inst_vec uses = traverseDirectUses(item); - - for (Instruction* use : uses) { - #if FRESHDEBUG - errs() << "Starting point use: " << *use << "\n"; - #endif - // if (isa(use)||isa(use)) { - unique.insert(use); - //} - for (Instruction* iOp : inputMap[use]) { - unique.insert(iOp); - } - } - - for (Instruction* iOp : inputMap[item]) { - unique.insert(iOp); - callChain.insert(iOp); - queue toExplore; - toExplore.push(iOp); - while (!toExplore.empty()) { - Instruction* curr = toExplore.front(); - toExplore.pop(); - for (Instruction* intermed : inputMap[curr]) { - if (! (find(callChain.begin(), callChain.end(), intermed)!=callChain.end())) { - callChain.insert(intermed); - toExplore.push(intermed); - } - } - } - - } - //don't forget the item itself - if (isa(item)||isa(item)) { - unique.insert(item); - } - - - } - //now construct the call chain - for (Instruction* vv : callChain) { - // errs() << "call chain val: " << *vv <<"\n"; - unique.insert(vv); - } - inst_vec v; - for (Instruction* item2 : unique) { - if (!isa(item2)) { - v.push_back(item2); - } - } - toReturn.push_back(v); - } - - - return toReturn; -} - -char InferAtomicModulePass::ID = 0; - -RegisterPass X("atomize", "Infer Atomic Pass"); diff --git a/ocelot/AtomicRegionInference/src/InferAtoms.cpp b/ocelot/AtomicRegionInference/src/InferAtoms.cpp new file mode 100644 index 0000000..b0219cd --- /dev/null +++ b/ocelot/AtomicRegionInference/src/InferAtoms.cpp @@ -0,0 +1,530 @@ +#include "include/InferAtoms.h" + +#define CAPSIZE 1000 + +// Top-level pass for atomic region inference +PreservedAnalyses InferAtomsPass::run(Module& M, ModuleAnalysisManager& AM) { + PassBuilder PB; + FunctionAnalysisManager FAM; + PB.registerFunctionAnalyses(FAM); + + setModule(&M); + + for (auto& F : M) { + auto FName = F.getName(); + if (FName.equals("atomic_start")) { +#if DEBUG + errs() << "Found atomic_start\n"; +#endif + atomStart = &F; + } else if (FName.equals("atomic_end")) { +#if DEBUG + errs() << "Found atomic_end\n"; +#endif + atomEnd = &F; + } + } + + // Build the consistent set and fresh lists here, + // to only go through all the declarations once. + std::map consVars; + inst_vec_vec freshVars; + inst_insts_map inputMap = buildInputs(this->M); + inst_vec toDelete; + getAnnotations(&consVars, &freshVars, inputMap, &toDelete); + // TODO: need to add unique point of call chain prefix to cons set + +#if DEBUG + errs() << "Initial Fresh:\n"; + for (auto& insts : freshVars) + for (auto* inst : insts) errs() << *inst << "\n"; +#endif + +#if DEBUG + errs() << "Initial Consistent:\n"; + for (auto& [_, insts] : consVars) { + for (auto* inst : insts) errs() << *inst << "\n"; + } +#endif + +#if DEBUG + errs() << "Print inputMap CallInst entries:\n"; + printInstInsts(inputMap, true); +#endif + + auto allConsSets = collectCons(consVars, inputMap); + auto allFresh = collectFresh(freshVars, inputMap); + +#if DEBUG + errs() << "Fresh after collect: \n"; + for (auto& varSet : allFresh) + for (auto* var : varSet) errs() << *var << "\n"; +#endif + +#if DEBUG + errs() << "Consistent after collect: \n"; + for (auto& [_, insts] : allConsSets) + for (auto* inst : insts) errs() << *inst << "\n"; +#endif + + // Consistent first + InferFreshCons* ci = new InferFreshCons(&FAM, &M, atomStart, atomEnd); + + ci->inferConsistent(allConsSets); + ci->inferFresh(allFresh); + + // Delete annotations + removeAnnotations(&toDelete); + + return PreservedAnalyses::none(); +} + +// This function finds annotated variables +void InferAtomsPass::getAnnotations(std::map* consVars, inst_vec_vec* freshVars, + inst_insts_map inputMap, inst_vec* toDelete) { +#if DEBUG + errs() << "=== getAnnotations ===\n"; +#endif + for (auto& F : *this->M) { + for (auto& B : F) { + for (auto& I : B) { + if (auto* ci = dyn_cast(&I)) { +#if DEBUG + errs() << "[Loop Inst] cur inst = CallInst\n"; +#endif + auto* fun = ci->getCalledFunction(); + // Various empty or null checks + if (fun == NULL || fun->empty() || !fun->hasName()) continue; + // Consistent and FreshConsistent + if (isAnnot(fun->getName()) && !fun->getName().equals("Fresh")) { +#if DEBUG + errs() << "[Loop Inst] Calls Consistent/FreshConsistent\n"; +#endif + toDelete->push_back(ci); + // First para is var, second is id + int setID; + // Bit cast use of x, then value operand of store + Instruction* var = dyn_cast(ci->getOperand(0)); + + if (var == NULL) continue; + // errs() << "New consistent annot. with " << *var<<"\n"; + Value* id = ci->getOperand(1); + if (ConstantInt* cint = dyn_cast(id)) { + setID = cint->getSExtValue(); + } + std::queue customUsers; + std::set v; + // v.emplace(ci); + // in case var itself is iOp + for (Instruction* iOp : inputMap[var]) { + v.emplace(iOp); + } + + // customUsers.push(var); + for (Value* use : var->users()) { + // don't push the annotation + if (use == ci) { + continue; + } + // errs() << "DEBUG: pushing use of var: " << *use << "\n"; + customUsers.push(use); + } + while (!customUsers.empty()) { + Value* use = customUsers.front(); + customUsers.pop(); + // errs() << "DEBUG: use is " << *use << " of var " << *var<<"\n"; + if (Instruction* instUse = dyn_cast(use)) { + for (Instruction* iOp : inputMap[instUse]) { + v.emplace(iOp); + // errs() << "DEBUG: adding to v " << *iOp << "\n"; + } + } + if (isa(use) || isa(use)) { + for (Value* use2 : use->users()) { + // errs() << "DEBUG: use2 is " << *use2 << "\n"; + if (StoreInst* si = dyn_cast(use2)) { + for (Instruction* iOp : inputMap[si]) { + v.emplace(iOp); + // errs() << "DEBUG: adding to v " << *iOp << "\n"; + } + } + // errs() << "DEBUG: pushing use2 of var: " << *use2 << "\n"; + customUsers.push(use2); + } + } + + if (isa(use)) { + for (Value* use2 : use->users()) { + // errs() << "DEBUG: use2 is " << *use2 << "\n"; + if (StoreInst* si = dyn_cast(use2)) { + // v.push_back(si); + for (Instruction* iOp : inputMap[si]) { + v.emplace(iOp); + // errs() << "DEBUG: adding to v " << *iOp << "\n"; + } + } + // errs() << "DEBUG: pushing use2 of var: " << *use2 << "\n"; + customUsers.push(use2); + } + } + } + // last case + if (v.empty()) { + // some entries have a first link with ci, not var + + for (Instruction* iOp : inputMap[ci]) { + if (inputMap[ci].size() == 1) { + for (Instruction* origLink : inputMap[iOp]) { + v.emplace(origLink); + } + } else { + v.emplace(iOp); + } + } + } + // for later deletion purposes + inputMap.erase(ci); + + if (!v.empty()) { + inst_vec temp; + for (Instruction* item : v) { + temp.push_back(item); + } + // add the collected list to the map + if (consVars->find(setID) != consVars->end()) { + consVars->at(setID).insert(consVars->at(setID).end(), temp.begin(), temp.end()); + } else { + consVars->emplace(setID, temp); + } + } + } else if (fun->getName().equals("Fresh")) { +#if DEBUG + errs() << "[Loop Inst] Calls Fresh\n"; +#endif + std::set v; + toDelete->push_back(ci); + +#if DEBUG + errs() << "[Loop Inst] Print inputMap entries:\n"; + printInstInsts(inputMap); +#endif + + //* Can't actually remove, otherwise wrong result + // #if DEBUG + // errs() << "[Loop Inst] Remove Fresh call from inputMap\n"; + // #endif + // inputMap.erase(ci); + + auto* arg = ci->getOperand(0); +#if DEBUG + errs() << "[Loop Inst] Fresh arg: " << *arg << "\n"; +#endif + + if (auto* inst = dyn_cast(arg)) { +#if DEBUG + errs() << "[Loop Inst] arg = Instruction, add to v\n"; +#endif + v.emplace(inst); + + //* Actually collect all uses (e.g., log(x)) + if (auto* li = dyn_cast(inst)) { +#if DEBUG + errs() << "[Loop Inst] Further arg = LoadInst\n"; +#endif + auto* ptr = li->getPointerOperand(); +#if DEBUG + errs() << "[Loop Inst] Ptr operand: " << *ptr << "\n"; +#endif + for (auto* ptrUse : ptr->users()) { +#if DEBUG + errs() << "[Loop ptr users] ptrUse: " << *ptrUse << "\n"; +#endif + if (ptrUse != inst) { + if (auto* liUse = dyn_cast(ptrUse)) { + errs() << "[Loop ptr users] ptrUse diff from Fresh arg, add to v\n"; + v.emplace(liUse); + } + } + } + } + } else { + // errs() << "error casting\n"; + } + // errs() << "New Fresh annot. with " << *var<<"\n"; + // v.push_back(ci); + +#if DEBUG + errs() << "[Loop Inst] Go over arg users\n"; +#endif + for (auto* use : arg->users()) { + if (auto* si = dyn_cast(use)) { +#if DEBUG + errs() << "[Loop Users] use = StoreInst, add to v: " << *si << "\n"; +#endif + v.emplace(si); + } else if (isa(use)) { + for (auto* use2 : use->users()) { + if (auto* si = dyn_cast(use2)) { + v.emplace(si); + } + } + } + } + + if (!v.empty()) { +#if DEBUG + errs() << "[Loop Inst] Add v's insts to a set in freshVars:\n"; +#endif + inst_vec tmp; + for (auto* inst : v) { +#if DEBUG + errs() << "[Loop v] " << *inst << "\n"; +#endif + tmp.push_back(inst); + } + freshVars->push_back(tmp); + } + } + } + } + } + } + +#if DEBUG + errs() << "*** getAnnotations ***\n"; +#endif +} + +void InferAtomsPass::removeAnnotations(inst_vec* toDelete) { + std::vector toDeleteF; + + // Delete all annotation function calls + for (auto& F : *this->M) { + if (F.hasName() && isAnnot(F.getName())) + toDeleteF.push_back(&F); + else + for (auto& B : F) { + auto I = B.begin(); + for (; I != B.end(); I++) { + if (auto* ci = dyn_cast(I)) { + // TODO: no need to confirm in toDelete? + if (std::find(toDelete->begin(), toDelete->end(), &*I) != toDelete->end()) { +#if DEBUG + errs() << "Remove call: " << *I << "\n"; +#endif + I->replaceAllUsesWith(UndefValue::get(I->getType())); + I = I->eraseFromParent(); + + //* Remove args and their uses as well + for (auto& arg : ci->args()) { + if (auto* argInst = dyn_cast(arg)) { +#if DEBUG + errs() << "Remove call arg: " << *argInst << "\n"; +#endif + argInst->eraseFromParent(); + argInst->replaceAllUsesWith(UndefValue::get(argInst->getType())); + } + } + } + } + } + } + } + + // Delete all annotation function defs + for (auto F : toDeleteF) { +#if DEBUG + errs() << "Remove function " << F->getName() << "\n"; +#endif + F->replaceAllUsesWith(UndefValue::get(F->getType())); + F->eraseFromParent(); + } +} + +/*Given the starting point annotations of conSets, find the +deepest unique point of the call chain*/ +std::map InferAtomsPass::collectCons(std::map startingPoints, inst_insts_map inputMap) { + std::map toReturn; + for (std::pair iv : startingPoints) { + std::set unique; + std::map> callChains; + // each item should be the starting point from a different annot + for (Instruction* item : iv.second) { +#if DEBUG + errs() << "Starting point: " << *item << "\n"; +#endif + // add self to call chain + callChains[item].insert(item); + + for (Instruction* iOp : inputMap[item]) { + // unique.insert(iOp); + callChains[item].insert(iOp); + std::queue toExplore; + toExplore.push(iOp); + while (!toExplore.empty()) { + Instruction* curr = toExplore.front(); + toExplore.pop(); + for (Instruction* intermed : inputMap[curr]) { + if (!(find(callChains[item].begin(), callChains[item].end(), intermed) != callChains[item].end())) { + callChains[item].insert(intermed); + toExplore.push(intermed); + } + } + } + + } // finish constructing call chain for one annot. in the set + + } // constructed call chains for ALL annot. in the set. + // now check the call chain + + // int index = 0; + // map foundUniquePoint; + // clean up the call chains + + for (auto ccmap : callChains) { + for (Instruction* possibility : ccmap.second) { + // if the link is in the same function, then continue + // errs() << "examining possibility: " << *possibility << "\n"; + bool sf = false; + for (Instruction* link : inputMap[possibility]) { + // errs() << "next link is" << *link << "\n"; + if ((link != possibility) && link->getFunction() == possibility->getFunction()) { + sf = true; + } + } + if (sf) { + continue; + } + bool isUnique = true; + for (auto ccmapNest : callChains) { + // if self then skip + if (ccmapNest == ccmap) { + continue; + } + // otherwise check if this map also contains the possibility + if (find(ccmapNest.second.begin(), ccmapNest.second.end(), possibility) != ccmapNest.second.end()) { + isUnique = false; + break; + } + } + if (isUnique) { + unique.insert(possibility); + // errs() << "Found unique!" << *possibility << "\n"; + } else { + // try another poss. + continue; + } + } + } + + inst_vec v; + for (Instruction* item2 : unique) { + if (!isa(item2)) { + v.push_back(item2); + } + } + toReturn[iv.first] = v; + } // end starting point check + + return toReturn; +} + +// Collects the source inputs and uses of Fresh-annotated vars +inst_vec_vec InferAtomsPass::collectFresh(inst_vec_vec freshVars, inst_insts_map inputMap) { +#if DEBUG + errs() << "=== collectFresh ===\n"; +#endif + inst_vec_vec toReturn; + +#if DEBUG + errs() << "Go over fresh var sets\n"; +#endif + for (auto varSet : freshVars) { +#if DEBUG + errs() << "[Loop freshVars] Go over varSet:\n"; + printInsts(varSet); +#endif + inst_set unique, callChain; + for (auto* var : varSet) { +#if DEBUG + errs() << "[Loop varSet] Cur var: " << *var << "\n"; +#endif + // Uses (forwards) are direct only (might need a little chaining for direct in rs to be direct in IR) + inst_vec uses = traverseUses(var); + +#if DEBUG + errs() << "[Loop varSet] Go over uses of var\n"; +#endif + for (auto* use : uses) { +#if DEBUG + errs() << "[Loop uses] Add use: " << *use << "\n"; +#endif + unique.insert(use); + + for (auto* input : inputMap[use]) { +#if DEBUG + errs() << "[Loop inputMap[use]] Add src input of use to unique: " << *input << "\n"; +#endif + unique.insert(input); + } + } + +#if DEBUG + errs() << "[Loop varSet] Go over src inputs of var\n"; +#endif + for (auto* input : inputMap[var]) { +#if DEBUG + errs() << "[Loop inputMap[var]] Cur src input: " << *input << "\n"; +#endif + unique.insert(input); + callChain.insert(input); + std::queue toExplore; + toExplore.push(input); + while (!toExplore.empty()) { + Instruction* curr = toExplore.front(); + toExplore.pop(); + for (Instruction* intermed : inputMap[curr]) { + if (!(find(callChain.begin(), callChain.end(), intermed) != callChain.end())) { + callChain.insert(intermed); + toExplore.push(intermed); + } + } + } + } + + // Add the var itself + if (isa(var) || isa(var)) { +#if DEBUG + errs() << "[Loop varSet] Cur var = StoreInst/CallInst, add to unique\n"; +#endif + unique.insert(var); + } + } + // Now construct the call chain + for (auto* vv : callChain) { + unique.insert(vv); + } + inst_vec v; +#if DEBUG + errs() << "[Loop freshVars] Go over unique\n"; +#endif + for (auto* inst : unique) { + if (!isa(inst)) { +#if DEBUG + errs() << "[Loop unique] Cur inst != AllocaInst, add to v: " << *inst << "\n"; +#endif + v.push_back(inst); + } + } + +#if DEBUG + errs() << "[Loop FreshVars] Add v to toReturn\n"; +#endif + toReturn.push_back(v); + } + +#if DEBUG + errs() << "*** collectFresh ***\n"; +#endif + return toReturn; +} diff --git a/ocelot/AtomicRegionInference/src/InferFreshCons.cpp b/ocelot/AtomicRegionInference/src/InferFreshCons.cpp new file mode 100644 index 0000000..dfcc950 --- /dev/null +++ b/ocelot/AtomicRegionInference/src/InferFreshCons.cpp @@ -0,0 +1,571 @@ +#include "include/InferFreshCons.h" + +#include "llvm/Analysis/PostDominators.h" + +Instruction* InferFreshCons::insertRegionInst(InsertKind insertKind, Instruction* insertBefore) { +#if DEBUG + errs() << "=== insertRegionInst ===\n"; +#endif + Instruction* call; + IRBuilder<> builder(insertBefore); + + if (insertKind == Start) { +#if DEBUG + errs() << "Insert start before: " << *insertBefore << "\n"; +#endif + call = builder.CreateCall(this->atomStart); + } else { +#if DEBUG + errs() << "Insert end before: " << *insertBefore << "\n"; +#endif + call = builder.CreateCall(this->atomEnd); + } + +#if DEBUG + errs() << "*** insertRegionInst ***\n"; +#endif + return call; +} + +// If a direct pred is also a successor, then it's a for loop block +bool InferFreshCons::loopCheck(BasicBlock* B) { + auto BName = getSimpleNodeLabel(B); + + if (!B->hasNPredecessors(1)) { + for (auto it = pred_begin(B), et = pred_end(B); it != et; ++it) { + auto* predecessor = *it; + auto pname = predecessor->getName().drop_front(2); + // errs() << "comparing " << pname<< " and " < 0) { + // errs() << "comparison is true\n"; + return true; + } + } + } + + return false; +} + +// Find the first block after a for loop +BasicBlock* InferFreshCons::getLoopEnd(BasicBlock* bb) { + auto* ti = bb->getTerminator(); + auto* end = ti->getSuccessor(0); + ti = end->getTerminator(); + // errs() << "end is " << end->getName() << "\n"; + // for switch inst, succ 0 is the fall through + end = ti->getSuccessor(1); + // errs() << "end is " << end->getName() << "\n"; + return end; +} + +// Top level region inference function -- could flatten later +void InferFreshCons::inferConsistent(std::map consSets) { + // TODO: start with pseudo code structure from design doc + for (auto [id, set] : consSets) { +#if DEBUG + errs() << "[InferConsistent] starting set " << id << "\n"; +#endif + addRegion(set, Consistent); + } +} + +// The only difference is outer map vs outer vec +void InferFreshCons::inferFresh(inst_vec_vec freshSets) { +#if DEBUG + errs() << "=== inferFresh ===\n"; +#endif + // TODO: start with pseudo code structure from design doc + for (auto freshSet : freshSets) addRegion(freshSet, Fresh); +#if DEBUG + errs() << "*** inferFresh ***\n"; +#endif +} + +void InferFreshCons::addRegion(inst_vec targetInsts, RegionKind regionKind) { +#if DEBUG + errs() << "=== addRegion ===\n"; +#endif + // A map from set item to bb + std::map targetBlocks; + // A queue of regions that still need to be processed + std::queue> regionsNeeded; + +#if DEBUG + errs() << "Build map from inst to bb\n"; +#endif + for (auto* targetInst : targetInsts) + targetBlocks[targetInst] = targetInst->getParent(); + +#if DEBUG + errs() << "Add map to regionsNeeded\n"; +#endif + regionsNeeded.push(targetBlocks); + + auto* root = m->getFunction("app"); + + // Iterate until no more possible regions, then pick the best one + inst_inst_vec regionsFound; + while (!regionsNeeded.empty()) { + // Need to raise all blocks in the map until they are the same + auto blocks = regionsNeeded.front(); + regionsNeeded.pop(); + // Record which functions have been traveled through + std::set seenFuns; + +#if DEBUG + errs() << "[Loop regionsNeeded] While blocks are in diff functions\n"; +#endif + while (!sameFunction(blocks)) { + // To think on: does this change? + auto* goal = findCandidate(blocks, root); +#if DEBUG + errs() << "[Loop !sameFunction] Go over each targetInst\n"; +#endif + for (auto* targetInst : targetInsts) { + // not all blocks need to be moved up + auto* curFun = blocks[targetInst]->getParent(); + seenFuns.insert(curFun); + if (curFun != goal) { + // if more than one call: + // callChain info is already in the starting set + // so only explore a caller if it's in conSet + bool first = true; + for (auto* use : curFun->users()) { + // if (regionKind == 1) { + if (!(find(targetInsts.begin(), targetInsts.end(), use) != targetInsts.end())) + continue; + // errs() << "Use: "<< *use << " is in call chain\n"; + //} + auto* inst = dyn_cast(use); +#if DEBUGINFER + errs() << "DEBUGINFER: examining use: " << *inst << "\n"; +#endif + if (inst == NULL) { + // errs () << "ERROR: use " << *use << "not an instruction\n"; + break; + } + // update the original map + if (first) { + blocks[targetInst] = inst->getParent(); + first = false; + } else { + // copy the blockmap, update, add to queue + auto* inst = dyn_cast(use); + std::map copy; + for (auto map : blocks) copy[map.first] = map.second; + copy[targetInst] = inst->getParent(); + regionsNeeded.push(copy); + } + } // end forall uses + } // end currFunc check + } // end forall items + } // end same function check + + // TODO: shuffle instructions here + +// Now, all bbs in the map are in the same function, so we can run +// dom or post-dom analysis on that function +#if DEBUG + errs() << "[Loop regionsNeeded] Start dom tree analysis\n"; +#endif + + auto* homeFun = blocks.begin()->second->getParent(); + if (homeFun == nullptr) { +#if DEBUG + errs() << "[Loop regionsNeeded] No function found\n"; +#endif + continue; + } +#if DEBUG + errs() << "[Loop regionsNeeded] Found home fun: " << homeFun->getName() << "\n"; +#endif + + auto& domTree = FAM->getResult(*homeFun); + // Find the closest point that dominates + auto* startDom = blocks.begin()->second; + for (auto& [_, B] : blocks) + startDom = domTree.findNearestCommonDominator(B, startDom); +#if DEBUG + errs() << "[Loop regionsNeeded] startDom: " << *startDom << "\n"; +#endif + + // TODO: if an inst in the set is in the bb, we can truncate? + +#if DEBUG + errs() << "Start post dom tree analysis\n"; +#endif + + // Flip directions for the region end + auto& postDomTree = FAM->getResult(*homeFun); + // Find the closest point that dominates + auto* endDom = blocks.begin()->second; + for (auto& [_, block] : blocks) { +#if DEBUGINFER + if (endDom != nullptr) { + errs() << "Finding post dom of: " << getSimpleNodeLabel(map.second) << " and " << getSimpleNodeLabel(endDom) << "\n"; + } else { + errs() << "endDom is null\n"; + } +#endif + endDom = postDomTree.findNearestCommonDominator(block, endDom); + } + +#if DEBUG + errs() << "[Loop regionsNeeded] endDom: " << *endDom << "\n"; +#endif + + if (startDom == nullptr) { + errs() << "[Error] Null startDom\n"; + } else if (endDom == nullptr) { + errs() << "[Error] Null endDom\n"; + } + + // Need to make the start and end dominate each other as well. + startDom = domTree.findNearestCommonDominator(startDom, endDom); + endDom = postDomTree.findNearestCommonDominator(startDom, endDom); + +#if DEBUG + errs() << "[Loop regionsNeeded] After matching scope\n"; + errs() << "[Loop regionsNeeded] startDom: " << *startDom << "\n"; + errs() << "[Loop regionsNeeded] endDom: " << *endDom << "\n"; +#endif + + // Extra check to disallow loop conditional block as the end + if (loopCheck(endDom)) { +#if DEBUG + errs() << "[Loop regionsNeeded] Loop check passed\n"; +#endif + endDom = getLoopEnd(endDom); + } + + if (startDom == nullptr) { + errs() << "[Error] Null startDom after scope merge\n"; + } else if (endDom == nullptr) { + errs() << "[Error] Null endDom after scope merge\n"; + } +#if DEBUG + errs() << "[Loop regionsNeeded] Insert insts\n"; +#endif + // TODO: fallback if endDom is null? Need hyper-blocks, I think + // possibly can do a truncation check, to lessen the size a little, but could that interfere with compiler optimizations? + auto* regionStart = truncate(startDom, true, targetInsts, seenFuns); + auto* regionEnd = truncate(endDom, false, targetInsts, seenFuns); + if (regionStart == nullptr) { + errs() << "[Error] Null startDom after truncation\n"; + } else if (regionEnd == nullptr) { + errs() << "[Error] Null endDom after truncation\n"; + } else { + // errs() << "Region start is before " << *regionStart<<" and region end is before " << *regionEnd<<"\n"; + } + +#if DEBUG + errs() << "[Loop regionsNeeded] Add to regionsFound: (" << *regionStart << ", " << *regionEnd << ")\n"; +#endif + // Insert into regionsFound + regionsFound.emplace_back(regionStart, regionEnd); + } // end while regions needed + + // Now see which region is smallest -- instruction count? they must dominate + // each other, so there's no possibility of not running into the start from + // the end + auto [regionStart, regionEnd] = findShortest(regionsFound); + insertRegionInst(Start, regionStart); + insertRegionInst(End, regionEnd); + //}//end while regions needed + +#if DEBUG + errs() << "*** addRegion ***\n"; +#endif +} + +// Truncate a bb if the instruction is in the bb +Instruction* InferFreshCons::truncate(BasicBlock* B, bool forwards, inst_vec set, std::set nested) { +#if DEBUG + errs() << "=== truncate ===\n"; +#endif + +#if DEBUG + errs() << "Set:\n"; + for (auto& inst : set) + errs() << *inst << "\n"; +#endif + + // Truncate the front + if (forwards) { +#if DEBUG + errs() << "Truncate startDom\n"; + errs() << "Go over each inst\n"; +#endif + for (auto& I : *B) { + // Stop at first inst in bb that is in the set. + if (find(set.begin(), set.end(), &I) != set.end()) { +#if DEBUG + errs() << "[Loop B] Found first inst also in set: " << I << "\n"; +#endif + return &I; + } + // Need to stop at relevant CallInsts as well + else if (auto* ci = dyn_cast(&I)) { + if (nested.find(ci->getCalledFunction()) != nested.end()) + return &I; + } + } + +#if DEBUG + errs() << "Found no inst, return last inst\n"; +#endif + // Otherwise just return the last inst + return &B->back(); + } + +#if DEBUG + errs() << "Truncate endDom\n"; + errs() << "Go over each inst in reverse\n"; +#endif + // Reverse directions if not forwards + Instruction* prev = NULL; + for (auto I = B->rbegin(), rend = B->rend(); I != rend; I++) { + auto* inst = &*I; + if (find(set.begin(), set.end(), inst) != set.end()) { +#if DEBUG + errs() << "[Loop B] Found last inst also in set: " << *I << "\n"; +#endif + // Need to return the previous inst (next in forwards), + // as it should be inserted before the returned inst + if (prev == NULL) { + // Only happens if use is a ret inst, which is a scope use to make the branching + // work, not an actual one, so this is safe + return inst; + } + +#if DEBUG + errs() << "[Loop B] Return prev inst: " << *prev << "\n"; +#endif + return prev; + } else if (auto* ci = dyn_cast(inst)) { + if (nested.find(ci->getCalledFunction()) != nested.end()) { + return prev; + } + } + prev = inst; + } + +#if DEBUG + errs() << "*** truncate ***\n"; +#endif + +#if DEBUG + errs() << "Found no inst, return first inst\n"; +#endif + // Otherwise just return first inst of the block + // errs() << "truncate returning " << bb->front() << "\n"; + return &B->front(); +} + +Function* InferFreshCons::findCandidate(std::map blockMap, Function* root) { +#if DEBUG + errs() << "== findCandidate ===\n"; +#endif + std::vector funList; + // Add the parents, without duplicates + for (auto& [_, B] : blockMap) { + if (!(find(funList.begin(), funList.end(), B->getParent()) != funList.end())) { +#if DEBUG + errs() << "Add: " << B->getParent()->getName() << "\n"; +#endif + funList.push_back(B->getParent()); + } + } + + // Easy case: everything is already in the same function + if (funList.size() == 1) return funList.at(0); + + /* Algo goal: get the deepest function that still calls (or is) all funcs in funcList. + * Consider: multiple calls? Should be dealt with in the addRegion -- eventually each caller + * gets its own region + */ + Function* goal = nullptr; +#if DEBUG + errs() << "starting from " << root->getName() << "\n"; +#endif + deepCaller(root, funList, &goal); + if (goal == nullptr) { + errs() << "ERROR: deepCaller failed\n"; + } + +#if DEBUG + errs() << "*** findCandidate ***\n"; +#endif + return goal; +} + +// From a root, returns list of called functions. +std::vector InferFreshCons::deepCaller(Function* root, std::vector& funList, Function** goal) { + std::vector calledFuncs; + bool mustIncludeSelf = false; + + for (inst_iterator inst = inst_begin(root), E = inst_end(root); inst != E; ++inst) { + if (CallInst* ci = dyn_cast(&(*inst))) { + calledFuncs.push_back(ci->getCalledFunction()); + } + } + std::vector explorationList; + for (auto* item : funList) { + // skip over root or called funcs + if ((find(calledFuncs.begin(), calledFuncs.end(), item) != calledFuncs.end()) || item == root) { + if (item == root) { + mustIncludeSelf = true; + } + continue; + } + explorationList.push_back(item); +#if DEBUGINFER + errs() << "need to find " << item->getName() << "\n"; +#endif + } + // this function is a root of a call tree that calls everything in the func List + if (explorationList.empty()) { +#if DEBUGINFER + errs() << "empty list\n"; +#endif + *goal = root; + return calledFuncs; + } + // otherwise recurse + Function* candidate = nullptr; + for (Function* called : calledFuncs) { + std::vector partial = deepCaller(called, explorationList, &candidate); + // if candidate is set, it means called is a root for everything in the explorationList + if (candidate != nullptr) { + *goal = candidate; +#if DEBUGINFER + errs() << "New candidate: " << (*goal)->getName() << "\n"; +#endif + } + // remove from explorationList, but add to calledFuncs + for (Function* item : partial) { + func_vec::iterator place = find(explorationList.begin(), explorationList.end(), item); + if (place != explorationList.end()) { + explorationList.erase(place); + } + calledFuncs.push_back(item); + } + } + // current point is a root + if (explorationList.empty()) { + // not the deepest + if (candidate != nullptr && !mustIncludeSelf) { + *goal = candidate; + } else { + // is the deepest + *goal = root; + } + } + return calledFuncs; +} + +// Get the min of the max length of each region +inst_inst_pair InferFreshCons::findShortest(inst_inst_vec regionsFound) { +#if DEBUG + errs() << "=== findShortest ===\n"; +#endif + inst_inst_pair best; + int shortest = INT32_MAX; + +#if DEBUG + errs() << "Go over regionsFound\n"; +#endif + for (auto& [start, end] : regionsFound) { + int prefixLength = 0, found = 0; + auto* startParent = start->getParent(); +#if DEBUG + errs() << "[Loop regionsFound] startParent: " << *startParent << "\n"; + errs() << "Go over startParent insts\n"; +#endif + for (auto& I : *startParent) { + prefixLength++; + if (&I == start) break; + } + + // Get the max length from the bb to the end instruction + std::vector v; + int endLength = getSubLength(startParent, end, v); + // Substract the prefix before the start inst + endLength -= prefixLength; +#if DEBUG + errs() << "[Loop regionsFound] Region length " << endLength << "\n"; +#endif + if (endLength < shortest) { +#if DEBUG + errs() << "[Loop regionsFound] Shortest region: (" << *start << ", " << *end + << ") at length " << endLength << "\n"; +#endif + shortest = endLength; + best = std::make_pair(start, end); + } + } + +#if DEBUG + errs() << "*** findShortest ***\n"; +#endif + return best; +} + +int InferFreshCons::getSubLength(BasicBlock* B, Instruction* end, std::vector visited) { +#if DEBUG + errs() << "=== getSubLength ===\n"; +#endif + + int count = 0, max_ret = 0; + visited.push_back(B); +#if DEBUG + errs() << "Go over bb insts\n"; +#endif + for (auto& I : *B) { + count++; + + if (&I == end) { +#if DEBUG + errs() << "[Loop I] Cur inst = end, stop\n"; +#endif + return count; + } + + if (auto* ci = dyn_cast(&I)) { + auto* cf = ci->getCalledFunction(); + if (!cf->empty() && cf != NULL) { +#if DEBUG + errs() << "[Loop I] Cur inst = CallInst, calling: " << cf->getName() << "\n"; +#endif + count += cf->getInstructionCount(); + } + } + + if (I.isTerminator()) { +#if DEBUG + errs() << "[Loop I] Cur inst = terminator\n"; +#endif + for (int i = 0; i < I.getNumSuccessors(); i++) { + auto* next = I.getSuccessor(i); + // already counted -- do something more fancy for loops? + if (find(visited.begin(), visited.end(), next) != visited.end()) continue; + int intermed = getSubLength(next, end, visited); + if (intermed > max_ret) { + max_ret = intermed; + } + } + } + } + +#if DEBUG + errs() << "*** getSubLength ***\n"; +#endif + return count + max_ret; +} + +bool InferFreshCons::sameFunction(std::map blockMap) { + auto* BComp = blockMap.begin()->second->getParent(); + for (auto& [_, B] : blockMap) + if (B->getParent() != BComp) return false; + return true; +} diff --git a/ocelot/AtomicRegionInference/src/TaintTracker.cpp b/ocelot/AtomicRegionInference/src/TaintTracker.cpp index 0033a78..cb19a40 100644 --- a/ocelot/AtomicRegionInference/src/TaintTracker.cpp +++ b/ocelot/AtomicRegionInference/src/TaintTracker.cpp @@ -1,857 +1,1000 @@ #include "include/TaintTracker.h" +// Main dataflow function to construct map of store (TODO: not just stores) insts to vars (inputs?) they depend on +inst_insts_map buildInputs(Module* M) { +#if DEBUG + errs() << "=== buildInputs ===\n"; +#endif + + inst_vec inputInsts = findInputInsts(M); + inst_insts_map taintedInsts; + inst_vec promotedInputs; + + for (auto inputInst : inputInsts) { +#if DEBUG + errs() << "[Loop inputInst] orig input: " << *inputInst << "\n"; +#endif + + // Add self to map + taintedInsts[inputInst].insert(inputInst); + std::queue toExplore; +#if DEBUG + errs() << "[Loop inputInst] Add orig input to toExplore\n"; +#endif + toExplore.push(inputInst); + +#if DEBUG + errs() << "[Loop inputInst] Explore flows from orig input\n"; +#endif + + // Iterate until no more inter-proc flows found + while (!toExplore.empty()) { +#if DEBUG + errs() << "=== Loop toExplore ===\n"; +#endif + auto* curVal = toExplore.front(); + toExplore.pop(); -/*Main DataFlow function to construct map of store insts to vars they depend on*/ -inst_insts_map buildInputs(Module* m) -{ - inst_vec inputs = findInputInsts(m); - inst_insts_map taintedDecl; - inst_vec promoted_inputs; - - for (Instruction* iOp : inputs) { - #if DEBUG - errs() << "Starting input: " << *iOp <<"\n"; - #endif - //don't forget to add self to map - taintedDecl[iOp].insert(iOp); - queue toExplore; - toExplore.push(iOp); - - //iterate until no more interproc flows found - while(!toExplore.empty()) { - - Value* currVal = toExplore.front(); - if (currVal == NULL) { - continue; - } + if (curVal == NULL) continue; + +#if DEBUG + errs() << "[Loop toExplore] cur inst: " << *curVal << "\n"; +#endif val_vec interProcFlows; - toExplore.pop(); - if (currVal == iOp) { - interProcFlows = traverseLocal(currVal, iOp, &taintedDecl, nullptr); - for (Value* vipf : interProcFlows) { - if(Instruction* iipf = dyn_cast(vipf)) { - if (CallInst* anno_check = dyn_cast(iipf)){ - //we delete these later... creates problems - if (anno_check->getName().contains("Fresh") || - anno_check->getName().contains("Consistent") ) { - continue; - } - } - taintedDecl[iipf].insert(iOp); + if (curVal == inputInst) { +#if DEBUG + errs() << "[Loop toExplore] cur inst = orig input\n"; + errs() << "[Loop toExplore] Call traverseLocal with cur inst (tainted), orig input (srcInput), caller (none)\n"; +#endif + interProcFlows = traverseLocal(curVal, inputInst, &taintedInsts, nullptr); +#if DEBUG + errs() << "[Loop toExplore] [cur inst = orig input] Inspect interProcFlows:\n"; +#endif + for (auto* vipf : interProcFlows) { + if (auto* iipf = dyn_cast(vipf)) { + if (auto* anno_check = dyn_cast(iipf)) { + // We delete these later... creates problems + if (isAnnot(anno_check->getName())) continue; + } + +#if DEBUG + errs() << "Adding orig input (" << *inputInst << ") to set at " << *iipf << "\n"; +#endif + taintedInsts[iipf].insert(inputInst); } } - } else if (isa(currVal)) { - //note it will not be iop, even though iop is a call - //this case handles both returns and pbref - - promoted_inputs.push_back(dyn_cast(currVal)); - Value* next = toExplore.front(); + } else if (isa(curVal)) { +#if DEBUG + errs() << "[Loop toExplore] cur inst = CallInst\n"; +#endif + // Note it will not be iop, even though iop is a call + // This case handles both returns and pbref + + promotedInputs.push_back(dyn_cast(curVal)); + auto* next = toExplore.front(); toExplore.pop(); - //if the next is a return, this was a return flow - //otherwise, if it's an arg, this was pbref + // If the next is a return, this was a return flow + // Otherwise, if it's an arg, this was pbref + //? pbref - pass by reference? if (isa(next)) { - interProcFlows = traverseLocal(currVal, dyn_cast(currVal), &taintedDecl, nullptr); +#if DEBUG + errs() << "[Loop toExplore] cur inst next = Return inst (return flow)\n"; +#endif + interProcFlows = traverseLocal(curVal, dyn_cast(curVal), &taintedInsts, nullptr); for (Value* vipf : interProcFlows) { - if(Instruction* iipf = dyn_cast(vipf)) { - - //don't add self - if (currVal == vipf) { + if (Instruction* iipf = dyn_cast(vipf)) { + // don't add self + if (curVal == vipf) { continue; } - if (CallInst* anno_check = dyn_cast(iipf)){ - //we delete these later... creates problems - if (anno_check->getName().contains("Fresh") || - anno_check->getName().contains("Consistent") ) { - continue; - } - } - taintedDecl[iipf].insert(dyn_cast(currVal)); + if (CallInst* anno_check = dyn_cast(iipf)) { + // we delete these later... creates problems + if (anno_check->getName().contains("Fresh") || + anno_check->getName().contains("Consistent")) { + continue; + } + } + taintedInsts[iipf].insert(dyn_cast(curVal)); } - } + } } else if (isa(next)) { - //grab the para corresponding to the argument +#if DEBUG + errs() << "[Loop toExplore] cur inst next = Argument (pbref)\n"; +#endif + // Grab the para corresponding to the argument int index = -1; int i = 0; - CallInst* ci = dyn_cast(currVal); - + CallInst* ci = dyn_cast(curVal); - if (ci->getCalledFunction() == NULL) { - continue; + if (ci->getCalledFunction() == NULL) continue; + if (ci->getCalledFunction()->empty()) continue; + +#if DEBUG + errs() << "exploring function " << ci->getCalledFunction()->getName() << "\n"; +#endif + + for (auto& arg : ci->getCalledFunction()->args()) { + // errs() <<"arg is "<(&arg) != next) { + i++; + } else { + index = i; + } } - if (ci->getCalledFunction()->empty()) { + if (index == -1) { +#if DEBUG + errs() << "couldn't find pass by ref " << *next << "\n"; +#endif continue; } - #if DEBUG - errs() << "exploring function " << ci->getCalledFunction()->getName() << "\n"; - #endif - - for (auto& arg : ci->getCalledFunction()->args()){ - //errs() <<"arg is "<(&arg)!=next) { - i++; - } else { - index = i; - } - + Value* tArg = ci->getArgOperand(index); + // errs() << "arg_op: "<< *arg_op<<"\n"; + // check if reference is part of an array + if (GEPOperator* gep = dyn_cast(tArg)) { + tArg = gep->getPointerOperand(); } - if(index == -1){ - #if DEBUG - errs() << "couldn't find pass by ref " << *next << "\n"; - #endif - continue; - } - - Value* tArg = ci->getArgOperand(index); - //errs() << "arg_op: "<< *arg_op<<"\n"; - //check if reference is part of an array - if (GEPOperator* gep = dyn_cast(tArg)) { - tArg = gep->getPointerOperand(); - } - //if bitcast inst, - else if (BitCastInst* bci = dyn_cast(tArg)){ + // if bitcast inst, + else if (BitCastInst* bci = dyn_cast(tArg)) { tArg = bci->getOperand(0); } - //need to actually find the first use *after* the callInst - Instruction* fstUse = ptrAfterCall(tArg,ci); - if (fstUse!=nullptr && fstUse!=tArg) { - #if DEBUG + // need to actually find the first use *after* the callInst + Instruction* fstUse = ptrAfterCall(tArg, ci); + if (fstUse != nullptr && fstUse != tArg) { +#if DEBUG errs() << "First use after call: " << *fstUse << "\n"; - #endif - //if the first use is itself a callinst, then treat as a tainted para case, +#endif + // if the first use is itself a callinst, then treat as a tainted para case, val_vec visited_fstuse; visited_fstuse.push_back(ci); - - while (CallInst* ci_fstuse = dyn_cast(fstUse) ) { - //already visited, as in loop - if (find(visited_fstuse.begin(),visited_fstuse.end(), ci_fstuse) - !=visited_fstuse.end()) { - //no non-call uses + + while (CallInst* ci_fstuse = dyn_cast(fstUse)) { + // already visited, as in loop + if (find(visited_fstuse.begin(), visited_fstuse.end(), ci_fstuse) != visited_fstuse.end()) { + // no non-call uses fstUse = nullptr; break; } - if (CallInst* anno_check = dyn_cast(ci_fstuse)){ - //we delete these later... creates problems - if (anno_check->getName().contains("Fresh") || - anno_check->getName().contains("Consistent") ) { - continue; - } - } + if (CallInst* anno_check = dyn_cast(ci_fstuse)) { + // we delete these later... creates problems + if (anno_check->getName().contains("Fresh") || + anno_check->getName().contains("Consistent")) { + continue; + } + } visited_fstuse.push_back(ci_fstuse); - unsigned int arg_num = ci_fstuse->getNumArgOperands(); - + unsigned int arg_num = ci_fstuse->arg_size(); + +#if DEBUG + errs() << "[Loop customUsers] Find index of tainted arg:\n"; +#endif // Find the index of the tainted argument - for (unsigned int i = 0; i < arg_num; i++){ - #if DEBUG - errs() << "DEBUG: comparing "<< *tArg <<" and " << *(ci_fstuse->getArgOperand(i))<<"\n"; - #endif - if(ci_fstuse->getArgOperand(i)==tArg) { - #if DEBUG - // errs() << "DEBUG: pushing arg of "<< calledFunc->getName() <<"\n"; - #endif + for (unsigned int i = 0; i < arg_num; i++) { + // TODO +#if DEBUG + errs() << "comparing " << *tArg << " and " << *(ci_fstuse->getArgOperand(i)) << "\n"; +#endif + if (ci_fstuse->getArgOperand(i) == tArg) { +#if DEBUG + // errs() << "pushing arg of "<< calledFunc->getName() <<"\n"; +#endif interProcFlows.push_back((ci_fstuse->getCalledFunction()->arg_begin() + i)); - //MUST also push back the call inst. + // MUST also push back the call inst. interProcFlows.push_back(ci_fstuse); - //and the srcOp + // and the srcOp interProcFlows.push_back(ci); - + break; } } - //find next local use - //promoted_inputs.push_back(ci); - taintedDecl[ci_fstuse].insert(ci); - fstUse = ptrAfterCall(tArg,ci_fstuse); + // find next local use + // promoted_inputs.push_back(ci); + taintedInsts[ci_fstuse].insert(ci); + fstUse = ptrAfterCall(tArg, ci_fstuse); if (fstUse == nullptr) { break; } - } - //re nullptr check - if (fstUse!=nullptr) { - interProcFlows = traverseLocal(fstUse, dyn_cast(currVal), &taintedDecl, nullptr); + } + // re nullptr check + if (fstUse != nullptr) { + interProcFlows = traverseLocal(fstUse, dyn_cast(curVal), &taintedInsts, nullptr); for (Value* vipf : interProcFlows) { - if(Instruction* iipf = dyn_cast(vipf)) { - if (CallInst* anno_check = dyn_cast(iipf)){ - //we delete these later... creates problems - if (anno_check->getName().contains("Fresh") || - anno_check->getName().contains("Consistent") ) { + if (Instruction* iipf = dyn_cast(vipf)) { + if (CallInst* anno_check = dyn_cast(iipf)) { + // we delete these later... creates problems + if (anno_check->getName().contains("Fresh") || + anno_check->getName().contains("Consistent")) { continue; } } - taintedDecl[iipf].insert(dyn_cast(currVal)); + taintedInsts[iipf].insert(dyn_cast(curVal)); } } } - } + } } - } else if (isa(currVal)) { - #if DEBUG - errs() << "exploring tainted arg " << *currVal << "\n"; - #endif - Instruction* caller = dyn_cast(toExplore.front()); - - //promoted_inputs.push_back(caller); + } else if (isa(curVal)) { +#if DEBUG + errs() << "[Loop toExplore] cur inst = Argument (tainted arg)\n"; +#endif + + auto* caller = dyn_cast(toExplore.front()); toExplore.pop(); - Instruction* innerSrcOp = dyn_cast(toExplore.front()); +#if DEBUG + errs() << "[Loop toExplore] Caller: " << *caller << "\n"; +#endif + // promoted_inputs.push_back(caller); + + auto* innerInputInst = dyn_cast(toExplore.front()); toExplore.pop(); - interProcFlows = traverseLocal(currVal, innerSrcOp, &taintedDecl, caller); - - for (Value* vipf : interProcFlows) { - if(Instruction* iipf = dyn_cast(vipf)) { - if (CallInst* anno_check = dyn_cast(iipf)){ - //we delete these later... creates problems - if (anno_check->getName().contains("Fresh") || - anno_check->getName().contains("Consistent") ) { - continue; - } - } - taintedDecl[iipf].insert(innerSrcOp); - } - } - }//end elsif chain - #if DEBUG - errs() << "Finished iteration\n"; - #endif - for (Value* item : interProcFlows) { - if(item != NULL) { - //errs() <<"pushing item " << *item <<"\n"; +#if DEBUG + errs() << "[Loop toExplore] orig input: " << *innerInputInst << "\n"; + errs() << "[Loop toExplore] Call traverseLocal with cur inst (tainted), orig input (srcInput), caller\n"; +#endif + + interProcFlows = traverseLocal(curVal, innerInputInst, &taintedInsts, caller); + +#if DEBUG + errs() << "[Loop toExplore] Inspect interProcFlows:\n"; +#endif + for (auto* vipf : interProcFlows) { + if (auto* iipf = dyn_cast(vipf)) { + if (auto* anno_check = dyn_cast(iipf)) { + // We delete these later... creates problems + if (isAnnot(anno_check->getName())) continue; + } + taintedInsts[iipf].insert(innerInputInst); +#if DEBUG + errs() << "Adding innerInputInst (" << *innerInputInst << ") to set at " << *iipf << "\n"; +#endif + } + } + } // end elsif chain + + for (auto* item : interProcFlows) { + if (item != NULL) { +#if DEBUG + errs() << "Add to toExplore: " << *item << "\n"; +#endif toExplore.push(item); } else { errs() << "ERROR: encountered null interproc item\n"; } } - }//end while queue not empty - }//end for all iOp - - return taintedDecl; + +#if DEBUG + errs() << "*** Loop toExplore ***\n"; +#endif + } // end while queue not empty + } // end for all inputInsts + +#if DEBUG + errs() << "*** buildInputs ***\n"; +#endif + return taintedInsts; } -val_vec traverseLocal(Value* tainted, Instruction* srcOp, inst_insts_map* iInfo, Instruction* caller) -{ +val_vec traverseLocal(Value* tainted, Instruction* srcInput, inst_insts_map* taintedInsts, Instruction* caller) { +#if DEBUG + errs() << "=== traverseLocal ===\n"; +#endif + val_vec interProcSinks; - queue localDeps; + std::queue localDeps; +#if DEBUG + errs() << "Add cur inst to localDeps\n"; +#endif localDeps.push(tainted); - while(!localDeps.empty()) { - Value* currVal = localDeps.front(); + while (!localDeps.empty()) { +#if DEBUG + errs() << "=== Loop localDeps ===\n"; +#endif + auto* curVal = localDeps.front(); localDeps.pop(); - val_vec customUsers; - if (StoreInst* si = dyn_cast(currVal)) { - //add the pointer to deps, as stores have no uses - //Add info on the store to the map - if(iInfo->find(si)!=iInfo->end()) { - if (find(iInfo->at(si).begin(), iInfo->at(si).end(), srcOp)!=iInfo->at(si).end()) { - continue; - } else { - iInfo->at(si).insert(srcOp); - } +#if DEBUG + errs() << "[Loop localDeps] cur inst: " << *curVal << "\n"; +#endif + val_vec customUsers; + if (auto* si = dyn_cast(curVal)) { +#if DEBUG + errs() << "[Loop localDeps] cur inst = StoreInst\n"; +#endif + // Add the pointer to deps, as stores have no uses + // Add info on the store to the map + if (taintedInsts->find(si) != taintedInsts->end()) { + auto insts = taintedInsts->at(si); + if (std::find(insts.begin(), insts.end(), srcInput) != insts.end()) continue; + taintedInsts->at(si).insert(srcInput); } else { - set seti; - seti.insert(srcOp); - iInfo->emplace(si, seti); + std::set seti; + seti.insert(srcInput); + taintedInsts->emplace(si, seti); } - #if DEBUG - errs() << " adding to map " << *srcOp << " for " << *si << "\n"; - #endif - //See if it is (or aliases?) one of the function arguments - for (Argument& arg : si->getFunction()->args()) { - Value* to_comp = si->getPointerOperand()->stripPointerCasts(); - #if DEBUG - errs() << " PBRef comp: " << *to_comp << " and " << arg << "\n"; - #endif - if (to_comp== &arg) { - //if taint came from inside any callsite is potentially tainted +#if DEBUG + errs() << "[Loop localDeps] Adding orig input (" << *srcInput << ") to set at cur inst (" << *si << ")\n"; +#endif + // See if it is (or aliases?) one of the function arguments (PBRef comp) + for (auto& arg : si->getFunction()->args()) { + auto* storePtr = si->getPointerOperand()->stripPointerCasts(); +#if DEBUG + errs() << "[Loop localDeps] Is ptr being stored to (" << *storePtr << ") = fun arg (" << arg << ")\n"; +#endif + if (storePtr == &arg) { + // if taint came from inside any callsite is potentially tainted if (caller == nullptr) { - for(Value* calls : si->getFunction()->users()) { + for (auto calls : si->getFunction()->users()) { interProcSinks.push_back(calls); interProcSinks.push_back(dyn_cast(&arg)); - if (Instruction* key = dyn_cast(calls)) { - //check to make sure not already visited - // iInfo->at(key).insert(srcOp); - + if (auto key = dyn_cast(calls)) { + // check to make sure not already visited + // taintedInsts->at(key).insert(srcOp); } } } else { - //otherwise, just the caller's + // otherwise, just the caller's interProcSinks.push_back(caller); interProcSinks.push_back(dyn_cast(&arg)); - if (Instruction* key = dyn_cast(caller)) { - - - //check to make sure not already visited - // iInfo->at(key).insert(srcOp); - + if (auto key = dyn_cast(caller)) { + // check to make sure not already visited + // taintedInsts->at(key).insert(srcOp); } } } } - //construct "users" of the store - #if DEBUG - errs() << "DEBUG: Store users\n"; - #endif - //add in loads that are reachable from the tainted store. - Value* ptr = si->getPointerOperand(); - //if bci, get the operand, as that's the useful ptr - if (BitCastInst* bciptr = dyn_cast(ptr) ){ - ptr = bciptr->getOperand(0); - } - for(Value* use : ptr->users()){ - if (Instruction* useOfStore = dyn_cast(use)) { - #if DEBUG - errs() << "DEBUG: checking use " << *useOfStore << "\n"; - #endif + // Construct "users" of the store +#if DEBUG + errs() << "[Loop localDeps] Add users (loads) of store to customUsers:\n"; +#endif + // Add in loads that are reachable from the tainted store. + auto* ptr = si->getPointerOperand(); + // If bci, get the operand, as that's the useful ptr + if (auto bciptr = dyn_cast(ptr)) ptr = bciptr->getOperand(0); + for (auto* use : ptr->users()) { + if (auto* useOfStore = dyn_cast(use)) { if (storePrecedesUse(useOfStore, si)) { +#if DEBUG + errs() << "[Loop Store Users] store precedes this use, add:" << *useOfStore << "\n"; +#endif customUsers.push_back(useOfStore); } } } - //update currVal to be the pointer - currVal = si->getPointerOperand(); + // Update curVal to be the pointer + curVal = si->getPointerOperand(); - //if it's a gepi, see if there are others that occur afterwards + // If it's a gepi, see if there are others that occur afterwards if (isa(si->getPointerOperand())) { inst_vec matching = couldMatchGEPI(dyn_cast(si->getPointerOperand())); - for (Instruction* item : matching) { + for (auto item : matching) { localDeps.push(item); } - //check pbref, need to compare op of the gepi, not gepi itself - for (Argument& arg : si->getFunction()->args()) { - #if DEBUG - errs() << " PBRef comp: " << *dyn_cast(currVal)->getOperand(0) << " and " << arg << "\n"; - #endif - if (dyn_cast(currVal)->getOperand(0) == &arg) { - //if taint came from inside any callsite is potentially tainted + // check pbref, need to compare op of the gepi, not gepi itself + for (auto& arg : si->getFunction()->args()) { +#if DEBUG + errs() << " PBRef comp: " << *dyn_cast(curVal)->getOperand(0) << " and " << arg << "\n"; +#endif + if (dyn_cast(curVal)->getOperand(0) == &arg) { + // if taint came from inside any callsite is potentially tainted if (caller == nullptr) { - for(Value* calls : si->getFunction()->users()) { + for (Value* calls : si->getFunction()->users()) { interProcSinks.push_back(calls); interProcSinks.push_back(dyn_cast(&arg)); if (Instruction* key = dyn_cast(calls)) { - - // iInfo->at(key).insert(srcOp); + // taintedInsts->at(key).insert(srcOp); } } } else { - //otherwise, just the caller's + // otherwise, just the caller's interProcSinks.push_back(caller); interProcSinks.push_back(dyn_cast(&arg)); if (Instruction* key = dyn_cast(caller)) { - - // iInfo->at(key).insert(srcOp); + // taintedInsts->at(key).insert(srcOp); } } } - } + } } - + } else { - //if not a store, do normal users of currval - customUsers.insert(customUsers.end(), currVal->user_begin(), currVal->user_end()); +#if DEBUG + errs() << "[Loop localDeps] cur inst != StoreInst\n"; + errs() << "[Loop localDeps] Add users of cur inst to customUsers:\n"; + for (auto* use : curVal->users()) errs() << *use << "\n"; +#endif + // If not a store, do normal users of curVal + customUsers.insert(customUsers.end(), curVal->user_begin(), curVal->user_end()); } - - - - for (Value* use : customUsers) { - - //check that the use of a tainted pointer is really tainted - - //this is checking if the use is a tainted store - - if (ReturnInst* ri = dyn_cast(use)) { - #if DEBUG - errs() << "DEBUG: in return case\n"; - #endif +#if DEBUG + errs() << "[Loop localDeps] Go over uses\n"; +#endif + //* Here we may cross over to another procedure + for (auto* use : customUsers) { + // Check that the use of a tainted pointer is really tainted + + // This is checking if the use is a tainted store + + if (auto ri = dyn_cast(use)) { +#if DEBUG + errs() << "[Loop customUsers] use = ReturnInst\n"; +#endif if (caller == nullptr) { - for(Value* calls : ri->getFunction()->users()) { - if(CallInst* ci = dyn_cast(calls)) { +#if DEBUG + errs() << "[Loop customUsers] No caller\n"; +#endif + for (auto calls : ri->getFunction()->users()) { + if (auto ci = dyn_cast(calls)) { interProcSinks.push_back(calls); - //extra for bookkeeping + // extra for bookkeeping interProcSinks.push_back(use); } } } else { - //otherwise, just the caller's +#if DEBUG + errs() << "[Loop customUsers] Some caller\n"; +#endif + // otherwise, just the caller's interProcSinks.push_back(caller); - //extra for bookkeeping + // extra for bookkeeping interProcSinks.push_back(use); } - - } else if (isa(use)) { - #if DEBUG - errs() << "DEBUG: in call case\n"; - #endif - //Add the right argument to the list - CallInst* ci = dyn_cast(use); - Function* calledFunc = ci ->getCalledFunction(); - if (calledFunc == NULL || calledFunc->empty()) { - //special case for llvm.memcpy - //See if it is (or aliases?) one of the function arguments - if (calledFunc!=NULL && calledFunc->hasName() && - calledFunc->getName().contains("llvm.memcpy")) { - //errs() << "DEBUG: memcpy " << *ci << "\n"; + } else if (auto* ci = dyn_cast(use)) { +#if DEBUG + errs() << "[Loop customUsers] use = CallInst\n"; +#endif + // Add the right argument to the list + auto* calledFun = ci->getCalledFunction(); + if (calledFun == NULL || calledFun->empty()) { + // special case for llvm.memcpy + // See if it is (or aliases?) one of the function arguments + if (calledFun != NULL && calledFun->hasName() && + calledFun->getName().contains("llvm.memcpy")) { + // errs() << "memcpy " << *ci << "\n"; Value* src = ci->getOperand(1)->stripPointerCasts(); Value* dest = ci->getOperand(0); - // errs() << "DEBUG: with dest " << *dest << "\n"; + // errs() << "with dest " << *dest << "\n"; if (BitCastInst* bci = dyn_cast(dest)) { dest = bci->getOperand(0); - } + } if (GetElementPtrInst* gepi = dyn_cast(dest)) { dest = gepi->getOperand(0); - // errs() << "DEBUG: and gepi dest " << *dest << "\n"; + // errs() << "and gepi dest " << *dest << "\n"; } bool found = false; for (Argument& arg : ci->getFunction()->args()) { - //Value* to_comp = - #if DEBUG +// Value* to_comp = +#if DEBUG errs() << " PBRef comp: " << *dest << " and " << arg << "\n"; - #endif - if (dest== &arg) { +#endif + if (dest == &arg) { found = true; - //if taint came from inside any callsite is potentially tainted + // if taint came from inside any callsite is potentially tainted if (caller == nullptr) { - for(Value* calls : ci->getFunction()->users()) { + for (Value* calls : ci->getFunction()->users()) { interProcSinks.push_back(calls); interProcSinks.push_back(dyn_cast(&arg)); if (Instruction* key = dyn_cast(calls)) { - - // iInfo->at(key).insert(srcOp); + // taintedInsts->at(key).insert(srcOp); } } } else { - //otherwise, just the caller's + // otherwise, just the caller's interProcSinks.push_back(caller); interProcSinks.push_back(dyn_cast(&arg)); if (Instruction* key = dyn_cast(caller)) { - // iInfo->at(key).insert(srcOp); + // taintedInsts->at(key).insert(srcOp); } } } } - //it wasn't pbref, just "store", so find fst ptr after call - //and also put in iInfo + // it wasn't pbref, just "store", so find fst ptr after call + // and also put in taintedInsts if (!found) { - Value* destFst = ptrAfterCall(dest,ci); - - - //in case of loop - if (destFst !=ci->getOperand(0)) { - // errs () << "found a memcpy store " << *destFst <<"\n"; - if(iInfo->find(ci)!=iInfo->end()) { - if (find(iInfo->at(ci).begin(), iInfo->at(ci).end(), srcOp)!=iInfo->at(ci).end()) { + Value* destFst = ptrAfterCall(dest, ci); + + // in case of loop + if (destFst != ci->getOperand(0)) { + // errs () << "found a memcpy store " << *destFst <<"\n"; + if (taintedInsts->find(ci) != taintedInsts->end()) { + if (find(taintedInsts->at(ci).begin(), taintedInsts->at(ci).end(), srcInput) != taintedInsts->at(ci).end()) { continue; } else { - iInfo->at(ci).insert(srcOp); + taintedInsts->at(ci).insert(srcInput); } } else { - set seti; - seti.insert(srcOp); - iInfo->emplace(ci, seti); + std::set seti; + seti.insert(srcInput); + taintedInsts->emplace(ci, seti); } localDeps.push(destFst); } - } - } //end memcpy check - - //conservative tainting decision - if (calledFunc->empty()) { - - //if it's empty but declared in our mod (one of the passed in C ones) - //and it returns a value, then consider the taint passed to the - //return - if (!calledFunc->getName().contains("llvm") && - !calledFunc->getName().contains("core")) { - #if DEBUG - errs() << "DEBUG: pushing presumed c lib func " << calledFunc->getName() << "\n"; - #endif - localDeps.push(ci); - } - + } + } // end memcpy check + + // conservative tainting decision + if (calledFun->empty()) { + // if it's empty but declared in our mod (one of the passed in C ones) + // and it returns a value, then consider the taint passed to the + // return + if (!calledFun->getName().contains("llvm") && + !calledFun->getName().contains("core")) { +#if DEBUG + errs() << "pushing presumed c lib func " << calledFun->getName() << "\n"; +#endif + localDeps.push(ci); + } } continue; - } - unsigned int arg_num = ci->getNumArgOperands(); - - // Find the index of the tainted argument - for (unsigned int i = 0; i < arg_num; i++){ - #if DEBUG - errs() << "DEBUG: comparing "<< *currVal <<" and " << *(ci->getArgOperand(i))<<"\n"; - #endif - if(ci->getArgOperand(i)==currVal) { - #if DEBUG - errs() << "DEBUG: pushing arg of "<< calledFunc->getName() <<"\n"; - #endif - interProcSinks.push_back((calledFunc->arg_begin() + i)); - //MUST also push back the call inst. + + unsigned int arg_num = ci->arg_size(); +#if DEBUG + errs() << "[Loop customUsers] Find tainted arg of " << calledFun->getName() << "\n"; +#endif + // Find the index of the tainted argument + for (unsigned int i = 0; i < arg_num; i++) { + auto* arg = ci->getArgOperand(i); + if (arg == curVal) { + auto funArg = calledFun->arg_begin() + i; +#if DEBUG + errs() << "Found tainted arg: " << *arg << ", add fun arg (" << *funArg << "), the use (" << *ci << "), and orig input (" << *srcInput << ") to interProcFlows\n"; +#endif + interProcSinks.push_back(funArg); + // MUST also push back the call inst. interProcSinks.push_back(ci); - //MUST also push back the current srcOp - interProcSinks.push_back(srcOp); - if (Instruction* key = dyn_cast(ci)) { - // iInfo->at(key).insert(srcOp); + // MUST also push back the current srcInput + interProcSinks.push_back(srcInput); + if (auto* key = dyn_cast(ci)) { + // taintedInsts->at(key).insert(srcOp); } - break; - } - } - - } else if (Instruction* iUse = dyn_cast(use)) { + break; + } + } + } else if (auto* iUse = dyn_cast(use)) { +#if DEBUG + errs() << "[Loop customUsers] use != ReturnInst & use != CallInst\n"; +#endif if (iUse->isTerminator()) { if (iUse->getNumSuccessors() > 1) { - //Add control deps off of a branch. - #if DEBUG - errs() << "DEBUG: adding condeps case\n"; - #endif +// Add control deps off of a branch. +#if DEBUG + errs() << "adding condeps case\n"; +#endif val_vec controlDeps = getControlDeps(iUse); - //for all condep, add any reached loads, and add the store to the map - for (Value* item : controlDeps) { - if (StoreInst* siCon = dyn_cast(item)) { + // for all condep, add any reached loads, and add the store to the map + for (auto* item : controlDeps) { + if (auto* siCon = dyn_cast(item)) { localDeps.push(siCon); } - }//end for vals in condep + } } - }//end terminator check - #if DEBUG - //errs() << "DEBUG: pushing "<< *iUse<<"\n"; - #endif + } + +#if DEBUG + errs() << "[Loop customUsers] Add use to localDeps\n"; +#endif + //* Here we may push inst from another procedure, crossing boundaries localDeps.push(iUse); } } +#if DEBUG + errs() << "*** Loop localDeps ***\n"; +#endif } +#if DEBUG + errs() << "*** traverseLocal ***\n"; +#endif return interProcSinks; } - - -inst_vec findInputInsts(Module* M) -{ - inst_vec sources; - func_vec io_name; - //Find io name annotations - for(GlobalVariable& gv : M->globals()) { - if(gv.getName().contains("IO_NAME")) { - - if( Function* fp = dyn_cast(gv.getInitializer()->getOperand(0)->stripPointerCasts())) { - #if DEBUG - errs() << "Found io inst "<< fp->getName() <<"\n"; - #endif - io_name.push_back(fp); +inst_vec findInputInsts(Module* M) { +#if DEBUG + errs() << "findInputInsts\n"; +#endif + inst_vec inputInsts; + + // Find IO_NAME annotations + for (auto& gv : M->globals()) { + if (gv.getName().starts_with("IO_NAME")) { + if (auto* fp = dyn_cast(gv.getInitializer())) { +#if DEBUG + errs() << "Found IO fun: " << fp->getName() << "\n"; +#endif + // Now, search for calls to those functions + for (auto& F : *M) { + for (auto& B : F) { + for (auto& I : B) { + if (auto* ci = dyn_cast(&I)) { + if (fp == ci->getCalledFunction()) { +#if DEBUG + errs() << "Found IO call: " << I << "\n"; +#endif + inputInsts.push_back(&I); + break; + } + } + } + } + } } else { - errs() << "ERROR: could not unwrap function pointer from annotation\n"; + // TODO: Say something else + errs() << "[ERROR] Could not unwrap function pointer from annotation\n"; } } - } - - //now, search for calls to those functions - for (Function& func : * M) { - for (BasicBlock& bb : func) { - for(Instruction& inst : bb) { - if(CallInst* ci = dyn_cast(&inst)) { - if(find(io_name.begin(), io_name.end(),ci->getCalledFunction())!=io_name.end()) { - sources.push_back(&inst); - } - } - } - - } } - return sources; -} + return inputInsts; +} -/*See if a particular store is exposed to a use -- possibly replace couldLoadTainted*/ +// See if a particular store is exposed to a use -- possibly replace couldLoadTainted bool storePrecedesUse(Instruction* use, StoreInst* toMatch) { - queue to_visit; - vector visited; + std::queue to_visit; + std::vector visited; BasicBlock* current; - vector possible; + std::vector possible; int found = 0; int skip = 1; - + to_visit.push(use->getParent()); - while(!to_visit.empty()) { + while (!to_visit.empty()) { current = to_visit.front(); to_visit.pop(); - - for(BasicBlock::reverse_iterator i = current->rbegin(), e = current->rend(); i!=e;++i) { + + for (BasicBlock::reverse_iterator i = current->rbegin(), e = current->rend(); i != e; ++i) { Instruction* inst = &*i; - //don't look at li block before li - if((current == use->getParent())&&(skip)) { - //errs() << "skipping" << *inst <<"\n"; - if(use==inst){ - skip = 0; - } - continue; + // don't look at li block before li + if ((current == use->getParent()) && (skip)) { + // errs() << "skipping" << *inst <<"\n"; + if (use == inst) { + skip = 0; + } + continue; + } + // if(BI!=nullptr) { + // errs() << "looking at" << *BI <<"\n"; + if (StoreInst* si = dyn_cast(inst)) { + // errs() << "found a store" << *si <<"\n"; + if (si->getPointerOperand() == toMatch->getPointerOperand()) { + possible.push_back(si); + found = 1; + break; + } } - //if(BI!=nullptr) { - //errs() << "looking at" << *BI <<"\n"; - if (StoreInst* si = dyn_cast(inst)) { - //errs() << "found a store" << *si <<"\n"; - if (si->getPointerOperand() == toMatch->getPointerOperand()) { - possible.push_back(si); - found = 1; - break; - } - } } - //we found a store in this node - if(found) { + // we found a store in this node + if (found) { found = 0; continue; } /*add pred. blocks to our queue*/ for (auto PI = pred_begin(current); PI != pred_end(current); ++PI) { - //if it's new - if(!(find(visited.begin(), visited.end(), *PI) != visited.end())){ - visited.push_back(*PI); - to_visit.push(*PI); + // if it's new + if (!(find(visited.begin(), visited.end(), *PI) != visited.end())) { + visited.push_back(*PI); + to_visit.push(*PI); } } } /*Was one of the preceding writes the store in question?*/ - for(Value* poss : possible) { - if(poss == toMatch) { - return true; + for (Value* poss : possible) { + if (poss == toMatch) { + return true; } - } - //this use does not consume the tainted store + // this use does not consume the tainted store return false; } - /*See if the same EP is used in multiple GEPI, check if exposed*/ inst_vec couldMatchGEPI(GetElementPtrInst* tGEPI) { - queue to_visit; - vector visited; + std::queue to_visit; + std::vector visited; BasicBlock* current; - vector possible; + std::vector possible; inst_vec matching; int found = 0; int skip = 1; - + to_visit.push(tGEPI->getParent()); - while(!to_visit.empty()) { + while (!to_visit.empty()) { current = to_visit.front(); to_visit.pop(); - - //forwards exploration - for(Instruction& i : *current) { + + // forwards exploration + for (Instruction& i : *current) { Instruction* inst = &i; - //don't look at gepi block before gepi - if((current == tGEPI->getParent())&&(skip)) { - //errs() << "skipping" << *inst <<"\n"; - if(tGEPI==inst){ - skip = 0; - } - continue; + // don't look at gepi block before gepi + if ((current == tGEPI->getParent()) && (skip)) { + // errs() << "skipping" << *inst <<"\n"; + if (tGEPI == inst) { + skip = 0; + } + continue; } - //if(BI!=nullptr) { - //errs() << "looking at" << *BI <<"\n"; - if (GetElementPtrInst* another = dyn_cast(inst)) { - //errs() << "found a store" << *si <<"\n"; - //check if the ops match - if (another->getPointerOperand() == tGEPI->getPointerOperand()) { - //check if used in load or store - for (Value* pUse : another->users()) { + // if(BI!=nullptr) { + // errs() << "looking at" << *BI <<"\n"; + if (GetElementPtrInst* another = dyn_cast(inst)) { + // errs() << "found a store" << *si <<"\n"; + // check if the ops match + if (another->getPointerOperand() == tGEPI->getPointerOperand()) { + // check if used in load or store + for (Value* pUse : another->users()) { if (isa(pUse)) { found = 1; break; } } - //no store + // no store if (!found) { - #if DEBUG - errs() << "matching GEPS: " << *another<<" and " << *tGEPI <<"\n"; - #endif +#if DEBUG + errs() << "matching GEPS: " << *another << " and " << *tGEPI << "\n"; +#endif matching.push_back(another); } - } - } + } + } } - //we found a store in this node - if(found) { + // we found a store in this node + if (found) { found = 0; continue; } /*add succ. blocks to our queue*/ for (auto SI = succ_begin(current); SI != succ_end(current); ++SI) { - //if it's new - if(!(find(visited.begin(), visited.end(), *SI) != visited.end())){ - visited.push_back(*SI); - to_visit.push(*SI); + // if it's new + if (!(find(visited.begin(), visited.end(), *SI) != visited.end())) { + visited.push_back(*SI); + to_visit.push(*SI); } } } - + return matching; } /*Find first use of a pointer after a callInst, for pass-by-ref*/ Instruction* ptrAfterCall(Value* ptr, CallInst* ci) { - queue to_visit; - vector visited; + std::queue to_visit; + std::vector visited; BasicBlock* current; - + int found = 0; int skip = 1; - + to_visit.push(ci->getParent()); - while(!to_visit.empty()) { + while (!to_visit.empty()) { current = to_visit.front(); to_visit.pop(); - - //forwards exploration - for(Instruction& i : *current) { + + // forwards exploration + for (Instruction& i : *current) { Instruction* inst = &i; - //don't look at gepi block before gepi - if((current == ci->getParent())&&(skip)) { - //errs() << "skipping" << *inst <<"\n"; - if(ci==inst){ - skip = 0; - } - continue; + // don't look at gepi block before gepi + if ((current == ci->getParent()) && (skip)) { + // errs() << "skipping" << *inst <<"\n"; + if (ci == inst) { + skip = 0; + } + continue; } - //if the inst is a use of the pointer - if (find(ptr->user_begin(),ptr->user_end(), inst)!=ptr->user_end()) { + // if the inst is a use of the pointer + if (std::find(ptr->user_begin(), ptr->user_end(), inst) != ptr->user_end()) { return inst; } - } /*add succ. blocks to our queue*/ for (auto SI = succ_begin(current); SI != succ_end(current); ++SI) { - //if it's new - if(!(find(visited.begin(), visited.end(), *SI) != visited.end())){ - visited.push_back(*SI); - to_visit.push(*SI); + // if it's new + if (!(find(visited.begin(), visited.end(), *SI) != visited.end())) { + visited.push_back(*SI); + to_visit.push(*SI); } } } return nullptr; } - -/*This is a function to return all the control dependent stores off of a control inst -Input -- ti, the (formerly) terminator inst +/*This is a function to return all the control dependent stores off of a control inst +Input -- ti, the (formerly) terminator inst Output -- list of deps */ -val_vec getControlDeps(Instruction* ti) -{ +val_vec getControlDeps(Instruction* ti) { val_vec deps; int succ_i = 0; while (succ_i < ti->getNumSuccessors()) { BasicBlock* bb = ti->getSuccessor(succ_i); succ_i++; - for(Instruction& inst : *bb) { - //if we encounter a store, add to deps - if(isa(&inst)) { - deps.push_back(&inst); - } //if we encounter a multi succ branch, recursive call, if we encouter a join, continue to next succ - else if(inst.isTerminator()) { - - if(ti->getNumSuccessors() > 1) { - vector intermed = getControlDeps(&inst); - for(Value* item : intermed) { - deps.push_back(item); - } - } else { - break; - } + for (Instruction& inst : *bb) { + // if we encounter a store, add to deps + if (isa(&inst)) { + deps.push_back(&inst); + } // if we encounter a multi succ branch, recursive call, if we encouter a join, continue to next succ + else if (inst.isTerminator()) { + if (ti->getNumSuccessors() > 1) { + std::vector intermed = getControlDeps(&inst); + for (Value* item : intermed) { + deps.push_back(item); + } + } else { + break; + } } } } return deps; } - -/*Get direct uses (at src level, not IR) of a fresh var*/ -inst_vec traverseDirectUses(Instruction* root) -{ +// Get direct uses (at src level, not IR) of a fresh var +inst_vec traverseDirectUses(Instruction* root) { +#if DEBUG + errs() << "=== traverseDirectUses ===\n"; +#endif inst_vec uses; - queue localDeps; + std::queue localDeps; +#if DEBUG + errs() << "Add root to localDeps: " << *root << "\n"; +#endif localDeps.push(root); - - //Edge case: check if return is an internally allocated stack var + + // Edge case: check if return is an internally allocated stack var Value* retPtr; - Instruction* last = &(root->getFunction()->back().back()); - if (ReturnInst* ri = dyn_cast(last)) { - for (Use& op : ri->operands()) { - if(LoadInst* li = dyn_cast(op.get())) { + auto* last = &(root->getFunction()->back().back()); + if (auto* ri = dyn_cast(last)) { + for (auto& op : ri->operands()) { + if (auto* li = dyn_cast(op.get())) { retPtr = li->getPointerOperand(); +#if DEBUG + errs() << "retPtr: " << *retPtr << "\n"; +#endif } } - } - while(!localDeps.empty()) { - Instruction* currVal = localDeps.front(); - uses.push_back(currVal); + while (!localDeps.empty()) { + auto* curVal = localDeps.front(); +#if DEBUG + errs() << "[Loop localDeps] Add curVal to uses: " << *curVal << "\n"; +#endif + uses.push_back(curVal); localDeps.pop(); - for (Value* use : currVal->users()) { - //if it's a gepi, see if there are others that occur afterwards - // errs() << *use <<" is a direct use of " << *currVal<<"\n"; + +#if DEBUG + errs() << "[Loop localDeps] Go over curVal users\n"; +#endif + for (auto* use : curVal->users()) { +#if DEBUG + errs() << "[Loop users] use: " << *use << "\n"; +#endif + // If it's a gepi, see if there are others that occur afterwards + // errs() << *use <<" is a direct use of " << *currVal<<"\n"; if (isa(use)) { - inst_vec matching = couldMatchGEPI(dyn_cast(use)); - for (Instruction* item : matching) { - // errs() << "pushing to local deps " << *item <<"\n"; + auto matching = couldMatchGEPI(dyn_cast(use)); + for (auto* item : matching) { + // errs() << "pushing to local deps " << *item <<"\n"; localDeps.push(item); } - } - else if (ReturnInst* ri = dyn_cast(use)) { - for(Value* calls : ri->getFunction()->users()) { - if(isa(calls)) { + } else if (ReturnInst* ri = dyn_cast(use)) { + for (Value* calls : ri->getFunction()->users()) { + if (isa(calls)) { uses.push_back(dyn_cast(calls)); - } } } else if (StoreInst* si = dyn_cast(use)) { - //if stores into ret pointer, treat as above +#if DEBUG + errs() << "[Loop users] use = StoreInst\n"; +#endif + // If stores into ret pointer, treat as above if (si->getPointerOperand() == retPtr) { - for(Value* calls : si->getFunction()->users()) { - if(isa(calls)) { - uses.push_back(dyn_cast(calls)); - +#if DEBUG + errs() << "[Loop users] ptr operand = retPtr\n"; +#endif + for (Value* calls : si->getFunction()->users()) { + if (isa(calls)) { + uses.push_back(dyn_cast(calls)); } - } + } } } else if (BranchInst* bi = dyn_cast(use)) { - //if a use is a branch inst the atomic region needs to - //dominate the successors + // If a use is a branch inst the atomic region needs to + // dominate the successors for (BasicBlock* bbInterior : bi->successors()) { - //skip panic blocks, otherwise there will be no post dom + // Skip panic blocks, otherwise there will be no post dom if (bbInterior->getName().equals("panic")) { continue; } uses.push_back(&(bbInterior->front())); } } else if (CallInst* ci = dyn_cast(use)) { - if(ci->hasName() && ci->getName().startswith("_")) { - //fall through +#if DEBUG + errs() << "[Loop users] use = CallInst\n"; +#endif + if (ci->hasName() && ci->getName().startswith("_")) { + // Fall through } else { +#if DEBUG + errs() << "[Loop users] Add CallInst to uses\n"; +#endif uses.push_back(ci); continue; } } - if (Instruction* iUse = dyn_cast(use)) { - //see if load is to another var or just internal ssa - if (LoadInst* li = dyn_cast(iUse)) { - if(li->hasName()) { - //Hacky --verify that this is always true - if(!li->getName().startswith("_")) { + + if (auto* iUse = dyn_cast(use)) { + // See if load is to another var or just internal ssa + if (auto* li = dyn_cast(iUse)) { + if (li->hasName()) { + // Hacky -- verify that this is always true + if (!li->getName().startswith("_")) continue; - } } } + +#if DEBUG + errs() << "[Loop users] Add use to localDeps\n"; +#endif localDeps.push(iUse); } } } +#if DEBUG + errs() << "*** traverseDirectUses ***\n"; +#endif return uses; } +inst_vec traverseUses(Instruction* root) { +#if DEBUG + errs() << "=== traverseUses ===\n"; +#endif + auto directUses = traverseDirectUses(root); + inst_set uses(directUses.begin(), directUses.end()); + + for (auto* directUse : directUses) { +#if DEBUG + errs() << "[directUses] directUse: " << *directUse << "\n"; +#endif + + if (auto* si = dyn_cast(directUse)) { +#if DEBUG + errs() << "[directUses] directUse = StoreInst\n"; +#endif + + auto* ptr = si->getPointerOperand(); +#if DEBUG + errs() << "[directUses] ptr operand: " << *ptr << "\n"; +#endif + + for (auto* ptrUse : ptr->users()) { + if (auto* li = dyn_cast(ptrUse)) { +#if DEBUG + errs() << "[ptrUsers] Add ptrUse (LoadInst) to uses: " << *ptrUse << "\n"; +#endif + uses.emplace(li); + + for (auto* liUse : li->users()) { + if (auto* ci = dyn_cast(liUse)) { +#if DEBUG + errs() << "[liUsers] Add liUse (CallInst) to uses: " << *liUse << "\n"; +#endif + uses.emplace(ci); + } + } + } + } + } + } +#if DEBUG + errs() << "=== traverseUses ===\n"; +#endif + inst_vec uses_vec(uses.begin(), uses.end()); + return uses_vec; +} diff --git a/ocelot/AtomicRegionInference/src/include/ConsistentInference.h b/ocelot/AtomicRegionInference/src/include/ConsistentInference.h deleted file mode 100644 index 1f7a429..0000000 --- a/ocelot/AtomicRegionInference/src/include/ConsistentInference.h +++ /dev/null @@ -1,38 +0,0 @@ -#ifndef __CONSISTENTINFERENCE__ -#define __CONSISTENTINFERENCE__ - -#include "HelperTypes.h" -using namespace llvm; -using namespace std; - -class ConsistentInference { -public: - ConsistentInference(Pass* _pass, Module* _m, Function* _as, Function* _ae) { - pass = _pass; - m = _m; - atomStart = _as; - atomEnd = _ae; - } - void inferConsistent(map allSets); - void inferFresh(inst_vec_vec allSets); - void addRegion(inst_vec conSet, int regType); - Function* commonPredecessor(map blocks, Function* root); - Instruction* insertRegionInst(int regInst, Instruction* insertBefore); - bool sameFunction(map blockMap); - Instruction* truncate(BasicBlock* bb, bool forwards, inst_vec conSet, set nested); - vector deepCaller(Function* root, vector funcList, Function** goal); - inst_inst_pair findSmallest(vectorregionsFound); - BasicBlock* getLoopEnd(BasicBlock* bb); - bool loopCheck(BasicBlock* bb); - int getSubLength(BasicBlock* bb, Instruction* end, vector visited); - - - -private: - Pass* pass; - Module* m; - Function* atomStart; - Function* atomEnd; -}; - -#endif diff --git a/ocelot/AtomicRegionInference/src/include/HelperTypes.h b/ocelot/AtomicRegionInference/src/include/HelperTypes.h index 843c498..edd9935 100644 --- a/ocelot/AtomicRegionInference/src/include/HelperTypes.h +++ b/ocelot/AtomicRegionInference/src/include/HelperTypes.h @@ -1,52 +1,42 @@ -#ifndef __HELPERTYPES__ -#define __HELPERTYPES__ - -#include "llvm/Pass.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Function.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/IR/LegacyPassManager.h" -#include "llvm/IR/InstrTypes.h" -#include "llvm/IR/InstIterator.h" -#include "llvm/Transforms/IPO/PassManagerBuilder.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/ADT/ilist.h" -#include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/IR/SymbolTableListTraits.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/BasicAliasAnalysis.h" -#include "llvm/Analysis/CallGraph.h" -#include -#include -#include -#include -//#include - -#define DEBUG 0 - -using namespace llvm; - -typedef std::vector val_vec; -typedef std::vector bb_vec; -typedef std::vector inst_vec; -typedef std::map val_insts_map; -typedef std::vector gv_vec; -typedef std::vector> val_inst_vec; -typedef std::vector> inst_inst_vec; -typedef std::map inst_vals_map; -typedef std::map> inst_insts_map; -typedef std::vector func_vec; -typedef std::vector inst_vec_vec; -typedef std::pair inst_inst_pair; - -extern gv_vec gv_list; - -/*bool isArray(Value* v); -bool isTask(Function* F); -bool isMemcpy(Instruction* I); -uint64_t getSize(Value* val); -int is_atomic_boundary(Instruction* ci); -#define OVERHEAD 0 -*/ -#endif +#ifndef __HELPERTYPES__ +#define __HELPERTYPES__ + +#include +#include +#include +#include + +#include "llvm/ADT/ilist.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/SymbolTableListTraits.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" + +#define DEBUG 1 + +using namespace llvm; + +typedef std::vector val_vec; +typedef std::vector bb_vec; +typedef std::vector inst_vec; +typedef std::set inst_set; +typedef std::map val_insts_map; +typedef std::vector gv_vec; +typedef std::vector> val_inst_vec; +typedef std::pair inst_inst_pair; +typedef std::vector inst_inst_vec; +typedef std::map inst_vals_map; +typedef std::map inst_insts_map; +typedef std::vector func_vec; +typedef std::vector inst_vec_vec; + +extern gv_vec gv_list; + +#endif diff --git a/ocelot/AtomicRegionInference/src/include/Helpers.h b/ocelot/AtomicRegionInference/src/include/Helpers.h new file mode 100644 index 0000000..8e940f0 --- /dev/null +++ b/ocelot/AtomicRegionInference/src/include/Helpers.h @@ -0,0 +1,15 @@ +#ifndef __HELPERS__ +#define __HELPERS__ + +#include + +#include "HelperTypes.h" + +using namespace llvm; + +std::string getSimpleNodeLabel(const Value* Node); +bool isAnnot(const StringRef annotName); +void printInstInsts(const inst_insts_map& iim, bool onlyCalls = false); +void printInsts(const inst_vec& iv); + +#endif \ No newline at end of file diff --git a/ocelot/AtomicRegionInference/src/include/InferAtomicPass.h b/ocelot/AtomicRegionInference/src/include/InferAtomicPass.h deleted file mode 100644 index bd0036a..0000000 --- a/ocelot/AtomicRegionInference/src/include/InferAtomicPass.h +++ /dev/null @@ -1,68 +0,0 @@ -#ifndef __INFERPASS__ -#define __INFERPASS__ - -#include "HelperTypes.h" -#include "ConsistentInference.h" -#include "llvm/ADT/APInt.h" -#include "llvm/IR/Verifier.h" -#include "llvm/ExecutionEngine/ExecutionEngine.h" -#include "llvm/ExecutionEngine/GenericValue.h" -#include "llvm/ExecutionEngine/MCJIT.h" -#include "llvm/IR/Argument.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/InstrTypes.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Type.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/TargetSelect.h" -#include "llvm/Support/raw_ostream.h" -#include -#include -#include -#include -#include - -using namespace llvm; - -class InferAtomicModulePass : public ModulePass { - public: - static char ID; - InferAtomicModulePass() : ModulePass(ID) {} - - virtual bool runOnModule(Module &M); - int getMaxCost(Function* f); - void mergeRegions(Function* f); - void getAnnotations(map* conSets, inst_vec_vec* freshVars, inst_insts_map inputs, inst_vec* toDelete); - inst_vec_vec collectFresh(inst_vec_vec startingPoints, inst_insts_map info); - map collectCon(map startingPointa, inst_insts_map inputMap); - void removeAnnotations(inst_vec* toDelete); - - - virtual void getAnalysisUsage(AnalysisUsage& AU) const { - AU.setPreservesAll(); - //AU.addRequired(); - //AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - } - Module* getModule() { - return m; - } - Module* setModule(Module* _m) { - return m = _m; - } - private: - Module* m; - int capacitorSize; - Function* atomStart; - Function* atomEnd; - - -}; - -#endif diff --git a/ocelot/AtomicRegionInference/src/include/InferAtoms.h b/ocelot/AtomicRegionInference/src/include/InferAtoms.h new file mode 100644 index 0000000..19701e0 --- /dev/null +++ b/ocelot/AtomicRegionInference/src/include/InferAtoms.h @@ -0,0 +1,54 @@ +#ifndef __INFERATOMS__ +#define __INFERATOMS__ + +#include +#include +#include +#include +#include +#include + +#include "Helpers.h" +#include "InferFreshCons.h" +#include "TaintTracker.h" +#include "llvm/ADT/APInt.h" +#include "llvm/Analysis/PostDominators.h" +#include "llvm/Pass.h" +#include "llvm/Passes/PassBuilder.h" +#include "llvm/Passes/PassPlugin.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +struct InferAtomsPass : public PassInfoMixin { + public: + InferAtomsPass() {} + PreservedAnalyses run(Module& M, ModuleAnalysisManager& AM); + + void getAnnotations(std::map* consVars, inst_vec_vec* freshVars, inst_insts_map inputMap, inst_vec* toDelete); + inst_vec_vec collectFresh(inst_vec_vec startingPoints, inst_insts_map info); + std::map collectCons(std::map startingPointa, inst_insts_map inputMap); + void removeAnnotations(inst_vec* toDelete); + void setModule(Module* _M) { M = _M; } + + private: + Module* M; + Function* atomStart; + Function* atomEnd; +}; + +extern "C" LLVM_ATTRIBUTE_WEAK ::llvm::PassPluginLibraryInfo +llvmGetPassPluginInfo() { + return { + .APIVersion = LLVM_PLUGIN_API_VERSION, + .PluginName = "Atomic Region Inference Pass", + .PluginVersion = "v0.1", + .RegisterPassBuilderCallbacks = [](PassBuilder& PB) { + PB.registerPipelineStartEPCallback( + [](ModulePassManager& MPM, OptimizationLevel Level) { + MPM.addPass(InferAtomsPass()); + }); + }}; +} + +#endif diff --git a/ocelot/AtomicRegionInference/src/include/InferFreshCons.h b/ocelot/AtomicRegionInference/src/include/InferFreshCons.h new file mode 100644 index 0000000..b3fcd10 --- /dev/null +++ b/ocelot/AtomicRegionInference/src/include/InferFreshCons.h @@ -0,0 +1,43 @@ +#ifndef __INFERFRESHCONS__ +#define __INFERFRESHCONS__ + +#include "Helpers.h" + +using namespace llvm; + +struct InferFreshCons { + public: + InferFreshCons(FunctionAnalysisManager* _FAM, Module* _m, Function* _as, Function* _ae) { + FAM = _FAM; + m = _m; + atomStart = _as; + atomEnd = _ae; + } + + enum RegionKind { Fresh, + Consistent }; + + enum InsertKind { Start, + End }; + + void inferConsistent(std::map allSets); + void inferFresh(inst_vec_vec allSets); + void addRegion(inst_vec conSet, RegionKind regionKind); + Function* findCandidate(std::map blocks, Function* root); + Instruction* insertRegionInst(InsertKind insertKind, Instruction* insertBefore); + bool sameFunction(std::map blockMap); + Instruction* truncate(BasicBlock* bb, bool forwards, inst_vec conSet, std::set nested); + std::vector deepCaller(Function* root, std::vector& funcList, Function** goal); + inst_inst_pair findShortest(inst_inst_vec regionsFound); + BasicBlock* getLoopEnd(BasicBlock* bb); + bool loopCheck(BasicBlock* bb); + int getSubLength(BasicBlock* bb, Instruction* end, std::vector visited); + + private: + FunctionAnalysisManager* FAM; + Module* m; + Function* atomStart; + Function* atomEnd; +}; + +#endif diff --git a/ocelot/AtomicRegionInference/src/include/TaintTracker.h b/ocelot/AtomicRegionInference/src/include/TaintTracker.h index ffd90ef..ea3ce03 100644 --- a/ocelot/AtomicRegionInference/src/include/TaintTracker.h +++ b/ocelot/AtomicRegionInference/src/include/TaintTracker.h @@ -1,40 +1,18 @@ -#include "llvm/Pass.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/AliasSetTracker.h" -#include "llvm/Analysis/CFLSteensAliasAnalysis.h" -#include "llvm/Analysis/MemoryLocation.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/IR/LegacyPassManager.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/CFG.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/Operator.h" -#include "llvm/IR/Metadata.h" -#include "llvm/IR/DebugInfoMetadata.h" -#include -#include -#include -#include -#include -#include -#include -#include "HelperTypes.h" +#ifndef __TAINTTRACKER__ +#define __TAINTTRACKER__ -using namespace llvm; -using namespace std; +#include "Helpers.h" +using namespace llvm; inst_insts_map buildInputs(Module* m); val_vec traverseLocal(Value* tainted, Instruction* srcOp, inst_insts_map* buildMap, Instruction* caller); - -inst_vec findInputInsts(Module* M); -Instruction* ptrAfterCall(Value* ptr, CallInst* ci); +inst_vec findInputInsts(Module* M); +Instruction* ptrAfterCall(Value* ptr, CallInst* ci); bool storePrecedesUse(Instruction* use, StoreInst* toMatch); inst_vec couldMatchGEPI(GetElementPtrInst* tGEPI); val_vec getControlDeps(Instruction* ti); inst_vec traverseDirectUses(Instruction* root); +inst_vec traverseUses(Instruction* root); + +#endif