diff --git a/svf/include/AE/Svfexe/AbsExtAPI.h b/svf/include/AE/Svfexe/AbsExtAPI.h index b97da7ba5..858ebd752 100644 --- a/svf/include/AE/Svfexe/AbsExtAPI.h +++ b/svf/include/AE/Svfexe/AbsExtAPI.h @@ -74,43 +74,23 @@ class AbsExtAPI */ void handleExtAPI(const CallICFGNode *call); - /** - * @brief Handles the strcpy API call. - * @param call Pointer to the call ICFG node. - */ - void handleStrcpy(const CallICFGNode *call); + // --- Shared primitives used by string/memory handlers --- - /** - * @brief Calculates the length of a string. - * @param as Reference to the abstract state. - * @param strValue Pointer to the SVF variable representing the string. - * @return The interval value representing the string length. - */ + /// Get the byte size of each element for a pointer/array variable. + u32_t getElementSize(AbstractState& as, const SVFVar* var); + + /// Check if an interval length is usable (not bottom, not unbounded). + static bool isValidLength(const IntervalValue& len); + + /// Calculate the length of a null-terminated string in abstract state. IntervalValue getStrlen(AbstractState& as, const SVF::SVFVar *strValue); - /** - * @brief Handles the strcat API call. - * @param call Pointer to the call ICFG node. - */ - void handleStrcat(const SVF::CallICFGNode *call); + // --- String/memory operation handlers --- - /** - * @brief Handles the memcpy API call. - * @param as Reference to the abstract state. - * @param dst Pointer to the destination SVF variable. - * @param src Pointer to the source SVF variable. - * @param len The interval value representing the length to copy. - * @param start_idx The starting index for copying. - */ + void handleStrcpy(const CallICFGNode *call); + void handleStrcat(const CallICFGNode *call); + void handleStrncat(const CallICFGNode *call); void handleMemcpy(AbstractState& as, const SVF::SVFVar *dst, const SVF::SVFVar *src, IntervalValue len, u32_t start_idx); - - /** - * @brief Handles the memset API call. - * @param as Reference to the abstract state. - * @param dst Pointer to the destination SVF variable. - * @param elem The interval value representing the element to set. - * @param len The interval value representing the length to set. - */ void handleMemset(AbstractState& as, const SVFVar* dst, IntervalValue elem, IntervalValue len); /** diff --git a/svf/include/AE/Svfexe/AbstractInterpretation.h b/svf/include/AE/Svfexe/AbstractInterpretation.h index aea2e9d90..02bd46ca2 100644 --- a/svf/include/AE/Svfexe/AbstractInterpretation.h +++ b/svf/include/AE/Svfexe/AbstractInterpretation.h @@ -36,6 +36,8 @@ #include "Util/SVFBugReport.h" #include "Util/SVFStat.h" #include "Graphs/SCC.h" +#include "Graphs/CallGraph.h" +#include namespace SVF { @@ -144,6 +146,13 @@ class AbstractInterpretation /// Program entry void analyse(); + /// Analyze all entry points (functions without callers) + void analyzeFromAllProgEntries(); + + /// Get all entry point functions (functions without callers) + std::deque collectProgEntryFuns(); + + static AbstractInterpretation& getAEInstance() { static AbstractInterpretation instance; @@ -322,6 +331,8 @@ class AbstractInterpretation AEAPI* api{nullptr}; ICFG* icfg; + CallGraph* callGraph; + CallGraphSCC* callGraphScc; AEStat* stat; std::vector callSiteStack; @@ -358,6 +369,7 @@ class AbstractInterpretation Map> func_map; Map abstractTrace; // abstract states immediately after nodes + Set allAnalyzedNodes; // All nodes ever analyzed (across all entry points) std::string moduleName; std::vector> detectors; diff --git a/svf/lib/AE/Svfexe/AEDetector.cpp b/svf/lib/AE/Svfexe/AEDetector.cpp index ae9d0bb3c..91431cc90 100644 --- a/svf/lib/AE/Svfexe/AEDetector.cpp +++ b/svf/lib/AE/Svfexe/AEDetector.cpp @@ -479,7 +479,23 @@ bool BufOverflowDetector::canSafelyAccessMemory(AbstractState& as, const SVF::SV SVFIR* svfir = PAG::getPAG(); NodeID value_id = value->getId(); - assert(as[value_id].isAddr()); + // Lazy initialization for uninitialized pointer parameters in multi-entry analysis. + // When analyzing a function as an entry point (e.g., not called from main), + // pointer parameters may not have been initialized via AddrStmt. + // + // Example: + // void process_buffer(char* buf, int len) { + // buf[0] = 'a'; // accessing buf + // } + // When analyzing process_buffer as an entry point, 'buf' is a function parameter + // with no AddrStmt, so it has no address information in the abstract state. + // We lazily initialize it to point to the black hole object (BlkPtr), representing + // an unknown but valid memory location. This allows the analysis to continue + // while being conservatively sound. + if (!as[value_id].isAddr()) + { + as[value_id] = AddressValue(InvalidMemAddr); + } for (const auto& addr : as[value_id].getAddrs()) { NodeID objId = as.getIDFromAddr(addr); diff --git a/svf/lib/AE/Svfexe/AbsExtAPI.cpp b/svf/lib/AE/Svfexe/AbsExtAPI.cpp index 1d90ad602..a6400a88e 100644 --- a/svf/lib/AE/Svfexe/AbsExtAPI.cpp +++ b/svf/lib/AE/Svfexe/AbsExtAPI.cpp @@ -272,60 +272,19 @@ void AbsExtAPI::initExtFunMap() auto sse_strlen = [&](const CallICFGNode *callNode) { - // check the arg size if (callNode->arg_size() < 1) return; - const SVFVar* strValue = callNode->getArgument(0); AbstractState& as = getAbsStateFromTrace(callNode); - NodeID value_id = strValue->getId(); u32_t lhsId = callNode->getRetICFGNode()->getActualRet()->getId(); - u32_t dst_size = 0; - for (const auto& addr : as[value_id].getAddrs()) - { - NodeID objId = as.getIDFromAddr(addr); - if (svfir->getBaseObject(objId)->isConstantByteSize()) - { - dst_size = svfir->getBaseObject(objId)->getByteSizeOfObj(); - } - else - { - const ICFGNode* addrNode = svfir->getBaseObject(objId)->getICFGNode(); - for (const SVFStmt* stmt2: addrNode->getSVFStmts()) - { - if (const AddrStmt* addrStmt = SVFUtil::dyn_cast(stmt2)) - { - dst_size = as.getAllocaInstByteSize(addrStmt); - } - } - } - } - u32_t len = 0; - NodeID dstid = strValue->getId(); - if (as.inVarToAddrsTable(dstid)) - { - for (u32_t index = 0; index < dst_size; index++) - { - AbstractValue expr0 = - as.getGepObjAddrs(dstid, IntervalValue(index)); - AbstractValue val; - for (const auto &addr: expr0.getAddrs()) - { - val.join_with(as.load(addr)); - } - if (val.getInterval().is_numeral() && (char) val.getInterval().getIntNumeral() == '\0') - { - break; - } - ++len; - } - } - if (len == 0) - { - as[lhsId] = IntervalValue((s64_t)0, (s64_t)Options::MaxFieldLimit()); - } + // strlen/wcslen return the number of characters (not bytes). + // getStrlen returns byte-scaled length (len * elemSize) for use + // by memcpy/strcpy. Here we need the raw character count, so + // divide back by elemSize. + IntervalValue byteLen = getStrlen(as, callNode->getArgument(0)); + u32_t elemSize = getElementSize(as, callNode->getArgument(0)); + if (byteLen.is_numeral() && elemSize > 1) + as[lhsId] = IntervalValue(byteLen.getIntNumeral() / (s64_t)elemSize); else - { - as[lhsId] = IntervalValue(len); - } + as[lhsId] = byteLen; }; func_map["strlen"] = sse_strlen; func_map["wcslen"] = sse_strlen; @@ -480,7 +439,13 @@ void AbsExtAPI::handleExtAPI(const CallICFGNode *call) } else if (extType == STRCAT) { - handleStrcat(call); + // Both strcat and strncat are annotated as STRCAT. + // Distinguish by name: strncat/wcsncat contain "ncat". + const std::string& name = fun->getName(); + if (name.find("ncat") != std::string::npos) + handleStrncat(call); + else + handleStrcat(call); } else { @@ -489,21 +454,50 @@ void AbsExtAPI::handleExtAPI(const CallICFGNode *call) return; } -void AbsExtAPI::handleStrcpy(const CallICFGNode *call) +// ===----------------------------------------------------------------------===// +// Shared primitives for string/memory handlers +// ===----------------------------------------------------------------------===// + +/// Get the byte size of each element for a pointer/array variable. +/// Shared by handleMemcpy, handleMemset, and getStrlen to avoid duplication. +u32_t AbsExtAPI::getElementSize(AbstractState& as, const SVFVar* var) { - // strcpy, __strcpy_chk, stpcpy , wcscpy, __wcscpy_chk - // get the dst and src - AbstractState& as = getAbsStateFromTrace(call); - const SVFVar* arg0Val = call->getArgument(0); - const SVFVar* arg1Val = call->getArgument(1); - IntervalValue strLen = getStrlen(as, arg1Val); - // no need to -1, since it has \0 as the last byte - handleMemcpy(as, arg0Val, arg1Val, strLen, strLen.lb().getIntNumeral()); + if (var->getType()->isArrayTy()) + { + return SVFUtil::dyn_cast(var->getType()) + ->getTypeOfElement()->getByteSize(); + } + if (var->getType()->isPointerTy()) + { + if (const SVFType* elemType = as.getPointeeElement(var->getId())) + { + if (elemType->isArrayTy()) + return SVFUtil::dyn_cast(elemType) + ->getTypeOfElement()->getByteSize(); + return elemType->getByteSize(); + } + return 1; + } + assert(false && "unsupported type for element size"); + return 1; +} + +/// Check if an interval length is usable for memory operations. +/// Returns false for bottom (no information) or unbounded lower bound +/// (cannot determine a concrete start for iteration). +bool AbsExtAPI::isValidLength(const IntervalValue& len) +{ + return !len.isBottom() && !len.lb().is_minus_infinity(); } +/// Calculate the length of a null-terminated string in abstract state. +/// Scans memory from the base of strValue looking for a '\0' byte. +/// Returns an IntervalValue: exact length if '\0' found, otherwise [0, MaxFieldLimit]. IntervalValue AbsExtAPI::getStrlen(AbstractState& as, const SVF::SVFVar *strValue) { NodeID value_id = strValue->getId(); + + // Step 1: determine the buffer size (in bytes) backing this pointer u32_t dst_size = 0; for (const auto& addr : as[value_id].getAddrs()) { @@ -524,8 +518,9 @@ IntervalValue AbsExtAPI::getStrlen(AbstractState& as, const SVF::SVFVar *strValu } } } + + // Step 2: scan for '\0' terminator u32_t len = 0; - u32_t elemSize = 1; if (as.inVarToAddrsTable(value_id)) { for (u32_t index = 0; index < dst_size; index++) @@ -537,188 +532,153 @@ IntervalValue AbsExtAPI::getStrlen(AbstractState& as, const SVF::SVFVar *strValu { val.join_with(as.load(addr)); } - if (val.getInterval().is_numeral() && (char) val.getInterval().getIntNumeral() == '\0') + if (val.getInterval().is_numeral() && + (char) val.getInterval().getIntNumeral() == '\0') { break; } ++len; } - if (strValue->getType()->isArrayTy()) - { - elemSize = SVFUtil::dyn_cast(strValue->getType())->getTypeOfElement()->getByteSize(); - } - else if (strValue->getType()->isPointerTy()) - { - if (const SVFType* elemType = as.getPointeeElement(value_id)) - { - if (elemType->isArrayTy()) - elemSize = SVFUtil::dyn_cast(elemType)->getTypeOfElement()->getByteSize(); - else - elemSize = elemType->getByteSize(); - } - else - { - elemSize = 1; - } - } - else - { - assert(false && "we cannot support this type"); - } } + + // Step 3: scale by element size and return + u32_t elemSize = getElementSize(as, strValue); if (len == 0) - { return IntervalValue((s64_t)0, (s64_t)Options::MaxFieldLimit()); - } - else - { - return IntervalValue(len * elemSize); - } + return IntervalValue(len * elemSize); +} + +// ===----------------------------------------------------------------------===// +// String/memory operation handlers +// ===----------------------------------------------------------------------===// + +/// strcpy(dst, src): copy all of src (including '\0') into dst. +/// Covers: strcpy, __strcpy_chk, stpcpy, wcscpy, __wcscpy_chk +void AbsExtAPI::handleStrcpy(const CallICFGNode *call) +{ + AbstractState& as = getAbsStateFromTrace(call); + const SVFVar* dst = call->getArgument(0); + const SVFVar* src = call->getArgument(1); + IntervalValue srcLen = getStrlen(as, src); + // no need to -1, since srcLen includes up to (but not past) '\0' + if (!isValidLength(srcLen)) return; + handleMemcpy(as, dst, src, srcLen, 0); } +/// strcat(dst, src): append all of src after the end of dst. +/// Covers: strcat, __strcat_chk, wcscat, __wcscat_chk +void AbsExtAPI::handleStrcat(const CallICFGNode *call) +{ + AbstractState& as = getAbsStateFromTrace(call); + const SVFVar* dst = call->getArgument(0); + const SVFVar* src = call->getArgument(1); + IntervalValue dstLen = getStrlen(as, dst); + IntervalValue srcLen = getStrlen(as, src); + if (!isValidLength(dstLen)) return; + handleMemcpy(as, dst, src, srcLen, dstLen.lb().getIntNumeral()); +} -void AbsExtAPI::handleStrcat(const SVF::CallICFGNode *call) +/// strncat(dst, src, n): append at most n bytes of src after the end of dst. +/// Covers: strncat, __strncat_chk, wcsncat, __wcsncat_chk +void AbsExtAPI::handleStrncat(const CallICFGNode *call) { - // __strcat_chk, strcat, __wcscat_chk, wcscat, __strncat_chk, strncat, __wcsncat_chk, wcsncat - // to check it is strcat group or strncat group AbstractState& as = getAbsStateFromTrace(call); - const FunObjVar *fun = call->getCalledFunction(); - const std::vector strcatGroup = {"__strcat_chk", "strcat", "__wcscat_chk", "wcscat"}; - const std::vector strncatGroup = {"__strncat_chk", "strncat", "__wcsncat_chk", "wcsncat"}; - if (std::find(strcatGroup.begin(), strcatGroup.end(), fun->getName()) != strcatGroup.end()) - { - const SVFVar* arg0Val = call->getArgument(0); - const SVFVar* arg1Val = call->getArgument(1); - IntervalValue strLen0 = getStrlen(as, arg0Val); - IntervalValue strLen1 = getStrlen(as, arg1Val); - IntervalValue totalLen = strLen0 + strLen1; - handleMemcpy(as, arg0Val, arg1Val, strLen1, strLen0.lb().getIntNumeral()); - // do memcpy - } - else if (std::find(strncatGroup.begin(), strncatGroup.end(), fun->getName()) != strncatGroup.end()) - { - const SVFVar* arg0Val = call->getArgument(0); - const SVFVar* arg1Val = call->getArgument(1); - const SVFVar* arg2Val = call->getArgument(2); - IntervalValue arg2Num = as[arg2Val->getId()].getInterval(); - IntervalValue strLen0 = getStrlen(as, arg0Val); - IntervalValue totalLen = strLen0 + arg2Num; - handleMemcpy(as, arg0Val, arg1Val, arg2Num, strLen0.lb().getIntNumeral()); - // do memcpy - } - else - { - assert(false && "unknown strcat function, please add it to strcatGroup or strncatGroup"); - } + const SVFVar* dst = call->getArgument(0); + const SVFVar* src = call->getArgument(1); + IntervalValue n = as[call->getArgument(2)->getId()].getInterval(); + IntervalValue dstLen = getStrlen(as, dst); + if (!isValidLength(dstLen)) return; + handleMemcpy(as, dst, src, n, dstLen.lb().getIntNumeral()); } -void AbsExtAPI::handleMemcpy(AbstractState& as, const SVF::SVFVar *dst, const SVF::SVFVar *src, IntervalValue len, u32_t start_idx) +/// Core memcpy: copy `len` bytes from src to dst starting at dst[start_idx]. +void AbsExtAPI::handleMemcpy(AbstractState& as, const SVF::SVFVar *dst, + const SVF::SVFVar *src, IntervalValue len, + u32_t start_idx) { - u32_t dstId = dst->getId(); // pts(dstId) = {objid} objbar objtypeinfo->getType(). + if (!isValidLength(len)) return; + + u32_t dstId = dst->getId(); u32_t srcId = src->getId(); - u32_t elemSize = 1; - if (dst->getType()->isArrayTy()) - { - elemSize = SVFUtil::dyn_cast(dst->getType())->getTypeOfElement()->getByteSize(); - } - // memcpy(i32*, i32*, 40) - else if (dst->getType()->isPointerTy()) - { - if (const SVFType* elemType = as.getPointeeElement(dstId)) - { - if (elemType->isArrayTy()) - elemSize = SVFUtil::dyn_cast(elemType)->getTypeOfElement()->getByteSize(); - else - elemSize = elemType->getByteSize(); - } - else - { - elemSize = 1; - } - } - else - { - assert(false && "we cannot support this type"); - } - u32_t size = std::min((u32_t)Options::MaxFieldLimit(), (u32_t) len.lb().getIntNumeral()); + u32_t elemSize = getElementSize(as, dst); + u32_t size = std::min((u32_t)Options::MaxFieldLimit(), + (u32_t)len.lb().getIntNumeral()); u32_t range_val = size / elemSize; - if (as.inVarToAddrsTable(srcId) && as.inVarToAddrsTable(dstId)) + + if (!as.inVarToAddrsTable(srcId) || !as.inVarToAddrsTable(dstId)) + return; + + for (u32_t index = 0; index < range_val; index++) { - for (u32_t index = 0; index < range_val; index++) + AbstractValue expr_src = + as.getGepObjAddrs(srcId, IntervalValue(index)); + AbstractValue expr_dst = + as.getGepObjAddrs(dstId, IntervalValue(index + start_idx)); + for (const auto &dstAddr: expr_dst.getAddrs()) { - // dead loop for string and break if there's a \0. If no \0, it will throw err. - AbstractValue expr_src = - as.getGepObjAddrs(srcId, IntervalValue(index)); - AbstractValue expr_dst = - as.getGepObjAddrs(dstId, IntervalValue(index + start_idx)); - for (const auto &dst: expr_dst.getAddrs()) + for (const auto &srcAddr: expr_src.getAddrs()) { - for (const auto &src: expr_src.getAddrs()) + u32_t objId = as.getIDFromAddr(srcAddr); + if (as.inAddrToValTable(objId) || as.inAddrToAddrsTable(objId)) { - u32_t objId = as.getIDFromAddr(src); - if (as.inAddrToValTable(objId)) - { - as.store(dst, as.load(src)); - } - else if (as.inAddrToAddrsTable(objId)) - { - as.store(dst, as.load(src)); - } + as.store(dstAddr, as.load(srcAddr)); } } } } } -void AbsExtAPI::handleMemset(AbstractState& as, const SVF::SVFVar *dst, IntervalValue elem, IntervalValue len) +/// Core memset: fill dst with `elem` for `len` bytes. +/// Note: elemSize here uses the pointee type's full size (not array element size) +/// to match how LLVM memset/wmemset intrinsics measure `len`. For a pointer to +/// wchar_t[100], elemSize = sizeof(wchar_t[100]), so range_val reflects the +/// number of top-level GEP fields, not individual array elements. +void AbsExtAPI::handleMemset(AbstractState& as, const SVF::SVFVar *dst, + IntervalValue elem, IntervalValue len) { + if (!isValidLength(len)) return; + u32_t dstId = dst->getId(); - u32_t size = std::min((u32_t)Options::MaxFieldLimit(), (u32_t) len.lb().getIntNumeral()); u32_t elemSize = 1; if (dst->getType()->isArrayTy()) { - elemSize = SVFUtil::dyn_cast(dst->getType())->getTypeOfElement()->getByteSize(); + elemSize = SVFUtil::dyn_cast(dst->getType()) + ->getTypeOfElement()->getByteSize(); } else if (dst->getType()->isPointerTy()) { if (const SVFType* elemType = as.getPointeeElement(dstId)) - { elemSize = elemType->getByteSize(); - } else - { elemSize = 1; - } } else { - assert(false && "we cannot support this type"); + assert(false && "unsupported type for element size"); } - + u32_t size = std::min((u32_t)Options::MaxFieldLimit(), + (u32_t)len.lb().getIntNumeral()); u32_t range_val = size / elemSize; + for (u32_t index = 0; index < range_val; index++) { - // dead loop for string and break if there's a \0. If no \0, it will throw err. - if (as.inVarToAddrsTable(dstId)) + if (!as.inVarToAddrsTable(dstId)) + break; + AbstractValue lhs_gep = as.getGepObjAddrs(dstId, IntervalValue(index)); + for (const auto &addr: lhs_gep.getAddrs()) { - AbstractValue lhs_gep = as.getGepObjAddrs(dstId, IntervalValue(index)); - for (const auto &addr: lhs_gep.getAddrs()) + u32_t objId = as.getIDFromAddr(addr); + if (as.inAddrToValTable(objId)) { - u32_t objId = as.getIDFromAddr(addr); - if (as.inAddrToValTable(objId)) - { - AbstractValue tmp = as.load(addr); - tmp.join_with(elem); - as.store(addr, tmp); - } - else - { - as.store(addr, elem); - } + AbstractValue tmp = as.load(addr); + tmp.join_with(elem); + as.store(addr, tmp); + } + else + { + as.store(addr, elem); } } - else - break; } } diff --git a/svf/lib/AE/Svfexe/AbstractInterpretation.cpp b/svf/lib/AE/Svfexe/AbstractInterpretation.cpp index 5e413c9f2..b490217c0 100644 --- a/svf/lib/AE/Svfexe/AbstractInterpretation.cpp +++ b/svf/lib/AE/Svfexe/AbstractInterpretation.cpp @@ -34,6 +34,7 @@ #include "Graphs/CallGraph.h" #include "WPA/Andersen.h" #include +#include using namespace SVF; using namespace SVFUtil; @@ -62,6 +63,11 @@ void AbstractInterpretation::runOnModule(ICFG *_icfg) AbstractInterpretation::AbstractInterpretation() { + AndersenWaveDiff* ander = AndersenWaveDiff::createAndersenWaveDiff(svfir); + callGraph = ander->getCallGraph(); + // Detect if the call graph has cycles by finding its strongly connected components (SCC) + callGraphScc = ander->getCallGraphSCC(); + callGraphScc->find(); stat = new AEStat(this); } /// Destructor @@ -94,12 +100,6 @@ void AbstractInterpretation::collectCycleHeads(const std::listgetCallGraphSCC(); - callGraphScc->find(); - CallGraph* callGraph = ander->getCallGraph(); - // Iterate through the call graph for (auto it = callGraph->begin(); it != callGraph->end(); it++) { @@ -162,33 +162,104 @@ void AbstractInterpretation::initWTO() } } -/// Program entry +/// Collect entry point functions for analysis. +/// Entry points are functions without callers (no incoming edges in CallGraph). +/// Uses a deque to allow efficient insertion at front for prioritizing main() +std::deque AbstractInterpretation::collectProgEntryFuns() +{ + std::deque entryFunctions; + const CallGraph* callGraph = svfir->getCallGraph(); + + for (auto it = callGraph->begin(); it != callGraph->end(); ++it) + { + const CallGraphNode* cgNode = it->second; + const FunObjVar* fun = cgNode->getFunction(); + + // Skip declarations + if (fun->isDeclaration()) + continue; + + // Entry points are functions without callers (no incoming edges) + if (cgNode->getInEdges().empty()) + { + // If main exists, put it first for priority using deque's push_front + if (fun->getName() == "main") + { + entryFunctions.push_front(fun); + } + else + { + entryFunctions.push_back(fun); + } + } + } + + return entryFunctions; +} + + +/// Program entry - analyze from all entry points (multi-entry analysis is the default) void AbstractInterpretation::analyse() { initWTO(); + + // Always use multi-entry analysis from all entry points + analyzeFromAllProgEntries(); +} + +/// Analyze all entry points (functions without callers) - for whole-program analysis. +/// Abstract state is shared across entry points so that functions analyzed from +/// earlier entries are not re-analyzed from scratch. +void AbstractInterpretation::analyzeFromAllProgEntries() +{ + // Collect all entry point functions + std::deque entryFunctions = collectProgEntryFuns(); + + if (entryFunctions.empty()) + { + assert(false && "No entry functions found for analysis"); + return; + } // handle Global ICFGNode of SVFModule handleGlobalNode(); - getAbsStateFromTrace( - icfg->getGlobalICFGNode())[PAG::getPAG()->getBlkPtr()] = IntervalValue::top(); - if (const CallGraphNode* cgn = svfir->getCallGraph()->getCallGraphNode("main")) + for (const FunObjVar* entryFun : entryFunctions) { - // Use worklist-based function handling instead of recursive WTO component handling - const ICFGNode* mainEntry = icfg->getFunEntryICFGNode(cgn->getFunction()); - handleFunction(mainEntry); + const ICFGNode* funEntry = icfg->getFunEntryICFGNode(entryFun); + handleFunction(funEntry); } } /// handle global node +/// Initializes the abstract state for the global ICFG node and processes all global statements. +/// This includes setting up the null pointer and black hole pointer (blkPtr). +/// BlkPtr is initialized to point to the InvalidMem (BlackHole) object, representing +/// an unknown memory location that cannot be statically resolved. void AbstractInterpretation::handleGlobalNode() { const ICFGNode* node = icfg->getGlobalICFGNode(); abstractTrace[node] = AbstractState(); abstractTrace[node][IRGraph::NullPtr] = AddressValue(); + // Global Node, we just need to handle addr, load, store, copy and gep for (const SVFStmt *stmt: node->getSVFStmts()) { handleSVFStatement(stmt); } + + // BlkPtr represents a pointer whose target is statically unknown (e.g., from + // int2ptr casts, external function returns, or unmodeled instructions like + // AtomicCmpXchg). It should be an address pointing to the InvalidMem object + // (BlackHole, ID=2), NOT an interval top. + // + // History: this was originally set to IntervalValue::top() as a quick fix when + // the analysis crashed on programs containing uninitialized BlkPtr. However, + // BlkPtr is semantically a *pointer* (address domain), not a numeric value + // (interval domain). Setting it to interval top broke cross-domain consistency: + // the interval domain and address domain gave contradictory information for the + // same variable. The correct representation is an AddressValue containing the + // BlackHole/InvalidMem virtual address, which means "points to unknown memory". + abstractTrace[node][PAG::getPAG()->getBlkPtr()] = + AddressValue(InvalidMemAddr); } /// get execution state by merging states of predecessor blocks @@ -661,6 +732,9 @@ bool AbstractInterpretation::handleICFGNode(const ICFGNode* node) detector->detect(getAbsStateFromTrace(node), node); stat->countStateSize(); + // Track this node as analyzed (for coverage statistics across all entry points) + allAnalyzedNodes.insert(node); + // Check if state changed (for fixpoint detection) // For entry nodes on first visit, always return true to process successors if (isFunEntry && !hadPrevState) @@ -945,7 +1019,15 @@ bool AbstractInterpretation::shouldApplyNarrowing(const FunObjVar* fun) return false; } } -/// Handle direct or indirect call: get callee, process function body, set return state +/// Handle direct or indirect call: get callee(s), process function body, set return state. +/// +/// For direct calls, the callee is known statically. +/// For indirect calls, the previous implementation resolved callees from the abstract +/// state's address domain, which only picked the first address and missed other targets. +/// Since the abstract state's address domain is not an over-approximation for function +/// pointers (it may be uninitialized or incomplete), we now use Andersen's pointer +/// analysis results from the pre-computed call graph, which soundly resolves all +/// possible indirect call targets. void AbstractInterpretation::handleFunCall(const CallICFGNode *callNode) { AbstractState& as = getAbsStateFromTrace(callNode); @@ -955,16 +1037,34 @@ void AbstractInterpretation::handleFunCall(const CallICFGNode *callNode) if (skipRecursiveCall(callNode)) return; - const FunObjVar* callee = getCallee(callNode); - if (!callee) + // Direct call: callee is known + if (const FunObjVar* callee = callNode->getCalledFunction()) + { + callSiteStack.push_back(callNode); + const ICFGNode* calleeEntry = icfg->getFunEntryICFGNode(callee); + handleFunction(calleeEntry); + callSiteStack.pop_back(); + const RetICFGNode* retNode = callNode->getRetICFGNode(); + abstractTrace[retNode] = abstractTrace[callNode]; return; + } - callSiteStack.push_back(callNode); - - const ICFGNode* calleeEntry = icfg->getFunEntryICFGNode(callee); - handleFunction(calleeEntry); - - callSiteStack.pop_back(); + // Indirect call: use Andersen's call graph to get all resolved callees. + // The call graph was built during initWTO() by running Andersen's pointer analysis, + // which over-approximates the set of possible targets for each indirect callsite. + if (callGraph->hasIndCSCallees(callNode)) + { + const auto& callees = callGraph->getIndCSCallees(callNode); + callSiteStack.push_back(callNode); + for (const FunObjVar* callee : callees) + { + if (callee->isDeclaration()) + continue; + const ICFGNode* calleeEntry = icfg->getFunEntryICFGNode(callee); + handleFunction(calleeEntry); + } + callSiteStack.pop_back(); + } const RetICFGNode* retNode = callNode->getRetICFGNode(); abstractTrace[retNode] = abstractTrace[callNode]; } @@ -1229,15 +1329,39 @@ void AEStat::finializeStat() generalNumMap["ES_Loc_Addr_AVG_Num"] /= count; } generalNumMap["SVF_STMT_NUM"] = count; - generalNumMap["ICFG_Node_Num"] = _ae->svfir->getICFG()->nodeNum; + + u32_t totalICFGNodes = _ae->svfir->getICFG()->nodeNum; + generalNumMap["ICFG_Node_Num"] = totalICFGNodes; + + // Calculate coverage: use allAnalyzedNodes which tracks all nodes across all entry points + u32_t analyzedNodes = _ae->allAnalyzedNodes.size(); + generalNumMap["Analyzed_ICFG_Node_Num"] = analyzedNodes; + + // Coverage percentage (stored as integer percentage * 100 for precision) + if (totalICFGNodes > 0) + { + double coveragePercent = (double)analyzedNodes / (double)totalICFGNodes * 100.0; + generalNumMap["ICFG_Coverage_Percent"] = (u32_t)(coveragePercent * 100); // Store as percentage * 100 + } + else + { + generalNumMap["ICFG_Coverage_Percent"] = 0; + } + u32_t callSiteNum = 0; u32_t extCallSiteNum = 0; Set funs; + Set analyzedFuns; for (const auto &it: *_ae->svfir->getICFG()) { if (it.second->getFun()) { funs.insert(it.second->getFun()); + // Check if this node was analyzed (across all entry points) + if (_ae->allAnalyzedNodes.find(it.second) != _ae->allAnalyzedNodes.end()) + { + analyzedFuns.insert(it.second->getFun()); + } } if (const CallICFGNode *callNode = dyn_cast(it.second)) { @@ -1252,6 +1376,19 @@ void AEStat::finializeStat() } } generalNumMap["Func_Num"] = funs.size(); + generalNumMap["Analyzed_Func_Num"] = analyzedFuns.size(); + + // Function coverage percentage + if (funs.size() > 0) + { + double funcCoveragePercent = (double)analyzedFuns.size() / (double)funs.size() * 100.0; + generalNumMap["Func_Coverage_Percent"] = (u32_t)(funcCoveragePercent * 100); // Store as percentage * 100 + } + else + { + generalNumMap["Func_Coverage_Percent"] = 0; + } + generalNumMap["EXT_CallSite_Num"] = extCallSiteNum; generalNumMap["NonEXT_CallSite_Num"] = callSiteNum; timeStatMap["Total_Time(sec)"] = (double)(endTime - startTime) / TIMEINTERVAL; @@ -1280,8 +1417,16 @@ void AEStat::performStat() unsigned field_width = 30; for (NUMStatMap::iterator it = generalNumMap.begin(), eit = generalNumMap.end(); it != eit; ++it) { - // format out put with width 20 space - std::cout << std::setw(field_width) << it->first << it->second << "\n"; + // Special handling for percentage fields (stored as percentage * 100) + if (it->first == "ICFG_Coverage_Percent" || it->first == "Func_Coverage_Percent") + { + double percent = (double)it->second / 100.0; + std::cout << std::setw(field_width) << it->first << std::fixed << std::setprecision(2) << percent << "%\n"; + } + else + { + std::cout << std::setw(field_width) << it->first << it->second << "\n"; + } } SVFUtil::outs() << "-------------------------------------------------------\n"; for (TIMEStatMap::iterator it = timeStatMap.begin(), eit = timeStatMap.end(); it != eit; ++it) @@ -1605,6 +1750,13 @@ void AbstractInterpretation::updateStateOnCmp(const CmpStmt *cmp) case CmpStmt::FCMP_TRUE: resVal = IntervalValue(1, 1); break; + case CmpStmt::FCMP_ORD: + case CmpStmt::FCMP_UNO: + // FCMP_ORD: true if both operands are not NaN + // FCMP_UNO: true if either operand is NaN + // Conservatively return [0, 1] since we don't track NaN + resVal = IntervalValue(0, 1); + break; default: assert(false && "undefined compare: "); } @@ -1719,6 +1871,13 @@ void AbstractInterpretation::updateStateOnCmp(const CmpStmt *cmp) case CmpStmt::FCMP_TRUE: resVal = IntervalValue(1, 1); break; + case CmpStmt::FCMP_ORD: + case CmpStmt::FCMP_UNO: + // FCMP_ORD: true if both operands are not NaN + // FCMP_UNO: true if either operand is NaN + // Conservatively return [0, 1] since we don't track NaN + resVal = IntervalValue(0, 1); + break; default: assert(false && "undefined compare: "); }