From c95a419853b496431d820757c8035ff4aba99409 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Wed, 21 May 2025 19:43:18 +0200 Subject: [PATCH 01/29] Add alloc-sites-aware default problem + start adding IDEDroid-like formulation of field-sensitive IFDS --- .../DataFlow/IfdsIde/IDETabulationProblem.h | 7 +- .../IfdsIde/DefaultAliasAwareIDEProblem.h | 9 + .../DefaultAllocSitesAwareIDEProblem.h | 168 ++++++++++++++++++ .../IfdsIde/DefaultNoAliasIDEProblem.h | 9 + .../FieldSensAllocSitesAwareIFDSProblem.h | 139 +++++++++++++++ .../IfdsIde/LLVMFunctionDataFlowFacts.h | 9 + .../PhasarLLVM/DataFlow/IfdsIde/LibCSummary.h | 14 +- ...DefaultAllocSitesAwareIDEFlowFunctions.cpp | 154 ++++++++++++++++ .../FieldSensAllocSitesAwareIFDSProblem.cpp | 79 ++++++++ 9 files changed, 586 insertions(+), 2 deletions(-) create mode 100644 include/phasar/PhasarLLVM/DataFlow/IfdsIde/DefaultAllocSitesAwareIDEProblem.h create mode 100644 include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h create mode 100644 lib/PhasarLLVM/DataFlow/IfdsIde/DefaultAllocSitesAwareIDEFlowFunctions.cpp create mode 100644 lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp diff --git a/include/phasar/DataFlow/IfdsIde/IDETabulationProblem.h b/include/phasar/DataFlow/IfdsIde/IDETabulationProblem.h index 57120a4398..dc12cc07bb 100644 --- a/include/phasar/DataFlow/IfdsIde/IDETabulationProblem.h +++ b/include/phasar/DataFlow/IfdsIde/IDETabulationProblem.h @@ -146,7 +146,12 @@ class IDETabulationProblem : public FlowFunctions, /// the level of soundness is ignored. Otherwise, true. virtual bool setSoundness(Soundness /*S*/) { return false; } - const ProjectIRDBBase *getProjectIRDB() const noexcept { return IRDB; } + [[nodiscard]] const ProjectIRDBBase *getProjectIRDB() const noexcept { + return IRDB; + } + [[nodiscard]] llvm::ArrayRef getEntryPoints() const noexcept { + return EntryPoints; + } protected: typename FlowFunctions::FlowFunctionPtrType diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/DefaultAliasAwareIDEProblem.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/DefaultAliasAwareIDEProblem.h index 9a73977f97..71d62aa4d2 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/DefaultAliasAwareIDEProblem.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/DefaultAliasAwareIDEProblem.h @@ -1,3 +1,12 @@ +/****************************************************************************** + * Copyright (c) 2025 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel, Maximilian Huber and others + *****************************************************************************/ + #ifndef PHASAR_PHASARLLVM_DATAFLOW_IFDSIDE_IDEALIASINFOTABULATIONPROBLEM_H #define PHASAR_PHASARLLVM_DATAFLOW_IFDSIDE_IDEALIASINFOTABULATIONPROBLEM_H diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/DefaultAllocSitesAwareIDEProblem.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/DefaultAllocSitesAwareIDEProblem.h new file mode 100644 index 0000000000..6ae7c56962 --- /dev/null +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/DefaultAllocSitesAwareIDEProblem.h @@ -0,0 +1,168 @@ +/****************************************************************************** + * Copyright (c) 2025 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#ifndef PHASAR_PHASARLLVM_DATAFLOW_IFDSIDE_DEFAULTALLOCSITESAWAREIDEPROBLEM_H +#define PHASAR_PHASARLLVM_DATAFLOW_IFDSIDE_DEFAULTALLOCSITESAWAREIDEPROBLEM_H + +#include "phasar/PhasarLLVM/DataFlow/IfdsIde/DefaultNoAliasIDEProblem.h" +#include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" + +#include + +// Forward declaration of types for which we only use its pointer or ref type +namespace llvm { +class Instruction; +class Function; +class Value; +} // namespace llvm + +namespace psr { + +namespace detail { +class IDEAllocSitesAwareDefaultFlowFunctionsImpl + : private IDENoAliasDefaultFlowFunctionsImpl { +public: + using typename IDENoAliasDefaultFlowFunctionsImpl::d_t; + using typename IDENoAliasDefaultFlowFunctionsImpl::f_t; + using typename IDENoAliasDefaultFlowFunctionsImpl::FlowFunctionPtrType; + using typename IDENoAliasDefaultFlowFunctionsImpl::FlowFunctionType; + using typename IDENoAliasDefaultFlowFunctionsImpl::n_t; + + using IDENoAliasDefaultFlowFunctionsImpl::isFunctionModeled; + + [[nodiscard]] constexpr LLVMAliasInfoRef getAliasInfo() const noexcept { + return AS; + } + + constexpr IDEAllocSitesAwareDefaultFlowFunctionsImpl( + LLVMAliasInfoRef AS) noexcept + : AS(AS) { + assert(AS && "You must provide an alias information handle!"); + } + + [[nodiscard]] FlowFunctionPtrType getNormalFlowFunctionImpl(n_t Curr, + n_t /*Succ*/); + [[nodiscard]] FlowFunctionPtrType getCallFlowFunctionImpl(n_t CallInst, + f_t CalleeFun); + [[nodiscard]] FlowFunctionPtrType getRetFlowFunctionImpl(n_t CallSite, + f_t /*CalleeFun*/, + n_t ExitInst, + n_t /*RetSite*/); + [[nodiscard]] FlowFunctionPtrType + getCallToRetFlowFunctionImpl(n_t CallSite, n_t /*RetSite*/, + llvm::ArrayRef /*Callees*/); + +private: + LLVMAliasInfoRef AS; +}; +} // namespace detail + +template +class DefaultAllocSitesAwareIDEProblem + : public IDETabulationProblem, + protected detail::IDEAllocSitesAwareDefaultFlowFunctionsImpl { +public: + using ProblemAnalysisDomain = AnalysisDomainTy; + using d_t = typename AnalysisDomainTy::d_t; + using n_t = typename AnalysisDomainTy::n_t; + using f_t = typename AnalysisDomainTy::f_t; + using t_t = typename AnalysisDomainTy::t_t; + using v_t = typename AnalysisDomainTy::v_t; + using l_t = typename AnalysisDomainTy::l_t; + using i_t = typename AnalysisDomainTy::i_t; + using db_t = typename AnalysisDomainTy::db_t; + + using ConfigurationTy = HasNoConfigurationType; + + using FlowFunctionType = FlowFunction; + using FlowFunctionPtrType = typename FlowFunctionType::FlowFunctionPtrType; + + using container_type = typename FlowFunctionType::container_type; + + /// Constructs an IDETabulationProblem with the usual arguments + alias + /// information. + /// + /// \note It is useful to use an instance of FilteredAliasSet for the alias + /// information to lower suprious aliases + explicit DefaultAllocSitesAwareIDEProblem( + const ProjectIRDBBase *IRDB, LLVMAliasInfoRef AS, + std::vector EntryPoints, + std::optional + ZeroValue) noexcept(std::is_nothrow_move_constructible_v) + : IDETabulationProblem(IRDB, std::move(EntryPoints), + std::move(ZeroValue)), + detail::IDEAllocSitesAwareDefaultFlowFunctionsImpl(AS) {} + + [[nodiscard]] FlowFunctionPtrType getNormalFlowFunction(n_t Curr, + n_t Succ) override { + return getNormalFlowFunctionImpl(Curr, Succ); + } + + [[nodiscard]] FlowFunctionPtrType + getCallFlowFunction(n_t CallInst, f_t CalleeFun) override { + return getCallFlowFunctionImpl(CallInst, CalleeFun); + } + + [[nodiscard]] FlowFunctionPtrType getRetFlowFunction(n_t CallSite, + f_t CalleeFun, + n_t ExitInst, + n_t RetSite) override { + return getRetFlowFunctionImpl(CallSite, CalleeFun, ExitInst, RetSite); + } + + [[nodiscard]] FlowFunctionPtrType + getCallToRetFlowFunction(n_t CallSite, n_t RetSite, + llvm::ArrayRef Callees) override { + return getCallToRetFlowFunctionImpl(CallSite, RetSite, Callees); + } +}; + +class DefaultAllocSitesAwareIFDSProblem + : public IFDSTabulationProblem, + protected detail::IDEAllocSitesAwareDefaultFlowFunctionsImpl { +public: + /// Constructs an IFDSTabulationProblem with the usual arguments + alias + /// information. + /// + /// \note It is useful to use an instance of FilteredAliasSet for the alias + /// information to lower suprious aliases + explicit DefaultAllocSitesAwareIFDSProblem( + const ProjectIRDBBase *IRDB, LLVMAliasInfoRef AS, + std::vector EntryPoints, + d_t ZeroValue) noexcept(std::is_nothrow_move_constructible_v) + : IFDSTabulationProblem(IRDB, std::move(EntryPoints), ZeroValue), + detail::IDEAllocSitesAwareDefaultFlowFunctionsImpl(AS) {} + + [[nodiscard]] FlowFunctionPtrType getNormalFlowFunction(n_t Curr, + n_t Succ) override { + return getNormalFlowFunctionImpl(Curr, Succ); + } + + [[nodiscard]] FlowFunctionPtrType + getCallFlowFunction(n_t CallInst, f_t CalleeFun) override { + return getCallFlowFunctionImpl(CallInst, CalleeFun); + } + + [[nodiscard]] FlowFunctionPtrType getRetFlowFunction(n_t CallSite, + f_t CalleeFun, + n_t ExitInst, + n_t RetSite) override { + return getRetFlowFunctionImpl(CallSite, CalleeFun, ExitInst, RetSite); + } + + [[nodiscard]] FlowFunctionPtrType + getCallToRetFlowFunction(n_t CallSite, n_t RetSite, + llvm::ArrayRef Callees) override { + return getCallToRetFlowFunctionImpl(CallSite, RetSite, Callees); + } +}; + +} // namespace psr + +#endif // PHASAR_PHASARLLVM_DATAFLOW_IFDSIDE_DEFAULTALLOCSITESAWAREIDEPROBLEM_H diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/DefaultNoAliasIDEProblem.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/DefaultNoAliasIDEProblem.h index 1865f749f2..35bd6fcb01 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/DefaultNoAliasIDEProblem.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/DefaultNoAliasIDEProblem.h @@ -1,3 +1,12 @@ +/****************************************************************************** + * Copyright (c) 2025 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel, Maximilian Huber and others + *****************************************************************************/ + #ifndef PHASAR_PHASARLLVM_DATAFLOW_IFDSIDE_IDENOALIASINFOTABULATIONPROBLEM_H #define PHASAR_PHASARLLVM_DATAFLOW_IFDSIDE_IDENOALIASINFOTABULATIONPROBLEM_H diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h new file mode 100644 index 0000000000..a4ea9f6b09 --- /dev/null +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h @@ -0,0 +1,139 @@ +/****************************************************************************** + * Copyright (c) 2025 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#ifndef PHASAR_PHASARLLVM_DATAFLOW_IFDSIDE_FIELDSENSALLOCSITESAWAREIFDSPROBLEM_H +#define PHASAR_PHASARLLVM_DATAFLOW_IFDSIDE_FIELDSENSALLOCSITESAWAREIFDSPROBLEM_H + +#include "phasar/DataFlow/IfdsIde/IDETabulationProblem.h" +#include "phasar/DataFlow/IfdsIde/IFDSTabulationProblem.h" +#include "phasar/PhasarLLVM/Domain/LLVMAnalysisDomain.h" +#include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" + +#include "llvm/ADT/SmallVector.h" + +#include + +namespace psr { + +/// \file Implements field-sensitivity after the paper "Boosting the performance +/// of alias-aware IFDS analysis with CFL-based environment transformers" by Li +/// et al. + +struct CFLFieldSensEdgeValue { + // TODO: JoinLatticeTraits + + llvm::SmallVector Loads; + llvm::SmallVector Stores; + llvm::SmallDenseSet Kills; + // Add an offset for pending GEPs; INT32_MIN is Top + int32_t Offset = {0}; +}; + +template +struct CFLFieldSensAnalysisDomain : AnalysisDomainTy { + using l_t = CFLFieldSensEdgeValue; +}; + +class FieldSensAllocSitesAwareIFDSProblem + : public IDETabulationProblem< + CFLFieldSensAnalysisDomain> { + using Base = IDETabulationProblem< + CFLFieldSensAnalysisDomain>; + +public: + using typename Base::container_type; + using typename Base::d_t; + using typename Base::db_t; + using typename Base::f_t; + using typename Base::FlowFunctionPtrType; + using typename Base::i_t; + using typename Base::l_t; + using typename Base::n_t; + using typename Base::ProblemAnalysisDomain; + using typename Base::t_t; + using typename Base::v_t; + + // Constructs an IDETabulationProblem with the usual arguments + alias + /// information. + /// + /// \note It is useful to use an instance of FilteredAliasSet for the alias + /// information to lower suprious aliases + explicit FieldSensAllocSitesAwareIFDSProblem( + IFDSTabulationProblem *UserProblem, + LLVMAliasInfoRef AS) noexcept(std::is_nothrow_move_constructible_v) + : Base(UserProblem->getProjectIRDB(), UserProblem->getEntryPoints(), + UserProblem->getZeroValue()), + AS(AS), UserProblem(UserProblem) {} + + FieldSensAllocSitesAwareIFDSProblem(std::nullptr_t, + LLVMAliasInfoRef AS) = delete; + + [[nodiscard]] InitialSeeds initialSeeds() override; + + [[nodiscard]] FlowFunctionPtrType getNormalFlowFunction(n_t Curr, + n_t Succ) override { + return UserProblem->getNormalFlowFunction(Curr, Succ); + } + + [[nodiscard]] FlowFunctionPtrType + getCallFlowFunction(n_t CallInst, f_t CalleeFun) override { + return UserProblem->getCallFlowFunction(CallInst, CalleeFun); + } + + [[nodiscard]] FlowFunctionPtrType + getSummaryFlowFunction(n_t CallInst, f_t CalleeFun) override { + return UserProblem->getSummaryFlowFunction(CallInst, CalleeFun); + } + + [[nodiscard]] FlowFunctionPtrType getRetFlowFunction(n_t CallSite, + f_t CalleeFun, + n_t ExitInst, + n_t RetSite) override { + return UserProblem->getRetFlowFunction(CallSite, CalleeFun, ExitInst, + RetSite); + } + + [[nodiscard]] FlowFunctionPtrType + getCallToRetFlowFunction(n_t CallSite, n_t RetSite, + llvm::ArrayRef Callees) override { + return UserProblem->getCallToRetFlowFunction(CallSite, RetSite, Callees); + } + + EdgeFunction getNormalEdgeFunction(n_t Curr, d_t CurrNode, n_t Succ, + d_t SuccNode) override; + + EdgeFunction getCallEdgeFunction(n_t CallSite, d_t SrcNode, + f_t DestinationFunction, + d_t DestNode) override; + + EdgeFunction getReturnEdgeFunction(n_t CallSite, f_t CalleeFunction, + n_t ExitStmt, d_t ExitNode, + n_t RetSite, d_t RetNode) override; + + EdgeFunction + getCallToRetEdgeFunction(n_t CallSite, d_t CallNode, n_t RetSite, + d_t RetSiteNode, + llvm::ArrayRef Callees) override; + + EdgeFunction getSummaryEdgeFunction(n_t Curr, d_t CurrNode, n_t Succ, + d_t SuccNode) override; + + EdgeFunction extend(const EdgeFunction &L, + const EdgeFunction &R) override; + + EdgeFunction combine(const EdgeFunction &L, + const EdgeFunction &R) override; + +private: + LLVMAliasInfoRef AS; + IFDSTabulationProblem *UserProblem{}; +}; +} // namespace psr + +#endif // PHASAR_PHASARLLVM_DATAFLOW_IFDSIDE_FIELDSENSALLOCSITESAWAREIFDSPROBLEM_H diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMFunctionDataFlowFacts.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMFunctionDataFlowFacts.h index 19e03b49bf..5161760f2c 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMFunctionDataFlowFacts.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMFunctionDataFlowFacts.h @@ -1,3 +1,12 @@ +/****************************************************************************** + * Copyright (c) 2025 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel, bulletSpace and others + *****************************************************************************/ + #include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" #include "phasar/PhasarLLVM/DataFlow/IfdsIde/FunctionDataFlowFacts.h" #include "phasar/Utils/DefaultValue.h" diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/LibCSummary.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/LibCSummary.h index cea1c2772c..0ac7309a73 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/LibCSummary.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/LibCSummary.h @@ -1,4 +1,14 @@ -#pragma once +/****************************************************************************** + * Copyright (c) 2025 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel, bulletSpace and others + *****************************************************************************/ + +#ifndef PHASAR_PHASARLLVM_DATAFLOW_IFDSIDE_LIBCSUMMARY_H +#define PHASAR_PHASARLLVM_DATAFLOW_IFDSIDE_LIBCSUMMARY_H namespace psr { namespace library_summary { @@ -7,3 +17,5 @@ class FunctionDataFlowFacts; [[nodiscard]] const library_summary::FunctionDataFlowFacts &getLibCSummary(); } // namespace psr + +#endif // PHASAR_PHASARLLVM_DATAFLOW_IFDSIDE_LIBCSUMMARY_H diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/DefaultAllocSitesAwareIDEFlowFunctions.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/DefaultAllocSitesAwareIDEFlowFunctions.cpp new file mode 100644 index 0000000000..20d4030794 --- /dev/null +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/DefaultAllocSitesAwareIDEFlowFunctions.cpp @@ -0,0 +1,154 @@ +#include "phasar/PhasarLLVM/DataFlow/IfdsIde/DefaultAllocSitesAwareIDEProblem.h" +#include "phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMFlowFunctions.h" +#include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" + +#include "llvm/IR/Instructions.h" +#include "llvm/Support/Casting.h" + +using namespace psr; + +using FFTemplates = FlowFunctionTemplates< + detail::IDEAllocSitesAwareDefaultFlowFunctionsImpl::d_t, + detail::IDEAllocSitesAwareDefaultFlowFunctionsImpl::FlowFunctionType:: + container_type>; +using container_type = FFTemplates::container_type; + +static container_type +getReachableAllocationSites(LLVMAliasInfoRef AS, const llvm::Value *Pointer, + const llvm::Instruction *Context) { + if (!Pointer->getType()->isPointerTy()) { + return {Pointer}; + } + + container_type Ret; + auto AllocSites = AS.getReachableAllocationSites(Pointer, true, Context); + Ret.insert(AllocSites->begin(), AllocSites->end()); + if (Ret.empty()) { + Ret.insert(Pointer); + } + + return Ret; +} + +auto detail::IDEAllocSitesAwareDefaultFlowFunctionsImpl:: + getNormalFlowFunctionImpl(n_t Curr, n_t Succ) -> FlowFunctionPtrType { + + if (const auto *Store = llvm::dyn_cast(Curr)) { + + container_type Gen = + getReachableAllocationSites(AS, Store->getPointerOperand(), Store); + + auto ValueAllocSites = + getReachableAllocationSites(AS, Store->getValueOperand(), Store); + + return FFTemplates::lambdaFlow([Store, Gen{std::move(Gen)}, + ValueAliases{std::move(ValueAllocSites)}]( + d_t Source) -> container_type { + if (Store->getPointerOperand() == Source || + Store->getPointerOperand()->stripPointerCastsAndAliases() == Source) { + return {}; + } + + if (Store->getValueOperand() == Source || ValueAliases.count(Source)) { + auto Ret = Gen; + Ret.insert(Source); + return Ret; + } + + return {Source}; + }); + } + + if (const auto *Load = llvm::dyn_cast(Curr)) { + auto AllocSites = + getReachableAllocationSites(AS, Load->getPointerOperand(), Load); + + return FFTemplates::lambdaFlow([Load, AllocSites{std::move(AllocSites)}]( + d_t Source) -> container_type { + if (Source == Load->getPointerOperand() || AllocSites.count(Source)) { + return {Source, Load}; + } + + return {Source}; + }); + } + + return this->IDENoAliasDefaultFlowFunctionsImpl::getNormalFlowFunctionImpl( + Curr, Succ); +} + +auto detail::IDEAllocSitesAwareDefaultFlowFunctionsImpl:: + getCallFlowFunctionImpl(n_t CallInst, f_t CalleeFun) + -> FlowFunctionPtrType { + if (const auto *CallSite = llvm::dyn_cast(CallInst)) { + return mapFactsToCallee( + CallSite, CalleeFun, [CallSite, AS = AS](d_t Arg, d_t Source) { + if (Arg == Source) { + return true; + } + + return Arg->getType()->isPointerTy() && + Source->getType()->isPointerTy() && + AS.isInReachableAllocationSites(Arg, Source, true, CallSite); + }); + } + + return FFTemplates::killAllFlows(); +} + +static container_type getReturnedAliases(const container_type &Facts, + psr::LLVMAliasInfoRef AS, + const llvm::Instruction *CallSite) { + container_type Ret; + for (const auto *Fact : Facts) { + const auto &AllocSites = getReachableAllocationSites(AS, Fact, CallSite); + Ret.insert(AllocSites.begin(), AllocSites.end()); + } + + return Ret; +} + +auto detail::IDEAllocSitesAwareDefaultFlowFunctionsImpl::getRetFlowFunctionImpl( + n_t CallSite, f_t /*CalleeFun*/, n_t ExitInst, n_t /*RetSite*/) + -> FlowFunctionPtrType { + container_type Gen; + + if (const auto *Call = llvm::dyn_cast(CallSite)) { + const auto PostProcessFacts = [AS = AS, Call](container_type &Facts) { + Facts = getReturnedAliases(Facts, AS, Call); + }; + + const auto PropagateParameter = [AS = AS, ExitInst](d_t Formal, + d_t Source) { + if (!Formal->getType()->isPointerTy()) { + return false; + } + + return Formal == Source || + AS.isInReachableAllocationSites(Formal, Source, true, ExitInst); + }; + + const auto PropagateRet = [AS = AS, ExitInst](d_t RetVal, d_t Source) { + if (RetVal == Source) { + return true; + } + + return RetVal->getType()->isPointerTy() && + Source->getType()->isPointerTy() && + AS.isInReachableAllocationSites(RetVal, Source, true, ExitInst); + }; + + return mapFactsToCaller(Call, ExitInst, PropagateParameter, PropagateRet, + {}, true, true, PostProcessFacts); + } + + return FFTemplates::killAllFlows(); +} + +auto detail::IDEAllocSitesAwareDefaultFlowFunctionsImpl:: + getCallToRetFlowFunctionImpl(n_t CallSite, n_t RetSite, + llvm::ArrayRef Callees) + -> FlowFunctionPtrType { + return this->IDENoAliasDefaultFlowFunctionsImpl::getCallToRetFlowFunctionImpl( + CallSite, RetSite, Callees); +} diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp new file mode 100644 index 0000000000..61f6d2e12b --- /dev/null +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp @@ -0,0 +1,79 @@ +#include "phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h" + +#include "phasar/DataFlow/IfdsIde/EdgeFunctionUtils.h" + +using namespace psr; + +auto FieldSensAllocSitesAwareIFDSProblem::initialSeeds() + -> InitialSeeds { + auto UserSeeds = UserProblem->initialSeeds(); + InitialSeeds::GeneralizedSeeds Ret; + + for (const auto &[Inst, Facts] : UserSeeds.getSeeds()) { + auto &SeedsAtInst = Ret[Inst]; + for (const auto &[Fact, Weight] : Facts) { + SeedsAtInst[Fact] = {}; + } + } + + return {std::move(Ret)}; +} + +auto FieldSensAllocSitesAwareIFDSProblem::getNormalEdgeFunction( + n_t Curr, d_t CurrNode, n_t Succ, d_t SuccNode) -> EdgeFunction { + // TODO: Store, Load, Gep + + return nullptr; +} + +auto FieldSensAllocSitesAwareIFDSProblem::getCallEdgeFunction( + n_t CallSite, d_t SrcNode, f_t DestinationFunction, d_t DestNode) + -> EdgeFunction { + // This is naturally identity + return EdgeIdentity{}; +} + +auto FieldSensAllocSitesAwareIFDSProblem::getReturnEdgeFunction( + n_t CallSite, f_t CalleeFunction, n_t ExitStmt, d_t ExitNode, n_t RetSite, + d_t RetNode) -> EdgeFunction { + // TODO: Need to map the fields to the ret-site + + return nullptr; +} + +auto FieldSensAllocSitesAwareIFDSProblem::getCallToRetEdgeFunction( + n_t CallSite, d_t CallNode, n_t RetSite, d_t RetSiteNode, + llvm::ArrayRef Callees) -> EdgeFunction { + // This naturally identity + return EdgeIdentity{}; +} + +auto FieldSensAllocSitesAwareIFDSProblem::getSummaryEdgeFunction( + n_t Curr, d_t CurrNode, n_t Succ, d_t SuccNode) -> EdgeFunction { + // TODO: Is that correct? -- We may need to handle field-indirections here as + // well + return EdgeIdentity{}; +} + +auto FieldSensAllocSitesAwareIFDSProblem::extend(const EdgeFunction &L, + const EdgeFunction &R) + -> EdgeFunction { + if (auto DfltCompose = psr::defaultComposeOrNull(L, R)) { + return DfltCompose; + } + + // TODO: Here, the real magic happens! + // --> Look in the paper at pages 12-13 +} + +auto FieldSensAllocSitesAwareIFDSProblem::combine(const EdgeFunction &L, + const EdgeFunction &R) + -> EdgeFunction { + if (auto DfltJoin = psr::defaultJoinOrNull(L, R)) { + return DfltJoin; + } + + // TODO: Join + + return AllBottom{}; +} From 948a35df747ecd05d634dd5093dc8a88ceef8d50 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Mon, 9 Jun 2025 15:19:51 +0200 Subject: [PATCH 02/29] Continue with CFL implementation (WIP) --- include/phasar/Domain/LatticeDomain.h | 9 + .../FieldSensAllocSitesAwareIFDSProblem.h | 66 ++++++- include/phasar/Utils/Union.h | 33 ++++ .../FieldSensAllocSitesAwareIFDSProblem.cpp | 167 +++++++++++++++++- 4 files changed, 270 insertions(+), 5 deletions(-) create mode 100644 include/phasar/Utils/Union.h diff --git a/include/phasar/Domain/LatticeDomain.h b/include/phasar/Domain/LatticeDomain.h index d45f7bc45a..17aa6936a2 100644 --- a/include/phasar/Domain/LatticeDomain.h +++ b/include/phasar/Domain/LatticeDomain.h @@ -12,6 +12,7 @@ #include "phasar/Utils/ByRef.h" #include "phasar/Utils/JoinLattice.h" +#include "phasar/Utils/Macros.h" #include "phasar/Utils/TypeTraits.h" #include "llvm/ADT/Hashing.h" @@ -21,6 +22,7 @@ #include "llvm/Support/raw_ostream.h" #include +#include #include #include @@ -98,6 +100,13 @@ struct LatticeDomain : public std::variant { assert(std::holds_alternative(*this)); return std::get(*this); } + + template + void onValue(TransformFn Transform, ArgsT &&...Args) { + if (auto *Val = getValueOrNull()) { + std::invoke(std::move(Transform), *Val, PSR_FWD(Args)...); + } + } }; template Loads; llvm::SmallVector Stores; llvm::SmallDenseSet Kills; // Add an offset for pending GEPs; INT32_MIN is Top int32_t Offset = {0}; + int32_t EmptyTombstone = 0; + + [[nodiscard]] bool kills(int32_t Off) const { + return Off != TopOffset && Kills.count(Off); + } + + [[nodiscard]] bool + operator==(const CFLFieldAccessPath &Other) const noexcept { + return EmptyTombstone == Other.EmptyTombstone && Loads == Other.Loads && + Stores == Other.Stores && Kills == Other.Kills; + } + + bool operator!=(const CFLFieldAccessPath &Other) const noexcept { + return !(*this == Other); + } + + friend size_t hash_value(const CFLFieldAccessPath &FieldString) noexcept; +}; + +struct CFLFieldAccessPathDMI { + static CFLFieldAccessPath getEmptyKey() { + CFLFieldAccessPath Ret{}; + Ret.EmptyTombstone = 1; + return Ret; + } + static CFLFieldAccessPath getTombstoneKey() { + CFLFieldAccessPath Ret{}; + Ret.EmptyTombstone = 2; + return Ret; + } + static auto getHashValue(const CFLFieldAccessPath &FieldString) noexcept { + return hash_value(FieldString); + } + static bool isEqual(const CFLFieldAccessPath &L, + const CFLFieldAccessPath &R) noexcept { + return L == R; + } +}; + +struct CFLFieldSensEdgeValue { + // TODO: JoinLatticeTraits + + llvm::SmallDenseSet Paths; + + void applyStore(); + void applyLoad(); + void applyKill(); + void applyGep(GEPEvent Evt); }; template struct CFLFieldSensAnalysisDomain : AnalysisDomainTy { - using l_t = CFLFieldSensEdgeValue; + using l_t = LatticeDomain; }; class FieldSensAllocSitesAwareIFDSProblem diff --git a/include/phasar/Utils/Union.h b/include/phasar/Utils/Union.h new file mode 100644 index 0000000000..c7148cb5d5 --- /dev/null +++ b/include/phasar/Utils/Union.h @@ -0,0 +1,33 @@ +/****************************************************************************** + * Copyright (c) 2025 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#ifndef PHASAR_UTILS_UNOIN_H +#define PHASAR_UTILS_UNOIN_H + +#include + +namespace psr { +template +[[nodiscard]] SetT setUnion(SetT First, SetT Second, + bool *ChangedPtr = nullptr) { + bool FirstSmaller = First.size() < Second.size(); + auto &Smaller = FirstSmaller ? First : Second; + + bool ChangedBuf = false; + bool &Changed = ChangedPtr ? *ChangedPtr : ChangedBuf; + + auto Ret = std::move(FirstSmaller ? Second : First); + for (auto &&Elem : Smaller) { + Changed |= Ret.insert(Elem).second; + } + return Ret; +} +} // namespace psr + +#endif // PHASAR_UTILS_UNOIN_H diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp index 61f6d2e12b..8011ce2b68 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp @@ -1,9 +1,125 @@ #include "phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h" +#include "phasar/DataFlow/IfdsIde/EdgeFunction.h" #include "phasar/DataFlow/IfdsIde/EdgeFunctionUtils.h" +#include "phasar/Utils/Fn.h" +#include "phasar/Utils/JoinLattice.h" +#include "phasar/Utils/Union.h" + +#include "llvm/ADT/Hashing.h" +#include "llvm/Support/ErrorHandling.h" + +#include +#include +#include using namespace psr; +namespace { + +using l_t = LatticeDomain; + +struct FieldSensEdgeFunctionComposer : EdgeFunctionComposer { + + static EdgeFunction + join(EdgeFunctionRef This, + const EdgeFunction &OtherFunction) { + llvm::report_fatal_error("Use combine() instead!"); + } +}; + +struct StoreEdgeFunction { + using l_t = LatticeDomain; + + [[nodiscard]] l_t computeTarget(l_t Source) const { + Source.onValue(fn<&CFLFieldSensEdgeValue::applyStore>); + return Source; + } + + static EdgeFunction compose(EdgeFunctionRef This, + const EdgeFunction &SecondFunction) { + llvm::report_fatal_error("Use extend() instead!"); + } + + static EdgeFunction join(EdgeFunctionRef This, + const EdgeFunction &OtherFunction) { + llvm::report_fatal_error("Use combine() instead!"); + } +}; +} // namespace + +void CFLFieldSensEdgeValue::applyStore() { + for (auto &F : Paths) { + // TODO: K-limiting! + F.Stores.push_back(std::exchange(F.Offset, 0)); + } + + // TODO: What if Paths is empty? Or can't that happen? +} + +void CFLFieldSensEdgeValue::applyLoad() { + for (auto IIt = Paths.begin(), End = Paths.end(); IIt != End;) { + auto It = IIt++; + auto &F = *It; + + auto Offs = std::exchange(F.Offset, 0); + if (F.Stores.empty()) { + if (F.kills(Offs)) { + Paths.erase(It); + } else { + // TODO: K-limiting! + F.Loads.push_back(Offs); + F.Kills.clear(); + } + continue; + } + + if (F.Stores.back() != Offs) { + Paths.erase(It); + continue; + } + + assert(F.Stores.back() == Offs); + F.Stores.pop_back(); + } +} +void CFLFieldSensEdgeValue::applyKill() { + for (auto IIt = Paths.begin(), End = Paths.end(); IIt != End;) { + auto It = IIt++; + auto &F = *It; + + if (F.Stores.empty()) { + F.Kills.insert(F.Offset); + continue; + } + + if (F.Stores.back() == F.Offset) { + Paths.erase(It); + continue; + } + + assert(F.Stores.back() != F.Offset); + // fallthrough + } +} +void CFLFieldSensEdgeValue::applyGep(GEPEvent Evt) { + for (auto &F : Paths) { + F.Offset += Evt.Field; + // TODO: k-limiting + } +} + +size_t psr::hash_value(const CFLFieldAccessPath &FieldString) noexcept { + auto HCL = llvm::hash_combine_range(FieldString.Loads.begin(), + FieldString.Loads.end()); + auto HCS = llvm::hash_combine_range(FieldString.Stores.begin(), + FieldString.Stores.end()); + // Xor does not care about the order + auto HCK = std::accumulate(FieldString.Kills.begin(), FieldString.Kills.end(), + 0, std::bit_xor<>{}); + return llvm::hash_combine(HCL, HCS, HCK); +} + auto FieldSensAllocSitesAwareIFDSProblem::initialSeeds() -> InitialSeeds { auto UserSeeds = UserProblem->initialSeeds(); @@ -62,17 +178,64 @@ auto FieldSensAllocSitesAwareIFDSProblem::extend(const EdgeFunction &L, return DfltCompose; } - // TODO: Here, the real magic happens! - // --> Look in the paper at pages 12-13 + if (R.isa>()) { + return R; + } + + if (L.isConstant()) { + auto FieldStringSet = R.computeTarget(L.computeTarget(bottomElement())); + + if (FieldStringSet.isBottom()) { + return AllBottom{}; + } + if (FieldStringSet.isTop()) { + llvm::errs() << "WARNING: We should never produce TOP!"; + return AllTop{}; + } + + return ConstantEdgeFunction{ + NonTopBotValue::unwrap(std::move(FieldStringSet)), + }; + } + + return FieldSensEdgeFunctionComposer{{L, R}}; } auto FieldSensAllocSitesAwareIFDSProblem::combine(const EdgeFunction &L, const EdgeFunction &R) -> EdgeFunction { + if (L.isa>()) { + // TODO + } + if (auto DfltJoin = psr::defaultJoinOrNull(L, R)) { return DfltJoin; } + if (L.isConstant() && R.isConstant()) { + auto LSet = L.computeTarget(bottomElement()); + auto RSet = R.computeTarget(bottomElement()); + + if (LSet.isBottom() || RSet.isBottom()) { + return AllBottom{}; + } + + assert(!LSet.isTop() && !RSet.isTop()); + + bool LeftSmaller = + LSet.assertGetValue().Paths.size() < RSet.assertGetValue().Paths.size(); + + bool Changed = false; + auto Union = setUnion(std::move(LSet.assertGetValue().Paths), + std::move(RSet.assertGetValue().Paths), &Changed); + + if (Changed) { + return ConstantEdgeFunction{{std::move(Union)}}; + } + + return LeftSmaller ? L : R; + } + // TODO: Join return AllBottom{}; From b791a94118a8e0217981cb7c1ab0ac0d6684d266 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Mon, 9 Jun 2025 18:23:37 +0200 Subject: [PATCH 03/29] Fully compiling CFL-field-sens IFDS implementation --- .../FieldSensAllocSitesAwareIFDSProblem.h | 21 +- include/phasar/Utils/Union.h | 16 +- .../FieldSensAllocSitesAwareIFDSProblem.cpp | 360 ++++++++++++++---- 3 files changed, 308 insertions(+), 89 deletions(-) diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h index 1d4afece3f..dd8c82f76c 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h @@ -43,7 +43,7 @@ struct CFLFieldAccessPath { llvm::SmallVector Loads; llvm::SmallVector Stores; llvm::SmallDenseSet Kills; - // Add an offset for pending GEPs; INT32_MIN is Top + // Add an offset for pending GEPs; INT16_MIN is Top int32_t Offset = {0}; int32_t EmptyTombstone = 0; @@ -85,14 +85,24 @@ struct CFLFieldAccessPathDMI { }; struct CFLFieldSensEdgeValue { - // TODO: JoinLatticeTraits - llvm::SmallDenseSet Paths; - void applyStore(); - void applyLoad(); + void applyStore(uint8_t DepthKLimit); + void applyGepAndStore(GEPEvent Evt, uint8_t DepthKLimit); + void applyLoad(uint8_t DepthKLimit); + void applyGepAndLoad(GEPEvent Evt, uint8_t DepthKLimit); void applyKill(); + void applyGepAndKill(GEPEvent Evt); void applyGep(GEPEvent Evt); + void applyTransform(const CFLFieldAccessPath &Txn, uint8_t DepthKLimit); + void applyTransforms(const CFLFieldSensEdgeValue &Txns, uint8_t DepthKLimit); + + bool operator==(const CFLFieldSensEdgeValue &Other) const noexcept { + return Paths == Other.Paths; + } + bool operator!=(const CFLFieldSensEdgeValue &Other) const noexcept { + return !(*this == Other); + } }; template @@ -193,6 +203,7 @@ class FieldSensAllocSitesAwareIFDSProblem private: LLVMAliasInfoRef AS; IFDSTabulationProblem *UserProblem{}; + uint8_t DepthKLimit = 5; // Original from the paper }; } // namespace psr diff --git a/include/phasar/Utils/Union.h b/include/phasar/Utils/Union.h index c7148cb5d5..23b6aaa4f7 100644 --- a/include/phasar/Utils/Union.h +++ b/include/phasar/Utils/Union.h @@ -10,24 +10,32 @@ #ifndef PHASAR_UTILS_UNOIN_H #define PHASAR_UTILS_UNOIN_H +#include "phasar/Utils/Macros.h" + #include namespace psr { -template -[[nodiscard]] SetT setUnion(SetT First, SetT Second, - bool *ChangedPtr = nullptr) { +template +[[nodiscard]] std::decay_t setUnion(SetLT &&First, SetRT &&Second, + bool *ChangedPtr = nullptr) { bool FirstSmaller = First.size() < Second.size(); auto &Smaller = FirstSmaller ? First : Second; bool ChangedBuf = false; bool &Changed = ChangedPtr ? *ChangedPtr : ChangedBuf; - auto Ret = std::move(FirstSmaller ? Second : First); + auto Ret = [&] { + if (FirstSmaller) { + return std::decay_t(PSR_FWD(Second)); + } + return PSR_FWD(First); + }(); for (auto &&Elem : Smaller) { Changed |= Ret.insert(Elem).second; } return Ret; } + } // namespace psr #endif // PHASAR_UTILS_UNOIN_H diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp index 8011ce2b68..d95adbc2ec 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp @@ -2,13 +2,18 @@ #include "phasar/DataFlow/IfdsIde/EdgeFunction.h" #include "phasar/DataFlow/IfdsIde/EdgeFunctionUtils.h" +#include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" #include "phasar/Utils/Fn.h" -#include "phasar/Utils/JoinLattice.h" #include "phasar/Utils/Union.h" +#include "llvm/ADT/APInt.h" #include "llvm/ADT/Hashing.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Operator.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" +#include #include #include #include @@ -19,96 +24,196 @@ namespace { using l_t = LatticeDomain; -struct FieldSensEdgeFunctionComposer : EdgeFunctionComposer { +constexpr static int32_t addOffsets(int32_t L, int32_t R) noexcept { + if (L == CFLFieldAccessPath::TopOffset || + R == CFLFieldAccessPath::TopOffset) { + return CFLFieldAccessPath::TopOffset; + } - static EdgeFunction - join(EdgeFunctionRef This, - const EdgeFunction &OtherFunction) { - llvm::report_fatal_error("Use combine() instead!"); + auto Sum = int64_t(L) + int64_t(R); + if (Sum < INT32_MIN || Sum > INT32_MAX) { + Sum = CFLFieldAccessPath::TopOffset; } -}; -struct StoreEdgeFunction { + return int32_t(Sum); +} + +struct CFLFieldSensEdgeFunction { using l_t = LatticeDomain; + CFLFieldSensEdgeValue Transform{}; + uint8_t DepthKLimit{}; [[nodiscard]] l_t computeTarget(l_t Source) const { - Source.onValue(fn<&CFLFieldSensEdgeValue::applyStore>); + Source.onValue(fn<&CFLFieldSensEdgeValue::applyTransforms>, Transform, + DepthKLimit); return Source; } - static EdgeFunction compose(EdgeFunctionRef This, - const EdgeFunction &SecondFunction) { + static EdgeFunction + compose(EdgeFunctionRef /*This*/, + const EdgeFunction & /*SecondFunction*/) { llvm::report_fatal_error("Use extend() instead!"); } - static EdgeFunction join(EdgeFunctionRef This, - const EdgeFunction &OtherFunction) { + static EdgeFunction + join(EdgeFunctionRef /*This*/, + const EdgeFunction & /*OtherFunction*/) { llvm::report_fatal_error("Use combine() instead!"); } + + bool operator==(const CFLFieldSensEdgeFunction &Other) const noexcept { + assert(DepthKLimit == Other.DepthKLimit); + return Transform == Other.Transform; + } }; + } // namespace -void CFLFieldSensEdgeValue::applyStore() { - for (auto &F : Paths) { - // TODO: K-limiting! - F.Stores.push_back(std::exchange(F.Offset, 0)); +void CFLFieldSensEdgeValue::applyGepAndStore(GEPEvent Evt, + uint8_t DepthKLimit) { + auto Save = std::exchange(Paths, {}); + Paths.reserve(Save.size()); + + for (auto F : Save) { + // TODO: Check, whether we can safely exchange Offset with 0 here! + + if (F.Stores.size() == DepthKLimit) { + // TODO: Optimize: + F.Stores.erase(F.Stores.begin()); + } + F.Stores.push_back(std::exchange(F.Offset, 0) + Evt.Field); + Paths.insert(std::move(F)); } // TODO: What if Paths is empty? Or can't that happen? + // --> Does not happen, as long as the fact is not killed in all paths } -void CFLFieldSensEdgeValue::applyLoad() { - for (auto IIt = Paths.begin(), End = Paths.end(); IIt != End;) { - auto It = IIt++; - auto &F = *It; +void CFLFieldSensEdgeValue::applyGepAndLoad(GEPEvent Evt, uint8_t DepthKLimit) { + auto Save = std::exchange(Paths, {}); - auto Offs = std::exchange(F.Offset, 0); + for (const auto &F : Save) { + auto Offs = F.Offset + Evt.Field; if (F.Stores.empty()) { + if (F.kills(Offs)) { - Paths.erase(It); - } else { - // TODO: K-limiting! - F.Loads.push_back(Offs); - F.Kills.clear(); + continue; + } + auto FF = F; + FF.Offset = 0; + + // TODO: Is this application of k-limiting correct here? + // cf. Section 4.2.3 "K-Limiting" in the paper + if (F.Loads.size() == DepthKLimit) { + Paths.insert(std::move(FF)); + continue; } + + FF.Loads.push_back(Offs); + FF.Kills.clear(); + Paths.insert(std::move(FF)); + continue; } if (F.Stores.back() != Offs) { - Paths.erase(It); continue; } assert(F.Stores.back() == Offs); - F.Stores.pop_back(); + auto FF = F; + FF.Offset = 0; + FF.Stores.pop_back(); + Paths.insert(std::move(FF)); } } -void CFLFieldSensEdgeValue::applyKill() { - for (auto IIt = Paths.begin(), End = Paths.end(); IIt != End;) { - auto It = IIt++; - auto &F = *It; + +void CFLFieldSensEdgeValue::applyGepAndKill(GEPEvent Evt) { + auto Save = std::exchange(Paths, {}); + + for (const auto &F : Save) { + auto Offs = F.Offset + Evt.Field; if (F.Stores.empty()) { - F.Kills.insert(F.Offset); + auto FF = F; + FF.Kills.insert(Offs); + Paths.insert(std::move(FF)); continue; } - if (F.Stores.back() == F.Offset) { - Paths.erase(It); + if (F.Stores.back() == Offs) { continue; } - assert(F.Stores.back() != F.Offset); - // fallthrough + assert(F.Stores.back() != Offs); + Paths.insert(F); } } + void CFLFieldSensEdgeValue::applyGep(GEPEvent Evt) { - for (auto &F : Paths) { - F.Offset += Evt.Field; - // TODO: k-limiting + auto Save = std::exchange(Paths, {}); + Paths.reserve(Save.size()); + + for (auto F : Save) { + F.Offset = addOffsets(F.Offset, Evt.Field); + Paths.insert(std::move(F)); } } +void CFLFieldSensEdgeValue::applyStore(uint8_t DepthKLimit) { + applyGepAndStore(GEPEvent{0}, DepthKLimit); +} +void CFLFieldSensEdgeValue::applyLoad(uint8_t DepthKLimit) { + applyGepAndLoad(GEPEvent{0}, DepthKLimit); +} +void CFLFieldSensEdgeValue::applyKill() { return applyGepAndKill(GEPEvent{0}); } + +void CFLFieldSensEdgeValue::applyTransform(const CFLFieldAccessPath &Txn, + uint8_t DepthKLimit) { + // TODO: Optimize! + + if (Txn.Offset) { + applyGep(GEPEvent{Txn.Offset}); + } + + for (auto Ld : Txn.Loads) { + applyGepAndLoad(GEPEvent{Ld}, DepthKLimit); + } + for (auto Kl : Txn.Kills) { + applyGepAndKill(GEPEvent{Kl}); + } + for (auto St : Txn.Stores) { + applyGepAndStore(GEPEvent{St}, DepthKLimit); + } +} + +void CFLFieldSensEdgeValue::applyTransforms(const CFLFieldSensEdgeValue &Txns, + uint8_t DepthKLimit) { + if (Txns.Paths.empty()) { + Paths.clear(); + return; + } + + if (Txns.Paths.size() == 1) { + applyTransform(*Txns.Paths.begin(), DepthKLimit); + return; + } + + auto Ret = *this; + + auto It = Txns.Paths.begin(); + auto End = Txns.Paths.end(); + Ret.applyTransform(*It, DepthKLimit); + + for (++It; It != End; ++It) { + auto Tmp = *this; + Tmp.applyTransform(*It, DepthKLimit); + Ret.Paths.insert(Tmp.Paths.begin(), Tmp.Paths.end()); + } + + *this = std::move(Ret); +} + size_t psr::hash_value(const CFLFieldAccessPath &FieldString) noexcept { auto HCL = llvm::hash_combine_range(FieldString.Loads.begin(), FieldString.Loads.end()); @@ -137,14 +242,81 @@ auto FieldSensAllocSitesAwareIFDSProblem::initialSeeds() auto FieldSensAllocSitesAwareIFDSProblem::getNormalEdgeFunction( n_t Curr, d_t CurrNode, n_t Succ, d_t SuccNode) -> EdgeFunction { - // TODO: Store, Load, Gep + if (isZeroValue(CurrNode) && !isZeroValue(SuccNode)) { + // Gen from zero - return nullptr; + return CFLFieldSensEdgeFunction{{{CFLFieldAccessPath{}}}, DepthKLimit}; + } + + if (const auto *Store = llvm::dyn_cast(Curr)) { + const auto *PointerOp = Store->getPointerOperand(); + if (CurrNode == SuccNode && + (PointerOp == CurrNode || + PointerOp->stripPointerCastsAndAliases() == CurrNode)) { + // Kill + + CFLFieldAccessPath FieldString{}; + FieldString.Kills.insert(0); + return CFLFieldSensEdgeFunction{{{std::move(FieldString)}}, DepthKLimit}; + } + + const auto *ValueOp = Store->getValueOperand(); + if (ValueOp == CurrNode || + AS.isInReachableAllocationSites(ValueOp, CurrNode, true, Store)) { + if (PointerOp == SuccNode || + AS.isInReachableAllocationSites(PointerOp, SuccNode, true, Store)) { + // Store + + CFLFieldAccessPath FieldString{}; + FieldString.Stores.push_back(0); + return CFLFieldSensEdgeFunction{{{std::move(FieldString)}}, + DepthKLimit}; + } + } + + // unaffected by the store + return EdgeIdentity{}; + } + + if (Curr == SuccNode) { + + if (const auto *Load = llvm::dyn_cast(Curr)) { + // Load + + CFLFieldAccessPath FieldString{}; + FieldString.Loads.push_back(0); + return CFLFieldSensEdgeFunction{{{std::move(FieldString)}}, DepthKLimit}; + } + + if (const auto *Gep = llvm::dyn_cast(Curr)) { + llvm::APInt Offset(64, 0); + int32_t OffsVal = CFLFieldAccessPath::TopOffset; + if (Gep->accumulateConstantOffset(IRDB->getModule()->getDataLayout(), + Offset)) { + auto RawOffsVal = Offset.getSExtValue(); + if (RawOffsVal <= INT32_MAX && RawOffsVal >= INT32_MIN) { + OffsVal = int32_t(RawOffsVal); + } + } + + CFLFieldAccessPath FieldString{}; + FieldString.Offset = OffsVal; + return CFLFieldSensEdgeFunction{{{std::move(FieldString)}}, DepthKLimit}; + } + } + + return EdgeIdentity{}; } auto FieldSensAllocSitesAwareIFDSProblem::getCallEdgeFunction( n_t CallSite, d_t SrcNode, f_t DestinationFunction, d_t DestNode) -> EdgeFunction { + if (isZeroValue(SrcNode) && !isZeroValue(DestNode)) { + // Gen from zero + + return CFLFieldSensEdgeFunction{{{CFLFieldAccessPath{}}}, DepthKLimit}; + } + // This is naturally identity return EdgeIdentity{}; } @@ -152,20 +324,36 @@ auto FieldSensAllocSitesAwareIFDSProblem::getCallEdgeFunction( auto FieldSensAllocSitesAwareIFDSProblem::getReturnEdgeFunction( n_t CallSite, f_t CalleeFunction, n_t ExitStmt, d_t ExitNode, n_t RetSite, d_t RetNode) -> EdgeFunction { - // TODO: Need to map the fields to the ret-site + if (isZeroValue(ExitNode) && !isZeroValue(RetNode)) { + // Gen from zero - return nullptr; + return CFLFieldSensEdgeFunction{{{CFLFieldAccessPath{}}}, DepthKLimit}; + } + + return EdgeIdentity{}; } auto FieldSensAllocSitesAwareIFDSProblem::getCallToRetEdgeFunction( n_t CallSite, d_t CallNode, n_t RetSite, d_t RetSiteNode, llvm::ArrayRef Callees) -> EdgeFunction { + if (isZeroValue(CallNode) && !isZeroValue(RetSiteNode)) { + // Gen from zero + + return CFLFieldSensEdgeFunction{{{CFLFieldAccessPath{}}}, DepthKLimit}; + } + // This naturally identity return EdgeIdentity{}; } auto FieldSensAllocSitesAwareIFDSProblem::getSummaryEdgeFunction( n_t Curr, d_t CurrNode, n_t Succ, d_t SuccNode) -> EdgeFunction { + if (isZeroValue(CurrNode) && !isZeroValue(SuccNode)) { + // Gen from zero + + return CFLFieldSensEdgeFunction{{{CFLFieldAccessPath{}}}, DepthKLimit}; + } + // TODO: Is that correct? -- We may need to handle field-indirections here as // well return EdgeIdentity{}; @@ -182,61 +370,73 @@ auto FieldSensAllocSitesAwareIFDSProblem::extend(const EdgeFunction &L, return R; } - if (L.isConstant()) { - auto FieldStringSet = R.computeTarget(L.computeTarget(bottomElement())); + const auto *FldSensL = L.dyn_cast(); + const auto *FldSensR = R.dyn_cast(); - if (FieldStringSet.isBottom()) { - return AllBottom{}; - } - if (FieldStringSet.isTop()) { - llvm::errs() << "WARNING: We should never produce TOP!"; - return AllTop{}; - } + if (FldSensL && FldSensR) { - return ConstantEdgeFunction{ - NonTopBotValue::unwrap(std::move(FieldStringSet)), - }; + // TODO: Be smarter with copying the transforms: + auto Txn = FldSensL->Transform; + Txn.applyTransforms(FldSensR->Transform, DepthKLimit); + // TODO: k-limit the number of paths! + return CFLFieldSensEdgeFunction{std::move(Txn), DepthKLimit}; } - return FieldSensEdgeFunctionComposer{{L, R}}; + llvm::report_fatal_error("[FieldSensAllocSitesAwareIFDSProblem::extend]: " + "Unexpected edge functions: " + + llvm::Twine(to_string(L)) + " EXTEND " + + llvm::Twine(to_string(R))); } auto FieldSensAllocSitesAwareIFDSProblem::combine(const EdgeFunction &L, const EdgeFunction &R) -> EdgeFunction { - if (L.isa>()) { - // TODO + if (llvm::isa>(R) || llvm::isa>(L)) { + return R; } - - if (auto DfltJoin = psr::defaultJoinOrNull(L, R)) { - return DfltJoin; + if (llvm::isa>(R) || llvm::isa>(L)) { + return L; } - if (L.isConstant() && R.isConstant()) { - auto LSet = L.computeTarget(bottomElement()); - auto RSet = R.computeTarget(bottomElement()); + const auto *FldSensL = L.dyn_cast(); + const auto *FldSensR = R.dyn_cast(); - if (LSet.isBottom() || RSet.isBottom()) { - return AllBottom{}; - } + if (FldSensL) { + if (FldSensR) { - assert(!LSet.isTop() && !RSet.isTop()); + bool LeftSmaller = + FldSensL->Transform.Paths.size() < FldSensR->Transform.Paths.size(); - bool LeftSmaller = - LSet.assertGetValue().Paths.size() < RSet.assertGetValue().Paths.size(); + bool Changed = false; + auto Union = setUnion(FldSensL->Transform.Paths, + FldSensR->Transform.Paths, &Changed); - bool Changed = false; - auto Union = setUnion(std::move(LSet.assertGetValue().Paths), - std::move(RSet.assertGetValue().Paths), &Changed); + if (Changed) { + // TODO: k-limit the number of paths! + return CFLFieldSensEdgeFunction{{std::move(Union)}, DepthKLimit}; + } - if (Changed) { - return ConstantEdgeFunction{{std::move(Union)}}; + return LeftSmaller ? L : R; } - return LeftSmaller ? L : R; - } + if (R.isa>()) { + if (FldSensL->Transform.Paths.contains(CFLFieldAccessPath{})) { + return L; + } - // TODO: Join + auto Txn = FldSensL->Transform; + Txn.Paths.insert(CFLFieldAccessPath{}); + return CFLFieldSensEdgeFunction{std::move(Txn), DepthKLimit}; + } + } else if (FldSensR && L.isa>()) { + if (FldSensR->Transform.Paths.contains(CFLFieldAccessPath{})) { + return R; + } + + auto Txn = FldSensR->Transform; + Txn.Paths.insert(CFLFieldAccessPath{}); + return CFLFieldSensEdgeFunction{std::move(Txn), DepthKLimit}; + } return AllBottom{}; } From e555e1e3b900dc14911d6fa251d32da034362402 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Tue, 10 Jun 2025 19:35:43 +0200 Subject: [PATCH 04/29] Add some simple unittests (WIP) --- .../DefaultAllocSitesAwareIDEProblem.h | 12 +- .../FieldSensAllocSitesAwareIFDSProblem.h | 11 + ...DefaultAllocSitesAwareIDEFlowFunctions.cpp | 29 ++- .../FieldSensAllocSitesAwareIFDSProblem.cpp | 231 +++++++++++++----- .../DataFlow/IfdsIde/CFLFieldSensTest.cpp | 193 +++++++++++++++ .../DataFlow/IfdsIde/CMakeLists.txt | 1 + 6 files changed, 406 insertions(+), 71 deletions(-) create mode 100644 unittests/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensTest.cpp diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/DefaultAllocSitesAwareIDEProblem.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/DefaultAllocSitesAwareIDEProblem.h index 6ae7c56962..e8fb77e83e 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/DefaultAllocSitesAwareIDEProblem.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/DefaultAllocSitesAwareIDEProblem.h @@ -36,6 +36,8 @@ class IDEAllocSitesAwareDefaultFlowFunctionsImpl using IDENoAliasDefaultFlowFunctionsImpl::isFunctionModeled; + bool EnableStrongUpdateStore = true; + [[nodiscard]] constexpr LLVMAliasInfoRef getAliasInfo() const noexcept { return AS; } @@ -99,6 +101,10 @@ class DefaultAllocSitesAwareIDEProblem std::move(ZeroValue)), detail::IDEAllocSitesAwareDefaultFlowFunctionsImpl(AS) {} + void disableStrongUpdateStore() noexcept { + this->EnableStrongUpdateStore = false; + } + [[nodiscard]] FlowFunctionPtrType getNormalFlowFunction(n_t Curr, n_t Succ) override { return getNormalFlowFunctionImpl(Curr, Succ); @@ -124,7 +130,7 @@ class DefaultAllocSitesAwareIDEProblem }; class DefaultAllocSitesAwareIFDSProblem - : public IFDSTabulationProblem, + : public IFDSTabulationProblem, protected detail::IDEAllocSitesAwareDefaultFlowFunctionsImpl { public: /// Constructs an IFDSTabulationProblem with the usual arguments + alias @@ -139,6 +145,10 @@ class DefaultAllocSitesAwareIFDSProblem : IFDSTabulationProblem(IRDB, std::move(EntryPoints), ZeroValue), detail::IDEAllocSitesAwareDefaultFlowFunctionsImpl(AS) {} + void disableStrongUpdateStore() noexcept { + this->EnableStrongUpdateStore = false; + } + [[nodiscard]] FlowFunctionPtrType getNormalFlowFunction(n_t Curr, n_t Succ) override { return getNormalFlowFunctionImpl(Curr, Succ); diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h index dd8c82f76c..8e6009b4a1 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h @@ -17,6 +17,7 @@ #include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Support/raw_ostream.h" #include @@ -47,6 +48,10 @@ struct CFLFieldAccessPath { int32_t Offset = {0}; int32_t EmptyTombstone = 0; + [[nodiscard]] bool empty() const noexcept { + return Loads.empty() && Stores.empty() && Kills.empty() && Offset == 0; + } + [[nodiscard]] bool kills(int32_t Off) const { return Off != TopOffset && Kills.count(Off); } @@ -62,6 +67,9 @@ struct CFLFieldAccessPath { } friend size_t hash_value(const CFLFieldAccessPath &FieldString) noexcept; + + friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, + const CFLFieldAccessPath &FieldString); }; struct CFLFieldAccessPathDMI { @@ -103,6 +111,9 @@ struct CFLFieldSensEdgeValue { bool operator!=(const CFLFieldSensEdgeValue &Other) const noexcept { return !(*this == Other); } + + friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, + const CFLFieldSensEdgeValue &EV); }; template diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/DefaultAllocSitesAwareIDEFlowFunctions.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/DefaultAllocSitesAwareIDEFlowFunctions.cpp index 20d4030794..7557efbaf0 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/DefaultAllocSitesAwareIDEFlowFunctions.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/DefaultAllocSitesAwareIDEFlowFunctions.cpp @@ -2,9 +2,12 @@ #include "phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMFlowFunctions.h" #include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/IR/Instructions.h" #include "llvm/Support/Casting.h" +#include + using namespace psr; using FFTemplates = FlowFunctionTemplates< @@ -41,14 +44,30 @@ auto detail::IDEAllocSitesAwareDefaultFlowFunctionsImpl:: auto ValueAllocSites = getReachableAllocationSites(AS, Store->getValueOperand(), Store); + if (EnableStrongUpdateStore) { + + return FFTemplates::lambdaFlow([Store, Gen{std::move(Gen)}, + ValueAliases{std::move(ValueAllocSites)}]( + d_t Source) -> container_type { + if (Store->getPointerOperand() == Source || + Store->getPointerOperand()->stripPointerCastsAndAliases() == + Source) { + return {}; + } + + if (Store->getValueOperand() == Source || ValueAliases.count(Source)) { + auto Ret = Gen; + Ret.insert(Source); + return Ret; + } + + return {Source}; + }); + } + return FFTemplates::lambdaFlow([Store, Gen{std::move(Gen)}, ValueAliases{std::move(ValueAllocSites)}]( d_t Source) -> container_type { - if (Store->getPointerOperand() == Source || - Store->getPointerOperand()->stripPointerCastsAndAliases() == Source) { - return {}; - } - if (Store->getValueOperand() == Source || ValueAliases.count(Source)) { auto Ret = Gen; Ret.insert(Source); diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp index d95adbc2ec..83b727530d 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp @@ -3,15 +3,19 @@ #include "phasar/DataFlow/IfdsIde/EdgeFunction.h" #include "phasar/DataFlow/IfdsIde/EdgeFunctionUtils.h" #include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" +#include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" #include "phasar/Utils/Fn.h" #include "phasar/Utils/Union.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/Hashing.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/IR/DataLayout.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Operator.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include #include @@ -65,6 +69,11 @@ struct CFLFieldSensEdgeFunction { assert(DepthKLimit == Other.DepthKLimit); return Transform == Other.Transform; } + + friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, + const CFLFieldSensEdgeFunction &EF) { + return OS << "Txn[" << EF.Transform << ']'; + } }; } // namespace @@ -90,6 +99,8 @@ void CFLFieldSensEdgeValue::applyGepAndStore(GEPEvent Evt, } void CFLFieldSensEdgeValue::applyGepAndLoad(GEPEvent Evt, uint8_t DepthKLimit) { + llvm::errs() << "[applyGepAndLoad]: " << *this << " + " << Evt.Field << "\n"; + auto Save = std::exchange(Paths, {}); for (const auto &F : Save) { @@ -125,7 +136,10 @@ void CFLFieldSensEdgeValue::applyGepAndLoad(GEPEvent Evt, uint8_t DepthKLimit) { FF.Offset = 0; FF.Stores.pop_back(); Paths.insert(std::move(FF)); + llvm::errs() << "> pop_back\n"; } + + llvm::errs() << "=> " << *this << '\n'; } void CFLFieldSensEdgeValue::applyGepAndKill(GEPEvent Evt) { @@ -155,7 +169,11 @@ void CFLFieldSensEdgeValue::applyGep(GEPEvent Evt) { Paths.reserve(Save.size()); for (auto F : Save) { - F.Offset = addOffsets(F.Offset, Evt.Field); + if (F.Stores.empty()) { + F.Offset = addOffsets(F.Offset, Evt.Field); + } else { + F.Stores.back() = addOffsets(F.Stores.back(), -Evt.Field); + } Paths.insert(std::move(F)); } } @@ -225,6 +243,46 @@ size_t psr::hash_value(const CFLFieldAccessPath &FieldString) noexcept { return llvm::hash_combine(HCL, HCS, HCK); } +llvm::raw_ostream &psr::operator<<(llvm::raw_ostream &OS, + const CFLFieldAccessPath &FieldString) { + if (FieldString.empty()) { + return OS << "ε"; + } + + if (FieldString.Offset) { + if (FieldString.Offset > 0) { + OS << '+'; + } + + OS << FieldString.Offset << '.'; + } + + for (auto Ld : FieldString.Loads) { + OS << 'L' << Ld << '.'; + } + + for (auto Kl : FieldString.Kills) { + OS << 'K' << Kl << '.'; + } + + for (auto St : FieldString.Stores) { + OS << 'S' << St << '.'; + } + + return OS; +} + +llvm::raw_ostream &psr::operator<<(llvm::raw_ostream &OS, + const CFLFieldSensEdgeValue &EV) { + if (EV.Paths.size() == 1) { + return OS << *EV.Paths.begin(); + } + + OS << "{ "; + llvm::interleaveComma(EV.Paths, OS); + return OS << " }"; +} + auto FieldSensAllocSitesAwareIFDSProblem::initialSeeds() -> InitialSeeds { auto UserSeeds = UserProblem->initialSeeds(); @@ -233,13 +291,31 @@ auto FieldSensAllocSitesAwareIFDSProblem::initialSeeds() for (const auto &[Inst, Facts] : UserSeeds.getSeeds()) { auto &SeedsAtInst = Ret[Inst]; for (const auto &[Fact, Weight] : Facts) { - SeedsAtInst[Fact] = {}; + SeedsAtInst[Fact] = CFLFieldSensEdgeValue{{CFLFieldAccessPath{}}}; } } return {std::move(Ret)}; } +static std::pair +getBaseAndOffset(const llvm::Value *V, const llvm::DataLayout &DL) { + llvm::APInt Offset(64, 0); + int32_t OffsVal = CFLFieldAccessPath::TopOffset; + const auto *Base = V->stripAndAccumulateConstantOffsets(DL, Offset, true); + + if (llvm::isa(Base)) { + return {Base->stripPointerCastsAndAliases(), CFLFieldAccessPath::TopOffset}; + } + + auto RawOffsVal = Offset.getSExtValue(); + if (RawOffsVal <= INT32_MAX && RawOffsVal >= INT32_MIN) { + OffsVal = int32_t(RawOffsVal); + } + + return {Base->stripPointerCastsAndAliases(), OffsVal}; +} + auto FieldSensAllocSitesAwareIFDSProblem::getNormalEdgeFunction( n_t Curr, d_t CurrNode, n_t Succ, d_t SuccNode) -> EdgeFunction { if (isZeroValue(CurrNode) && !isZeroValue(SuccNode)) { @@ -250,13 +326,19 @@ auto FieldSensAllocSitesAwareIFDSProblem::getNormalEdgeFunction( if (const auto *Store = llvm::dyn_cast(Curr)) { const auto *PointerOp = Store->getPointerOperand(); + + // TODO;: How to deal with BasePtr? + if (CurrNode == SuccNode && (PointerOp == CurrNode || PointerOp->stripPointerCastsAndAliases() == CurrNode)) { // Kill + auto [BasePtr, Offset] = + getBaseAndOffset(PointerOp, IRDB->getModule()->getDataLayout()); + CFLFieldAccessPath FieldString{}; - FieldString.Kills.insert(0); + FieldString.Kills.insert(Offset); return CFLFieldSensEdgeFunction{{{std::move(FieldString)}}, DepthKLimit}; } @@ -267,8 +349,11 @@ auto FieldSensAllocSitesAwareIFDSProblem::getNormalEdgeFunction( AS.isInReachableAllocationSites(PointerOp, SuccNode, true, Store)) { // Store + auto [BasePtr, Offset] = + getBaseAndOffset(PointerOp, IRDB->getModule()->getDataLayout()); + CFLFieldAccessPath FieldString{}; - FieldString.Stores.push_back(0); + FieldString.Stores.push_back(Offset); return CFLFieldSensEdgeFunction{{{std::move(FieldString)}}, DepthKLimit}; } @@ -283,21 +368,21 @@ auto FieldSensAllocSitesAwareIFDSProblem::getNormalEdgeFunction( if (const auto *Load = llvm::dyn_cast(Curr)) { // Load + // auto [BasePtr, Offset] = getBaseAndOffset( + // Load->getPointerOperand(), IRDB->getModule()->getDataLayout()); + + // TODO;: How to deal with BasePtr? + CFLFieldAccessPath FieldString{}; FieldString.Loads.push_back(0); + llvm::errs() << "Handle load: " << llvmIRToString(Load) << '\n'; + llvm::errs() << "> CurrNode: " << llvmIRToString(CurrNode) << '\n'; return CFLFieldSensEdgeFunction{{{std::move(FieldString)}}, DepthKLimit}; } if (const auto *Gep = llvm::dyn_cast(Curr)) { - llvm::APInt Offset(64, 0); - int32_t OffsVal = CFLFieldAccessPath::TopOffset; - if (Gep->accumulateConstantOffset(IRDB->getModule()->getDataLayout(), - Offset)) { - auto RawOffsVal = Offset.getSExtValue(); - if (RawOffsVal <= INT32_MAX && RawOffsVal >= INT32_MIN) { - OffsVal = int32_t(RawOffsVal); - } - } + auto OffsVal = + getBaseAndOffset(Gep, IRDB->getModule()->getDataLayout()).second; CFLFieldAccessPath FieldString{}; FieldString.Offset = OffsVal; @@ -362,81 +447,97 @@ auto FieldSensAllocSitesAwareIFDSProblem::getSummaryEdgeFunction( auto FieldSensAllocSitesAwareIFDSProblem::extend(const EdgeFunction &L, const EdgeFunction &R) -> EdgeFunction { - if (auto DfltCompose = psr::defaultComposeOrNull(L, R)) { - return DfltCompose; - } + auto Ret = [&]() -> EdgeFunction { + if (auto DfltCompose = psr::defaultComposeOrNull(L, R)) { + return DfltCompose; + } - if (R.isa>()) { - return R; - } + if (R.isa>()) { + return R; + } - const auto *FldSensL = L.dyn_cast(); - const auto *FldSensR = R.dyn_cast(); + const auto *FldSensL = L.dyn_cast(); + const auto *FldSensR = R.dyn_cast(); - if (FldSensL && FldSensR) { + if (FldSensL && FldSensR) { - // TODO: Be smarter with copying the transforms: - auto Txn = FldSensL->Transform; - Txn.applyTransforms(FldSensR->Transform, DepthKLimit); - // TODO: k-limit the number of paths! - return CFLFieldSensEdgeFunction{std::move(Txn), DepthKLimit}; - } + // TODO: Be smarter with copying the transforms: + auto Txn = FldSensL->Transform; + Txn.applyTransforms(FldSensR->Transform, DepthKLimit); + // TODO: k-limit the number of paths! + return CFLFieldSensEdgeFunction{std::move(Txn), DepthKLimit}; + } + + llvm::report_fatal_error("[FieldSensAllocSitesAwareIFDSProblem::extend]: " + "Unexpected edge functions: " + + llvm::Twine(to_string(L)) + " EXTEND " + + llvm::Twine(to_string(R))); + }(); - llvm::report_fatal_error("[FieldSensAllocSitesAwareIFDSProblem::extend]: " - "Unexpected edge functions: " + - llvm::Twine(to_string(L)) + " EXTEND " + - llvm::Twine(to_string(R))); + // llvm::errs() << "EXTEND " << L << " X " << R << " ==> " << Ret << '\n'; + + return Ret; } auto FieldSensAllocSitesAwareIFDSProblem::combine(const EdgeFunction &L, const EdgeFunction &R) -> EdgeFunction { - if (llvm::isa>(R) || llvm::isa>(L)) { - return R; - } - if (llvm::isa>(R) || llvm::isa>(L)) { - return L; - } + auto Ret = [&]() -> EdgeFunction { + if (llvm::isa>(R) || llvm::isa>(L)) { + return R; + } + if (llvm::isa>(R) || llvm::isa>(L)) { + return L; + } - const auto *FldSensL = L.dyn_cast(); - const auto *FldSensR = R.dyn_cast(); + if (llvm::isa>(L) && llvm::isa>(R)) { + return L; + } + + const auto *FldSensL = L.dyn_cast(); + const auto *FldSensR = R.dyn_cast(); - if (FldSensL) { - if (FldSensR) { + if (FldSensL) { + if (FldSensR) { - bool LeftSmaller = - FldSensL->Transform.Paths.size() < FldSensR->Transform.Paths.size(); + bool LeftSmaller = + FldSensL->Transform.Paths.size() < FldSensR->Transform.Paths.size(); - bool Changed = false; - auto Union = setUnion(FldSensL->Transform.Paths, - FldSensR->Transform.Paths, &Changed); + bool Changed = false; + auto Union = setUnion(FldSensL->Transform.Paths, + FldSensR->Transform.Paths, &Changed); + + if (Changed) { + // TODO: k-limit the number of paths! + return CFLFieldSensEdgeFunction{{std::move(Union)}, DepthKLimit}; + } - if (Changed) { - // TODO: k-limit the number of paths! - return CFLFieldSensEdgeFunction{{std::move(Union)}, DepthKLimit}; + return LeftSmaller ? R : L; } - return LeftSmaller ? L : R; - } + if (R.isa>()) { + if (FldSensL->Transform.Paths.contains(CFLFieldAccessPath{})) { + return L; + } - if (R.isa>()) { - if (FldSensL->Transform.Paths.contains(CFLFieldAccessPath{})) { - return L; + auto Txn = FldSensL->Transform; + Txn.Paths.insert(CFLFieldAccessPath{}); + return CFLFieldSensEdgeFunction{std::move(Txn), DepthKLimit}; + } + } else if (FldSensR && L.isa>()) { + if (FldSensR->Transform.Paths.contains(CFLFieldAccessPath{})) { + return R; } - auto Txn = FldSensL->Transform; + auto Txn = FldSensR->Transform; Txn.Paths.insert(CFLFieldAccessPath{}); return CFLFieldSensEdgeFunction{std::move(Txn), DepthKLimit}; } - } else if (FldSensR && L.isa>()) { - if (FldSensR->Transform.Paths.contains(CFLFieldAccessPath{})) { - return R; - } - auto Txn = FldSensR->Transform; - Txn.Paths.insert(CFLFieldAccessPath{}); - return CFLFieldSensEdgeFunction{std::move(Txn), DepthKLimit}; - } + return AllBottom{}; + }(); + + llvm::errs() << "COMBINE " << L << " X " << R << " ==> " << Ret << '\n'; - return AllBottom{}; + return Ret; } diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensTest.cpp b/unittests/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensTest.cpp new file mode 100644 index 0000000000..6445023a96 --- /dev/null +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensTest.cpp @@ -0,0 +1,193 @@ +#include "phasar/ControlFlow/CallGraphAnalysisType.h" +#include "phasar/DataFlow/IfdsIde/Solver/IFDSSolver.h" +#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h" +#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h" +#include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" +#include "phasar/PhasarLLVM/DataFlow/IfdsIde/DefaultAllocSitesAwareIDEProblem.h" +#include "phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h" +#include "phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMZeroValue.h" +#include "phasar/PhasarLLVM/Pointer/LLVMAliasSet.h" +#include "phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h" +#include "phasar/PhasarLLVM/TaintConfig/TaintConfigUtilities.h" +#include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" + +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Twine.h" +#include "llvm/IR/Instruction.h" + +#include "TestConfig.h" +#include "gtest/gtest.h" + +namespace { + +void populateWithMayAliases(psr::LLVMAliasInfoRef AS, + std::set &Facts, + const llvm::Instruction *Context) { + auto Tmp = Facts; + for (const auto *Fact : Facts) { + auto Aliases = AS.getReachableAllocationSites(Fact, true, Context); + Tmp.insert(Aliases->begin(), Aliases->end()); + } + + Facts = std::move(Tmp); +} + +class ExampleTaintAnalysis : public psr::DefaultAllocSitesAwareIFDSProblem { +public: + explicit ExampleTaintAnalysis(const psr::LLVMProjectIRDB *IRDB, + psr::LLVMAliasInfoRef AS, + const psr::LLVMTaintConfig *Config, + std::vector EntryPoints) + : psr::DefaultAllocSitesAwareIFDSProblem( + IRDB, AS, std::move(EntryPoints), + psr::LLVMZeroValue::getInstance()), + Config(&psr::assertNotNull(Config)) { + this->disableStrongUpdateStore(); + } + + [[nodiscard]] psr::InitialSeeds initialSeeds() override { + + psr::InitialSeeds Seeds = Config->makeInitialSeeds(); + + psr::LLVMBasedCFG CFG; + + addSeedsForStartingPoints(EntryPoints, IRDB, CFG, Seeds, getZeroValue(), + psr::BinaryDomain::BOTTOM); + + return Seeds; + }; + + [[nodiscard]] FlowFunctionPtrType + getSummaryFlowFunction(n_t CallSite, f_t DestFun) override { + const auto *CS = llvm::cast(CallSite); + + container_type Gen; + container_type Leak; + container_type Kill; + + psr::collectGeneratedFacts(Gen, *Config, CS, DestFun); + psr::collectLeakedFacts(Leak, *Config, CS, DestFun); + psr::collectSanitizedFacts(Kill, *Config, CS, DestFun); + + if (Gen.empty() && Leak.empty() && Kill.empty()) { + return DefaultAllocSitesAwareIFDSProblem::getSummaryFlowFunction(CS, + DestFun); + } + + populateWithMayAliases(getAliasInfo(), Gen, CallSite); + populateWithMayAliases(getAliasInfo(), Leak, CallSite); + + return lambdaFlow([this, CS, Gen{std::move(Gen)}, Leak{std::move(Leak)}, + Kill{std::move(Kill)}](d_t Source) -> container_type { + if (isZeroValue(Source)) { + return Gen; + } + + if (Leak.count(Source)) { + Leaks[CS] = Source; + } + + if (Kill.count(Source)) { + return {}; + } + + return {Source}; + }); + } + + llvm::DenseMap Leaks{}; + +private: + const psr::LLVMTaintConfig *Config{}; +}; + +class CFLFieldSensTest : public ::testing::Test { +protected: + static constexpr auto PathToLLFiles = PHASAR_BUILD_SUBFOLDER("xtaint/"); + const std::vector EntryPoints = {"main"}; + + void run(const llvm::Twine &IRFileName, + const std::map> &GroundTruth) { + psr::LLVMProjectIRDB IRDB(IRFileName); + ASSERT_TRUE(IRDB); + + psr::LLVMAliasSet AS(&IRDB); + psr::LLVMTaintConfig TC(IRDB); + ExampleTaintAnalysis TaintProblem(&IRDB, &AS, &TC, {"main"}); + + psr::FieldSensAllocSitesAwareIFDSProblem FsTaintProblem(&TaintProblem, &AS); + + psr::LLVMBasedICFG ICFG(&IRDB, psr::CallGraphAnalysisType::OTF, {"main"}, + nullptr, &AS); + + auto Results = psr::solveIDEProblem(FsTaintProblem, ICFG); + + Results.dumpResults(ICFG); + + std::map> ComputedLeaks; + + for (auto IIt = TaintProblem.Leaks.begin(), End = TaintProblem.Leaks.end(); + IIt != End;) { + auto It = IIt++; + const auto &[LeakInst, LeakFact] = *It; + + const auto &Res = Results.resultAt(LeakInst, LeakFact); + if (const auto *FieldStrings = Res.getValueOrNull()) { + if (llvm::all_of(FieldStrings->Paths, + [](const auto &F) { return !F.empty(); })) { + llvm::errs() << "> Erase leak at " << psr::llvmIRToString(LeakInst) + << "; because leaking fact " + << psr::llvmIRToShortString(LeakFact) + << " has non-empty field-string: " << Res << '\n'; + TaintProblem.Leaks.erase(It); + } else { + ComputedLeaks[stoi(psr::getMetaDataID(LeakInst))].insert( + psr::getMetaDataID(LeakFact)); + } + } + } + + EXPECT_EQ(GroundTruth, ComputedLeaks); + } +}; + +TEST_F(CFLFieldSensTest, Basic_01) { + std::map> Gt; + + Gt[13] = {"12"}; + + run({PathToLLFiles + "xtaint01_cpp.ll"}, Gt); +} + +TEST_F(CFLFieldSensTest, Basic_02) { + GTEST_SKIP() << "Need field-sensitive alias information!"; + + std::map> Gt; + + Gt[18] = {"17"}; + + run({PathToLLFiles + "xtaint02_cpp.ll"}, Gt); +} + +TEST_F(CFLFieldSensTest, Basic_03) { + std::map> Gt; + + Gt[21] = {"20"}; + + run({PathToLLFiles + "xtaint03_cpp.ll"}, Gt); +} + +TEST_F(CFLFieldSensTest, Basic_04) { + std::map> Gt; + + Gt[16] = {"15"}; + + run({PathToLLFiles + "xtaint04_cpp.ll"}, Gt); +} + +} // namespace + +int main(int Argc, char **Argv) { + ::testing::InitGoogleTest(&Argc, Argv); + return RUN_ALL_TESTS(); +} diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/CMakeLists.txt b/unittests/PhasarLLVM/DataFlow/IfdsIde/CMakeLists.txt index dcda34ffdd..b5de60ba49 100644 --- a/unittests/PhasarLLVM/DataFlow/IfdsIde/CMakeLists.txt +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/CMakeLists.txt @@ -7,6 +7,7 @@ set(IfdsIdeSources InteractiveIDESolverTest.cpp SparseIDESolverTest.cpp IterativeIDESolverTest.cpp + CFLFieldSensTest.cpp ) foreach(TEST_SRC ${IfdsIdeSources}) From 9756c18789a6e0f65fef0f17266860ccff8644cb Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Tue, 10 Jun 2025 19:47:37 +0200 Subject: [PATCH 05/29] Add a hack to make the alias information partially field sensitive --- .../FieldSensAllocSitesAwareIFDSProblem.cpp | 14 +++++++++++--- .../DataFlow/IfdsIde/CFLFieldSensTest.cpp | 2 +- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp index 83b727530d..493ac0ba0f 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp @@ -368,13 +368,21 @@ auto FieldSensAllocSitesAwareIFDSProblem::getNormalEdgeFunction( if (const auto *Load = llvm::dyn_cast(Curr)) { // Load - // auto [BasePtr, Offset] = getBaseAndOffset( - // Load->getPointerOperand(), IRDB->getModule()->getDataLayout()); + auto [BasePtr, Offset] = getBaseAndOffset( + Load->getPointerOperand(), IRDB->getModule()->getDataLayout()); + + int32_t LoadOffs = 0; + + if (BasePtr == CurrNode && Load->getPointerOperand() != CurrNode) { + // This is a hack, but we do sth similar in the IDEExtendedTaintAnalysis + // (see forEachAliasOf() Lines 144 ff) + LoadOffs = Offset; + } // TODO;: How to deal with BasePtr? CFLFieldAccessPath FieldString{}; - FieldString.Loads.push_back(0); + FieldString.Loads.push_back(LoadOffs); llvm::errs() << "Handle load: " << llvmIRToString(Load) << '\n'; llvm::errs() << "> CurrNode: " << llvmIRToString(CurrNode) << '\n'; return CFLFieldSensEdgeFunction{{{std::move(FieldString)}}, DepthKLimit}; diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensTest.cpp b/unittests/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensTest.cpp index 6445023a96..cf1c0182d3 100644 --- a/unittests/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensTest.cpp +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensTest.cpp @@ -160,7 +160,7 @@ TEST_F(CFLFieldSensTest, Basic_01) { } TEST_F(CFLFieldSensTest, Basic_02) { - GTEST_SKIP() << "Need field-sensitive alias information!"; + // GTEST_SKIP() << "Need field-sensitive alias information!"; std::map> Gt; From dbc413c8401ca2b64d3af7904683b85f21ff6096 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Tue, 24 Jun 2025 20:21:36 +0200 Subject: [PATCH 06/29] Change alias handling --- .../DefaultAllocSitesAwareIDEProblem.h | 5 +- .../FieldSensAllocSitesAwareIFDSProblem.h | 29 ++++ .../AbstractMemoryLocation.h | 6 +- .../AbstractMemoryLocationFactory.h | 35 +---- .../PhasarLLVM/Pointer/LLVMFieldAliasSet.h | 130 +++++++++++++++++ .../phasar/Utils/MemoryLocationAllocator.h | 82 +++++++++++ ...DefaultAllocSitesAwareIDEFlowFunctions.cpp | 136 +++++++++--------- .../FieldSensAllocSitesAwareIFDSProblem.cpp | 97 +++++++++++-- .../AbstractMemoryLocation.cpp | 4 +- .../AbstractMemoryLocationFactory.cpp | 117 ++------------- lib/PhasarLLVM/Pointer/LLVMFieldAliasSet.cpp | 108 ++++++++++++++ lib/Utils/MemoryLocationAllocator.cpp | 98 +++++++++++++ .../DataFlow/IfdsIde/CFLFieldSensTest.cpp | 19 ++- 13 files changed, 637 insertions(+), 229 deletions(-) create mode 100644 include/phasar/PhasarLLVM/Pointer/LLVMFieldAliasSet.h create mode 100644 include/phasar/Utils/MemoryLocationAllocator.h create mode 100644 lib/PhasarLLVM/Pointer/LLVMFieldAliasSet.cpp create mode 100644 lib/Utils/MemoryLocationAllocator.cpp diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/DefaultAllocSitesAwareIDEProblem.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/DefaultAllocSitesAwareIDEProblem.h index e8fb77e83e..b85b00d7d8 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/DefaultAllocSitesAwareIDEProblem.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/DefaultAllocSitesAwareIDEProblem.h @@ -12,6 +12,7 @@ #include "phasar/PhasarLLVM/DataFlow/IfdsIde/DefaultNoAliasIDEProblem.h" #include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" +#include "phasar/PhasarLLVM/Pointer/LLVMFieldAliasSet.h" #include @@ -38,7 +39,7 @@ class IDEAllocSitesAwareDefaultFlowFunctionsImpl bool EnableStrongUpdateStore = true; - [[nodiscard]] constexpr LLVMAliasInfoRef getAliasInfo() const noexcept { + [[nodiscard]] constexpr const auto &getAliasInfo() const noexcept { return AS; } @@ -61,7 +62,7 @@ class IDEAllocSitesAwareDefaultFlowFunctionsImpl llvm::ArrayRef /*Callees*/); private: - LLVMAliasInfoRef AS; + LLVMBasePointerAliasSet AS; }; } // namespace detail diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h index 8e6009b4a1..b01a58ea34 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h @@ -13,10 +13,16 @@ #include "phasar/DataFlow/IfdsIde/IDETabulationProblem.h" #include "phasar/DataFlow/IfdsIde/IFDSTabulationProblem.h" #include "phasar/Domain/LatticeDomain.h" +#include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" #include "phasar/PhasarLLVM/Domain/LLVMAnalysisDomain.h" #include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" +#include "phasar/PhasarLLVM/Pointer/LLVMFieldAliasSet.h" +#include "phasar/Utils/MemoryLocationAllocator.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Support/TrailingObjects.h" #include "llvm/Support/raw_ostream.h" #include @@ -127,6 +133,23 @@ class FieldSensAllocSitesAwareIFDSProblem using Base = IDETabulationProblem< CFLFieldSensAnalysisDomain>; + struct CachedAccessPath final + : public llvm::TrailingObjects { + + using OffsetType = int32_t; + + constexpr CachedAccessPath(const llvm::Value *BasePtr, + uint32_t NumOffsets) noexcept + : BasePtr(BasePtr), NumOffsets(NumOffsets) {} + + const llvm::Value *BasePtr{}; + uint32_t NumOffsets{}; + + [[nodiscard]] llvm::ArrayRef offsets() const noexcept { + return {this->getTrailingObjects(), NumOffsets}; + } + }; + public: using typename Base::container_type; using typename Base::d_t; @@ -212,8 +235,14 @@ class FieldSensAllocSitesAwareIFDSProblem const EdgeFunction &R) override; private: + [[nodiscard]] const CachedAccessPath * + getAccessPath(const llvm::Value *Pointer); + LLVMAliasInfoRef AS; IFDSTabulationProblem *UserProblem{}; + MemoryLocationAllocator MemLocAlloc{}; + llvm::DenseMap MemLocCache{}; + uint8_t DepthKLimit = 5; // Original from the paper }; } // namespace psr diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/ExtendedTaintAnalysis/AbstractMemoryLocation.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/ExtendedTaintAnalysis/AbstractMemoryLocation.h index 3650360426..ceec4e404c 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/ExtendedTaintAnalysis/AbstractMemoryLocation.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/ExtendedTaintAnalysis/AbstractMemoryLocation.h @@ -21,12 +21,12 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Operator.h" #include "llvm/IR/Value.h" #include "llvm/Support/TrailingObjects.h" #include "llvm/Support/raw_ostream.h" #include -#include #include #include @@ -97,8 +97,8 @@ class AbstractMemoryLocationImpl final /// /// \return The byte offset, iff all indices are constants. Otherwise /// std::nullopt - static std::optional - computeOffset(const llvm::DataLayout &DL, const llvm::GetElementPtrInst *Gep); + static std::optional computeOffset(const llvm::DataLayout &DL, + const llvm::GEPOperator *Gep); [[nodiscard]] inline bool isOverApproximation() const { return lifetime() == 0; diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/ExtendedTaintAnalysis/AbstractMemoryLocationFactory.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/ExtendedTaintAnalysis/AbstractMemoryLocationFactory.h index a492bdd6a3..77f585bf41 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/ExtendedTaintAnalysis/AbstractMemoryLocationFactory.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/ExtendedTaintAnalysis/AbstractMemoryLocationFactory.h @@ -11,19 +11,15 @@ #define PHASAR_PHASARLLVM_DATAFLOW_IFDSIDE_PROBLEMS_EXTENDEDTAINTANALYSIS_ABSTRACTMEMORYLOCATIONFACTORY_H #include "phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/ExtendedTaintAnalysis/AbstractMemoryLocation.h" +#include "phasar/Utils/MemoryLocationAllocator.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/FoldingSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Support/TrailingObjects.h" - -#include -#include namespace llvm { class DataLayout; -class GetElementPtrInst; +class GEPOperator; } // namespace llvm namespace psr { @@ -35,30 +31,10 @@ namespace detail { class AbstractMemoryLocationFactoryBase { private: - struct Allocator { - struct Block final : public llvm::TrailingObjects { - - Block *Next = nullptr; - - static Block *create(Block *Next, size_t NumPointerEntries); - static void destroy(Block *Blck, size_t NumPointerEntries); - - private: - Block(Block *Next); - }; - - Block *Root = nullptr; - void **Pos = nullptr, **End = nullptr; - size_t InitialCapacity{}; + struct Allocator : MemoryLocationAllocator { Allocator() noexcept = default; Allocator(size_t InitialCapacity); - Allocator(const Allocator &) = delete; - Allocator(Allocator &&Other) noexcept; - ~Allocator(); - - Allocator &operator=(const Allocator &) = delete; - Allocator &operator=(Allocator &&Other) noexcept; AbstractMemoryLocationImpl *create(const llvm::Value *Baseptr, size_t Lifetime, @@ -106,7 +82,7 @@ class AbstractMemoryLocationFactoryBase { llvm::ArrayRef Ind); const AbstractMemoryLocationImpl * withOffsetImpl(const AbstractMemoryLocationImpl *AML, - const llvm::GetElementPtrInst *Gep); + const llvm::GEPOperator *Gep); const AbstractMemoryLocationImpl * withOffsetsImpl(const AbstractMemoryLocationImpl *AML, @@ -178,8 +154,7 @@ class AbstractMemoryLocationFactory } [[nodiscard]] AbstractMemoryLocation - withOffset(const AbstractMemoryLocation &AML, - const llvm::GetElementPtrInst *Gep) { + withOffset(const AbstractMemoryLocation &AML, const llvm::GEPOperator *Gep) { return {withOffsetImpl(AML.operator->(), Gep)}; } diff --git a/include/phasar/PhasarLLVM/Pointer/LLVMFieldAliasSet.h b/include/phasar/PhasarLLVM/Pointer/LLVMFieldAliasSet.h new file mode 100644 index 0000000000..337348a007 --- /dev/null +++ b/include/phasar/PhasarLLVM/Pointer/LLVMFieldAliasSet.h @@ -0,0 +1,130 @@ +/****************************************************************************** + * Copyright (c) 2025 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#ifndef PHASAR_PHASARLLVM_POINTER_LLVMFIELDALIASSET_H +#define PHASAR_PHASARLLVM_POINTER_LLVMFIELDALIASSET_H + +#include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" +#include "phasar/Pointer/AliasAnalysisType.h" + +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/DataLayout.h" + +#include +#include +#include + +namespace psr { + +class LLVMBasePointerAliasSet { +public: + using v_t = typename LLVMAliasInfoRef::v_t; + using n_t = typename LLVMAliasInfoRef::n_t; + using AliasSetTy = LLVMAliasInfoRef::AliasSetTy; + using AliasSetPtrTy = std::unique_ptr; + + constexpr LLVMBasePointerAliasSet(LLVMAliasInfoRef AS) noexcept : AS(AS) {} + + [[nodiscard]] bool isInterProcedural() const noexcept { + return AS.isInterProcedural(); + } + + [[nodiscard]] AliasAnalysisType getAliasAnalysisType() const noexcept { + return AS.getAliasAnalysisType(); + } + + [[nodiscard]] static const llvm::Value * + getBasePointer(const llvm::Value *Pointer); + + [[nodiscard]] AliasResult alias(v_t Pointer1, v_t Pointer2, + n_t AtInstruction = {}) const { + return AS.alias(Pointer1, Pointer2, AtInstruction); + } + + [[nodiscard]] AliasSetPtrTy getAliasSet(v_t Pointer, + n_t AtInstruction = {}) const; + +private: + LLVMAliasInfoRef AS; +}; + +class LLVMFieldAliasSet { +public: + struct AccessPath { + const llvm::Value *BasePtr{}; + llvm::SmallVector FieldAccesses; + + static constexpr ptrdiff_t TopOffset = PTRDIFF_MIN; + + bool operator==(const AccessPath &Other) const noexcept { + return BasePtr == Other.BasePtr && FieldAccesses == Other.FieldAccesses; + } + bool operator!=(const AccessPath &Other) const noexcept { + return !(*this == Other); + } + }; + + using v_t = typename LLVMAliasInfoRef::v_t; + using n_t = typename LLVMAliasInfoRef::n_t; + using AliasSetTy = llvm::DenseSet; + using AliasSetPtrTy = std::unique_ptr; + + explicit LLVMFieldAliasSet( + LLVMAliasInfoRef AS, + std::reference_wrapper DL) noexcept + : AS(AS), DL(&DL.get()) {} + + [[nodiscard]] bool isInterProcedural() const noexcept { + return AS.isInterProcedural(); + } + + [[nodiscard]] AliasAnalysisType getAliasAnalysisType() const noexcept { + return AS.getAliasAnalysisType(); + } + + [[nodiscard]] AccessPath getAccessPath(const llvm::Value *Pointer) const; + + [[nodiscard]] AliasResult alias(v_t Pointer1, v_t Pointer2, + n_t AtInstruction = {}) const { + return AS.alias(Pointer1, Pointer2, AtInstruction); + } + + [[nodiscard]] AliasSetPtrTy getAliasSet(v_t Pointer, + n_t AtInstruction = {}) const; + +private: + LLVMAliasInfoRef AS; + const llvm::DataLayout *DL{}; +}; +} // namespace psr + +namespace llvm { +template <> struct DenseMapInfo { + using AccessPath = psr::LLVMFieldAliasSet::AccessPath; + static AccessPath getEmptyKey() { + return AccessPath{DenseMapInfo::getEmptyKey(), {}}; + } + static AccessPath getTombstoneKey() { + return AccessPath{DenseMapInfo::getTombstoneKey(), {}}; + } + static auto getHashValue(const AccessPath &AP) { + auto HC = hash_value(AP.BasePtr); + auto HC2 = + hash_combine_range(AP.FieldAccesses.begin(), AP.FieldAccesses.end()); + return hash_combine(HC, HC2); + } + static bool isEqual(const AccessPath &L, const AccessPath &R) noexcept { + return L == R; + } +}; +} // namespace llvm + +#endif // PHASAR_PHASARLLVM_POINTER_LLVMFIELDALIASSET_H diff --git a/include/phasar/Utils/MemoryLocationAllocator.h b/include/phasar/Utils/MemoryLocationAllocator.h new file mode 100644 index 0000000000..5f4cad1a9d --- /dev/null +++ b/include/phasar/Utils/MemoryLocationAllocator.h @@ -0,0 +1,82 @@ +/****************************************************************************** + * Copyright (c) 2025 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#ifndef PHASAR_UTILS_MEMORYLOCATIONALLOCATOR_H +#define PHASAR_UTILS_MEMORYLOCATIONALLOCATOR_H + +#include "llvm/Support/Compiler.h" +#include "llvm/Support/TrailingObjects.h" + +#include + +namespace psr { +/// \brief A simple custom allocator that is used to allocate, e.g., +/// AbstractMemoryLocation for the IDEExtendedTaintAnalysis. +/// +/// It can be used to allocate trivially destructible objects of variable size, +/// e.g., using llvm::TrailingObjects, that should live as long as the +/// allocator. +class MemoryLocationAllocator { +public: + constexpr MemoryLocationAllocator() noexcept = default; + + explicit MemoryLocationAllocator(size_t InitialCapacity, + size_t DynamicBlockSize = size_t(1024) * 6 - + 8); + + constexpr MemoryLocationAllocator(MemoryLocationAllocator &&Other) noexcept + : Root(Other.Root), Pos(Other.Pos), End(Other.End) { + Other.Root = nullptr; + Other.Pos = nullptr; + Other.End = nullptr; + } + MemoryLocationAllocator &operator=(MemoryLocationAllocator &&Other) noexcept { + MemoryLocationAllocator(std::move(Other)).swap(*this); + return *this; + } + + ~MemoryLocationAllocator(); + + MemoryLocationAllocator(const MemoryLocationAllocator &) = delete; + MemoryLocationAllocator &operator=(const MemoryLocationAllocator &) = delete; + + void swap(MemoryLocationAllocator &Other) noexcept { + std::swap(Root, Other.Root); + std::swap(Pos, Other.Pos); + std::swap(End, Other.End); + } + + /// \brief Allocates a chunk of memory of at least NumBytes bytes, aligned to + /// alignof(void *). + /// + /// Fails with llvm::report_bad_alloc_error(), if memory could not be + /// allocated. + [[nodiscard]] LLVM_ATTRIBUTE_RETURNS_NONNULL void *allocate(size_t NumBytes); + +private: + struct Block final : public llvm::TrailingObjects { + + Block *Next = nullptr; + + static Block *create(Block *Next, size_t NumPointerEntries); + static void destroy(Block *Blck, size_t NumPointerEntries); + + private: + constexpr Block(Block *Next) noexcept : Next(Next) {} + }; + + Block *Root = nullptr; + void **Pos = nullptr; + void **End = nullptr; + size_t InitialCapacity{}; + size_t DynamicBlockSize{}; +}; +} // namespace psr + +#endif // PHASAR_UTILS_MEMORYLOCATIONALLOCATOR_H diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/DefaultAllocSitesAwareIDEFlowFunctions.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/DefaultAllocSitesAwareIDEFlowFunctions.cpp index 7557efbaf0..78d07a14fa 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/DefaultAllocSitesAwareIDEFlowFunctions.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/DefaultAllocSitesAwareIDEFlowFunctions.cpp @@ -1,13 +1,14 @@ #include "phasar/PhasarLLVM/DataFlow/IfdsIde/DefaultAllocSitesAwareIDEProblem.h" #include "phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMFlowFunctions.h" #include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" +#include "phasar/PhasarLLVM/Pointer/LLVMFieldAliasSet.h" #include "llvm/ADT/STLExtras.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" #include "llvm/Support/Casting.h" -#include - using namespace psr; using FFTemplates = FlowFunctionTemplates< @@ -16,16 +17,32 @@ using FFTemplates = FlowFunctionTemplates< container_type>; using container_type = FFTemplates::container_type; +static const llvm::Value *getBase(const llvm::Value *V, + const llvm::DataLayout &DL) { + // TODO: Optimize! + + llvm::APInt Offset(64, 0); + const auto *Base = V->stripAndAccumulateConstantOffsets(DL, Offset, true); + + return Base->stripPointerCastsAndAliases(); +} + static container_type -getReachableAllocationSites(LLVMAliasInfoRef AS, const llvm::Value *Pointer, +getReachableAllocationSites(const LLVMBasePointerAliasSet &AS, + const llvm::Value *Pointer, const llvm::Instruction *Context) { if (!Pointer->getType()->isPointerTy()) { return {Pointer}; } + const auto &DL = Context->getModule()->getDataLayout(); + container_type Ret; - auto AllocSites = AS.getReachableAllocationSites(Pointer, true, Context); - Ret.insert(AllocSites->begin(), AllocSites->end()); + auto AllocSites = AS.getAliasSet(Pointer, Context); + for (const auto *Alias : *AllocSites) { + const auto *AliasBase = getBase(Alias, DL); + Ret.insert(AliasBase); + } if (Ret.empty()) { Ret.insert(Pointer); } @@ -38,58 +55,56 @@ auto detail::IDEAllocSitesAwareDefaultFlowFunctionsImpl:: if (const auto *Store = llvm::dyn_cast(Curr)) { - container_type Gen = - getReachableAllocationSites(AS, Store->getPointerOperand(), Store); - - auto ValueAllocSites = - getReachableAllocationSites(AS, Store->getValueOperand(), Store); + const auto &DL = Store->getModule()->getDataLayout(); + const auto *PointerBase = getBase(Store->getPointerOperand(), DL); + const auto *ValueBase = getBase(Store->getValueOperand(), DL); + container_type Gen = getReachableAllocationSites(AS, PointerBase, Store); + Gen.insert(ValueBase); + // auto ValueAllocSites = + // getReachableAllocationSites(AS, Store->getValueOperand(), Store); if (EnableStrongUpdateStore) { - return FFTemplates::lambdaFlow([Store, Gen{std::move(Gen)}, - ValueAliases{std::move(ValueAllocSites)}]( - d_t Source) -> container_type { - if (Store->getPointerOperand() == Source || - Store->getPointerOperand()->stripPointerCastsAndAliases() == - Source) { - return {}; - } - - if (Store->getValueOperand() == Source || ValueAliases.count(Source)) { - auto Ret = Gen; - Ret.insert(Source); - return Ret; - } - - return {Source}; - }); + return FFTemplates::lambdaFlow( + [PointerBase, ValueBase, + Gen{std::move(Gen)}](d_t Source) -> container_type { + if (PointerBase == Source) { + return {}; + } + + if (ValueBase == Source) { + auto Ret = Gen; + return Ret; + } + + return {Source}; + }); } - return FFTemplates::lambdaFlow([Store, Gen{std::move(Gen)}, - ValueAliases{std::move(ValueAllocSites)}]( - d_t Source) -> container_type { - if (Store->getValueOperand() == Source || ValueAliases.count(Source)) { - auto Ret = Gen; - Ret.insert(Source); - return Ret; - } + return FFTemplates::lambdaFlow( + [Gen{std::move(Gen)}, ValueBase](d_t Source) -> container_type { + if (ValueBase == Source) { + auto Ret = Gen; + Ret.insert(Source); + return Ret; + } - return {Source}; - }); + return {Source}; + }); } if (const auto *Load = llvm::dyn_cast(Curr)) { - auto AllocSites = - getReachableAllocationSites(AS, Load->getPointerOperand(), Load); + const auto &DL = Load->getModule()->getDataLayout(); + const auto *PointerBase = getBase(Load->getPointerOperand(), DL); - return FFTemplates::lambdaFlow([Load, AllocSites{std::move(AllocSites)}]( - d_t Source) -> container_type { - if (Source == Load->getPointerOperand() || AllocSites.count(Source)) { - return {Source, Load}; - } + return FFTemplates::lambdaFlow( + [PointerBase, Load](d_t Source) -> container_type { + if (Source == PointerBase) { + return {Source, Load}; + } - return {Source}; - }); + return {Source}; + }); } return this->IDENoAliasDefaultFlowFunctionsImpl::getNormalFlowFunctionImpl( @@ -100,23 +115,14 @@ auto detail::IDEAllocSitesAwareDefaultFlowFunctionsImpl:: getCallFlowFunctionImpl(n_t CallInst, f_t CalleeFun) -> FlowFunctionPtrType { if (const auto *CallSite = llvm::dyn_cast(CallInst)) { - return mapFactsToCallee( - CallSite, CalleeFun, [CallSite, AS = AS](d_t Arg, d_t Source) { - if (Arg == Source) { - return true; - } - - return Arg->getType()->isPointerTy() && - Source->getType()->isPointerTy() && - AS.isInReachableAllocationSites(Arg, Source, true, CallSite); - }); + return mapFactsToCallee(CallSite, CalleeFun); } return FFTemplates::killAllFlows(); } static container_type getReturnedAliases(const container_type &Facts, - psr::LLVMAliasInfoRef AS, + const LLVMBasePointerAliasSet &AS, const llvm::Instruction *CallSite) { container_type Ret; for (const auto *Fact : Facts) { @@ -137,24 +143,18 @@ auto detail::IDEAllocSitesAwareDefaultFlowFunctionsImpl::getRetFlowFunctionImpl( Facts = getReturnedAliases(Facts, AS, Call); }; - const auto PropagateParameter = [AS = AS, ExitInst](d_t Formal, - d_t Source) { + const auto PropagateParameter = [](d_t Formal, d_t Source) { if (!Formal->getType()->isPointerTy()) { return false; } - return Formal == Source || - AS.isInReachableAllocationSites(Formal, Source, true, ExitInst); + return Formal == Source; }; - const auto PropagateRet = [AS = AS, ExitInst](d_t RetVal, d_t Source) { - if (RetVal == Source) { - return true; - } + const auto &DL = Call->getModule()->getDataLayout(); - return RetVal->getType()->isPointerTy() && - Source->getType()->isPointerTy() && - AS.isInReachableAllocationSites(RetVal, Source, true, ExitInst); + const auto PropagateRet = [&DL](d_t RetVal, d_t Source) { + return getBase(RetVal, DL) == Source; }; return mapFactsToCaller(Call, ExitInst, PropagateParameter, PropagateRet, diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp index 493ac0ba0f..21040cb955 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp @@ -3,6 +3,7 @@ #include "phasar/DataFlow/IfdsIde/EdgeFunction.h" #include "phasar/DataFlow/IfdsIde/EdgeFunctionUtils.h" #include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" +#include "phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/ExtendedTaintAnalysis/AbstractMemoryLocation.h" #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" #include "phasar/Utils/Fn.h" #include "phasar/Utils/Union.h" @@ -18,6 +19,7 @@ #include "llvm/Support/raw_ostream.h" #include +#include #include #include #include @@ -343,20 +345,15 @@ auto FieldSensAllocSitesAwareIFDSProblem::getNormalEdgeFunction( } const auto *ValueOp = Store->getValueOperand(); - if (ValueOp == CurrNode || - AS.isInReachableAllocationSites(ValueOp, CurrNode, true, Store)) { - if (PointerOp == SuccNode || - AS.isInReachableAllocationSites(PointerOp, SuccNode, true, Store)) { - // Store - - auto [BasePtr, Offset] = - getBaseAndOffset(PointerOp, IRDB->getModule()->getDataLayout()); - - CFLFieldAccessPath FieldString{}; - FieldString.Stores.push_back(Offset); - return CFLFieldSensEdgeFunction{{{std::move(FieldString)}}, - DepthKLimit}; - } + if (ValueOp == CurrNode && CurrNode != SuccNode) { + // Store + + auto [BasePtr, Offset] = + getBaseAndOffset(PointerOp, IRDB->getModule()->getDataLayout()); + + CFLFieldAccessPath FieldString{}; + FieldString.Stores.push_back(Offset); + return CFLFieldSensEdgeFunction{{{std::move(FieldString)}}, DepthKLimit}; } // unaffected by the store @@ -549,3 +546,75 @@ auto FieldSensAllocSitesAwareIFDSProblem::combine(const EdgeFunction &L, return Ret; } + +static std::pair> +createAccessPath(const llvm::Value *Pointer, const llvm::DataLayout &DL) { + + std::pair> Ret; + auto &[BasePtr, Offsets] = Ret; + + BasePtr = Pointer; + Offsets.push_back(0); + + // Note: llvm::Constant includes llvm::GlobalValue + if (llvm::isa(Pointer) || + llvm::isa(Pointer) || + llvm::isa(Pointer) || + llvm::isa(Pointer)) { + // Globals, argument, function calls and allocas define themselves + return Ret; + } + + while (true) { + // TODO: Should we look into the cache within this loop? + // TODO: Handle constant GEPs + + if (const auto *Load = llvm::dyn_cast(BasePtr)) { + Offsets.push_back(0); + BasePtr = Load->getPointerOperand()->stripPointerCasts(); + } else if (const auto *Gep = llvm::dyn_cast(BasePtr)) { + + auto GepOffs = detail::AbstractMemoryLocationImpl::computeOffset(DL, Gep); + if (GepOffs.has_value() && *GepOffs >= INT32_MIN && + *GepOffs <= INT32_MAX) { + Offsets.back() = addOffsets(Offsets.back(), int32_t(*GepOffs)); + } else { + Offsets.back() = CFLFieldAccessPath::TopOffset; + } + BasePtr = Gep->getPointerOperand()->stripPointerCasts(); + } else { + // TODO aggregate instructions, e.g. insertvalue, extractvalue, ... + break; + } + } + + // NOTE: Do not reverse the offsets as we do in + // AbstractMemoryLocationFactoryBase::createImpl(). + // For the CFL formulation, we need the offsets in inverse order anyway! + + return Ret; +} + +auto FieldSensAllocSitesAwareIFDSProblem::getAccessPath( + const llvm::Value *Pointer) -> const CachedAccessPath * { + auto &Ret = MemLocCache[Pointer]; + if (Ret) { + return Ret; + } + + auto [BasePtr, Offsets] = + createAccessPath(Pointer, IRDB->getModule()->getDataLayout()); + assert(Offsets.size() < UINT32_MAX); + + using OffsetType = CachedAccessPath::OffsetType; + + auto NumBytes = + CachedAccessPath::totalSizeToAlloc(Offsets.size()); + auto *RawMem = MemLocAlloc.allocate(NumBytes); + auto *AP = new (RawMem) CachedAccessPath(BasePtr, Offsets.size()); + memcpy(AP->getTrailingObjects(), Offsets.data(), + Offsets.size() * sizeof(OffsetType)); + + Ret = AP; + return AP; +} diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/ExtendedTaintAnalysis/AbstractMemoryLocation.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/ExtendedTaintAnalysis/AbstractMemoryLocation.cpp index da9898ee05..205ee403d7 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/ExtendedTaintAnalysis/AbstractMemoryLocation.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/ExtendedTaintAnalysis/AbstractMemoryLocation.cpp @@ -55,8 +55,8 @@ llvm::ArrayRef AbstractMemoryLocationImpl::offsets() const { return llvm::makeArrayRef(this->getTrailingObjects(), NumOffsets); } -auto AbstractMemoryLocationImpl::computeOffset( - const llvm::DataLayout &DL, const llvm::GetElementPtrInst *Gep) +auto AbstractMemoryLocationImpl::computeOffset(const llvm::DataLayout &DL, + const llvm::GEPOperator *Gep) -> std::optional { // TODO: Use results from IDELinearConstantAnalysis here (LLVM 12 has an // overload of accumulateConstantOffset that takes an external analysis diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/ExtendedTaintAnalysis/AbstractMemoryLocationFactory.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/ExtendedTaintAnalysis/AbstractMemoryLocationFactory.cpp index 6baa829493..1f2788a9a9 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/ExtendedTaintAnalysis/AbstractMemoryLocationFactory.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/ExtendedTaintAnalysis/AbstractMemoryLocationFactory.cpp @@ -9,122 +9,28 @@ #include "phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/ExtendedTaintAnalysis/AbstractMemoryLocationFactory.h" -#include "phasar/Utils/Logger.h" +#include "phasar/Utils/MemoryLocationAllocator.h" #include "llvm/IR/Instructions.h" -#include "llvm/Support/Compiler.h" - -#include -#include +#include "llvm/IR/Operator.h" namespace psr::detail { -AbstractMemoryLocationFactoryBase::Allocator::Block::Block(Block *Next) - : Next(Next) {} - -auto AbstractMemoryLocationFactoryBase::Allocator::Block::create( - Block *Next, size_t NumPointerEntries) -> Block * { - // Allocate one more pointer to store the next-block ptr - - if (LLVM_UNLIKELY(NumPointerEntries > - std::numeric_limits::max() / sizeof(size_t) - 1)) { - - PHASAR_LOG_LEVEL(CRITICAL, "Cannot allocate " << NumPointerEntries - << " pointer entries"); - - std::terminate(); - } - - auto *Ret = reinterpret_cast(new (std::align_val_t{ - alignof(AbstractMemoryLocationImpl)}) size_t[1 + NumPointerEntries]); - - new (Ret) Block(Next); - - __asan_poison_memory_region(Ret->getTrailingObjects(), - NumPointerEntries * sizeof(void *)); - - return Ret; -} - -void AbstractMemoryLocationFactoryBase::Allocator::Block::destroy( - Block *Blck, [[maybe_unused]] size_t NumPointerEntries) { - __asan_unpoison_memory_region(Blck->getTrailingObjects(), - NumPointerEntries * sizeof(void *)); - ::operator delete[](Blck, - std::align_val_t{alignof(AbstractMemoryLocationImpl)}); -} - -AbstractMemoryLocationFactoryBase::Allocator::Allocator( - size_t InitialCapacity) { - if (InitialCapacity <= ExpectedNumAmLsPerBlock) { - return; - } - - const auto NumPointersPerInitialBlock = - (MinNumPointersPerAML + 3) * InitialCapacity; - Root = Block::create(nullptr, NumPointersPerInitialBlock); - Pos = Root->getTrailingObjects(); - End = Pos + NumPointersPerInitialBlock; -} - -AbstractMemoryLocationFactoryBase::Allocator::~Allocator() { - auto *Rt = Root; - auto *Blck = Rt; - while (Blck) { - auto *Nxt = Blck->Next; - Block::destroy(Blck, Blck == Rt - ? (MinNumPointersPerAML + 3) * InitialCapacity - : NumPointersPerBlock); - Blck = Nxt; - } - Root = nullptr; - Pos = nullptr; - End = nullptr; -} - -AbstractMemoryLocationFactoryBase::Allocator::Allocator( - Allocator &&Other) noexcept - : Root(Other.Root), Pos(Other.Pos), End(Other.End) { - Other.Root = nullptr; - Other.Pos = nullptr; - Other.End = nullptr; -} - -auto AbstractMemoryLocationFactoryBase::Allocator::operator=( - Allocator &&Other) noexcept -> Allocator & { - this->Allocator::~Allocator(); - new (this) Allocator(std::move(Other)); - return *this; -} +AbstractMemoryLocationFactoryBase::Allocator::Allocator(size_t InitialCapacity) + : MemoryLocationAllocator((MinNumPointersPerAML + 3) * InitialCapacity * + sizeof(void *)) {} AbstractMemoryLocationImpl * AbstractMemoryLocationFactoryBase::Allocator::create( const llvm::Value *Baseptr, size_t Lifetime, llvm::ArrayRef Offsets) { - // All fields inside AML have pointer size, so there is no padding at all - - auto NumPointersRequired = - AbstractMemoryLocationImpl::totalSizeToAlloc(Offsets.size()) / - sizeof(void *); - auto *Rt = Root; - auto *Curr = Pos; - - if (End - Curr < ptrdiff_t(NumPointersRequired)) { - Root = Rt = Block::create(Rt, NumPointersPerBlock); - Pos = Curr = Rt->getTrailingObjects(); - End = Curr + NumPointersPerBlock; - } - - auto *Ret = reinterpret_cast(Curr); - - Pos += NumPointersRequired; + auto NumBytesRequired = + AbstractMemoryLocationImpl::totalSizeToAlloc(Offsets.size()); - __asan_unpoison_memory_region(Ret, NumPointersRequired * sizeof(void *)); + auto *RetBytes = this->allocate(NumBytesRequired); - new (Ret) AbstractMemoryLocationImpl(Baseptr, Offsets, Lifetime); - - return Ret; + return new (RetBytes) AbstractMemoryLocationImpl(Baseptr, Offsets, Lifetime); } AbstractMemoryLocationFactoryBase::AbstractMemoryLocationFactoryBase( @@ -203,8 +109,7 @@ AbstractMemoryLocationFactoryBase::createImpl(const llvm::Value *V, Baseptr = Load->getPointerOperand(); } else if (const auto *Cast = llvm::dyn_cast(Baseptr)) { Baseptr = Cast->getOperand(0); - } else if (const auto *Gep = - llvm::dyn_cast(Baseptr)) { + } else if (const auto *Gep = llvm::dyn_cast(Baseptr)) { auto GepOffs = detail::AbstractMemoryLocationImpl::computeOffset(*DL, Gep); @@ -319,7 +224,7 @@ AbstractMemoryLocationFactoryBase::withIndirectionOfImpl( const AbstractMemoryLocationImpl * AbstractMemoryLocationFactoryBase::withOffsetImpl( - const AbstractMemoryLocationImpl *AML, const llvm::GetElementPtrInst *Gep) { + const AbstractMemoryLocationImpl *AML, const llvm::GEPOperator *Gep) { assert(DL); switch (AML->lifetime()) { diff --git a/lib/PhasarLLVM/Pointer/LLVMFieldAliasSet.cpp b/lib/PhasarLLVM/Pointer/LLVMFieldAliasSet.cpp new file mode 100644 index 0000000000..6a49391183 --- /dev/null +++ b/lib/PhasarLLVM/Pointer/LLVMFieldAliasSet.cpp @@ -0,0 +1,108 @@ +#include "phasar/PhasarLLVM/Pointer/LLVMFieldAliasSet.h" + +#include "phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/ExtendedTaintAnalysis/AbstractMemoryLocation.h" + +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/MathExtras.h" + +#include +#include + +using namespace psr; + +const llvm::Value * +LLVMBasePointerAliasSet::getBasePointer(const llvm::Value *Pointer) { + if (!Pointer || !Pointer->getType()->isPointerTy()) { + return Pointer; + } + + while (true) { + if (const auto *Load = llvm::dyn_cast(Pointer)) { + Pointer = Load->getPointerOperand()->stripPointerCastsAndAliases(); + continue; + } + + if (const auto *GEP = llvm::dyn_cast(Pointer)) { + Pointer = GEP->getPointerOperand()->stripPointerCastsAndAliases(); + continue; + } + + break; + } + + return Pointer; +} + +static constexpr ptrdiff_t TopOffset = LLVMFieldAliasSet::AccessPath::TopOffset; + +constexpr static void addOffset(ptrdiff_t &Into, ptrdiff_t Offs) noexcept { + if (Into == TopOffset) { + return; + } + + if (llvm::AddOverflow(Into, Offs, Into)) { + Into = TopOffset; + } +} + +auto LLVMFieldAliasSet::getAccessPath(const llvm::Value *Pointer) const + -> AccessPath { + // TODO: We may want to cache this! + // -> See AbstractMemoryLocationFactory + + AccessPath Ret{Pointer, {0}}; + if (!Pointer || !Pointer->getType()->isPointerTy()) { + return Ret; + } + + while (true) { + if (const auto *Load = llvm::dyn_cast(Pointer)) { + Pointer = Load->getPointerOperand()->stripPointerCastsAndAliases(); + Ret.FieldAccesses.push_back(0); + continue; + } + + if (const auto *GEP = llvm::dyn_cast(Pointer)) { + Pointer = GEP->getPointerOperand()->stripPointerCastsAndAliases(); + auto Offset = detail::AbstractMemoryLocationImpl::computeOffset(*DL, GEP); + if (Offset) { + addOffset(Ret.FieldAccesses.back(), *Offset); + } else { + Ret.FieldAccesses.back() = TopOffset; + } + + continue; + } + } + + Ret.BasePtr = Pointer; + return Ret; +} + +auto LLVMBasePointerAliasSet::getAliasSet(v_t Pointer, n_t AtInstruction) const + -> AliasSetPtrTy { + auto Aliases = AS.getAliasSet(Pointer, AtInstruction); + + auto Ret = std::make_unique(); + for (const auto *Alias : *Aliases) { + Ret->insert(getBasePointer(Alias)); + } + + return Ret; +} + +auto LLVMFieldAliasSet::getAliasSet(v_t Pointer, n_t AtInstruction) const + -> AliasSetPtrTy { + auto Aliases = AS.getAliasSet(Pointer, AtInstruction); + + auto Ret = std::make_unique(); + for (const auto *Alias : *Aliases) { + Ret->insert(getAccessPath(Alias)); + } + + return Ret; +} diff --git a/lib/Utils/MemoryLocationAllocator.cpp b/lib/Utils/MemoryLocationAllocator.cpp new file mode 100644 index 0000000000..0706f6e410 --- /dev/null +++ b/lib/Utils/MemoryLocationAllocator.cpp @@ -0,0 +1,98 @@ +#include "phasar/Utils/MemoryLocationAllocator.h" + +#include "phasar/Utils/Logger.h" + +#include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace psr; + +MemoryLocationAllocator::Block * +MemoryLocationAllocator::Block::create(Block *Next, size_t NumPointerEntries) { + // Allocate one more pointer to store the next-block ptr + + if (LLVM_UNLIKELY(NumPointerEntries > + std::numeric_limits::max() / sizeof(size_t) - 1)) { + + PHASAR_LOG_LEVEL(CRITICAL, "Cannot allocate " << NumPointerEntries + << " pointer entries"); + + llvm::report_bad_alloc_error( + "Cannot allocate memory for abstract memory locations"); + } + + static_assert(sizeof(Block) == sizeof(void *)); + static_assert(alignof(Block) == alignof(void *)); + auto *RetBytes = new void *[1 + NumPointerEntries]; + + auto *Ret = new (RetBytes) Block(Next); + + __asan_poison_memory_region(Ret->getTrailingObjects(), + NumPointerEntries * sizeof(void *)); + + return Ret; +} + +void MemoryLocationAllocator::Block::destroy( + MemoryLocationAllocator::Block *Blck, + [[maybe_unused]] size_t NumPointerEntries) { + __asan_unpoison_memory_region(Blck->getTrailingObjects(), + NumPointerEntries * sizeof(void *)); + delete[] reinterpret_cast(Blck); +} + +static constexpr size_t translateBytesToPointers(size_t Bytes) noexcept { + return (Bytes + sizeof(void *)) / sizeof(void *); +} + +MemoryLocationAllocator::MemoryLocationAllocator(size_t InitialCapacity, + size_t DynamicBlockSize) + : InitialCapacity(translateBytesToPointers(InitialCapacity)), + DynamicBlockSize(translateBytesToPointers(DynamicBlockSize)) { + assert(DynamicBlockSize >= sizeof(void *)); + if (this->InitialCapacity <= this->DynamicBlockSize) { + return; + } + + Root = Block::create(nullptr, this->InitialCapacity); + Pos = Root->getTrailingObjects(); + End = Pos + this->InitialCapacity; +} + +MemoryLocationAllocator::~MemoryLocationAllocator() { + auto *Rt = Root; + auto *Blck = Rt; + while (Blck) { + auto *Nxt = Blck->Next; + Block::destroy(Blck, Blck == Rt ? InitialCapacity : DynamicBlockSize); + Blck = Nxt; + } + Root = nullptr; + Pos = nullptr; + End = nullptr; +} + +[[nodiscard]] LLVM_ATTRIBUTE_RETURNS_NONNULL void * +MemoryLocationAllocator::allocate(size_t NumBytes) { + auto NumPointersRequired = translateBytesToPointers(NumBytes); + if (LLVM_UNLIKELY(NumPointersRequired == 0)) { + // Prevent aliasing issues by refusing to allocate zero bytes + NumPointersRequired = 1; + } + + auto *Rt = Root; + auto *Curr = Pos; + + if (LLVM_UNLIKELY(End - Curr < ptrdiff_t(NumPointersRequired))) { + Root = Rt = Block::create(Rt, DynamicBlockSize); + Pos = Curr = Rt->getTrailingObjects(); + End = Curr + DynamicBlockSize; + } + + auto *Ret = Curr; + Pos += NumPointersRequired; + + __asan_unpoison_memory_region(Ret, NumPointersRequired * sizeof(void *)); + + return Ret; +} diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensTest.cpp b/unittests/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensTest.cpp index cf1c0182d3..3219554c8a 100644 --- a/unittests/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensTest.cpp +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensTest.cpp @@ -6,6 +6,7 @@ #include "phasar/PhasarLLVM/DataFlow/IfdsIde/DefaultAllocSitesAwareIDEProblem.h" #include "phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h" #include "phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMZeroValue.h" +#include "phasar/PhasarLLVM/Pointer/FilteredLLVMAliasSet.h" #include "phasar/PhasarLLVM/Pointer/LLVMAliasSet.h" #include "phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h" #include "phasar/PhasarLLVM/TaintConfig/TaintConfigUtilities.h" @@ -20,12 +21,13 @@ namespace { -void populateWithMayAliases(psr::LLVMAliasInfoRef AS, +template +void populateWithMayAliases(const AliasInfoTy &AS, std::set &Facts, const llvm::Instruction *Context) { auto Tmp = Facts; for (const auto *Fact : Facts) { - auto Aliases = AS.getReachableAllocationSites(Fact, true, Context); + auto Aliases = AS.getAliasSet(Fact, Context); Tmp.insert(Aliases->begin(), Aliases->end()); } @@ -111,14 +113,15 @@ class CFLFieldSensTest : public ::testing::Test { psr::LLVMProjectIRDB IRDB(IRFileName); ASSERT_TRUE(IRDB); - psr::LLVMAliasSet AS(&IRDB); + psr::LLVMAliasSet BaseAS(&IRDB); + psr::FilteredLLVMAliasSet AS(&BaseAS); psr::LLVMTaintConfig TC(IRDB); ExampleTaintAnalysis TaintProblem(&IRDB, &AS, &TC, {"main"}); psr::FieldSensAllocSitesAwareIFDSProblem FsTaintProblem(&TaintProblem, &AS); psr::LLVMBasedICFG ICFG(&IRDB, psr::CallGraphAnalysisType::OTF, {"main"}, - nullptr, &AS); + nullptr, &BaseAS); auto Results = psr::solveIDEProblem(FsTaintProblem, ICFG); @@ -185,6 +188,14 @@ TEST_F(CFLFieldSensTest, Basic_04) { run({PathToLLFiles + "xtaint04_cpp.ll"}, Gt); } +TEST_F(CFLFieldSensTest, Basic_06) { + std::map> Gt; + + // no leaks expected + + run({PathToLLFiles + "xtaint06_cpp.ll"}, Gt); +} + } // namespace int main(int Argc, char **Argv) { From 6e1e94a1f8b251f6279cbf9522ae58a2a0c23658 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Fri, 23 Jan 2026 16:06:15 +0100 Subject: [PATCH 07/29] Add more (passing) XTaint tests --- CMakeLists.txt | 2 +- .../DataFlow/IfdsIde/Solver/Compressor.h | 9 +- .../IfdsIde/Solver/IterativeIDESolver.h | 38 +++++---- .../FieldSensAllocSitesAwareIFDSProblem.h | 11 +++ .../FieldSensAllocSitesAwareIFDSProblem.cpp | 47 +++++++++-- .../DataFlow/IfdsIde/CFLFieldSensTest.cpp | 82 ++++++++++++++++++- .../Problems/IDEExtendedTaintAnalysisTest.cpp | 2 +- 7 files changed, 160 insertions(+), 31 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5370c88ee3..ddb6a5e55e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -347,7 +347,7 @@ else() endif() # Clang -option(BUILD_PHASAR_CLANG "Build the phasar_clang library (default is ON)" ON) +option(BUILD_PHASAR_CLANG "Build the phasar_clang library (default is OFF)" OFF) if(BUILD_PHASAR_CLANG) add_clang() endif() diff --git a/include/phasar/DataFlow/IfdsIde/Solver/Compressor.h b/include/phasar/DataFlow/IfdsIde/Solver/Compressor.h index d0f9472d9c..cafa37fc53 100644 --- a/include/phasar/DataFlow/IfdsIde/Solver/Compressor.h +++ b/include/phasar/DataFlow/IfdsIde/Solver/Compressor.h @@ -23,7 +23,8 @@ template class Compressor; /// /// This specialization handles types that can be efficiently passed by value template -class Compressor>> { +class Compressor && + has_llvm_dense_map_info>> { public: void reserve(size_t Capacity) { assert(Capacity <= UINT32_MAX); @@ -70,7 +71,8 @@ class Compressor>> { /// /// This specialization handles types that cannot be efficiently passed by value template -class Compressor>> { +class Compressor || + !has_llvm_dense_map_info>> { public: void reserve(size_t Capacity) { assert(Capacity <= UINT32_MAX); @@ -132,6 +134,9 @@ class Compressor>> { assert(Elem != nullptr); if constexpr (has_llvm_dense_map_info) { return llvm::DenseMapInfo::getHashValue(*Elem); + } else if constexpr (is_llvm_hashable_v) { + using llvm::hash_value; + return hash_value(*Elem); } else { return std::hash{}(*Elem); } diff --git a/include/phasar/DataFlow/IfdsIde/Solver/IterativeIDESolver.h b/include/phasar/DataFlow/IfdsIde/Solver/IterativeIDESolver.h index 68ee40cb80..1f3574556a 100644 --- a/include/phasar/DataFlow/IfdsIde/Solver/IterativeIDESolver.h +++ b/include/phasar/DataFlow/IfdsIde/Solver/IterativeIDESolver.h @@ -474,7 +474,7 @@ class IterativeIDESolver return true; } - auto NewEF = EF.joinWith(std::move(LocalEF)); + auto NewEF = Problem.combine(EF, std::move(LocalEF)); assert(NewEF != nullptr); if (NewEF != EF) { @@ -540,7 +540,7 @@ class IterativeIDESolver return; } - auto NewEF = EF.joinWith(std::move(LocalEF)); + auto NewEF = Problem.combine(EF, std::move(LocalEF)); assert(NewEF != nullptr); if (NewEF != EF) { @@ -622,10 +622,11 @@ class IterativeIDESolver auto FactId = FactCompressor.getOrInsert(Fact); auto EF = [&] { if constexpr (ComputeValues) { - return SourceEF.composeWith(FECache.getNormalEdgeFunction( - Problem, AtInstruction, CSFact, Succ, Fact, - combineIds(AtInstructionId, SuccId), - combineIds(PropagatedFactId, FactId))); + return Problem.extend(SourceEF, + FECache.getNormalEdgeFunction( + Problem, AtInstruction, CSFact, Succ, + Fact, combineIds(AtInstructionId, SuccId), + combineIds(PropagatedFactId, FactId))); } else { return EdgeFunctionPtrType{}; } @@ -694,10 +695,12 @@ class IterativeIDESolver auto EF = [&] { if constexpr (ComputeValues) { - return SourceEF.composeWith(FECache.getCallToRetEdgeFunction( - Problem, AtInstruction, CSFact, RetSite, Fact, Callees /*Vec*/, - combineIds(AtInstructionId, RetSiteId), - combineIds(PropagatedFactId, FactId))); + return Problem.extend(SourceEF, + FECache.getCallToRetEdgeFunction( + Problem, AtInstruction, CSFact, RetSite, + Fact, Callees /*Vec*/, + combineIds(AtInstructionId, RetSiteId), + combineIds(PropagatedFactId, FactId))); } else { return EdgeFunctionPtrType{}; } @@ -835,10 +838,11 @@ class IterativeIDESolver auto CallEF = [&] { if constexpr (ComputeValues) { - return SourceEF.composeWith(FECache.getCallEdgeFunction( - Problem, AtInstruction, CSFact, Callee, Fact, - combineIds(AtInstructionId, CalleeId), - combineIds(CSFactId, FactId))); + return Problem.extend( + SourceEF, FECache.getCallEdgeFunction( + Problem, AtInstruction, CSFact, Callee, Fact, + combineIds(AtInstructionId, CalleeId), + combineIds(CSFactId, FactId))); } else { return EdgeFunctionPtrType{}; } @@ -900,7 +904,7 @@ class IterativeIDESolver Problem, AtInstruction, CSFact, RetSite, Fact, combineIds(AtInstructionId, RetSiteId), combineIds(CSFactId, FactId)); - return EF ? SourceEF.composeWith(std::move(EF)) : SourceEF; + return EF ? Problem.extend(SourceEF, std::move(EF)) : SourceEF; } else { return EdgeFunctionPtrType{}; } @@ -939,8 +943,8 @@ class IterativeIDESolver Problem, CallSite, Callee, ExitInst, SummaryFact, RetSite, RetFact, ExitId, combineIds(CSId, RSId), combineIds(SummaryFactId, RetFactId)); - return CallEF.composeWith(Summary.second) - .composeWith(std::move(RetEF)); + return Problem.extend(Problem.extend(CallEF, Summary.second), + std::move(RetEF)); } else { return EdgeFunctionPtrType{}; } diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h index b01a58ea34..37cc7dff9f 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h @@ -118,6 +118,10 @@ struct CFLFieldSensEdgeValue { return !(*this == Other); } + [[nodiscard]] friend auto hash_value(const CFLFieldSensEdgeValue EV) { + return llvm::hash_combine_range(EV.Paths.begin(), EV.Paths.end()); + } + friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const CFLFieldSensEdgeValue &EV); }; @@ -178,6 +182,13 @@ class FieldSensAllocSitesAwareIFDSProblem FieldSensAllocSitesAwareIFDSProblem(std::nullptr_t, LLVMAliasInfoRef AS) = delete; + // TODO: Provide a customization-point to provide sanitizer information to the + // edge functions! + + // TODO: Provide a customization-point to provide gen offsets to the + // edge-functions (generating from zero currently always generates at + // epsilon!) + [[nodiscard]] InitialSeeds initialSeeds() override; [[nodiscard]] FlowFunctionPtrType getNormalFlowFunction(n_t Curr, diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp index 21040cb955..5d2149d5a0 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp @@ -145,6 +145,8 @@ void CFLFieldSensEdgeValue::applyGepAndLoad(GEPEvent Evt, uint8_t DepthKLimit) { } void CFLFieldSensEdgeValue::applyGepAndKill(GEPEvent Evt) { + llvm::errs() << "[applyGepAndKill]: " << *this << " + " << Evt.Field << "\n"; + auto Save = std::exchange(Paths, {}); for (const auto &F : Save) { @@ -154,13 +156,25 @@ void CFLFieldSensEdgeValue::applyGepAndKill(GEPEvent Evt) { auto FF = F; FF.Kills.insert(Offs); Paths.insert(std::move(FF)); + llvm::errs() << "> add K" << Offs << '\n'; continue; } if (F.Stores.back() == Offs) { + llvm::errs() << "> Kill "; + llvm::interleave( + F.Stores, llvm::errs(), + [](auto StoreOffs) { llvm::errs() << 'S' << StoreOffs; }, "."); + llvm::errs() << '\n'; continue; } + llvm::errs() << "> Retain "; + llvm::interleave( + F.Stores, llvm::errs(), + [](auto StoreOffs) { llvm::errs() << 'S' << StoreOffs; }, "."); + llvm::errs() << '\n'; + assert(F.Stores.back() != Offs); Paths.insert(F); } @@ -331,14 +345,22 @@ auto FieldSensAllocSitesAwareIFDSProblem::getNormalEdgeFunction( // TODO;: How to deal with BasePtr? + const auto &DL = IRDB->getModule()->getDataLayout(); + auto [BasePtr, Offset] = getBaseAndOffset(PointerOp, DL); + + auto [BaseBasePtr, + BaseOffset] = [&]() -> std::pair { + if (BasePtr != SuccNode && llvm::isa(BasePtr)) { + return getBaseAndOffset( + llvm::cast(BasePtr)->getPointerOperand(), DL); + } + + return {nullptr, INT32_MIN}; + }(); if (CurrNode == SuccNode && - (PointerOp == CurrNode || - PointerOp->stripPointerCastsAndAliases() == CurrNode)) { + (BasePtr == CurrNode || BaseBasePtr == CurrNode)) { // Kill - auto [BasePtr, Offset] = - getBaseAndOffset(PointerOp, IRDB->getModule()->getDataLayout()); - CFLFieldAccessPath FieldString{}; FieldString.Kills.insert(Offset); return CFLFieldSensEdgeFunction{{{std::move(FieldString)}}, DepthKLimit}; @@ -348,11 +370,20 @@ auto FieldSensAllocSitesAwareIFDSProblem::getNormalEdgeFunction( if (ValueOp == CurrNode && CurrNode != SuccNode) { // Store - auto [BasePtr, Offset] = - getBaseAndOffset(PointerOp, IRDB->getModule()->getDataLayout()); - CFLFieldAccessPath FieldString{}; FieldString.Stores.push_back(Offset); + + if (BasePtr != SuccNode && llvm::isa(BasePtr)) { + // This is a hack, to be more correct wih field-insensitive alias + // information + auto [BaseBasePtr, BaseOffset] = getBaseAndOffset( + llvm::cast(BasePtr)->getPointerOperand(), DL); + if (BaseBasePtr == SuccNode) { + // push back, or push front? + FieldString.Stores.insert(FieldString.Stores.begin(), BaseOffset); + } + } + return CFLFieldSensEdgeFunction{{{std::move(FieldString)}}, DepthKLimit}; } diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensTest.cpp b/unittests/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensTest.cpp index 3219554c8a..5418b5e578 100644 --- a/unittests/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensTest.cpp +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensTest.cpp @@ -1,5 +1,6 @@ #include "phasar/ControlFlow/CallGraphAnalysisType.h" #include "phasar/DataFlow/IfdsIde/Solver/IFDSSolver.h" +#include "phasar/DataFlow/IfdsIde/Solver/IterativeIDESolver.h" #include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h" #include "phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h" #include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" @@ -123,9 +124,14 @@ class CFLFieldSensTest : public ::testing::Test { psr::LLVMBasedICFG ICFG(&IRDB, psr::CallGraphAnalysisType::OTF, {"main"}, nullptr, &BaseAS); - auto Results = psr::solveIDEProblem(FsTaintProblem, ICFG); + psr::IterativeIDESolver Solver(&FsTaintProblem, &ICFG); + Solver.solve(); + auto Results = Solver.getSolverResults(); - Results.dumpResults(ICFG); + Solver.dumpResults(); + + // auto Results = psr::solveIDEProblem(FsTaintProblem, ICFG); + // Results.dumpResults(ICFG); std::map> ComputedLeaks; @@ -196,6 +202,78 @@ TEST_F(CFLFieldSensTest, Basic_06) { run({PathToLLFiles + "xtaint06_cpp.ll"}, Gt); } +TEST_F(CFLFieldSensTest, Basic_09_1) { + std::map> Gt; + + Gt[25] = {"24"}; + + run({PathToLLFiles + "xtaint09_1_cpp.ll"}, Gt); +} + +TEST_F(CFLFieldSensTest, Basic_12) { + std::map> Gt; + + // We sanitize an alias - since we don't have must-alias relations, we cannot + // kill aliases at all + Gt[28] = {"27"}; + + run({PathToLLFiles + "xtaint12_cpp.ll"}, Gt); +} + +TEST_F(CFLFieldSensTest, Basic_13) { + GTEST_SKIP() << "Requires sanitizer-callback to edge-functions to prevent " + "{28: {27}} to be leaked!"; + std::map> Gt; + + Gt[30] = {"29"}; + + run({PathToLLFiles + "xtaint13_cpp.ll"}, Gt); +} + +TEST_F(CFLFieldSensTest, Basic_14) { + GTEST_SKIP() << "Requires sanitizer-callback to edge-functions to prevent " + "{31: {30}} to be leaked!"; + std::map> Gt; + + Gt[33] = {"32"}; + + run({PathToLLFiles + "xtaint14_cpp.ll"}, Gt); +} + +TEST_F(CFLFieldSensTest, Basic_16) { + std::map> Gt; + + Gt[24] = {"23"}; + + run({PathToLLFiles + "xtaint16_cpp.ll"}, Gt); +} + +TEST_F(CFLFieldSensTest, Basic_17) { + std::map> Gt; + + Gt[27] = {"26"}; + + run({PathToLLFiles + "xtaint17_cpp.ll"}, Gt); +} + +TEST_F(CFLFieldSensTest, Basic_18) { + std::map> Gt; + + // no leaks expected + + run({PathToLLFiles + "xtaint18_cpp.ll"}, Gt); +} + +TEST_F(CFLFieldSensTest, Basic_20) { + std::map> Gt; + + Gt[22] = {"14"}; + // Gt[24] = {"23"}; // no leak here, because above we define the semantics to + // exclude deep taints! + + run({PathToLLFiles + "xtaint20_cpp.ll"}, Gt); +} + } // namespace int main(int Argc, char **Argv) { diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEExtendedTaintAnalysisTest.cpp b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEExtendedTaintAnalysisTest.cpp index 51af9f3058..7e7dd8bdef 100644 --- a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEExtendedTaintAnalysisTest.cpp +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEExtendedTaintAnalysisTest.cpp @@ -202,7 +202,7 @@ TEST_F(IDETaintAnalysisTest, XTaint09_1) { Gt[25] = {"24"}; - doAnalysis({PathToLLFiles + "xtaint09_1_cpp.ll"}, Gt, std::monostate{}); + doAnalysis({PathToLLFiles + "xtaint09_1_cpp.ll"}, Gt, std::monostate{}, true); } TEST_F(IDETaintAnalysisTest, XTaint09) { From 02f08ab29798cea5e4ba9c07e48b4dbfda4ce7d5 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Fri, 23 Jan 2026 16:50:36 +0100 Subject: [PATCH 08/29] Add sanitizer-customization-point + fix bug in combine() with EdgeIdentity --- .../FieldSensAllocSitesAwareIFDSProblem.h | 45 +++++++++++++--- .../FieldSensAllocSitesAwareIFDSProblem.cpp | 47 ++++++++++------- .../DataFlow/IfdsIde/CFLFieldSensTest.cpp | 51 +++++++++++++++++-- 3 files changed, 112 insertions(+), 31 deletions(-) diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h index 37cc7dff9f..feac82af34 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h @@ -21,7 +21,9 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/FunctionExtras.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/IR/Operator.h" #include "llvm/Support/TrailingObjects.h" #include "llvm/Support/raw_ostream.h" @@ -131,8 +133,37 @@ struct CFLFieldSensAnalysisDomain : AnalysisDomainTy { using l_t = LatticeDomain; }; +struct FieldSensAllocSitesAwareIFDSProblemConfig + : LLVMIFDSAnalysisDomainDefault { + llvm::unique_function(n_t Curr, d_t CurrNode)> KillsAt; + // TODO: more +}; + +class FieldSensAllocSitesAwareIFDSProblemBase { +public: + [[nodiscard]] static std::pair + getBaseAndOffset(const llvm::Value *V, const llvm::DataLayout &DL) { + llvm::APInt Offset(64, 0); + int32_t OffsVal = CFLFieldAccessPath::TopOffset; + const auto *Base = V->stripAndAccumulateConstantOffsets(DL, Offset, true); + + if (llvm::isa(Base)) { + return {Base->stripPointerCastsAndAliases(), + CFLFieldAccessPath::TopOffset}; + } + + auto RawOffsVal = Offset.getSExtValue(); + if (RawOffsVal <= INT32_MAX && RawOffsVal >= INT32_MIN) { + OffsVal = int32_t(RawOffsVal); + } + + return {Base->stripPointerCastsAndAliases(), OffsVal}; + } +}; + class FieldSensAllocSitesAwareIFDSProblem - : public IDETabulationProblem< + : public FieldSensAllocSitesAwareIFDSProblemBase, + public IDETabulationProblem< CFLFieldSensAnalysisDomain> { using Base = IDETabulationProblem< CFLFieldSensAnalysisDomain>; @@ -167,24 +198,23 @@ class FieldSensAllocSitesAwareIFDSProblem using typename Base::t_t; using typename Base::v_t; - // Constructs an IDETabulationProblem with the usual arguments + alias + /// Constructs an IDETabulationProblem with the usual arguments + alias /// information. /// /// \note It is useful to use an instance of FilteredAliasSet for the alias /// information to lower suprious aliases explicit FieldSensAllocSitesAwareIFDSProblem( IFDSTabulationProblem *UserProblem, - LLVMAliasInfoRef AS) noexcept(std::is_nothrow_move_constructible_v) + LLVMAliasInfoRef AS, + FieldSensAllocSitesAwareIFDSProblemConfig Config = + {}) noexcept(std::is_nothrow_move_constructible_v) : Base(UserProblem->getProjectIRDB(), UserProblem->getEntryPoints(), UserProblem->getZeroValue()), - AS(AS), UserProblem(UserProblem) {} + AS(AS), UserProblem(UserProblem), Config(std::move(Config)) {} FieldSensAllocSitesAwareIFDSProblem(std::nullptr_t, LLVMAliasInfoRef AS) = delete; - // TODO: Provide a customization-point to provide sanitizer information to the - // edge functions! - // TODO: Provide a customization-point to provide gen offsets to the // edge-functions (generating from zero currently always generates at // epsilon!) @@ -251,6 +281,7 @@ class FieldSensAllocSitesAwareIFDSProblem LLVMAliasInfoRef AS; IFDSTabulationProblem *UserProblem{}; + FieldSensAllocSitesAwareIFDSProblemConfig Config{}; MemoryLocationAllocator MemLocAlloc{}; llvm::DenseMap MemLocCache{}; diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp index 5d2149d5a0..3a2a9d3386 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp @@ -314,24 +314,6 @@ auto FieldSensAllocSitesAwareIFDSProblem::initialSeeds() return {std::move(Ret)}; } -static std::pair -getBaseAndOffset(const llvm::Value *V, const llvm::DataLayout &DL) { - llvm::APInt Offset(64, 0); - int32_t OffsVal = CFLFieldAccessPath::TopOffset; - const auto *Base = V->stripAndAccumulateConstantOffsets(DL, Offset, true); - - if (llvm::isa(Base)) { - return {Base->stripPointerCastsAndAliases(), CFLFieldAccessPath::TopOffset}; - } - - auto RawOffsVal = Offset.getSExtValue(); - if (RawOffsVal <= INT32_MAX && RawOffsVal >= INT32_MIN) { - OffsVal = int32_t(RawOffsVal); - } - - return {Base->stripPointerCastsAndAliases(), OffsVal}; -} - auto FieldSensAllocSitesAwareIFDSProblem::getNormalEdgeFunction( n_t Curr, d_t CurrNode, n_t Succ, d_t SuccNode) -> EdgeFunction { if (isZeroValue(CurrNode) && !isZeroValue(SuccNode)) { @@ -457,6 +439,16 @@ auto FieldSensAllocSitesAwareIFDSProblem::getReturnEdgeFunction( auto FieldSensAllocSitesAwareIFDSProblem::getCallToRetEdgeFunction( n_t CallSite, d_t CallNode, n_t RetSite, d_t RetSiteNode, llvm::ArrayRef Callees) -> EdgeFunction { + + if (CallNode == RetSiteNode && Config.KillsAt) { + if (auto KillOffs = Config.KillsAt(CallSite, CallNode)) { + // Let the summary-FF kill the fact + + // XXX: Can we somehow circumvent calling KillsAt twice? + return AllTop{}; + } + } + if (isZeroValue(CallNode) && !isZeroValue(RetSiteNode)) { // Gen from zero @@ -469,6 +461,23 @@ auto FieldSensAllocSitesAwareIFDSProblem::getCallToRetEdgeFunction( auto FieldSensAllocSitesAwareIFDSProblem::getSummaryEdgeFunction( n_t Curr, d_t CurrNode, n_t Succ, d_t SuccNode) -> EdgeFunction { + + llvm::errs() << "[getSummaryEdgeFunction]: Curr: " << llvmIRToString(Curr) + << ":\n"; + llvm::errs() << " > CurrNode: " << llvmIRToString(CurrNode) << '\n'; + llvm::errs() << " > SuccNode: " << llvmIRToString(SuccNode) << '\n'; + + if (CurrNode == SuccNode && Config.KillsAt) { + if (auto KillOffs = Config.KillsAt(Curr, CurrNode)) { + // kill + llvm::errs() << " > request to kill " << llvmIRToString(CurrNode) + << " with offset " << *KillOffs << '\n'; + CFLFieldAccessPath FieldString{}; + FieldString.Kills.insert(*KillOffs); + return CFLFieldSensEdgeFunction{{{std::move(FieldString)}}, DepthKLimit}; + } + } + if (isZeroValue(CurrNode) && !isZeroValue(SuccNode)) { // Gen from zero @@ -560,7 +569,7 @@ auto FieldSensAllocSitesAwareIFDSProblem::combine(const EdgeFunction &L, Txn.Paths.insert(CFLFieldAccessPath{}); return CFLFieldSensEdgeFunction{std::move(Txn), DepthKLimit}; } - } else if (FldSensR && L.isa>()) { + } else if (FldSensR && L.isa>()) { if (FldSensR->Transform.Paths.contains(CFLFieldAccessPath{})) { return R; } diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensTest.cpp b/unittests/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensTest.cpp index 5418b5e578..ef1ec65cdd 100644 --- a/unittests/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensTest.cpp +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensTest.cpp @@ -20,6 +20,9 @@ #include "TestConfig.h" #include "gtest/gtest.h" +#include +#include + namespace { template @@ -60,6 +63,36 @@ class ExampleTaintAnalysis : public psr::DefaultAllocSitesAwareIFDSProblem { return Seeds; }; + [[nodiscard]] auto killsAt() const { + return [this](n_t Curr, d_t CurrNode) -> std::optional { + const auto *CS = llvm::dyn_cast(Curr); + if (!CS) { + return std::nullopt; + } + + const auto *DestFun = CS->getCalledFunction(); + if (!DestFun) { + return std::nullopt; + } + + container_type Kill; + psr::collectSanitizedFacts(Kill, *Config, CS, DestFun); + + const auto &DL = IRDB->getModule()->getDataLayout(); + + for (const auto *KillFact : Kill) { + auto [BasePtr, Offset] = + psr::FieldSensAllocSitesAwareIFDSProblemBase::getBaseAndOffset( + KillFact, DL); + if (BasePtr == CurrNode) { + return Offset; + } + } + + return std::nullopt; + }; + } + [[nodiscard]] FlowFunctionPtrType getSummaryFlowFunction(n_t CallSite, f_t DestFun) override { const auto *CS = llvm::cast(CallSite); @@ -119,7 +152,11 @@ class CFLFieldSensTest : public ::testing::Test { psr::LLVMTaintConfig TC(IRDB); ExampleTaintAnalysis TaintProblem(&IRDB, &AS, &TC, {"main"}); - psr::FieldSensAllocSitesAwareIFDSProblem FsTaintProblem(&TaintProblem, &AS); + psr::FieldSensAllocSitesAwareIFDSProblem FsTaintProblem( + &TaintProblem, &AS, + { + .KillsAt = TaintProblem.killsAt(), + }); psr::LLVMBasedICFG ICFG(&IRDB, psr::CallGraphAnalysisType::OTF, {"main"}, nullptr, &BaseAS); @@ -210,6 +247,14 @@ TEST_F(CFLFieldSensTest, Basic_09_1) { run({PathToLLFiles + "xtaint09_1_cpp.ll"}, Gt); } +TEST_F(CFLFieldSensTest, Basic_09) { + std::map> Gt; + + Gt[24] = {"23"}; + + run({PathToLLFiles + "xtaint09_cpp.ll"}, Gt); +} + TEST_F(CFLFieldSensTest, Basic_12) { std::map> Gt; @@ -221,8 +266,6 @@ TEST_F(CFLFieldSensTest, Basic_12) { } TEST_F(CFLFieldSensTest, Basic_13) { - GTEST_SKIP() << "Requires sanitizer-callback to edge-functions to prevent " - "{28: {27}} to be leaked!"; std::map> Gt; Gt[30] = {"29"}; @@ -231,8 +274,6 @@ TEST_F(CFLFieldSensTest, Basic_13) { } TEST_F(CFLFieldSensTest, Basic_14) { - GTEST_SKIP() << "Requires sanitizer-callback to edge-functions to prevent " - "{31: {30}} to be leaked!"; std::map> Gt; Gt[33] = {"32"}; From 5687866c930dc3e86e3561038372013b91115ae1 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Tue, 27 Jan 2026 15:43:53 +0100 Subject: [PATCH 09/29] Fix minor compilation issue --- include/phasar/Domain/LatticeDomain.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/include/phasar/Domain/LatticeDomain.h b/include/phasar/Domain/LatticeDomain.h index fac9016454..8b2be7f222 100644 --- a/include/phasar/Domain/LatticeDomain.h +++ b/include/phasar/Domain/LatticeDomain.h @@ -252,7 +252,12 @@ template struct hash> { return SIZE_MAX - 1; } assert(LD.getValueOrNull() != nullptr); - return std::hash{}(*LD.getValueOrNull()); + if constexpr (psr::is_std_hashable_v) { + return std::hash{}(*LD.getValueOrNull()); + } else { + using llvm::hash_value; + return hash_value(*LD.getValueOrNull()); + } } }; } // namespace std From 1b6b8314ae9c2320006198d475f0ef398ba9436e Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Tue, 27 Jan 2026 16:21:26 +0100 Subject: [PATCH 10/29] Out-comment unnecessary stuff --- .../FieldSensAllocSitesAwareIFDSProblem.h | 37 ++-- .../PhasarLLVM/Pointer/LLVMFieldAliasSet.h | 5 + .../FieldSensAllocSitesAwareIFDSProblem.cpp | 196 +++++++++--------- lib/PhasarLLVM/Pointer/LLVMFieldAliasSet.cpp | 27 +-- 4 files changed, 137 insertions(+), 128 deletions(-) diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h index feac82af34..fc68ab744a 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h @@ -47,7 +47,7 @@ struct GEPEvent { struct CFLFieldAccessPath { static constexpr int32_t TopOffset = INT32_MIN; - // TODO: compose, DenseMapInfo + // TODO: compose llvm::SmallVector Loads; llvm::SmallVector Stores; @@ -61,7 +61,7 @@ struct CFLFieldAccessPath { } [[nodiscard]] bool kills(int32_t Off) const { - return Off != TopOffset && Kills.count(Off); + return Off != TopOffset && Kills.contains(Off); } [[nodiscard]] bool @@ -168,22 +168,22 @@ class FieldSensAllocSitesAwareIFDSProblem using Base = IDETabulationProblem< CFLFieldSensAnalysisDomain>; - struct CachedAccessPath final - : public llvm::TrailingObjects { + // struct CachedAccessPath final + // : public llvm::TrailingObjects { - using OffsetType = int32_t; + // using OffsetType = int32_t; - constexpr CachedAccessPath(const llvm::Value *BasePtr, - uint32_t NumOffsets) noexcept - : BasePtr(BasePtr), NumOffsets(NumOffsets) {} + // constexpr CachedAccessPath(const llvm::Value *BasePtr, + // uint32_t NumOffsets) noexcept + // : BasePtr(BasePtr), NumOffsets(NumOffsets) {} - const llvm::Value *BasePtr{}; - uint32_t NumOffsets{}; + // const llvm::Value *BasePtr{}; + // uint32_t NumOffsets{}; - [[nodiscard]] llvm::ArrayRef offsets() const noexcept { - return {this->getTrailingObjects(), NumOffsets}; - } - }; + // [[nodiscard]] llvm::ArrayRef offsets() const noexcept { + // return {this->getTrailingObjects(), NumOffsets}; + // } + // }; public: using typename Base::container_type; @@ -276,14 +276,15 @@ class FieldSensAllocSitesAwareIFDSProblem const EdgeFunction &R) override; private: - [[nodiscard]] const CachedAccessPath * - getAccessPath(const llvm::Value *Pointer); + // [[nodiscard]] const CachedAccessPath * + // getAccessPath(const llvm::Value *Pointer); LLVMAliasInfoRef AS; IFDSTabulationProblem *UserProblem{}; FieldSensAllocSitesAwareIFDSProblemConfig Config{}; - MemoryLocationAllocator MemLocAlloc{}; - llvm::DenseMap MemLocCache{}; + // MemoryLocationAllocator MemLocAlloc{}; + // llvm::DenseMap + // MemLocCache{}; uint8_t DepthKLimit = 5; // Original from the paper }; diff --git a/include/phasar/PhasarLLVM/Pointer/LLVMFieldAliasSet.h b/include/phasar/PhasarLLVM/Pointer/LLVMFieldAliasSet.h index 337348a007..f84a23447b 100644 --- a/include/phasar/PhasarLLVM/Pointer/LLVMFieldAliasSet.h +++ b/include/phasar/PhasarLLVM/Pointer/LLVMFieldAliasSet.h @@ -56,6 +56,9 @@ class LLVMBasePointerAliasSet { LLVMAliasInfoRef AS; }; +} // namespace psr +#if 0 +namespace psr{ class LLVMFieldAliasSet { public: struct AccessPath { @@ -127,4 +130,6 @@ template <> struct DenseMapInfo { }; } // namespace llvm +#endif + #endif // PHASAR_PHASARLLVM_POINTER_LLVMFIELDALIASSET_H diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp index 3a2a9d3386..ce8ceb5de6 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp @@ -2,8 +2,6 @@ #include "phasar/DataFlow/IfdsIde/EdgeFunction.h" #include "phasar/DataFlow/IfdsIde/EdgeFunctionUtils.h" -#include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" -#include "phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/ExtendedTaintAnalysis/AbstractMemoryLocation.h" #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" #include "phasar/Utils/Fn.h" #include "phasar/Utils/Union.h" @@ -16,6 +14,7 @@ #include "llvm/IR/Operator.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include @@ -36,12 +35,12 @@ constexpr static int32_t addOffsets(int32_t L, int32_t R) noexcept { return CFLFieldAccessPath::TopOffset; } - auto Sum = int64_t(L) + int64_t(R); - if (Sum < INT32_MIN || Sum > INT32_MAX) { - Sum = CFLFieldAccessPath::TopOffset; + int32_t Sum{}; + if (llvm::AddOverflow(L, R, Sum)) { + return CFLFieldAccessPath::TopOffset; } - return int32_t(Sum); + return Sum; } struct CFLFieldSensEdgeFunction { @@ -200,7 +199,7 @@ void CFLFieldSensEdgeValue::applyStore(uint8_t DepthKLimit) { void CFLFieldSensEdgeValue::applyLoad(uint8_t DepthKLimit) { applyGepAndLoad(GEPEvent{0}, DepthKLimit); } -void CFLFieldSensEdgeValue::applyKill() { return applyGepAndKill(GEPEvent{0}); } +void CFLFieldSensEdgeValue::applyKill() { applyGepAndKill(GEPEvent{0}); } void CFLFieldSensEdgeValue::applyTransform(const CFLFieldAccessPath &Txn, uint8_t DepthKLimit) { @@ -228,15 +227,15 @@ void CFLFieldSensEdgeValue::applyTransforms(const CFLFieldSensEdgeValue &Txns, return; } + auto It = Txns.Paths.begin(); if (Txns.Paths.size() == 1) { - applyTransform(*Txns.Paths.begin(), DepthKLimit); + applyTransform(*It, DepthKLimit); return; } + auto End = Txns.Paths.end(); auto Ret = *this; - auto It = Txns.Paths.begin(); - auto End = Txns.Paths.end(); Ret.applyTransform(*It, DepthKLimit); for (++It; It != End; ++It) { @@ -315,7 +314,7 @@ auto FieldSensAllocSitesAwareIFDSProblem::initialSeeds() } auto FieldSensAllocSitesAwareIFDSProblem::getNormalEdgeFunction( - n_t Curr, d_t CurrNode, n_t Succ, d_t SuccNode) -> EdgeFunction { + n_t Curr, d_t CurrNode, n_t /*Succ*/, d_t SuccNode) -> EdgeFunction { if (isZeroValue(CurrNode) && !isZeroValue(SuccNode)) { // Gen from zero @@ -353,19 +352,19 @@ auto FieldSensAllocSitesAwareIFDSProblem::getNormalEdgeFunction( // Store CFLFieldAccessPath FieldString{}; - FieldString.Stores.push_back(Offset); - if (BasePtr != SuccNode && llvm::isa(BasePtr)) { // This is a hack, to be more correct wih field-insensitive alias // information auto [BaseBasePtr, BaseOffset] = getBaseAndOffset( llvm::cast(BasePtr)->getPointerOperand(), DL); if (BaseBasePtr == SuccNode) { - // push back, or push front? - FieldString.Stores.insert(FieldString.Stores.begin(), BaseOffset); + // push before Offset, or after? + FieldString.Stores.push_back(BaseOffset); } } + FieldString.Stores.push_back(Offset); + return CFLFieldSensEdgeFunction{{{std::move(FieldString)}}, DepthKLimit}; } @@ -383,11 +382,11 @@ auto FieldSensAllocSitesAwareIFDSProblem::getNormalEdgeFunction( int32_t LoadOffs = 0; - if (BasePtr == CurrNode && Load->getPointerOperand() != CurrNode) { - // This is a hack, but we do sth similar in the IDEExtendedTaintAnalysis - // (see forEachAliasOf() Lines 144 ff) - LoadOffs = Offset; - } + // if (BasePtr == CurrNode && Load->getPointerOperand() != CurrNode) { + // This is a hack, but we do sth similar in the IDEExtendedTaintAnalysis + // (see forEachAliasOf() Lines 144 ff) + LoadOffs = Offset; + // } // TODO;: How to deal with BasePtr? @@ -412,7 +411,7 @@ auto FieldSensAllocSitesAwareIFDSProblem::getNormalEdgeFunction( } auto FieldSensAllocSitesAwareIFDSProblem::getCallEdgeFunction( - n_t CallSite, d_t SrcNode, f_t DestinationFunction, d_t DestNode) + n_t /*CallSite*/, d_t SrcNode, f_t /*DestinationFunction*/, d_t DestNode) -> EdgeFunction { if (isZeroValue(SrcNode) && !isZeroValue(DestNode)) { // Gen from zero @@ -425,8 +424,8 @@ auto FieldSensAllocSitesAwareIFDSProblem::getCallEdgeFunction( } auto FieldSensAllocSitesAwareIFDSProblem::getReturnEdgeFunction( - n_t CallSite, f_t CalleeFunction, n_t ExitStmt, d_t ExitNode, n_t RetSite, - d_t RetNode) -> EdgeFunction { + n_t /*CallSite*/, f_t /*CalleeFunction*/, n_t /*ExitStmt*/, d_t ExitNode, + n_t /*RetSite*/, d_t RetNode) -> EdgeFunction { if (isZeroValue(ExitNode) && !isZeroValue(RetNode)) { // Gen from zero @@ -437,8 +436,8 @@ auto FieldSensAllocSitesAwareIFDSProblem::getReturnEdgeFunction( } auto FieldSensAllocSitesAwareIFDSProblem::getCallToRetEdgeFunction( - n_t CallSite, d_t CallNode, n_t RetSite, d_t RetSiteNode, - llvm::ArrayRef Callees) -> EdgeFunction { + n_t CallSite, d_t CallNode, n_t /*RetSite*/, d_t RetSiteNode, + llvm::ArrayRef /*Callees*/) -> EdgeFunction { if (CallNode == RetSiteNode && Config.KillsAt) { if (auto KillOffs = Config.KillsAt(CallSite, CallNode)) { @@ -460,7 +459,7 @@ auto FieldSensAllocSitesAwareIFDSProblem::getCallToRetEdgeFunction( } auto FieldSensAllocSitesAwareIFDSProblem::getSummaryEdgeFunction( - n_t Curr, d_t CurrNode, n_t Succ, d_t SuccNode) -> EdgeFunction { + n_t Curr, d_t CurrNode, n_t /*Succ*/, d_t SuccNode) -> EdgeFunction { llvm::errs() << "[getSummaryEdgeFunction]: Curr: " << llvmIRToString(Curr) << ":\n"; @@ -545,7 +544,7 @@ auto FieldSensAllocSitesAwareIFDSProblem::combine(const EdgeFunction &L, if (FldSensL) { if (FldSensR) { - bool LeftSmaller = + const bool LeftSmaller = FldSensL->Transform.Paths.size() < FldSensR->Transform.Paths.size(); bool Changed = false; @@ -587,74 +586,75 @@ auto FieldSensAllocSitesAwareIFDSProblem::combine(const EdgeFunction &L, return Ret; } -static std::pair> -createAccessPath(const llvm::Value *Pointer, const llvm::DataLayout &DL) { - - std::pair> Ret; - auto &[BasePtr, Offsets] = Ret; - - BasePtr = Pointer; - Offsets.push_back(0); - - // Note: llvm::Constant includes llvm::GlobalValue - if (llvm::isa(Pointer) || - llvm::isa(Pointer) || - llvm::isa(Pointer) || - llvm::isa(Pointer)) { - // Globals, argument, function calls and allocas define themselves - return Ret; - } - - while (true) { - // TODO: Should we look into the cache within this loop? - // TODO: Handle constant GEPs - - if (const auto *Load = llvm::dyn_cast(BasePtr)) { - Offsets.push_back(0); - BasePtr = Load->getPointerOperand()->stripPointerCasts(); - } else if (const auto *Gep = llvm::dyn_cast(BasePtr)) { - - auto GepOffs = detail::AbstractMemoryLocationImpl::computeOffset(DL, Gep); - if (GepOffs.has_value() && *GepOffs >= INT32_MIN && - *GepOffs <= INT32_MAX) { - Offsets.back() = addOffsets(Offsets.back(), int32_t(*GepOffs)); - } else { - Offsets.back() = CFLFieldAccessPath::TopOffset; - } - BasePtr = Gep->getPointerOperand()->stripPointerCasts(); - } else { - // TODO aggregate instructions, e.g. insertvalue, extractvalue, ... - break; - } - } - - // NOTE: Do not reverse the offsets as we do in - // AbstractMemoryLocationFactoryBase::createImpl(). - // For the CFL formulation, we need the offsets in inverse order anyway! - - return Ret; -} - -auto FieldSensAllocSitesAwareIFDSProblem::getAccessPath( - const llvm::Value *Pointer) -> const CachedAccessPath * { - auto &Ret = MemLocCache[Pointer]; - if (Ret) { - return Ret; - } - - auto [BasePtr, Offsets] = - createAccessPath(Pointer, IRDB->getModule()->getDataLayout()); - assert(Offsets.size() < UINT32_MAX); - - using OffsetType = CachedAccessPath::OffsetType; - - auto NumBytes = - CachedAccessPath::totalSizeToAlloc(Offsets.size()); - auto *RawMem = MemLocAlloc.allocate(NumBytes); - auto *AP = new (RawMem) CachedAccessPath(BasePtr, Offsets.size()); - memcpy(AP->getTrailingObjects(), Offsets.data(), - Offsets.size() * sizeof(OffsetType)); - - Ret = AP; - return AP; -} +// static std::pair> +// createAccessPath(const llvm::Value *Pointer, const llvm::DataLayout &DL) { + +// std::pair> Ret; +// auto &[BasePtr, Offsets] = Ret; + +// BasePtr = Pointer; +// Offsets.push_back(0); + +// // Note: llvm::Constant includes llvm::GlobalValue +// if (llvm::isa(Pointer) || +// llvm::isa(Pointer) || +// llvm::isa(Pointer) || +// llvm::isa(Pointer)) { +// // Globals, argument, function calls and allocas define themselves +// return Ret; +// } + +// while (true) { +// // TODO: Should we look into the cache within this loop? +// // TODO: Handle constant GEPs + +// if (const auto *Load = llvm::dyn_cast(BasePtr)) { +// Offsets.push_back(0); +// BasePtr = Load->getPointerOperand()->stripPointerCasts(); +// } else if (const auto *Gep = llvm::dyn_cast(BasePtr)) +// { + +// auto GepOffs = detail::AbstractMemoryLocationImpl::computeOffset(DL, +// Gep); if (GepOffs.has_value() && *GepOffs >= INT32_MIN && +// *GepOffs <= INT32_MAX) { +// Offsets.back() = addOffsets(Offsets.back(), int32_t(*GepOffs)); +// } else { +// Offsets.back() = CFLFieldAccessPath::TopOffset; +// } +// BasePtr = Gep->getPointerOperand()->stripPointerCasts(); +// } else { +// // TODO aggregate instructions, e.g. insertvalue, extractvalue, ... +// break; +// } +// } + +// // NOTE: Do not reverse the offsets as we do in +// // AbstractMemoryLocationFactoryBase::createImpl(). +// // For the CFL formulation, we need the offsets in inverse order anyway! + +// return Ret; +// } + +// auto FieldSensAllocSitesAwareIFDSProblem::getAccessPath( +// const llvm::Value *Pointer) -> const CachedAccessPath * { +// auto &Ret = MemLocCache[Pointer]; +// if (Ret) { +// return Ret; +// } + +// auto [BasePtr, Offsets] = +// createAccessPath(Pointer, IRDB->getModule()->getDataLayout()); +// assert(Offsets.size() < UINT32_MAX); + +// using OffsetType = CachedAccessPath::OffsetType; + +// auto NumBytes = +// CachedAccessPath::totalSizeToAlloc(Offsets.size()); +// auto *RawMem = MemLocAlloc.allocate(NumBytes); +// auto *AP = new (RawMem) CachedAccessPath(BasePtr, Offsets.size()); +// memcpy(AP->getTrailingObjects(), Offsets.data(), +// Offsets.size() * sizeof(OffsetType)); + +// Ret = AP; +// return AP; +// } diff --git a/lib/PhasarLLVM/Pointer/LLVMFieldAliasSet.cpp b/lib/PhasarLLVM/Pointer/LLVMFieldAliasSet.cpp index 6a49391183..396e16448d 100644 --- a/lib/PhasarLLVM/Pointer/LLVMFieldAliasSet.cpp +++ b/lib/PhasarLLVM/Pointer/LLVMFieldAliasSet.cpp @@ -1,7 +1,5 @@ #include "phasar/PhasarLLVM/Pointer/LLVMFieldAliasSet.h" -#include "phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/ExtendedTaintAnalysis/AbstractMemoryLocation.h" - #include "llvm/IR/Instructions.h" #include "llvm/IR/Operator.h" #include "llvm/IR/Type.h" @@ -37,6 +35,19 @@ LLVMBasePointerAliasSet::getBasePointer(const llvm::Value *Pointer) { return Pointer; } +auto LLVMBasePointerAliasSet::getAliasSet(v_t Pointer, n_t AtInstruction) const + -> AliasSetPtrTy { + auto Aliases = AS.getAliasSet(Pointer, AtInstruction); + + auto Ret = std::make_unique(); + for (const auto *Alias : *Aliases) { + Ret->insert(getBasePointer(Alias)); + } + + return Ret; +} + +#if 0 static constexpr ptrdiff_t TopOffset = LLVMFieldAliasSet::AccessPath::TopOffset; constexpr static void addOffset(ptrdiff_t &Into, ptrdiff_t Offs) noexcept { @@ -83,17 +94,7 @@ auto LLVMFieldAliasSet::getAccessPath(const llvm::Value *Pointer) const return Ret; } -auto LLVMBasePointerAliasSet::getAliasSet(v_t Pointer, n_t AtInstruction) const - -> AliasSetPtrTy { - auto Aliases = AS.getAliasSet(Pointer, AtInstruction); - - auto Ret = std::make_unique(); - for (const auto *Alias : *Aliases) { - Ret->insert(getBasePointer(Alias)); - } - return Ret; -} auto LLVMFieldAliasSet::getAliasSet(v_t Pointer, n_t AtInstruction) const -> AliasSetPtrTy { @@ -106,3 +107,5 @@ auto LLVMFieldAliasSet::getAliasSet(v_t Pointer, n_t AtInstruction) const return Ret; } + +#endif From 76a347c02c6392b03e3fa9276c177d9de42c7cb3 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Wed, 28 Jan 2026 15:52:07 +0100 Subject: [PATCH 11/29] Convert CFLFieldSensTest to testingSrcLocation + add logger to FieldSensAllocSitesAwareIFDSProblem --- .../FieldSensAllocSitesAwareIFDSProblem.h | 5 + include/phasar/Utils/Union.h | 2 +- .../FieldSensAllocSitesAwareIFDSProblem.cpp | 192 +++++++++++++++--- .../DataFlow/IfdsIde/CFLFieldSensTest.cpp | 143 +++++++------ 4 files changed, 252 insertions(+), 90 deletions(-) diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h index fc68ab744a..9c5399f91b 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h @@ -103,6 +103,8 @@ struct CFLFieldAccessPathDMI { struct CFLFieldSensEdgeValue { llvm::SmallDenseSet Paths; + static constexpr llvm::StringLiteral LogCategory = "CFLFieldSensEdgeValue"; + void applyStore(uint8_t DepthKLimit); void applyGepAndStore(GEPEvent Evt, uint8_t DepthKLimit); void applyLoad(uint8_t DepthKLimit); @@ -141,6 +143,9 @@ struct FieldSensAllocSitesAwareIFDSProblemConfig class FieldSensAllocSitesAwareIFDSProblemBase { public: + static constexpr llvm::StringLiteral LogCategory = + "FieldSensAllocSitesAwareIFDSProblem"; + [[nodiscard]] static std::pair getBaseAndOffset(const llvm::Value *V, const llvm::DataLayout &DL) { llvm::APInt Offset(64, 0); diff --git a/include/phasar/Utils/Union.h b/include/phasar/Utils/Union.h index 23b6aaa4f7..8689d328a7 100644 --- a/include/phasar/Utils/Union.h +++ b/include/phasar/Utils/Union.h @@ -18,7 +18,7 @@ namespace psr { template [[nodiscard]] std::decay_t setUnion(SetLT &&First, SetRT &&Second, bool *ChangedPtr = nullptr) { - bool FirstSmaller = First.size() < Second.size(); + const bool FirstSmaller = First.size() < Second.size(); auto &Smaller = FirstSmaller ? First : Second; bool ChangedBuf = false; diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp index ce8ceb5de6..57c58863d6 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp @@ -4,6 +4,7 @@ #include "phasar/DataFlow/IfdsIde/EdgeFunctionUtils.h" #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" #include "phasar/Utils/Fn.h" +#include "phasar/Utils/Logger.h" #include "phasar/Utils/Union.h" #include "llvm/ADT/APInt.h" @@ -21,6 +22,7 @@ #include #include #include +#include #include using namespace psr; @@ -77,6 +79,95 @@ struct CFLFieldSensEdgeFunction { } }; +[[nodiscard]] std::string storesToString(const CFLFieldAccessPath &AP) { + std::string Ret; + llvm::raw_string_ostream ROS(Ret); + + llvm::interleave( + AP.Stores, ROS, [&ROS](auto StoreOffs) { ROS << 'S' << StoreOffs; }, "."); + + return Ret; +} + +// Returns whether to retain F +[[nodiscard]] auto applyOneGepAndStore(CFLFieldAccessPath &F, GEPEvent Evt, + uint8_t DepthKLimit) { + if (F.Stores.size() == DepthKLimit) { + // TODO: Optimize: + F.Stores.erase(F.Stores.begin()); + } + F.Stores.push_back(std::exchange(F.Offset, 0) + Evt.Field); + return std::true_type{}; +} + +// Returns whether to retain F +[[nodiscard]] auto applyOneGepAndLoad(CFLFieldAccessPath &F, GEPEvent Evt, + uint8_t DepthKLimit) { + auto Offs = F.Offset + Evt.Field; + if (F.Stores.empty()) { + + if (F.kills(Offs)) { + return false; + } + + F.Offset = 0; + + // TODO: Is this application of k-limiting correct here? + // cf. Section 4.2.3 "K-Limiting" in the paper + if (F.Loads.size() == DepthKLimit) { + return true; + } + + F.Loads.push_back(Offs); + F.Kills.clear(); + return true; + } + + if (F.Stores.back() != Offs) { + return false; + } + + assert(F.Stores.back() == Offs); + F.Offset = 0; + F.Stores.pop_back(); + // llvm::errs() << "> pop_back\n"; + return true; +} + +[[nodiscard]] auto applyOneGepAndKill(CFLFieldAccessPath &F, GEPEvent Evt, + uint8_t /*DepthKLimit*/) { + auto Offs = F.Offset + Evt.Field; + + if (F.Stores.empty()) { + F.Kills.insert(Offs); + PHASAR_LOG_LEVEL_CAT(DEBUG, CFLFieldSensEdgeValue::LogCategory, + "> add K" << Offs); + return true; + } + + if (F.Stores.back() == Offs) { + PHASAR_LOG_LEVEL_CAT(DEBUG, CFLFieldSensEdgeValue::LogCategory, + "> Kill " << storesToString(F)); + return false; + } + + PHASAR_LOG_LEVEL_CAT(DEBUG, CFLFieldSensEdgeValue::LogCategory, + "> Retain " << storesToString(F)); + + assert(F.Stores.back() != Offs); + return true; +} + +[[nodiscard]] auto applyOneGep(CFLFieldAccessPath &F, GEPEvent Evt, + uint8_t /*DepthKLimit*/) { + if (F.Stores.empty()) { + F.Offset = addOffsets(F.Offset, Evt.Field); + } else { + F.Stores.back() = addOffsets(F.Stores.back(), -Evt.Field); + } + return std::true_type{}; +} + } // namespace void CFLFieldSensEdgeValue::applyGepAndStore(GEPEvent Evt, @@ -203,21 +294,60 @@ void CFLFieldSensEdgeValue::applyKill() { applyGepAndKill(GEPEvent{0}); } void CFLFieldSensEdgeValue::applyTransform(const CFLFieldAccessPath &Txn, uint8_t DepthKLimit) { - // TODO: Optimize! + auto Save = std::exchange(Paths, {}); + Paths.reserve(Save.size()); - if (Txn.Offset) { - applyGep(GEPEvent{Txn.Offset}); - } + const auto TxnOffset = Txn.Offset; - for (auto Ld : Txn.Loads) { - applyGepAndLoad(GEPEvent{Ld}, DepthKLimit); - } - for (auto Kl : Txn.Kills) { - applyGepAndKill(GEPEvent{Kl}); - } - for (auto St : Txn.Stores) { - applyGepAndStore(GEPEvent{St}, DepthKLimit); + for (const auto &F : Save) { + auto Copy = F; + bool Retain = [&] { + if (TxnOffset) { + if (!applyOneGep(Copy, GEPEvent{TxnOffset}, DepthKLimit)) { + return false; + } + } + for (auto Ld : Txn.Loads) { + if (!applyOneGepAndLoad(Copy, GEPEvent{Ld}, DepthKLimit)) { + return false; + } + } + + for (auto Kl : Txn.Kills) { + if (!applyOneGepAndKill(Copy, GEPEvent{Kl}, DepthKLimit)) { + return false; + } + } + + for (auto St : Txn.Stores) { + if (!applyOneGepAndStore(Copy, GEPEvent{St}, DepthKLimit)) { + return false; + } + } + + return true; + }(); + + if (Retain) { + Paths.insert(std::move(Copy)); + } } + + // // TODO: Optimize! + + // if (Txn.Offset) { + // applyGep(GEPEvent{Txn.Offset}); + // } + + // for (auto Ld : Txn.Loads) { + // applyGepAndLoad(GEPEvent{Ld}, DepthKLimit); + // } + // for (auto Kl : Txn.Kills) { + // applyGepAndKill(GEPEvent{Kl}); + // } + // for (auto St : Txn.Stores) { + // applyGepAndStore(GEPEvent{St}, DepthKLimit); + // } } void CFLFieldSensEdgeValue::applyTransforms(const CFLFieldSensEdgeValue &Txns, @@ -233,6 +363,9 @@ void CFLFieldSensEdgeValue::applyTransforms(const CFLFieldSensEdgeValue &Txns, return; } + // This path should be very rare, otherwise we will for sure have a + // performance problem... + auto End = Txns.Paths.end(); auto Ret = *this; @@ -392,8 +525,8 @@ auto FieldSensAllocSitesAwareIFDSProblem::getNormalEdgeFunction( CFLFieldAccessPath FieldString{}; FieldString.Loads.push_back(LoadOffs); - llvm::errs() << "Handle load: " << llvmIRToString(Load) << '\n'; - llvm::errs() << "> CurrNode: " << llvmIRToString(CurrNode) << '\n'; + // llvm::errs() << "Handle load: " << llvmIRToString(Load) << '\n'; + // llvm::errs() << "> CurrNode: " << llvmIRToString(CurrNode) << '\n'; return CFLFieldSensEdgeFunction{{{std::move(FieldString)}}, DepthKLimit}; } @@ -461,16 +594,22 @@ auto FieldSensAllocSitesAwareIFDSProblem::getCallToRetEdgeFunction( auto FieldSensAllocSitesAwareIFDSProblem::getSummaryEdgeFunction( n_t Curr, d_t CurrNode, n_t /*Succ*/, d_t SuccNode) -> EdgeFunction { - llvm::errs() << "[getSummaryEdgeFunction]: Curr: " << llvmIRToString(Curr) - << ":\n"; - llvm::errs() << " > CurrNode: " << llvmIRToString(CurrNode) << '\n'; - llvm::errs() << " > SuccNode: " << llvmIRToString(SuccNode) << '\n'; + PHASAR_LOG_LEVEL_CAT( + DEBUG, LogCategory, + "[getSummaryEdgeFunction]: Curr: " << llvmIRToString(Curr) << ":"); + PHASAR_LOG_LEVEL_CAT(DEBUG, LogCategory, + " > CurrNode: " << llvmIRToString(CurrNode)); + PHASAR_LOG_LEVEL_CAT(DEBUG, LogCategory, + " > SuccNode: " << llvmIRToString(SuccNode)); if (CurrNode == SuccNode && Config.KillsAt) { if (auto KillOffs = Config.KillsAt(Curr, CurrNode)) { // kill - llvm::errs() << " > request to kill " << llvmIRToString(CurrNode) - << " with offset " << *KillOffs << '\n'; + PHASAR_LOG_LEVEL_CAT(DEBUG, LogCategory, + " > request to kill " << llvmIRToString(CurrNode) + << " with offset " + << *KillOffs); + CFLFieldAccessPath FieldString{}; FieldString.Kills.insert(*KillOffs); return CFLFieldSensEdgeFunction{{{std::move(FieldString)}}, DepthKLimit}; @@ -483,8 +622,8 @@ auto FieldSensAllocSitesAwareIFDSProblem::getSummaryEdgeFunction( return CFLFieldSensEdgeFunction{{{CFLFieldAccessPath{}}}, DepthKLimit}; } - // TODO: Is that correct? -- We may need to handle field-indirections here as - // well + // TODO: Is that correct? -- We may need to handle field-indirections here + // as well return EdgeIdentity{}; } @@ -518,7 +657,8 @@ auto FieldSensAllocSitesAwareIFDSProblem::extend(const EdgeFunction &L, llvm::Twine(to_string(R))); }(); - // llvm::errs() << "EXTEND " << L << " X " << R << " ==> " << Ret << '\n'; + PHASAR_LOG_LEVEL_CAT(DEBUG, LogCategory, + "EXTEND " << L << " X " << R << " ==> " << Ret); return Ret; } @@ -581,7 +721,8 @@ auto FieldSensAllocSitesAwareIFDSProblem::combine(const EdgeFunction &L, return AllBottom{}; }(); - llvm::errs() << "COMBINE " << L << " X " << R << " ==> " << Ret << '\n'; + PHASAR_LOG_LEVEL_CAT(DEBUG, LogCategory, + "COMBINE " << L << " X " << R << " ==> " << Ret); return Ret; } @@ -611,7 +752,8 @@ auto FieldSensAllocSitesAwareIFDSProblem::combine(const EdgeFunction &L, // if (const auto *Load = llvm::dyn_cast(BasePtr)) { // Offsets.push_back(0); // BasePtr = Load->getPointerOperand()->stripPointerCasts(); -// } else if (const auto *Gep = llvm::dyn_cast(BasePtr)) +// } else if (const auto *Gep = +// llvm::dyn_cast(BasePtr)) // { // auto GepOffs = detail::AbstractMemoryLocationImpl::computeOffset(DL, diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensTest.cpp b/unittests/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensTest.cpp index ef1ec65cdd..fd28fce35a 100644 --- a/unittests/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensTest.cpp +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensTest.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/Twine.h" #include "llvm/IR/Instruction.h" +#include "SrcCodeLocationEntry.h" #include "TestConfig.h" #include "gtest/gtest.h" @@ -137,15 +138,21 @@ class ExampleTaintAnalysis : public psr::DefaultAllocSitesAwareIFDSProblem { const psr::LLVMTaintConfig *Config{}; }; +using namespace psr::unittest; + class CFLFieldSensTest : public ::testing::Test { protected: static constexpr auto PathToLLFiles = PHASAR_BUILD_SUBFOLDER("xtaint/"); const std::vector EntryPoints = {"main"}; + using TaintSetT = std::set; + void run(const llvm::Twine &IRFileName, - const std::map> &GroundTruth) { - psr::LLVMProjectIRDB IRDB(IRFileName); - ASSERT_TRUE(IRDB); + const std::map &GroundTruth) { + auto IRDB = psr::LLVMProjectIRDB::loadOrExit(IRFileName); + + auto GroundTruthEntries = + convertTestingLocationSetMapInIR(GroundTruth, IRDB); psr::LLVMAliasSet BaseAS(&IRDB); psr::FilteredLLVMAliasSet AS(&BaseAS); @@ -165,12 +172,11 @@ class CFLFieldSensTest : public ::testing::Test { Solver.solve(); auto Results = Solver.getSolverResults(); - Solver.dumpResults(); - // auto Results = psr::solveIDEProblem(FsTaintProblem, ICFG); // Results.dumpResults(ICFG); - std::map> ComputedLeaks; + std::map> + ComputedLeaks; for (auto IIt = TaintProblem.Leaks.begin(), End = TaintProblem.Leaks.end(); IIt != End;) { @@ -187,132 +193,141 @@ class CFLFieldSensTest : public ::testing::Test { << " has non-empty field-string: " << Res << '\n'; TaintProblem.Leaks.erase(It); } else { - ComputedLeaks[stoi(psr::getMetaDataID(LeakInst))].insert( - psr::getMetaDataID(LeakFact)); + ComputedLeaks[LeakInst].insert(LeakFact); } } } - EXPECT_EQ(GroundTruth, ComputedLeaks); + EXPECT_EQ(GroundTruthEntries, ComputedLeaks); + if (HasFailure()) { + Solver.dumpResults(); + } } }; TEST_F(CFLFieldSensTest, Basic_01) { - std::map> Gt; - Gt[13] = {"12"}; + std::map GroundTruth = { + {LineColFun{8, 3, "main"}, + {LineColFunOp{8, 9, "main", llvm::Instruction::Load}}}, + }; - run({PathToLLFiles + "xtaint01_cpp.ll"}, Gt); + run({PathToLLFiles + "xtaint01_cpp_dbg.ll"}, GroundTruth); } TEST_F(CFLFieldSensTest, Basic_02) { - // GTEST_SKIP() << "Need field-sensitive alias information!"; - - std::map> Gt; - - Gt[18] = {"17"}; + std::map GroundTruth = { + {LineColFun{9, 3, "main"}, + {LineColFunOp{9, 9, "main", llvm::Instruction::Load}}}, + }; - run({PathToLLFiles + "xtaint02_cpp.ll"}, Gt); + run({PathToLLFiles + "xtaint02_cpp_dbg.ll"}, GroundTruth); } TEST_F(CFLFieldSensTest, Basic_03) { - std::map> Gt; - - Gt[21] = {"20"}; + std::map GroundTruth = { + {LineColFun{10, 3, "main"}, + {LineColFunOp{10, 9, "main", llvm::Instruction::Load}}}, + }; - run({PathToLLFiles + "xtaint03_cpp.ll"}, Gt); + run({PathToLLFiles + "xtaint03_cpp_dbg.ll"}, GroundTruth); } TEST_F(CFLFieldSensTest, Basic_04) { - std::map> Gt; + auto Call = LineColFun{6, 3, "_Z3barPi"}; - Gt[16] = {"15"}; + std::map GroundTruth = { + {Call, {OperandOf{0, Call}}}, + }; - run({PathToLLFiles + "xtaint04_cpp.ll"}, Gt); + run({PathToLLFiles + "xtaint04_cpp_dbg.ll"}, GroundTruth); } TEST_F(CFLFieldSensTest, Basic_06) { - std::map> Gt; - - // no leaks expected + std::map GroundTruth = { + // no leaks expected + }; - run({PathToLLFiles + "xtaint06_cpp.ll"}, Gt); + run({PathToLLFiles + "xtaint06_cpp_dbg.ll"}, GroundTruth); } TEST_F(CFLFieldSensTest, Basic_09_1) { - std::map> Gt; - - Gt[25] = {"24"}; + std::map GroundTruth = { + {LineColFun{14, 3, "main"}, {LineColFun{14, 8, "main"}}}, + }; - run({PathToLLFiles + "xtaint09_1_cpp.ll"}, Gt); + run({PathToLLFiles + "xtaint09_1_cpp_dbg.ll"}, GroundTruth); } TEST_F(CFLFieldSensTest, Basic_09) { - std::map> Gt; - - Gt[24] = {"23"}; + auto SinkCall = LineColFun{16, 3, "main"}; + std::map GroundTruth = { + {SinkCall, {OperandOf{0, SinkCall}}}, + }; - run({PathToLLFiles + "xtaint09_cpp.ll"}, Gt); + run({PathToLLFiles + "xtaint09_cpp_dbg.ll"}, GroundTruth); } TEST_F(CFLFieldSensTest, Basic_12) { - std::map> Gt; + std::map GroundTruth = { + {LineColFun{19, 3, "main"}, {LineColFun{19, 8, "main"}}}, + }; // We sanitize an alias - since we don't have must-alias relations, we cannot // kill aliases at all - Gt[28] = {"27"}; - run({PathToLLFiles + "xtaint12_cpp.ll"}, Gt); + run({PathToLLFiles + "xtaint12_cpp_dbg.ll"}, GroundTruth); } TEST_F(CFLFieldSensTest, Basic_13) { - std::map> Gt; - - Gt[30] = {"29"}; + std::map GroundTruth = { + {LineColFun{17, 3, "main"}, {LineColFun{17, 8, "main"}}}, + }; - run({PathToLLFiles + "xtaint13_cpp.ll"}, Gt); + run({PathToLLFiles + "xtaint13_cpp_dbg.ll"}, GroundTruth); } TEST_F(CFLFieldSensTest, Basic_14) { - std::map> Gt; - - Gt[33] = {"32"}; + std::map GroundTruth = { + {LineColFun{24, 3, "main"}, {LineColFun{24, 8, "main"}}}, + }; - run({PathToLLFiles + "xtaint14_cpp.ll"}, Gt); + run({PathToLLFiles + "xtaint14_cpp_dbg.ll"}, GroundTruth); } TEST_F(CFLFieldSensTest, Basic_16) { - std::map> Gt; - - Gt[24] = {"23"}; + std::map GroundTruth = { + {LineColFun{13, 3, "main"}, {LineColFun{13, 8, "main"}}}, + }; - run({PathToLLFiles + "xtaint16_cpp.ll"}, Gt); + run({PathToLLFiles + "xtaint16_cpp_dbg.ll"}, GroundTruth); } TEST_F(CFLFieldSensTest, Basic_17) { - std::map> Gt; - - Gt[27] = {"26"}; + std::map GroundTruth = { + {LineColFun{17, 3, "main"}, {LineColFun{17, 8, "main"}}}, + }; - run({PathToLLFiles + "xtaint17_cpp.ll"}, Gt); + run({PathToLLFiles + "xtaint17_cpp_dbg.ll"}, GroundTruth); } TEST_F(CFLFieldSensTest, Basic_18) { - std::map> Gt; - - // no leaks expected + std::map GroundTruth = { + // no leaks expected + }; - run({PathToLLFiles + "xtaint18_cpp.ll"}, Gt); + run({PathToLLFiles + "xtaint18_cpp_dbg.ll"}, GroundTruth); } TEST_F(CFLFieldSensTest, Basic_20) { - std::map> Gt; - - Gt[22] = {"14"}; + std::map GroundTruth = { + {LineColFun{12, 3, "main"}, {LineColFun{6, 7, "main"}}}, + // {LineColFun{13, 3, "main"}, {LineColFun{13, 8, "main"}}}, + }; // Gt[24] = {"23"}; // no leak here, because above we define the semantics to // exclude deep taints! - run({PathToLLFiles + "xtaint20_cpp.ll"}, Gt); + run({PathToLLFiles + "xtaint20_cpp_dbg.ll"}, GroundTruth); } } // namespace From 47d91a207bf20dd8e2aec78c5ccbaee21e402115 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Thu, 29 Jan 2026 14:55:48 +0100 Subject: [PATCH 12/29] Optimize CFLFieldSensEdgeValue::applyTransforms --- .../FieldSensAllocSitesAwareIFDSProblem.cpp | 29 +++++++++++++++---- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp index 57c58863d6..7d46019e20 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp @@ -290,10 +290,23 @@ void CFLFieldSensEdgeValue::applyStore(uint8_t DepthKLimit) { void CFLFieldSensEdgeValue::applyLoad(uint8_t DepthKLimit) { applyGepAndLoad(GEPEvent{0}, DepthKLimit); } -void CFLFieldSensEdgeValue::applyKill() { applyGepAndKill(GEPEvent{0}); } +void CFLFieldSensEdgeValue::applyKill() { // + applyGepAndKill(GEPEvent{0}); +} void CFLFieldSensEdgeValue::applyTransform(const CFLFieldAccessPath &Txn, uint8_t DepthKLimit) { + if (Paths.empty() || Txn.empty()) { + // Nothing to be done here + return; + } + if (Paths.size() == 1 && Paths.begin()->empty()) { + Paths.clear(); + Paths.insert(Txn); + return; + } + // llvm::errs() << "[applyTransform]: " << *this << " X " << Txn << '\n'; + auto Save = std::exchange(Paths, {}); Paths.reserve(Save.size()); @@ -333,6 +346,8 @@ void CFLFieldSensEdgeValue::applyTransform(const CFLFieldAccessPath &Txn, } } + // llvm::errs() << "[applyTransform]: > result: " << *this << '\n'; + // // TODO: Optimize! // if (Txn.Offset) { @@ -358,7 +373,7 @@ void CFLFieldSensEdgeValue::applyTransforms(const CFLFieldSensEdgeValue &Txns, } auto It = Txns.Paths.begin(); - if (Txns.Paths.size() == 1) { + if (Txns.Paths.size() == 1) [[likely]] { applyTransform(*It, DepthKLimit); return; } @@ -372,9 +387,13 @@ void CFLFieldSensEdgeValue::applyTransforms(const CFLFieldSensEdgeValue &Txns, Ret.applyTransform(*It, DepthKLimit); for (++It; It != End; ++It) { - auto Tmp = *this; - Tmp.applyTransform(*It, DepthKLimit); - Ret.Paths.insert(Tmp.Paths.begin(), Tmp.Paths.end()); + if (!It->empty()) { + auto Tmp = *this; + Tmp.applyTransform(*It, DepthKLimit); + Ret.Paths.insert(Tmp.Paths.begin(), Tmp.Paths.end()); + } else { + Ret.Paths.insert(Paths.begin(), Paths.end()); + } } *this = std::move(Ret); From e0518f1483ac4f3c9ae1bab6f7e4247b42212092 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Thu, 29 Jan 2026 15:59:38 +0100 Subject: [PATCH 13/29] Some cleanup + optimize creation of CFLFieldSensEdgeFunction --- .../DefaultAllocSitesAwareIDEProblem.h | 2 +- .../FieldSensAllocSitesAwareIFDSProblem.h | 32 +--- .../Pointer/LLVMBasePointerAliasSet.h | 61 ++++++++ .../PhasarLLVM/Pointer/LLVMFieldAliasSet.h | 135 ----------------- ...DefaultAllocSitesAwareIDEFlowFunctions.cpp | 3 - .../FieldSensAllocSitesAwareIFDSProblem.cpp | 140 +++++------------- .../Pointer/LLVMBasePointerAliasSet.cpp | 45 ++++++ lib/PhasarLLVM/Pointer/LLVMFieldAliasSet.cpp | 111 -------------- 8 files changed, 150 insertions(+), 379 deletions(-) create mode 100644 include/phasar/PhasarLLVM/Pointer/LLVMBasePointerAliasSet.h delete mode 100644 include/phasar/PhasarLLVM/Pointer/LLVMFieldAliasSet.h create mode 100644 lib/PhasarLLVM/Pointer/LLVMBasePointerAliasSet.cpp delete mode 100644 lib/PhasarLLVM/Pointer/LLVMFieldAliasSet.cpp diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/DefaultAllocSitesAwareIDEProblem.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/DefaultAllocSitesAwareIDEProblem.h index b85b00d7d8..a275a8ee81 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/DefaultAllocSitesAwareIDEProblem.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/DefaultAllocSitesAwareIDEProblem.h @@ -12,7 +12,7 @@ #include "phasar/PhasarLLVM/DataFlow/IfdsIde/DefaultNoAliasIDEProblem.h" #include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" -#include "phasar/PhasarLLVM/Pointer/LLVMFieldAliasSet.h" +#include "phasar/PhasarLLVM/Pointer/LLVMBasePointerAliasSet.h" #include diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h index 9c5399f91b..b0615768c8 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h @@ -16,15 +16,12 @@ #include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" #include "phasar/PhasarLLVM/Domain/LLVMAnalysisDomain.h" #include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" -#include "phasar/PhasarLLVM/Pointer/LLVMFieldAliasSet.h" -#include "phasar/Utils/MemoryLocationAllocator.h" +#include "phasar/PhasarLLVM/Pointer/LLVMBasePointerAliasSet.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/FunctionExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/IR/Operator.h" -#include "llvm/Support/TrailingObjects.h" #include "llvm/Support/raw_ostream.h" #include @@ -47,12 +44,10 @@ struct GEPEvent { struct CFLFieldAccessPath { static constexpr int32_t TopOffset = INT32_MIN; - // TODO: compose - llvm::SmallVector Loads; llvm::SmallVector Stores; llvm::SmallDenseSet Kills; - // Add an offset for pending GEPs; INT16_MIN is Top + // Add an offset for pending GEPs; INT32_MIN is Top int32_t Offset = {0}; int32_t EmptyTombstone = 0; @@ -173,23 +168,6 @@ class FieldSensAllocSitesAwareIFDSProblem using Base = IDETabulationProblem< CFLFieldSensAnalysisDomain>; - // struct CachedAccessPath final - // : public llvm::TrailingObjects { - - // using OffsetType = int32_t; - - // constexpr CachedAccessPath(const llvm::Value *BasePtr, - // uint32_t NumOffsets) noexcept - // : BasePtr(BasePtr), NumOffsets(NumOffsets) {} - - // const llvm::Value *BasePtr{}; - // uint32_t NumOffsets{}; - - // [[nodiscard]] llvm::ArrayRef offsets() const noexcept { - // return {this->getTrailingObjects(), NumOffsets}; - // } - // }; - public: using typename Base::container_type; using typename Base::d_t; @@ -281,15 +259,9 @@ class FieldSensAllocSitesAwareIFDSProblem const EdgeFunction &R) override; private: - // [[nodiscard]] const CachedAccessPath * - // getAccessPath(const llvm::Value *Pointer); - LLVMAliasInfoRef AS; IFDSTabulationProblem *UserProblem{}; FieldSensAllocSitesAwareIFDSProblemConfig Config{}; - // MemoryLocationAllocator MemLocAlloc{}; - // llvm::DenseMap - // MemLocCache{}; uint8_t DepthKLimit = 5; // Original from the paper }; diff --git a/include/phasar/PhasarLLVM/Pointer/LLVMBasePointerAliasSet.h b/include/phasar/PhasarLLVM/Pointer/LLVMBasePointerAliasSet.h new file mode 100644 index 0000000000..07fad9b44b --- /dev/null +++ b/include/phasar/PhasarLLVM/Pointer/LLVMBasePointerAliasSet.h @@ -0,0 +1,61 @@ +/****************************************************************************** + * Copyright (c) 2025 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#ifndef PHASAR_PHASARLLVM_POINTER_LLVMFIELDALIASSET_H +#define PHASAR_PHASARLLVM_POINTER_LLVMFIELDALIASSET_H + +#include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" +#include "phasar/Pointer/AliasAnalysisType.h" + +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/DataLayout.h" + +#include +#include +#include + +namespace psr { + +class LLVMBasePointerAliasSet { +public: + using v_t = typename LLVMAliasInfoRef::v_t; + using n_t = typename LLVMAliasInfoRef::n_t; + using AliasSetTy = LLVMAliasInfoRef::AliasSetTy; + using AliasSetPtrTy = std::unique_ptr; + + constexpr LLVMBasePointerAliasSet(LLVMAliasInfoRef AS) noexcept : AS(AS) {} + + [[nodiscard]] bool isInterProcedural() const noexcept { + return AS.isInterProcedural(); + } + + [[nodiscard]] AliasAnalysisType getAliasAnalysisType() const noexcept { + return AS.getAliasAnalysisType(); + } + + [[nodiscard]] static const llvm::Value * + getBasePointer(const llvm::Value *Pointer); + + [[nodiscard]] AliasResult alias(v_t Pointer1, v_t Pointer2, + n_t AtInstruction = {}) const { + return AS.alias(Pointer1, Pointer2, AtInstruction); + } + + [[nodiscard]] AliasSetPtrTy getAliasSet(v_t Pointer, + n_t AtInstruction = {}) const; + +private: + LLVMAliasInfoRef AS; +}; + +} // namespace psr + +#endif // PHASAR_PHASARLLVM_POINTER_LLVMFIELDALIASSET_H diff --git a/include/phasar/PhasarLLVM/Pointer/LLVMFieldAliasSet.h b/include/phasar/PhasarLLVM/Pointer/LLVMFieldAliasSet.h deleted file mode 100644 index f84a23447b..0000000000 --- a/include/phasar/PhasarLLVM/Pointer/LLVMFieldAliasSet.h +++ /dev/null @@ -1,135 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2025 Fabian Schiebel. - * All rights reserved. This program and the accompanying materials are made - * available under the terms of LICENSE.txt. - * - * Contributors: - * Fabian Schiebel and others - *****************************************************************************/ - -#ifndef PHASAR_PHASARLLVM_POINTER_LLVMFIELDALIASSET_H -#define PHASAR_PHASARLLVM_POINTER_LLVMFIELDALIASSET_H - -#include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" -#include "phasar/Pointer/AliasAnalysisType.h" - -#include "llvm/ADT/DenseMapInfo.h" -#include "llvm/ADT/Hashing.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/IR/DataLayout.h" - -#include -#include -#include - -namespace psr { - -class LLVMBasePointerAliasSet { -public: - using v_t = typename LLVMAliasInfoRef::v_t; - using n_t = typename LLVMAliasInfoRef::n_t; - using AliasSetTy = LLVMAliasInfoRef::AliasSetTy; - using AliasSetPtrTy = std::unique_ptr; - - constexpr LLVMBasePointerAliasSet(LLVMAliasInfoRef AS) noexcept : AS(AS) {} - - [[nodiscard]] bool isInterProcedural() const noexcept { - return AS.isInterProcedural(); - } - - [[nodiscard]] AliasAnalysisType getAliasAnalysisType() const noexcept { - return AS.getAliasAnalysisType(); - } - - [[nodiscard]] static const llvm::Value * - getBasePointer(const llvm::Value *Pointer); - - [[nodiscard]] AliasResult alias(v_t Pointer1, v_t Pointer2, - n_t AtInstruction = {}) const { - return AS.alias(Pointer1, Pointer2, AtInstruction); - } - - [[nodiscard]] AliasSetPtrTy getAliasSet(v_t Pointer, - n_t AtInstruction = {}) const; - -private: - LLVMAliasInfoRef AS; -}; - -} // namespace psr -#if 0 -namespace psr{ -class LLVMFieldAliasSet { -public: - struct AccessPath { - const llvm::Value *BasePtr{}; - llvm::SmallVector FieldAccesses; - - static constexpr ptrdiff_t TopOffset = PTRDIFF_MIN; - - bool operator==(const AccessPath &Other) const noexcept { - return BasePtr == Other.BasePtr && FieldAccesses == Other.FieldAccesses; - } - bool operator!=(const AccessPath &Other) const noexcept { - return !(*this == Other); - } - }; - - using v_t = typename LLVMAliasInfoRef::v_t; - using n_t = typename LLVMAliasInfoRef::n_t; - using AliasSetTy = llvm::DenseSet; - using AliasSetPtrTy = std::unique_ptr; - - explicit LLVMFieldAliasSet( - LLVMAliasInfoRef AS, - std::reference_wrapper DL) noexcept - : AS(AS), DL(&DL.get()) {} - - [[nodiscard]] bool isInterProcedural() const noexcept { - return AS.isInterProcedural(); - } - - [[nodiscard]] AliasAnalysisType getAliasAnalysisType() const noexcept { - return AS.getAliasAnalysisType(); - } - - [[nodiscard]] AccessPath getAccessPath(const llvm::Value *Pointer) const; - - [[nodiscard]] AliasResult alias(v_t Pointer1, v_t Pointer2, - n_t AtInstruction = {}) const { - return AS.alias(Pointer1, Pointer2, AtInstruction); - } - - [[nodiscard]] AliasSetPtrTy getAliasSet(v_t Pointer, - n_t AtInstruction = {}) const; - -private: - LLVMAliasInfoRef AS; - const llvm::DataLayout *DL{}; -}; -} // namespace psr - -namespace llvm { -template <> struct DenseMapInfo { - using AccessPath = psr::LLVMFieldAliasSet::AccessPath; - static AccessPath getEmptyKey() { - return AccessPath{DenseMapInfo::getEmptyKey(), {}}; - } - static AccessPath getTombstoneKey() { - return AccessPath{DenseMapInfo::getTombstoneKey(), {}}; - } - static auto getHashValue(const AccessPath &AP) { - auto HC = hash_value(AP.BasePtr); - auto HC2 = - hash_combine_range(AP.FieldAccesses.begin(), AP.FieldAccesses.end()); - return hash_combine(HC, HC2); - } - static bool isEqual(const AccessPath &L, const AccessPath &R) noexcept { - return L == R; - } -}; -} // namespace llvm - -#endif - -#endif // PHASAR_PHASARLLVM_POINTER_LLVMFIELDALIASSET_H diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/DefaultAllocSitesAwareIDEFlowFunctions.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/DefaultAllocSitesAwareIDEFlowFunctions.cpp index 78d07a14fa..0ff5f0316a 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/DefaultAllocSitesAwareIDEFlowFunctions.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/DefaultAllocSitesAwareIDEFlowFunctions.cpp @@ -1,12 +1,9 @@ #include "phasar/PhasarLLVM/DataFlow/IfdsIde/DefaultAllocSitesAwareIDEProblem.h" #include "phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMFlowFunctions.h" #include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" -#include "phasar/PhasarLLVM/Pointer/LLVMFieldAliasSet.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" -#include "llvm/IR/Operator.h" #include "llvm/Support/Casting.h" using namespace psr; diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp index 7d46019e20..891d6bc751 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp @@ -77,6 +77,25 @@ struct CFLFieldSensEdgeFunction { const CFLFieldSensEdgeFunction &EF) { return OS << "Txn[" << EF.Transform << ']'; } + + [[nodiscard]] static auto from(CFLFieldSensEdgeValue &&Txn, + uint8_t DepthKLimit) { + return CFLFieldSensEdgeFunction{ + .Transform = std::move(Txn), + .DepthKLimit = DepthKLimit, + }; + } + + [[nodiscard]] static auto from(CFLFieldAccessPath &&Txn, + uint8_t DepthKLimit) { + // Avoid initializer-list as it prevents moving + auto Ret = CFLFieldSensEdgeFunction{ + .Transform = {}, + .DepthKLimit = DepthKLimit, + }; + Ret.Transform.Paths.insert(std::move(Txn)); + return Ret; + } }; [[nodiscard]] std::string storesToString(const CFLFieldAccessPath &AP) { @@ -470,7 +489,7 @@ auto FieldSensAllocSitesAwareIFDSProblem::getNormalEdgeFunction( if (isZeroValue(CurrNode) && !isZeroValue(SuccNode)) { // Gen from zero - return CFLFieldSensEdgeFunction{{{CFLFieldAccessPath{}}}, DepthKLimit}; + return CFLFieldSensEdgeFunction::from(CFLFieldAccessPath{}, DepthKLimit); } if (const auto *Store = llvm::dyn_cast(Curr)) { @@ -496,7 +515,8 @@ auto FieldSensAllocSitesAwareIFDSProblem::getNormalEdgeFunction( CFLFieldAccessPath FieldString{}; FieldString.Kills.insert(Offset); - return CFLFieldSensEdgeFunction{{{std::move(FieldString)}}, DepthKLimit}; + return CFLFieldSensEdgeFunction::from(std::move(FieldString), + DepthKLimit); } const auto *ValueOp = Store->getValueOperand(); @@ -507,8 +527,7 @@ auto FieldSensAllocSitesAwareIFDSProblem::getNormalEdgeFunction( if (BasePtr != SuccNode && llvm::isa(BasePtr)) { // This is a hack, to be more correct wih field-insensitive alias // information - auto [BaseBasePtr, BaseOffset] = getBaseAndOffset( - llvm::cast(BasePtr)->getPointerOperand(), DL); + if (BaseBasePtr == SuccNode) { // push before Offset, or after? FieldString.Stores.push_back(BaseOffset); @@ -517,7 +536,8 @@ auto FieldSensAllocSitesAwareIFDSProblem::getNormalEdgeFunction( FieldString.Stores.push_back(Offset); - return CFLFieldSensEdgeFunction{{{std::move(FieldString)}}, DepthKLimit}; + return CFLFieldSensEdgeFunction::from(std::move(FieldString), + DepthKLimit); } // unaffected by the store @@ -532,21 +552,14 @@ auto FieldSensAllocSitesAwareIFDSProblem::getNormalEdgeFunction( auto [BasePtr, Offset] = getBaseAndOffset( Load->getPointerOperand(), IRDB->getModule()->getDataLayout()); - int32_t LoadOffs = 0; - - // if (BasePtr == CurrNode && Load->getPointerOperand() != CurrNode) { - // This is a hack, but we do sth similar in the IDEExtendedTaintAnalysis - // (see forEachAliasOf() Lines 144 ff) - LoadOffs = Offset; - // } - // TODO;: How to deal with BasePtr? CFLFieldAccessPath FieldString{}; - FieldString.Loads.push_back(LoadOffs); + FieldString.Loads.push_back(Offset); // llvm::errs() << "Handle load: " << llvmIRToString(Load) << '\n'; // llvm::errs() << "> CurrNode: " << llvmIRToString(CurrNode) << '\n'; - return CFLFieldSensEdgeFunction{{{std::move(FieldString)}}, DepthKLimit}; + return CFLFieldSensEdgeFunction::from(std::move(FieldString), + DepthKLimit); } if (const auto *Gep = llvm::dyn_cast(Curr)) { @@ -555,7 +568,8 @@ auto FieldSensAllocSitesAwareIFDSProblem::getNormalEdgeFunction( CFLFieldAccessPath FieldString{}; FieldString.Offset = OffsVal; - return CFLFieldSensEdgeFunction{{{std::move(FieldString)}}, DepthKLimit}; + return CFLFieldSensEdgeFunction::from(std::move(FieldString), + DepthKLimit); } } @@ -568,7 +582,7 @@ auto FieldSensAllocSitesAwareIFDSProblem::getCallEdgeFunction( if (isZeroValue(SrcNode) && !isZeroValue(DestNode)) { // Gen from zero - return CFLFieldSensEdgeFunction{{{CFLFieldAccessPath{}}}, DepthKLimit}; + return CFLFieldSensEdgeFunction::from(CFLFieldAccessPath{}, DepthKLimit); } // This is naturally identity @@ -581,7 +595,7 @@ auto FieldSensAllocSitesAwareIFDSProblem::getReturnEdgeFunction( if (isZeroValue(ExitNode) && !isZeroValue(RetNode)) { // Gen from zero - return CFLFieldSensEdgeFunction{{{CFLFieldAccessPath{}}}, DepthKLimit}; + return CFLFieldSensEdgeFunction::from(CFLFieldAccessPath{}, DepthKLimit); } return EdgeIdentity{}; @@ -603,7 +617,7 @@ auto FieldSensAllocSitesAwareIFDSProblem::getCallToRetEdgeFunction( if (isZeroValue(CallNode) && !isZeroValue(RetSiteNode)) { // Gen from zero - return CFLFieldSensEdgeFunction{{{CFLFieldAccessPath{}}}, DepthKLimit}; + return CFLFieldSensEdgeFunction::from(CFLFieldAccessPath{}, DepthKLimit); } // This naturally identity @@ -631,14 +645,15 @@ auto FieldSensAllocSitesAwareIFDSProblem::getSummaryEdgeFunction( CFLFieldAccessPath FieldString{}; FieldString.Kills.insert(*KillOffs); - return CFLFieldSensEdgeFunction{{{std::move(FieldString)}}, DepthKLimit}; + return CFLFieldSensEdgeFunction::from(std::move(FieldString), + DepthKLimit); } } if (isZeroValue(CurrNode) && !isZeroValue(SuccNode)) { // Gen from zero - return CFLFieldSensEdgeFunction{{{CFLFieldAccessPath{}}}, DepthKLimit}; + return CFLFieldSensEdgeFunction::from(CFLFieldAccessPath{}, DepthKLimit); } // TODO: Is that correct? -- We may need to handle field-indirections here @@ -667,7 +682,7 @@ auto FieldSensAllocSitesAwareIFDSProblem::extend(const EdgeFunction &L, auto Txn = FldSensL->Transform; Txn.applyTransforms(FldSensR->Transform, DepthKLimit); // TODO: k-limit the number of paths! - return CFLFieldSensEdgeFunction{std::move(Txn), DepthKLimit}; + return CFLFieldSensEdgeFunction::from(std::move(Txn), DepthKLimit); } llvm::report_fatal_error("[FieldSensAllocSitesAwareIFDSProblem::extend]: " @@ -712,7 +727,8 @@ auto FieldSensAllocSitesAwareIFDSProblem::combine(const EdgeFunction &L, if (Changed) { // TODO: k-limit the number of paths! - return CFLFieldSensEdgeFunction{{std::move(Union)}, DepthKLimit}; + return CFLFieldSensEdgeFunction::from( + CFLFieldSensEdgeValue{std::move(Union)}, DepthKLimit); } return LeftSmaller ? R : L; @@ -725,7 +741,7 @@ auto FieldSensAllocSitesAwareIFDSProblem::combine(const EdgeFunction &L, auto Txn = FldSensL->Transform; Txn.Paths.insert(CFLFieldAccessPath{}); - return CFLFieldSensEdgeFunction{std::move(Txn), DepthKLimit}; + return CFLFieldSensEdgeFunction::from(std::move(Txn), DepthKLimit); } } else if (FldSensR && L.isa>()) { if (FldSensR->Transform.Paths.contains(CFLFieldAccessPath{})) { @@ -734,7 +750,7 @@ auto FieldSensAllocSitesAwareIFDSProblem::combine(const EdgeFunction &L, auto Txn = FldSensR->Transform; Txn.Paths.insert(CFLFieldAccessPath{}); - return CFLFieldSensEdgeFunction{std::move(Txn), DepthKLimit}; + return CFLFieldSensEdgeFunction::from(std::move(Txn), DepthKLimit); } return AllBottom{}; @@ -745,77 +761,3 @@ auto FieldSensAllocSitesAwareIFDSProblem::combine(const EdgeFunction &L, return Ret; } - -// static std::pair> -// createAccessPath(const llvm::Value *Pointer, const llvm::DataLayout &DL) { - -// std::pair> Ret; -// auto &[BasePtr, Offsets] = Ret; - -// BasePtr = Pointer; -// Offsets.push_back(0); - -// // Note: llvm::Constant includes llvm::GlobalValue -// if (llvm::isa(Pointer) || -// llvm::isa(Pointer) || -// llvm::isa(Pointer) || -// llvm::isa(Pointer)) { -// // Globals, argument, function calls and allocas define themselves -// return Ret; -// } - -// while (true) { -// // TODO: Should we look into the cache within this loop? -// // TODO: Handle constant GEPs - -// if (const auto *Load = llvm::dyn_cast(BasePtr)) { -// Offsets.push_back(0); -// BasePtr = Load->getPointerOperand()->stripPointerCasts(); -// } else if (const auto *Gep = -// llvm::dyn_cast(BasePtr)) -// { - -// auto GepOffs = detail::AbstractMemoryLocationImpl::computeOffset(DL, -// Gep); if (GepOffs.has_value() && *GepOffs >= INT32_MIN && -// *GepOffs <= INT32_MAX) { -// Offsets.back() = addOffsets(Offsets.back(), int32_t(*GepOffs)); -// } else { -// Offsets.back() = CFLFieldAccessPath::TopOffset; -// } -// BasePtr = Gep->getPointerOperand()->stripPointerCasts(); -// } else { -// // TODO aggregate instructions, e.g. insertvalue, extractvalue, ... -// break; -// } -// } - -// // NOTE: Do not reverse the offsets as we do in -// // AbstractMemoryLocationFactoryBase::createImpl(). -// // For the CFL formulation, we need the offsets in inverse order anyway! - -// return Ret; -// } - -// auto FieldSensAllocSitesAwareIFDSProblem::getAccessPath( -// const llvm::Value *Pointer) -> const CachedAccessPath * { -// auto &Ret = MemLocCache[Pointer]; -// if (Ret) { -// return Ret; -// } - -// auto [BasePtr, Offsets] = -// createAccessPath(Pointer, IRDB->getModule()->getDataLayout()); -// assert(Offsets.size() < UINT32_MAX); - -// using OffsetType = CachedAccessPath::OffsetType; - -// auto NumBytes = -// CachedAccessPath::totalSizeToAlloc(Offsets.size()); -// auto *RawMem = MemLocAlloc.allocate(NumBytes); -// auto *AP = new (RawMem) CachedAccessPath(BasePtr, Offsets.size()); -// memcpy(AP->getTrailingObjects(), Offsets.data(), -// Offsets.size() * sizeof(OffsetType)); - -// Ret = AP; -// return AP; -// } diff --git a/lib/PhasarLLVM/Pointer/LLVMBasePointerAliasSet.cpp b/lib/PhasarLLVM/Pointer/LLVMBasePointerAliasSet.cpp new file mode 100644 index 0000000000..44daba2153 --- /dev/null +++ b/lib/PhasarLLVM/Pointer/LLVMBasePointerAliasSet.cpp @@ -0,0 +1,45 @@ +#include "phasar/PhasarLLVM/Pointer/LLVMBasePointerAliasSet.h" + +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" + +#include + +using namespace psr; + +const llvm::Value * +LLVMBasePointerAliasSet::getBasePointer(const llvm::Value *Pointer) { + if (!Pointer || !Pointer->getType()->isPointerTy()) { + return Pointer; + } + + while (true) { + if (const auto *Load = llvm::dyn_cast(Pointer)) { + Pointer = Load->getPointerOperand()->stripPointerCastsAndAliases(); + continue; + } + + if (const auto *GEP = llvm::dyn_cast(Pointer)) { + Pointer = GEP->getPointerOperand()->stripPointerCastsAndAliases(); + continue; + } + + break; + } + + return Pointer; +} + +auto LLVMBasePointerAliasSet::getAliasSet(v_t Pointer, n_t AtInstruction) const + -> AliasSetPtrTy { + auto Aliases = AS.getAliasSet(Pointer, AtInstruction); + + auto Ret = std::make_unique(); + for (const auto *Alias : *Aliases) { + Ret->insert(getBasePointer(Alias)); + } + + return Ret; +} diff --git a/lib/PhasarLLVM/Pointer/LLVMFieldAliasSet.cpp b/lib/PhasarLLVM/Pointer/LLVMFieldAliasSet.cpp deleted file mode 100644 index 396e16448d..0000000000 --- a/lib/PhasarLLVM/Pointer/LLVMFieldAliasSet.cpp +++ /dev/null @@ -1,111 +0,0 @@ -#include "phasar/PhasarLLVM/Pointer/LLVMFieldAliasSet.h" - -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Operator.h" -#include "llvm/IR/Type.h" -#include "llvm/IR/Value.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/MathExtras.h" - -#include -#include - -using namespace psr; - -const llvm::Value * -LLVMBasePointerAliasSet::getBasePointer(const llvm::Value *Pointer) { - if (!Pointer || !Pointer->getType()->isPointerTy()) { - return Pointer; - } - - while (true) { - if (const auto *Load = llvm::dyn_cast(Pointer)) { - Pointer = Load->getPointerOperand()->stripPointerCastsAndAliases(); - continue; - } - - if (const auto *GEP = llvm::dyn_cast(Pointer)) { - Pointer = GEP->getPointerOperand()->stripPointerCastsAndAliases(); - continue; - } - - break; - } - - return Pointer; -} - -auto LLVMBasePointerAliasSet::getAliasSet(v_t Pointer, n_t AtInstruction) const - -> AliasSetPtrTy { - auto Aliases = AS.getAliasSet(Pointer, AtInstruction); - - auto Ret = std::make_unique(); - for (const auto *Alias : *Aliases) { - Ret->insert(getBasePointer(Alias)); - } - - return Ret; -} - -#if 0 -static constexpr ptrdiff_t TopOffset = LLVMFieldAliasSet::AccessPath::TopOffset; - -constexpr static void addOffset(ptrdiff_t &Into, ptrdiff_t Offs) noexcept { - if (Into == TopOffset) { - return; - } - - if (llvm::AddOverflow(Into, Offs, Into)) { - Into = TopOffset; - } -} - -auto LLVMFieldAliasSet::getAccessPath(const llvm::Value *Pointer) const - -> AccessPath { - // TODO: We may want to cache this! - // -> See AbstractMemoryLocationFactory - - AccessPath Ret{Pointer, {0}}; - if (!Pointer || !Pointer->getType()->isPointerTy()) { - return Ret; - } - - while (true) { - if (const auto *Load = llvm::dyn_cast(Pointer)) { - Pointer = Load->getPointerOperand()->stripPointerCastsAndAliases(); - Ret.FieldAccesses.push_back(0); - continue; - } - - if (const auto *GEP = llvm::dyn_cast(Pointer)) { - Pointer = GEP->getPointerOperand()->stripPointerCastsAndAliases(); - auto Offset = detail::AbstractMemoryLocationImpl::computeOffset(*DL, GEP); - if (Offset) { - addOffset(Ret.FieldAccesses.back(), *Offset); - } else { - Ret.FieldAccesses.back() = TopOffset; - } - - continue; - } - } - - Ret.BasePtr = Pointer; - return Ret; -} - - - -auto LLVMFieldAliasSet::getAliasSet(v_t Pointer, n_t AtInstruction) const - -> AliasSetPtrTy { - auto Aliases = AS.getAliasSet(Pointer, AtInstruction); - - auto Ret = std::make_unique(); - for (const auto *Alias : *Aliases) { - Ret->insert(getAccessPath(Alias)); - } - - return Ret; -} - -#endif From 2eefc33e61227ab8895f905f5081733e83608d20 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Thu, 29 Jan 2026 16:10:46 +0100 Subject: [PATCH 14/29] minor --- .../FieldSensAllocSitesAwareIFDSProblem.h | 6 +++++ .../FieldSensAllocSitesAwareIFDSProblem.cpp | 23 +++++++++++++------ 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h index b0615768c8..0266e3e2f1 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h @@ -91,6 +91,12 @@ struct CFLFieldAccessPathDMI { } static bool isEqual(const CFLFieldAccessPath &L, const CFLFieldAccessPath &R) noexcept { + if (L.EmptyTombstone != R.EmptyTombstone) { + return false; + } + if (L.EmptyTombstone) { + return true; + } return L == R; } }; diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp index 891d6bc751..a1a57b0793 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp @@ -96,6 +96,15 @@ struct CFLFieldSensEdgeFunction { Ret.Transform.Paths.insert(std::move(Txn)); return Ret; } + + [[nodiscard]] static auto fromEpsilon(uint8_t DepthKLimit) { + auto Ret = CFLFieldSensEdgeFunction{ + .Transform = {}, + .DepthKLimit = DepthKLimit, + }; + Ret.Transform.Paths.insert(CFLFieldAccessPath{}); + return Ret; + } }; [[nodiscard]] std::string storesToString(const CFLFieldAccessPath &AP) { @@ -424,8 +433,8 @@ size_t psr::hash_value(const CFLFieldAccessPath &FieldString) noexcept { auto HCS = llvm::hash_combine_range(FieldString.Stores.begin(), FieldString.Stores.end()); // Xor does not care about the order - auto HCK = std::accumulate(FieldString.Kills.begin(), FieldString.Kills.end(), - 0, std::bit_xor<>{}); + auto HCK = std::reduce(FieldString.Kills.begin(), FieldString.Kills.end(), 0, + std::bit_xor<>{}); return llvm::hash_combine(HCL, HCS, HCK); } @@ -489,7 +498,7 @@ auto FieldSensAllocSitesAwareIFDSProblem::getNormalEdgeFunction( if (isZeroValue(CurrNode) && !isZeroValue(SuccNode)) { // Gen from zero - return CFLFieldSensEdgeFunction::from(CFLFieldAccessPath{}, DepthKLimit); + return CFLFieldSensEdgeFunction::fromEpsilon(DepthKLimit); } if (const auto *Store = llvm::dyn_cast(Curr)) { @@ -582,7 +591,7 @@ auto FieldSensAllocSitesAwareIFDSProblem::getCallEdgeFunction( if (isZeroValue(SrcNode) && !isZeroValue(DestNode)) { // Gen from zero - return CFLFieldSensEdgeFunction::from(CFLFieldAccessPath{}, DepthKLimit); + return CFLFieldSensEdgeFunction::fromEpsilon(DepthKLimit); } // This is naturally identity @@ -595,7 +604,7 @@ auto FieldSensAllocSitesAwareIFDSProblem::getReturnEdgeFunction( if (isZeroValue(ExitNode) && !isZeroValue(RetNode)) { // Gen from zero - return CFLFieldSensEdgeFunction::from(CFLFieldAccessPath{}, DepthKLimit); + return CFLFieldSensEdgeFunction::fromEpsilon(DepthKLimit); } return EdgeIdentity{}; @@ -617,7 +626,7 @@ auto FieldSensAllocSitesAwareIFDSProblem::getCallToRetEdgeFunction( if (isZeroValue(CallNode) && !isZeroValue(RetSiteNode)) { // Gen from zero - return CFLFieldSensEdgeFunction::from(CFLFieldAccessPath{}, DepthKLimit); + return CFLFieldSensEdgeFunction::fromEpsilon(DepthKLimit); } // This naturally identity @@ -653,7 +662,7 @@ auto FieldSensAllocSitesAwareIFDSProblem::getSummaryEdgeFunction( if (isZeroValue(CurrNode) && !isZeroValue(SuccNode)) { // Gen from zero - return CFLFieldSensEdgeFunction::from(CFLFieldAccessPath{}, DepthKLimit); + return CFLFieldSensEdgeFunction::fromEpsilon(DepthKLimit); } // TODO: Is that correct? -- We may need to handle field-indirections here From c984b229d40d2d6c8665991f639d95f440cd533f Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Thu, 5 Feb 2026 09:30:40 +0100 Subject: [PATCH 15/29] Some optimizations --- .../FieldSensAllocSitesAwareIFDSProblem.h | 31 +++- .../FieldSensAllocSitesAwareIFDSProblem.cpp | 144 ++++++++++-------- .../DataFlow/IfdsIde/CFLFieldSensTest.cpp | 7 +- 3 files changed, 111 insertions(+), 71 deletions(-) diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h index 0266e3e2f1..edcac24409 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h @@ -12,6 +12,7 @@ #include "phasar/DataFlow/IfdsIde/IDETabulationProblem.h" #include "phasar/DataFlow/IfdsIde/IFDSTabulationProblem.h" +#include "phasar/Domain/BinaryDomain.h" #include "phasar/Domain/LatticeDomain.h" #include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" #include "phasar/PhasarLLVM/Domain/LLVMAnalysisDomain.h" @@ -129,6 +130,10 @@ struct CFLFieldSensEdgeValue { friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const CFLFieldSensEdgeValue &EV); + + [[nodiscard]] bool isEpsilon() const { + return Paths.size() == 1 && Paths.begin()->empty(); + } }; template @@ -142,11 +147,15 @@ struct FieldSensAllocSitesAwareIFDSProblemConfig // TODO: more }; -class FieldSensAllocSitesAwareIFDSProblemBase { +class FieldSensAllocSitesAwareIFDSProblemBase + : public CFLFieldSensAnalysisDomain { public: static constexpr llvm::StringLiteral LogCategory = "FieldSensAllocSitesAwareIFDSProblem"; + [[nodiscard]] static InitialSeeds + makeInitialSeeds(const InitialSeeds &UserSeeds); + [[nodiscard]] static std::pair getBaseAndOffset(const llvm::Value *V, const llvm::DataLayout &DL) { llvm::APInt Offset(64, 0); @@ -194,21 +203,23 @@ class FieldSensAllocSitesAwareIFDSProblem /// information to lower suprious aliases explicit FieldSensAllocSitesAwareIFDSProblem( IFDSTabulationProblem *UserProblem, - LLVMAliasInfoRef AS, FieldSensAllocSitesAwareIFDSProblemConfig Config = {}) noexcept(std::is_nothrow_move_constructible_v) : Base(UserProblem->getProjectIRDB(), UserProblem->getEntryPoints(), UserProblem->getZeroValue()), - AS(AS), UserProblem(UserProblem), Config(std::move(Config)) {} + UserProblem(UserProblem), Config(std::move(Config)) {} - FieldSensAllocSitesAwareIFDSProblem(std::nullptr_t, - LLVMAliasInfoRef AS) = delete; + FieldSensAllocSitesAwareIFDSProblem( + std::nullptr_t, + FieldSensAllocSitesAwareIFDSProblemConfig Config = {}) = delete; // TODO: Provide a customization-point to provide gen offsets to the // edge-functions (generating from zero currently always generates at // epsilon!) - [[nodiscard]] InitialSeeds initialSeeds() override; + [[nodiscard]] InitialSeeds initialSeeds() override { + return makeInitialSeeds(UserProblem->initialSeeds()); + } [[nodiscard]] FlowFunctionPtrType getNormalFlowFunction(n_t Curr, n_t Succ) override { @@ -239,6 +250,11 @@ class FieldSensAllocSitesAwareIFDSProblem return UserProblem->getCallToRetFlowFunction(CallSite, RetSite, Callees); } + static EdgeFunction getStoreEdgeFunction(d_t CurrNode, d_t SuccNode, + d_t PointerOp, d_t ValueOp, + uint8_t DepthKLimit, + const llvm::DataLayout &DL); + EdgeFunction getNormalEdgeFunction(n_t Curr, d_t CurrNode, n_t Succ, d_t SuccNode) override; @@ -264,8 +280,9 @@ class FieldSensAllocSitesAwareIFDSProblem EdgeFunction combine(const EdgeFunction &L, const EdgeFunction &R) override; + [[nodiscard]] const auto &base() const noexcept { return *UserProblem; } + private: - LLVMAliasInfoRef AS; IFDSTabulationProblem *UserProblem{}; FieldSensAllocSitesAwareIFDSProblemConfig Config{}; diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp index a1a57b0793..1d143ce4c9 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp @@ -328,7 +328,7 @@ void CFLFieldSensEdgeValue::applyTransform(const CFLFieldAccessPath &Txn, // Nothing to be done here return; } - if (Paths.size() == 1 && Paths.begin()->empty()) { + if (isEpsilon()) { Paths.clear(); Paths.insert(Txn); return; @@ -478,9 +478,9 @@ llvm::raw_ostream &psr::operator<<(llvm::raw_ostream &OS, return OS << " }"; } -auto FieldSensAllocSitesAwareIFDSProblem::initialSeeds() +auto FieldSensAllocSitesAwareIFDSProblemBase::makeInitialSeeds( + const InitialSeeds &UserSeeds) -> InitialSeeds { - auto UserSeeds = UserProblem->initialSeeds(); InitialSeeds::GeneralizedSeeds Ret; for (const auto &[Inst, Facts] : UserSeeds.getSeeds()) { @@ -493,64 +493,66 @@ auto FieldSensAllocSitesAwareIFDSProblem::initialSeeds() return {std::move(Ret)}; } -auto FieldSensAllocSitesAwareIFDSProblem::getNormalEdgeFunction( - n_t Curr, d_t CurrNode, n_t /*Succ*/, d_t SuccNode) -> EdgeFunction { - if (isZeroValue(CurrNode) && !isZeroValue(SuccNode)) { - // Gen from zero +auto FieldSensAllocSitesAwareIFDSProblem::getStoreEdgeFunction( + d_t CurrNode, d_t SuccNode, d_t PointerOp, d_t ValueOp, uint8_t DepthKLimit, + const llvm::DataLayout &DL) -> EdgeFunction { + auto [BasePtr, Offset] = getBaseAndOffset(PointerOp, DL); - return CFLFieldSensEdgeFunction::fromEpsilon(DepthKLimit); - } + // TODO;: How to deal with BasePtr? - if (const auto *Store = llvm::dyn_cast(Curr)) { - const auto *PointerOp = Store->getPointerOperand(); + auto [BaseBasePtr, + BaseOffset] = [&]() -> std::pair { + if (BasePtr != SuccNode && llvm::isa(BasePtr)) { + return getBaseAndOffset( + llvm::cast(BasePtr)->getPointerOperand(), DL); + } - // TODO;: How to deal with BasePtr? + return {nullptr, INT32_MIN}; + }(); + if (CurrNode == SuccNode && + (BasePtr == CurrNode || BaseBasePtr == CurrNode)) { + // Kill - const auto &DL = IRDB->getModule()->getDataLayout(); - auto [BasePtr, Offset] = getBaseAndOffset(PointerOp, DL); + CFLFieldAccessPath FieldString{}; + FieldString.Kills.insert(Offset); + return CFLFieldSensEdgeFunction::from(std::move(FieldString), DepthKLimit); + } - auto [BaseBasePtr, - BaseOffset] = [&]() -> std::pair { - if (BasePtr != SuccNode && llvm::isa(BasePtr)) { - return getBaseAndOffset( - llvm::cast(BasePtr)->getPointerOperand(), DL); - } + if (ValueOp == CurrNode && CurrNode != SuccNode) { + // Store - return {nullptr, INT32_MIN}; - }(); - if (CurrNode == SuccNode && - (BasePtr == CurrNode || BaseBasePtr == CurrNode)) { - // Kill + CFLFieldAccessPath FieldString{}; + if (BasePtr != SuccNode && llvm::isa(BasePtr)) { + // This is a hack, to be more correct with field-insensitive alias + // information - CFLFieldAccessPath FieldString{}; - FieldString.Kills.insert(Offset); - return CFLFieldSensEdgeFunction::from(std::move(FieldString), - DepthKLimit); + if (BaseBasePtr == SuccNode) { + // push before Offset, or after? + FieldString.Stores.push_back(BaseOffset); + } } - const auto *ValueOp = Store->getValueOperand(); - if (ValueOp == CurrNode && CurrNode != SuccNode) { - // Store + FieldString.Stores.push_back(Offset); - CFLFieldAccessPath FieldString{}; - if (BasePtr != SuccNode && llvm::isa(BasePtr)) { - // This is a hack, to be more correct wih field-insensitive alias - // information + return CFLFieldSensEdgeFunction::from(std::move(FieldString), DepthKLimit); + } - if (BaseBasePtr == SuccNode) { - // push before Offset, or after? - FieldString.Stores.push_back(BaseOffset); - } - } + // unaffected by the store + return EdgeIdentity{}; +} - FieldString.Stores.push_back(Offset); +auto FieldSensAllocSitesAwareIFDSProblem::getNormalEdgeFunction( + n_t Curr, d_t CurrNode, n_t /*Succ*/, d_t SuccNode) -> EdgeFunction { + if (isZeroValue(CurrNode) && !isZeroValue(SuccNode)) { + // Gen from zero - return CFLFieldSensEdgeFunction::from(std::move(FieldString), - DepthKLimit); - } + return CFLFieldSensEdgeFunction::fromEpsilon(DepthKLimit); + } - // unaffected by the store - return EdgeIdentity{}; + if (const auto *Store = llvm::dyn_cast(Curr)) { + return getStoreEdgeFunction(CurrNode, SuccNode, Store->getPointerOperand(), + Store->getValueOperand(), DepthKLimit, + IRDB->getModule()->getDataLayout()); } if (Curr == SuccNode) { @@ -686,10 +688,22 @@ auto FieldSensAllocSitesAwareIFDSProblem::extend(const EdgeFunction &L, const auto *FldSensR = R.dyn_cast(); if (FldSensL && FldSensR) { + if (FldSensR->Transform.isEpsilon()) { + // llvm::errs() << "[EXTEND]: identity transformation!\n"; + return L; + } + + if (FldSensL->Transform.Paths.empty()) { + llvm::errs() << "[EXTEND]: Empty prefix!\n"; + return L; + } - // TODO: Be smarter with copying the transforms: auto Txn = FldSensL->Transform; Txn.applyTransforms(FldSensR->Transform, DepthKLimit); + if (Txn.Paths.empty()) { + // llvm::errs() << "[EXTEND]: kill flow\n"; + return allTopFunction(); + } // TODO: k-limit the number of paths! return CFLFieldSensEdgeFunction::from(std::move(Txn), DepthKLimit); } @@ -726,18 +740,28 @@ auto FieldSensAllocSitesAwareIFDSProblem::combine(const EdgeFunction &L, if (FldSensL) { if (FldSensR) { - - const bool LeftSmaller = - FldSensL->Transform.Paths.size() < FldSensR->Transform.Paths.size(); - - bool Changed = false; - auto Union = setUnion(FldSensL->Transform.Paths, - FldSensR->Transform.Paths, &Changed); - - if (Changed) { - // TODO: k-limit the number of paths! - return CFLFieldSensEdgeFunction::from( - CFLFieldSensEdgeValue{std::move(Union)}, DepthKLimit); + const auto &LPaths = FldSensL->Transform.Paths; + const auto &RPaths = FldSensR->Transform.Paths; + const auto LeftSz = LPaths.size(); + const auto RightSz = RPaths.size(); + const auto LeftSmaller = LeftSz < RightSz; + + if (LeftSz && RightSz) { + const auto &Larger = LeftSmaller ? RPaths : LPaths; + const auto &Smaller = LeftSmaller ? LPaths : RPaths; + + auto It = Smaller.begin(); + const auto End = Smaller.end(); + for (; It != End; ++It) { + if (!Larger.contains(*It)) { + auto Union = LeftSmaller ? RPaths : LPaths; + Union.insert(It, End); + + // TODO: k-limit the number of paths! + return CFLFieldSensEdgeFunction::from( + CFLFieldSensEdgeValue{std::move(Union)}, DepthKLimit); + } + } } return LeftSmaller ? R : L; diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensTest.cpp b/unittests/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensTest.cpp index fd28fce35a..84d3174ce2 100644 --- a/unittests/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensTest.cpp +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensTest.cpp @@ -160,10 +160,9 @@ class CFLFieldSensTest : public ::testing::Test { ExampleTaintAnalysis TaintProblem(&IRDB, &AS, &TC, {"main"}); psr::FieldSensAllocSitesAwareIFDSProblem FsTaintProblem( - &TaintProblem, &AS, - { - .KillsAt = TaintProblem.killsAt(), - }); + &TaintProblem, { + .KillsAt = TaintProblem.killsAt(), + }); psr::LLVMBasedICFG ICFG(&IRDB, psr::CallGraphAnalysisType::OTF, {"main"}, nullptr, &BaseAS); From 586cda4d202faf14d6f26ea5d553e7df46c36a04 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Thu, 5 Feb 2026 14:16:33 +0100 Subject: [PATCH 16/29] Add corner case tests --- test/llvm_test_code/xtaint/CMakeLists.txt | 10 +++++--- test/llvm_test_code/xtaint/xtaint22.cpp | 11 +++++++++ test/llvm_test_code/xtaint/xtaint23.cpp | 19 +++++++++++++++ .../DataFlow/IfdsIde/CFLFieldSensTest.cpp | 24 ++++++++++++++++--- 4 files changed, 58 insertions(+), 6 deletions(-) create mode 100644 test/llvm_test_code/xtaint/xtaint22.cpp create mode 100644 test/llvm_test_code/xtaint/xtaint23.cpp diff --git a/test/llvm_test_code/xtaint/CMakeLists.txt b/test/llvm_test_code/xtaint/CMakeLists.txt index c3f8041798..c5ff825eb3 100644 --- a/test/llvm_test_code/xtaint/CMakeLists.txt +++ b/test/llvm_test_code/xtaint/CMakeLists.txt @@ -1,14 +1,16 @@ set(XTAINT_DBG_SOURCES xtaint01.cpp xtaint01_json.cpp + # xtaint01_json.cpp xtaint02.cpp xtaint03.cpp xtaint04.cpp xtaint05.cpp xtaint06.cpp -# xtaint07.cpp -# xtaint08.cpp + + # xtaint07.cpp + # xtaint08.cpp xtaint09.cpp xtaint09_1.cpp xtaint10.cpp @@ -23,8 +25,10 @@ set(XTAINT_DBG_SOURCES xtaint19.cpp xtaint20.cpp xtaint21.cpp + xtaint22.cpp + xtaint23.cpp ) foreach(TEST_SRC ${XTAINT_DBG_SOURCES}) - generate_ll_file(FILE ${TEST_SRC} DEBUG) + generate_ll_file(FILE ${TEST_SRC} DEBUG) endforeach(TEST_SRC) diff --git a/test/llvm_test_code/xtaint/xtaint22.cpp b/test/llvm_test_code/xtaint/xtaint22.cpp new file mode 100644 index 0000000000..0afcfef30f --- /dev/null +++ b/test/llvm_test_code/xtaint/xtaint22.cpp @@ -0,0 +1,11 @@ + +void print([[clang::annotate("psr.sink")]] int) {} + +int main([[clang::annotate("psr.source")]] int argc, char *argv[]) { + int arr[10]{}; + arr[5] = argc; + + for (int *it = arr, *end = arr + 10; it != end; ++it) { + print(*it); + } +} diff --git a/test/llvm_test_code/xtaint/xtaint23.cpp b/test/llvm_test_code/xtaint/xtaint23.cpp new file mode 100644 index 0000000000..2042db9f24 --- /dev/null +++ b/test/llvm_test_code/xtaint/xtaint23.cpp @@ -0,0 +1,19 @@ + +void print([[clang::annotate("psr.sink")]] int) {} + +struct iterator { + int *it{}; + + void next() { // + ++it; + } +}; + +int main([[clang::annotate("psr.source")]] int argc, char *argv[]) { + int arr[10]{}; + arr[5] = argc; + + for (iterator it = {arr}, end = {arr + 10}; it.it != end.it; it.next()) { + print(*it.it); + } +} diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensTest.cpp b/unittests/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensTest.cpp index 84d3174ce2..865b74b105 100644 --- a/unittests/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensTest.cpp +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensTest.cpp @@ -198,9 +198,9 @@ class CFLFieldSensTest : public ::testing::Test { } EXPECT_EQ(GroundTruthEntries, ComputedLeaks); - if (HasFailure()) { - Solver.dumpResults(); - } + // if (HasFailure()) { + Solver.dumpResults(); + // } } }; @@ -329,6 +329,24 @@ TEST_F(CFLFieldSensTest, Basic_20) { run({PathToLLFiles + "xtaint20_cpp_dbg.ll"}, GroundTruth); } +TEST_F(CFLFieldSensTest, Basic_22) { + std::map GroundTruth = { + // TODO + }; + + run({PathToLLFiles + "xtaint22_cpp_dbg.ll"}, GroundTruth); + FAIL() << "Not Implemented yet"; +} + +TEST_F(CFLFieldSensTest, Basic_23) { + std::map GroundTruth = { + // TODO + }; + + run({PathToLLFiles + "xtaint23_cpp_dbg.ll"}, GroundTruth); + FAIL() << "Not Implemented yet"; +} + } // namespace int main(int Argc, char **Argv) { From a81dfac9c4245b80e3b50e1943ffdae691db8047 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Mon, 9 Feb 2026 19:42:57 +0100 Subject: [PATCH 17/29] Fix edge-cases in CFL-Fieldsens analysis + make corresponding tests (xtaint 22,23) pass --- .../DataFlow/IfdsIde/EdgeFunctionUtils.h | 12 ++ ...DefaultAllocSitesAwareIDEFlowFunctions.cpp | 7 +- .../FieldSensAllocSitesAwareIFDSProblem.cpp | 113 ++++++++++++++---- test/llvm_test_code/xtaint/xtaint22.cpp | 2 +- test/llvm_test_code/xtaint/xtaint23.cpp | 2 +- .../DataFlow/IfdsIde/CFLFieldSensTest.cpp | 20 ++-- .../IfdsIde/EdgeFunctionComposerTest.cpp | 2 +- 7 files changed, 126 insertions(+), 32 deletions(-) diff --git a/include/phasar/DataFlow/IfdsIde/EdgeFunctionUtils.h b/include/phasar/DataFlow/IfdsIde/EdgeFunctionUtils.h index 4a28b5694c..b479a667d5 100644 --- a/include/phasar/DataFlow/IfdsIde/EdgeFunctionUtils.h +++ b/include/phasar/DataFlow/IfdsIde/EdgeFunctionUtils.h @@ -39,6 +39,10 @@ template struct EdgeIdentity final { [[nodiscard]] static EdgeFunction join(EdgeFunctionRef This, const EdgeFunction &OtherFunction); + + friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, EdgeIdentity) { + return OS << "EdgeIdentity"; + } }; template struct ConstantEdgeFunction { @@ -151,6 +155,10 @@ template struct AllBottom final { { return LHS.BottomValue == RHS.BottomValue; } + + friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, AllBottom) { + return OS << "AllBottom"; + } }; template struct AllTop final { @@ -189,6 +197,10 @@ template struct AllTop final { { return LHS.TopValue == RHS.TopValue; } + + friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, AllTop) { + return OS << "AllTop"; + } }; template diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/DefaultAllocSitesAwareIDEFlowFunctions.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/DefaultAllocSitesAwareIDEFlowFunctions.cpp index 0ff5f0316a..2350524002 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/DefaultAllocSitesAwareIDEFlowFunctions.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/DefaultAllocSitesAwareIDEFlowFunctions.cpp @@ -54,8 +54,13 @@ auto detail::IDEAllocSitesAwareDefaultFlowFunctionsImpl:: const auto &DL = Store->getModule()->getDataLayout(); const auto *PointerBase = getBase(Store->getPointerOperand(), DL); - const auto *ValueBase = getBase(Store->getValueOperand(), DL); + const auto *ValueBase = Store->getValueOperand(); container_type Gen = getReachableAllocationSites(AS, PointerBase, Store); + + // llvm::errs() << "At store " << llvmIRToString(Curr) + // << ": ReachableAllocationSites: " << PrettyPrinter{Gen} + // << '\n'; + Gen.insert(ValueBase); // auto ValueAllocSites = // getReachableAllocationSites(AS, Store->getValueOperand(), Store); diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp index 1d143ce4c9..1cd17da692 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp @@ -2,14 +2,17 @@ #include "phasar/DataFlow/IfdsIde/EdgeFunction.h" #include "phasar/DataFlow/IfdsIde/EdgeFunctionUtils.h" +#include "phasar/Domain/LatticeDomain.h" #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" #include "phasar/Utils/Fn.h" #include "phasar/Utils/Logger.h" -#include "phasar/Utils/Union.h" +#include "phasar/Utils/Printer.h" #include "llvm/ADT/APInt.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Operator.h" @@ -151,11 +154,13 @@ struct CFLFieldSensEdgeFunction { return true; } - if (F.Stores.back() != Offs) { + if (F.Stores.back() != Offs && + F.Stores.back() != CFLFieldAccessPath::TopOffset) { return false; } - assert(F.Stores.back() == Offs); + assert(F.Stores.back() == Offs || + F.Stores.back() == CFLFieldAccessPath::TopOffset); F.Offset = 0; F.Stores.pop_back(); // llvm::errs() << "> pop_back\n"; @@ -164,7 +169,11 @@ struct CFLFieldSensEdgeFunction { [[nodiscard]] auto applyOneGepAndKill(CFLFieldAccessPath &F, GEPEvent Evt, uint8_t /*DepthKLimit*/) { - auto Offs = F.Offset + Evt.Field; + auto Offs = addOffsets(F.Offset, Evt.Field); + if (Offs == CFLFieldAccessPath::TopOffset) { + // We cannot kill Top + return true; + } if (F.Stores.empty()) { F.Kills.insert(Offs); @@ -210,7 +219,7 @@ void CFLFieldSensEdgeValue::applyGepAndStore(GEPEvent Evt, // TODO: Optimize: F.Stores.erase(F.Stores.begin()); } - F.Stores.push_back(std::exchange(F.Offset, 0) + Evt.Field); + F.Stores.push_back(addOffsets(std::exchange(F.Offset, 0), Evt.Field)); Paths.insert(std::move(F)); } @@ -224,7 +233,7 @@ void CFLFieldSensEdgeValue::applyGepAndLoad(GEPEvent Evt, uint8_t DepthKLimit) { auto Save = std::exchange(Paths, {}); for (const auto &F : Save) { - auto Offs = F.Offset + Evt.Field; + auto Offs = addOffsets(F.Offset, Evt.Field); if (F.Stores.empty()) { if (F.kills(Offs)) { @@ -247,7 +256,8 @@ void CFLFieldSensEdgeValue::applyGepAndLoad(GEPEvent Evt, uint8_t DepthKLimit) { continue; } - if (F.Stores.back() != Offs) { + if (F.Stores.back() != Offs && + F.Stores.back() != CFLFieldAccessPath::TopOffset) { continue; } @@ -268,7 +278,7 @@ void CFLFieldSensEdgeValue::applyGepAndKill(GEPEvent Evt) { auto Save = std::exchange(Paths, {}); for (const auto &F : Save) { - auto Offs = F.Offset + Evt.Field; + auto Offs = addOffsets(F.Offset, Evt.Field); if (F.Stores.empty()) { auto FF = F; @@ -543,6 +553,13 @@ auto FieldSensAllocSitesAwareIFDSProblem::getStoreEdgeFunction( auto FieldSensAllocSitesAwareIFDSProblem::getNormalEdgeFunction( n_t Curr, d_t CurrNode, n_t /*Succ*/, d_t SuccNode) -> EdgeFunction { + PHASAR_LOG_LEVEL_CAT(DEBUG, LogCategory, "[getNormalEdgeFunction]:"); + PHASAR_LOG_LEVEL_CAT(DEBUG, LogCategory, " Curr: " << NToString(Curr)); + PHASAR_LOG_LEVEL_CAT(DEBUG, LogCategory, + " CurrNode: " << DToString(CurrNode)); + PHASAR_LOG_LEVEL_CAT(DEBUG, LogCategory, + " SuccNode: " << DToString(SuccNode)); + if (isZeroValue(CurrNode) && !isZeroValue(SuccNode)) { // Gen from zero @@ -588,8 +605,15 @@ auto FieldSensAllocSitesAwareIFDSProblem::getNormalEdgeFunction( } auto FieldSensAllocSitesAwareIFDSProblem::getCallEdgeFunction( - n_t /*CallSite*/, d_t SrcNode, f_t /*DestinationFunction*/, d_t DestNode) + n_t CallSite, d_t SrcNode, f_t /*DestinationFunction*/, d_t DestNode) -> EdgeFunction { + PHASAR_LOG_LEVEL_CAT(DEBUG, LogCategory, "[getCallEdgeFunction]"); + PHASAR_LOG_LEVEL_CAT(DEBUG, LogCategory, " Curr: " << NToString(CallSite)); + PHASAR_LOG_LEVEL_CAT(DEBUG, LogCategory, + " CurrNode: " << DToString(SrcNode)); + PHASAR_LOG_LEVEL_CAT(DEBUG, LogCategory, + " SuccNode: " << DToString(DestNode)); + if (isZeroValue(SrcNode) && !isZeroValue(DestNode)) { // Gen from zero @@ -601,8 +625,15 @@ auto FieldSensAllocSitesAwareIFDSProblem::getCallEdgeFunction( } auto FieldSensAllocSitesAwareIFDSProblem::getReturnEdgeFunction( - n_t /*CallSite*/, f_t /*CalleeFunction*/, n_t /*ExitStmt*/, d_t ExitNode, + n_t /*CallSite*/, f_t /*CalleeFunction*/, n_t ExitStmt, d_t ExitNode, n_t /*RetSite*/, d_t RetNode) -> EdgeFunction { + PHASAR_LOG_LEVEL_CAT(DEBUG, LogCategory, "[getReturnEdgeFunction]"); + PHASAR_LOG_LEVEL_CAT(DEBUG, LogCategory, " Curr: " << NToString(ExitStmt)); + PHASAR_LOG_LEVEL_CAT(DEBUG, LogCategory, + " CurrNode: " << DToString(ExitNode)); + PHASAR_LOG_LEVEL_CAT(DEBUG, LogCategory, + " SuccNode: " << DToString(RetNode)); + if (isZeroValue(ExitNode) && !isZeroValue(RetNode)) { // Gen from zero @@ -616,6 +647,13 @@ auto FieldSensAllocSitesAwareIFDSProblem::getCallToRetEdgeFunction( n_t CallSite, d_t CallNode, n_t /*RetSite*/, d_t RetSiteNode, llvm::ArrayRef /*Callees*/) -> EdgeFunction { + PHASAR_LOG_LEVEL_CAT(DEBUG, LogCategory, "[getCallToRetEdgeFunction]"); + PHASAR_LOG_LEVEL_CAT(DEBUG, LogCategory, " Curr: " << NToString(CallSite)); + PHASAR_LOG_LEVEL_CAT(DEBUG, LogCategory, + " CurrNode: " << DToString(CallNode)); + PHASAR_LOG_LEVEL_CAT(DEBUG, LogCategory, + " SuccNode: " << DToString(RetSiteNode)); + if (CallNode == RetSiteNode && Config.KillsAt) { if (auto KillOffs = Config.KillsAt(CallSite, CallNode)) { // Let the summary-FF kill the fact @@ -638,13 +676,12 @@ auto FieldSensAllocSitesAwareIFDSProblem::getCallToRetEdgeFunction( auto FieldSensAllocSitesAwareIFDSProblem::getSummaryEdgeFunction( n_t Curr, d_t CurrNode, n_t /*Succ*/, d_t SuccNode) -> EdgeFunction { - PHASAR_LOG_LEVEL_CAT( - DEBUG, LogCategory, - "[getSummaryEdgeFunction]: Curr: " << llvmIRToString(Curr) << ":"); + PHASAR_LOG_LEVEL_CAT(DEBUG, LogCategory, "[getSummaryEdgeFunction]"); + PHASAR_LOG_LEVEL_CAT(DEBUG, LogCategory, " Curr: " << NToString(Curr)); PHASAR_LOG_LEVEL_CAT(DEBUG, LogCategory, - " > CurrNode: " << llvmIRToString(CurrNode)); + " CurrNode: " << DToString(CurrNode)); PHASAR_LOG_LEVEL_CAT(DEBUG, LogCategory, - " > SuccNode: " << llvmIRToString(SuccNode)); + " SuccNode: " << DToString(SuccNode)); if (CurrNode == SuccNode && Config.KillsAt) { if (auto KillOffs = Config.KillsAt(Curr, CurrNode)) { @@ -672,6 +709,31 @@ auto FieldSensAllocSitesAwareIFDSProblem::getSummaryEdgeFunction( return EdgeIdentity{}; } +void klimitPaths(auto &Paths) { + + llvm::SmallDenseMap, + 2, CFLFieldAccessPathDMI> + ToInsert; + for (auto IIt = Paths.begin(), End = Paths.end(); IIt != End;) { + auto It = IIt++; + if (!It->Stores.empty()) { + CFLFieldAccessPath Approx = *It; + Approx.Stores.back() = CFLFieldAccessPath::TopOffset; + ToInsert[std::move(Approx)].push_back(*It); + Paths.erase(It); + } + } + for (auto &&[Approx, OrigPaths] : ToInsert) { + if (OrigPaths.size() > 2) { + Paths.insert(Approx); + } else { + Paths.insert(OrigPaths.begin(), OrigPaths.end()); + } + } +} + +static constexpr ptrdiff_t BreadthKLimit = 5; + auto FieldSensAllocSitesAwareIFDSProblem::extend(const EdgeFunction &L, const EdgeFunction &R) -> EdgeFunction { @@ -694,17 +756,20 @@ auto FieldSensAllocSitesAwareIFDSProblem::extend(const EdgeFunction &L, } if (FldSensL->Transform.Paths.empty()) { - llvm::errs() << "[EXTEND]: Empty prefix!\n"; + // llvm::errs() << "[EXTEND]: Empty prefix!\n"; return L; } auto Txn = FldSensL->Transform; Txn.applyTransforms(FldSensR->Transform, DepthKLimit); - if (Txn.Paths.empty()) { - // llvm::errs() << "[EXTEND]: kill flow\n"; - return allTopFunction(); + // if (Txn.Paths.empty()) { + // // llvm::errs() << "[EXTEND]: kill flow\n"; + // return allTopFunction(); + // } + + if (Txn.Paths.size() > BreadthKLimit) { + klimitPaths(Txn.Paths); } - // TODO: k-limit the number of paths! return CFLFieldSensEdgeFunction::from(std::move(Txn), DepthKLimit); } @@ -714,8 +779,10 @@ auto FieldSensAllocSitesAwareIFDSProblem::extend(const EdgeFunction &L, llvm::Twine(to_string(R))); }(); + // if (!L.isa>() && !R.isa>()) { PHASAR_LOG_LEVEL_CAT(DEBUG, LogCategory, "EXTEND " << L << " X " << R << " ==> " << Ret); + // } return Ret; } @@ -752,11 +819,17 @@ auto FieldSensAllocSitesAwareIFDSProblem::combine(const EdgeFunction &L, auto It = Smaller.begin(); const auto End = Smaller.end(); + for (; It != End; ++It) { if (!Larger.contains(*It)) { auto Union = LeftSmaller ? RPaths : LPaths; + Union.insert(It, End); + if (Union.size() > BreadthKLimit) { + klimitPaths(Union); + } + // TODO: k-limit the number of paths! return CFLFieldSensEdgeFunction::from( CFLFieldSensEdgeValue{std::move(Union)}, DepthKLimit); diff --git a/test/llvm_test_code/xtaint/xtaint22.cpp b/test/llvm_test_code/xtaint/xtaint22.cpp index 0afcfef30f..80f4e64721 100644 --- a/test/llvm_test_code/xtaint/xtaint22.cpp +++ b/test/llvm_test_code/xtaint/xtaint22.cpp @@ -3,7 +3,7 @@ void print([[clang::annotate("psr.sink")]] int) {} int main([[clang::annotate("psr.source")]] int argc, char *argv[]) { int arr[10]{}; - arr[5] = argc; + arr[4] = argc; for (int *it = arr, *end = arr + 10; it != end; ++it) { print(*it); diff --git a/test/llvm_test_code/xtaint/xtaint23.cpp b/test/llvm_test_code/xtaint/xtaint23.cpp index 2042db9f24..62e632b5b9 100644 --- a/test/llvm_test_code/xtaint/xtaint23.cpp +++ b/test/llvm_test_code/xtaint/xtaint23.cpp @@ -11,7 +11,7 @@ struct iterator { int main([[clang::annotate("psr.source")]] int argc, char *argv[]) { int arr[10]{}; - arr[5] = argc; + arr[4] = argc; for (iterator it = {arr}, end = {arr + 10}; it.it != end.it; it.next()) { print(*it.it); diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensTest.cpp b/unittests/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensTest.cpp index 865b74b105..c5e13d346f 100644 --- a/unittests/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensTest.cpp +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensTest.cpp @@ -12,6 +12,7 @@ #include "phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h" #include "phasar/PhasarLLVM/TaintConfig/TaintConfigUtilities.h" #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" +#include "phasar/Utils/Logger.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Twine.h" @@ -148,7 +149,8 @@ class CFLFieldSensTest : public ::testing::Test { using TaintSetT = std::set; void run(const llvm::Twine &IRFileName, - const std::map &GroundTruth) { + const std::map &GroundTruth, + bool ShouldDumpResults = false) { auto IRDB = psr::LLVMProjectIRDB::loadOrExit(IRFileName); auto GroundTruthEntries = @@ -198,9 +200,9 @@ class CFLFieldSensTest : public ::testing::Test { } EXPECT_EQ(GroundTruthEntries, ComputedLeaks); - // if (HasFailure()) { - Solver.dumpResults(); - // } + if (ShouldDumpResults || HasFailure()) { + Solver.dumpResults(); + } } }; @@ -331,20 +333,22 @@ TEST_F(CFLFieldSensTest, Basic_20) { TEST_F(CFLFieldSensTest, Basic_22) { std::map GroundTruth = { - // TODO + {LineColFun{9, 5, "main"}, {LineColFun{9, 11, "main"}}}, }; + // psr::Logger::initializeStderrLogger( + // psr::SeverityLevel::DEBUG, + // psr::FieldSensAllocSitesAwareIFDSProblem::LogCategory.str()); + run({PathToLLFiles + "xtaint22_cpp_dbg.ll"}, GroundTruth); - FAIL() << "Not Implemented yet"; } TEST_F(CFLFieldSensTest, Basic_23) { std::map GroundTruth = { - // TODO + {LineColFun{17, 5, "main"}, {LineColFun{17, 11, "main"}}}, }; run({PathToLLFiles + "xtaint23_cpp_dbg.ll"}, GroundTruth); - FAIL() << "Not Implemented yet"; } } // namespace diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/EdgeFunctionComposerTest.cpp b/unittests/PhasarLLVM/DataFlow/IfdsIde/EdgeFunctionComposerTest.cpp index 61e3950a8a..c97cb3dfb6 100644 --- a/unittests/PhasarLLVM/DataFlow/IfdsIde/EdgeFunctionComposerTest.cpp +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/EdgeFunctionComposerTest.cpp @@ -69,7 +69,7 @@ TEST(EdgeFunctionComposerTest, HandleEFIDs) { EdgeFunction EFC2 = MyEFC{EF2, EdgeIdentity{}}; llvm::outs() << "My EFC: " << EFC1 << " " << EFC2 << '\n'; EXPECT_EQ("EFComposer[AddTwoEF_1, AddTwoEF_2]", to_string(EFC1)); - EXPECT_EQ("EFComposer[AddTwoEF_2, psr::EdgeIdentity]", to_string(EFC2)); + EXPECT_EQ("EFComposer[AddTwoEF_2, EdgeIdentity]", to_string(EFC2)); // Reset ID's for next test CurrAddTwoEfId = 0; } From 2131919e55aee18574c636202c9060f5e7fe9d56 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Mon, 16 Feb 2026 14:44:02 +0100 Subject: [PATCH 18/29] Small tweak in CFLFieldSensTest + minor --- .../DataFlow/IfdsIde/EdgeFunctionUtils.h | 29 +++++++++++++++---- .../FieldSensAllocSitesAwareIFDSProblem.cpp | 23 ++++----------- .../DataFlow/IfdsIde/CFLFieldSensTest.cpp | 13 ++++----- 3 files changed, 34 insertions(+), 31 deletions(-) diff --git a/include/phasar/DataFlow/IfdsIde/EdgeFunctionUtils.h b/include/phasar/DataFlow/IfdsIde/EdgeFunctionUtils.h index b479a667d5..30ce3c9614 100644 --- a/include/phasar/DataFlow/IfdsIde/EdgeFunctionUtils.h +++ b/include/phasar/DataFlow/IfdsIde/EdgeFunctionUtils.h @@ -204,7 +204,7 @@ template struct AllTop final { }; template -EdgeFunction +inline EdgeFunction defaultComposeOrNull(EdgeFunctionRef This, const EdgeFunction &SecondFunction) noexcept { if (llvm::isa>(SecondFunction)) { @@ -217,7 +217,7 @@ defaultComposeOrNull(EdgeFunctionRef This, } template -EdgeFunction +inline EdgeFunction defaultComposeOrNull(const EdgeFunction &This, const EdgeFunction &SecondFunction) noexcept { if (llvm::isa>(SecondFunction)) { @@ -400,8 +400,8 @@ template struct JoinEdgeFunction { /// Joining with EdgeIdentity will overapproximate to (AllBottom if N==0, else /// JoinEdgeFunction). template -EdgeFunction defaultJoinOrNull(EdgeFunctionRef This, - const EdgeFunction &OtherFunction) { +inline EdgeFunction defaultJoinOrNull(EdgeFunctionRef This, + const EdgeFunction &OtherFunction) { if (llvm::isa>(OtherFunction)) { return OtherFunction; } @@ -419,8 +419,8 @@ EdgeFunction defaultJoinOrNull(EdgeFunctionRef This, } template -EdgeFunction defaultJoinOrNull(const EdgeFunction &This, - const EdgeFunction &OtherFunction) { +inline EdgeFunction defaultJoinOrNull(const EdgeFunction &This, + const EdgeFunction &OtherFunction) { if (llvm::isa>(OtherFunction) || llvm::isa>(This)) { return OtherFunction; } @@ -438,6 +438,23 @@ EdgeFunction defaultJoinOrNull(const EdgeFunction &This, return nullptr; } +/// Similar to defaultJoinOrNull(), but does not handle This==OtherFunction and +/// EdgeIdentity. +template +inline EdgeFunction +defaultJoinOrNullNoId(const EdgeFunction &This, + const EdgeFunction &OtherFunction) { + if (llvm::isa>(OtherFunction) || llvm::isa>(This)) { + return OtherFunction; + } + if (llvm::isa>(OtherFunction) || llvm::isa>(This) || + OtherFunction.referenceEquals(This)) { + return This; + } + + return nullptr; +} + template EdgeFunction EdgeIdentity::join(EdgeFunctionRef This, const EdgeFunction &OtherFunction) { diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp index 1cd17da692..59356a2c3b 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp @@ -709,7 +709,7 @@ auto FieldSensAllocSitesAwareIFDSProblem::getSummaryEdgeFunction( return EdgeIdentity{}; } -void klimitPaths(auto &Paths) { +static void klimitPaths(auto &Paths) { llvm::SmallDenseMap, 2, CFLFieldAccessPathDMI> @@ -742,10 +742,6 @@ auto FieldSensAllocSitesAwareIFDSProblem::extend(const EdgeFunction &L, return DfltCompose; } - if (R.isa>()) { - return R; - } - const auto *FldSensL = L.dyn_cast(); const auto *FldSensR = R.dyn_cast(); @@ -791,15 +787,8 @@ auto FieldSensAllocSitesAwareIFDSProblem::combine(const EdgeFunction &L, const EdgeFunction &R) -> EdgeFunction { auto Ret = [&]() -> EdgeFunction { - if (llvm::isa>(R) || llvm::isa>(L)) { - return R; - } - if (llvm::isa>(R) || llvm::isa>(L)) { - return L; - } - - if (llvm::isa>(L) && llvm::isa>(R)) { - return L; + if (auto Dflt = defaultJoinOrNullNoId(L, R)) { + return Dflt; } const auto *FldSensL = L.dyn_cast(); @@ -822,15 +811,13 @@ auto FieldSensAllocSitesAwareIFDSProblem::combine(const EdgeFunction &L, for (; It != End; ++It) { if (!Larger.contains(*It)) { - auto Union = LeftSmaller ? RPaths : LPaths; - + auto Union = Larger; Union.insert(It, End); if (Union.size() > BreadthKLimit) { klimitPaths(Union); } - // TODO: k-limit the number of paths! return CFLFieldSensEdgeFunction::from( CFLFieldSensEdgeValue{std::move(Union)}, DepthKLimit); } @@ -859,6 +846,8 @@ auto FieldSensAllocSitesAwareIFDSProblem::combine(const EdgeFunction &L, return CFLFieldSensEdgeFunction::from(std::move(Txn), DepthKLimit); } + llvm::errs() << "COMBINE " << L << " X " << R << " ==> AllBottom\n"; + return AllBottom{}; }(); diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensTest.cpp b/unittests/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensTest.cpp index c5e13d346f..d4ef6cc9bd 100644 --- a/unittests/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensTest.cpp +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensTest.cpp @@ -186,17 +186,16 @@ class CFLFieldSensTest : public ::testing::Test { const auto &Res = Results.resultAt(LeakInst, LeakFact); if (const auto *FieldStrings = Res.getValueOrNull()) { - if (llvm::all_of(FieldStrings->Paths, - [](const auto &F) { return !F.empty(); })) { + if (FieldStrings->Paths.empty()) { llvm::errs() << "> Erase leak at " << psr::llvmIRToString(LeakInst) << "; because leaking fact " << psr::llvmIRToShortString(LeakFact) - << " has non-empty field-string: " << Res << '\n'; + << " has empty set of access-paths: " << Res << '\n'; TaintProblem.Leaks.erase(It); - } else { - ComputedLeaks[LeakInst].insert(LeakFact); + continue; } } + ComputedLeaks[LeakInst].insert(LeakFact); } EXPECT_EQ(GroundTruthEntries, ComputedLeaks); @@ -323,10 +322,8 @@ TEST_F(CFLFieldSensTest, Basic_18) { TEST_F(CFLFieldSensTest, Basic_20) { std::map GroundTruth = { {LineColFun{12, 3, "main"}, {LineColFun{6, 7, "main"}}}, - // {LineColFun{13, 3, "main"}, {LineColFun{13, 8, "main"}}}, + {LineColFun{13, 3, "main"}, {LineColFun{13, 8, "main"}}}, }; - // Gt[24] = {"23"}; // no leak here, because above we define the semantics to - // exclude deep taints! run({PathToLLFiles + "xtaint20_cpp_dbg.ll"}, GroundTruth); } From da536951ede236d0767de992d331376d372520cc Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Thu, 19 Feb 2026 11:40:14 +0100 Subject: [PATCH 19/29] Add structural sharing for Store- and Load sequences --- .../FieldSensAllocSitesAwareIFDSProblem.h | 130 ++++- include/phasar/Utils/Compressor.h | 18 +- .../FieldSensAllocSitesAwareIFDSProblem.cpp | 467 +++++++++++------- 3 files changed, 405 insertions(+), 210 deletions(-) diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h index edcac24409..9137a760fe 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h @@ -18,6 +18,9 @@ #include "phasar/PhasarLLVM/Domain/LLVMAnalysisDomain.h" #include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" #include "phasar/PhasarLLVM/Pointer/LLVMBasePointerAliasSet.h" +#include "phasar/Utils/Compressor.h" +#include "phasar/Utils/Logger.h" +#include "phasar/Utils/TypedVector.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/FunctionExtras.h" @@ -26,12 +29,13 @@ #include "llvm/Support/raw_ostream.h" #include +#include namespace psr { /// \file Implements field-sensitivity after the paper "Boosting the performance /// of alias-aware IFDS analysis with CFL-based environment transformers" by Li -/// et al. +/// et al. struct StoreEvent {}; struct LoadEvent {}; @@ -42,18 +46,103 @@ struct GEPEvent { int32_t Field; }; +enum class CFLFieldStringNodeId : uint32_t { + None = 0, +}; + +[[nodiscard]] inline llvm::hash_code hash_value(CFLFieldStringNodeId NId) { + return llvm::hash_value(std::underlying_type_t(NId)); +} + +struct CFLFieldStringNode { + CFLFieldStringNodeId Next{}; + int32_t Offset{}; + + [[nodiscard]] constexpr bool + operator==(const CFLFieldStringNode &) const noexcept = default; +}; + +} // namespace psr + +namespace llvm { +template <> struct DenseMapInfo { + static constexpr psr::CFLFieldStringNode getEmptyKey() noexcept { + return {psr::CFLFieldStringNodeId(UINT32_MAX), 0}; + } + static constexpr psr::CFLFieldStringNode getTombstoneKey() noexcept { + return {psr::CFLFieldStringNodeId(UINT32_MAX - 1), 0}; + } + static constexpr bool isEqual(psr::CFLFieldStringNode L, + psr::CFLFieldStringNode R) noexcept { + return L == R; + } + static llvm::hash_code getHashValue(psr::CFLFieldStringNode Nod) { + return llvm::hash_combine(Nod.Next, Nod.Offset); + } +}; +} // namespace llvm + +namespace psr { + +class CFLFieldStringManager { +public: + CFLFieldStringManager() { + // Sentinel + NodeCompressor.insertDummy( + CFLFieldStringNode{CFLFieldStringNodeId::None, 0}); + Depth.push_back(0); + } + + [[nodiscard]] CFLFieldStringNodeId intern(CFLFieldStringNode Nod) { + auto [Id, Inserted] = NodeCompressor.insert(Nod); + + if (Inserted) { + Depth.push_back(Depth[Nod.Next] + 1); + } + + return Id; + } + + [[nodiscard]] CFLFieldStringNodeId prepend(int32_t Head, + CFLFieldStringNodeId Tail) { + auto Ret = intern(CFLFieldStringNode{.Next = Tail, .Offset = Head}); + PHASAR_LOG_LEVEL(DEBUG, "[prepend]: " << Head << " :: #" << uint32_t(Tail) + << " = #" << uint32_t(Ret)); + return Ret; + } + + [[nodiscard]] CFLFieldStringNode operator[](CFLFieldStringNodeId NId) const { + return NodeCompressor[NId]; + } + + [[nodiscard]] llvm::SmallVector + getFullFieldString(CFLFieldStringNodeId NId) const; + + [[nodiscard]] CFLFieldStringNodeId + fromFullFieldString(llvm::ArrayRef FieldString); + + [[nodiscard]] uint32_t depth(CFLFieldStringNodeId NId) const { + return Depth[NId]; + } + +private: + Compressor NodeCompressor{}; + TypedVector Depth{}; +}; + struct CFLFieldAccessPath { static constexpr int32_t TopOffset = INT32_MIN; - llvm::SmallVector Loads; - llvm::SmallVector Stores; + CFLFieldStringNodeId Loads; + CFLFieldStringNodeId Stores; llvm::SmallDenseSet Kills; // Add an offset for pending GEPs; INT32_MIN is Top int32_t Offset = {0}; int32_t EmptyTombstone = 0; [[nodiscard]] bool empty() const noexcept { - return Loads.empty() && Stores.empty() && Kills.empty() && Offset == 0; + return Loads == CFLFieldStringNodeId::None && + Stores == CFLFieldStringNodeId::None && Kills.empty() && Offset == 0; } [[nodiscard]] bool kills(int32_t Off) const { @@ -74,6 +163,8 @@ struct CFLFieldAccessPath { friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const CFLFieldAccessPath &FieldString); + + void print(llvm::raw_ostream &OS, const CFLFieldStringManager &Mgr) const; }; struct CFLFieldAccessPathDMI { @@ -103,21 +194,24 @@ struct CFLFieldAccessPathDMI { }; struct CFLFieldSensEdgeValue { + [[clang::require_explicit_initialization]] CFLFieldStringManager *Mgr{}; llvm::SmallDenseSet Paths; static constexpr llvm::StringLiteral LogCategory = "CFLFieldSensEdgeValue"; - void applyStore(uint8_t DepthKLimit); - void applyGepAndStore(GEPEvent Evt, uint8_t DepthKLimit); - void applyLoad(uint8_t DepthKLimit); - void applyGepAndLoad(GEPEvent Evt, uint8_t DepthKLimit); - void applyKill(); - void applyGepAndKill(GEPEvent Evt); - void applyGep(GEPEvent Evt); + // void applyStore(uint8_t DepthKLimit); + // void applyGepAndStore(GEPEvent Evt, uint8_t DepthKLimit); + // void applyLoad(uint8_t DepthKLimit); + // void applyGepAndLoad(GEPEvent Evt, uint8_t DepthKLimit); + // void applyKill(); + // void applyGepAndKill(GEPEvent Evt); + // void applyGep(GEPEvent Evt); void applyTransform(const CFLFieldAccessPath &Txn, uint8_t DepthKLimit); void applyTransforms(const CFLFieldSensEdgeValue &Txns, uint8_t DepthKLimit); bool operator==(const CFLFieldSensEdgeValue &Other) const noexcept { + assert(Mgr == Other.Mgr); + assert(Mgr != nullptr); return Paths == Other.Paths; } bool operator!=(const CFLFieldSensEdgeValue &Other) const noexcept { @@ -154,7 +248,8 @@ class FieldSensAllocSitesAwareIFDSProblemBase "FieldSensAllocSitesAwareIFDSProblem"; [[nodiscard]] static InitialSeeds - makeInitialSeeds(const InitialSeeds &UserSeeds); + makeInitialSeeds(const InitialSeeds &UserSeeds, + CFLFieldStringManager &Mgr); [[nodiscard]] static std::pair getBaseAndOffset(const llvm::Value *V, const llvm::DataLayout &DL) { @@ -218,7 +313,7 @@ class FieldSensAllocSitesAwareIFDSProblem // epsilon!) [[nodiscard]] InitialSeeds initialSeeds() override { - return makeInitialSeeds(UserProblem->initialSeeds()); + return makeInitialSeeds(UserProblem->initialSeeds(), Mgr); } [[nodiscard]] FlowFunctionPtrType getNormalFlowFunction(n_t Curr, @@ -250,10 +345,10 @@ class FieldSensAllocSitesAwareIFDSProblem return UserProblem->getCallToRetFlowFunction(CallSite, RetSite, Callees); } - static EdgeFunction getStoreEdgeFunction(d_t CurrNode, d_t SuccNode, - d_t PointerOp, d_t ValueOp, - uint8_t DepthKLimit, - const llvm::DataLayout &DL); + EdgeFunction getStoreEdgeFunction(d_t CurrNode, d_t SuccNode, + d_t PointerOp, d_t ValueOp, + uint8_t DepthKLimit, + const llvm::DataLayout &DL); EdgeFunction getNormalEdgeFunction(n_t Curr, d_t CurrNode, n_t Succ, d_t SuccNode) override; @@ -284,6 +379,7 @@ class FieldSensAllocSitesAwareIFDSProblem private: IFDSTabulationProblem *UserProblem{}; + CFLFieldStringManager Mgr{}; FieldSensAllocSitesAwareIFDSProblemConfig Config{}; uint8_t DepthKLimit = 5; // Original from the paper diff --git a/include/phasar/Utils/Compressor.h b/include/phasar/Utils/Compressor.h index bc7bde4530..7bc0cb9457 100644 --- a/include/phasar/Utils/Compressor.h +++ b/include/phasar/Utils/Compressor.h @@ -11,12 +11,14 @@ #define PHASAR_UTILS_COMPRESSOR_H #include "phasar/Utils/ByRef.h" +#include "phasar/Utils/Macros.h" #include "phasar/Utils/TypeTraits.h" #include "phasar/Utils/TypedVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseMapInfo.h" +#include #include #include #include @@ -41,7 +43,7 @@ class Compressor { } IdT getOrInsert(T Elem) { - auto [It, Inserted] = ToInt.try_emplace(Elem, IdT(ToInt.size())); + auto [It, Inserted] = ToInt.try_emplace(Elem, IdT(FromInt.size())); if (Inserted) { FromInt.push_back(Elem); } @@ -49,13 +51,19 @@ class Compressor { } std::pair insert(T Elem) { - auto [It, Inserted] = ToInt.try_emplace(Elem, IdT(ToInt.size())); + auto [It, Inserted] = ToInt.try_emplace(Elem, IdT(FromInt.size())); if (Inserted) { FromInt.push_back(Elem); } return {It->second, Inserted}; } + IdT insertDummy(T Elem) { + auto Ret = IdT(FromInt.size()); + FromInt.push_back(Elem); + return Ret; + } + [[nodiscard]] std::optional getOrNull(T Elem) const { if (auto It = ToInt.find(Elem); It != ToInt.end()) { @@ -159,6 +167,12 @@ class Compressor { return {Ret, true}; } + IdT insertDummy(std::convertible_to auto &&Elem) { + auto Ret = Id(FromInt.size()); + FromInt.emplace_back(PSR_FWD(Elem)); + return Ret; + } + /// Returns the index of the given element in the compressors storage. If /// the element isn't present, std::nullopt will be returned [[nodiscard]] std::optional getOrNull(const T &Elem) const { diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp index 59356a2c3b..f6da0508f8 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp @@ -21,6 +21,7 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include #include #include #include @@ -30,6 +31,27 @@ using namespace psr; +llvm::SmallVector +CFLFieldStringManager::getFullFieldString(CFLFieldStringNodeId NId) const { + llvm::SmallVector Ret; + while (NId != CFLFieldStringNodeId::None) { + auto Nod = NodeCompressor[NId]; + Ret.push_back(Nod.Offset); + NId = Nod.Next; + } + std::ranges::reverse(Ret); + return Ret; +} + +CFLFieldStringNodeId CFLFieldStringManager::fromFullFieldString( + llvm::ArrayRef FieldString) { + CFLFieldStringNodeId Ret = CFLFieldStringNodeId::None; + for (const auto &Offset : FieldString) { + Ret = prepend(Offset, Ret); + } + return Ret; +} + namespace { using l_t = LatticeDomain; @@ -50,7 +72,7 @@ constexpr static int32_t addOffsets(int32_t L, int32_t R) noexcept { struct CFLFieldSensEdgeFunction { using l_t = LatticeDomain; - CFLFieldSensEdgeValue Transform{}; + [[clang::require_explicit_initialization]] CFLFieldSensEdgeValue Transform; uint8_t DepthKLimit{}; [[nodiscard]] l_t computeTarget(l_t Source) const { @@ -90,19 +112,21 @@ struct CFLFieldSensEdgeFunction { } [[nodiscard]] static auto from(CFLFieldAccessPath &&Txn, + CFLFieldStringManager &Mgr, uint8_t DepthKLimit) { // Avoid initializer-list as it prevents moving auto Ret = CFLFieldSensEdgeFunction{ - .Transform = {}, + .Transform = {&Mgr, {}}, .DepthKLimit = DepthKLimit, }; Ret.Transform.Paths.insert(std::move(Txn)); return Ret; } - [[nodiscard]] static auto fromEpsilon(uint8_t DepthKLimit) { + [[nodiscard]] static auto fromEpsilon(uint8_t DepthKLimit, + CFLFieldStringManager &Mgr) { auto Ret = CFLFieldSensEdgeFunction{ - .Transform = {}, + .Transform = {&Mgr, {}}, .DepthKLimit = DepthKLimit, }; Ret.Transform.Paths.insert(CFLFieldAccessPath{}); @@ -110,32 +134,38 @@ struct CFLFieldSensEdgeFunction { } }; -[[nodiscard]] std::string storesToString(const CFLFieldAccessPath &AP) { +[[nodiscard]] std::string storesToString(const CFLFieldAccessPath &AP, + const CFLFieldStringManager &Mgr) { std::string Ret; llvm::raw_string_ostream ROS(Ret); llvm::interleave( - AP.Stores, ROS, [&ROS](auto StoreOffs) { ROS << 'S' << StoreOffs; }, "."); + Mgr.getFullFieldString(AP.Stores), ROS, + [&ROS](auto StoreOffs) { ROS << 'S' << StoreOffs; }, "."); return Ret; } // Returns whether to retain F -[[nodiscard]] auto applyOneGepAndStore(CFLFieldAccessPath &F, GEPEvent Evt, +[[nodiscard]] auto applyOneGepAndStore(CFLFieldStringManager &Mgr, + CFLFieldAccessPath &F, GEPEvent Evt, uint8_t DepthKLimit) { - if (F.Stores.size() == DepthKLimit) { + if (Mgr.depth(F.Stores) == DepthKLimit) { // TODO: Optimize: - F.Stores.erase(F.Stores.begin()); + auto Full = Mgr.getFullFieldString(F.Stores); + Full.erase(Full.begin()); + F.Stores = Mgr.fromFullFieldString(Full); } - F.Stores.push_back(std::exchange(F.Offset, 0) + Evt.Field); + F.Stores = Mgr.prepend(std::exchange(F.Offset, 0) + Evt.Field, F.Stores); return std::true_type{}; } // Returns whether to retain F -[[nodiscard]] auto applyOneGepAndLoad(CFLFieldAccessPath &F, GEPEvent Evt, +[[nodiscard]] auto applyOneGepAndLoad(CFLFieldStringManager &Mgr, + CFLFieldAccessPath &F, GEPEvent Evt, uint8_t DepthKLimit) { auto Offs = F.Offset + Evt.Field; - if (F.Stores.empty()) { + if (F.Stores == psr::CFLFieldStringNodeId::None) { if (F.kills(Offs)) { return false; @@ -145,29 +175,32 @@ struct CFLFieldSensEdgeFunction { // TODO: Is this application of k-limiting correct here? // cf. Section 4.2.3 "K-Limiting" in the paper - if (F.Loads.size() == DepthKLimit) { + if (Mgr.depth(F.Loads) == DepthKLimit) { return true; } - F.Loads.push_back(Offs); + F.Loads = Mgr.prepend(Offs, F.Loads); F.Kills.clear(); return true; } - if (F.Stores.back() != Offs && - F.Stores.back() != CFLFieldAccessPath::TopOffset) { + auto StoresHead = Mgr[F.Stores]; + + if (StoresHead.Offset != Offs && + StoresHead.Offset != CFLFieldAccessPath::TopOffset) { return false; } - assert(F.Stores.back() == Offs || - F.Stores.back() == CFLFieldAccessPath::TopOffset); + assert(StoresHead.Offset == Offs || + StoresHead.Offset == CFLFieldAccessPath::TopOffset); F.Offset = 0; - F.Stores.pop_back(); + F.Stores = StoresHead.Next; // llvm::errs() << "> pop_back\n"; return true; } -[[nodiscard]] auto applyOneGepAndKill(CFLFieldAccessPath &F, GEPEvent Evt, +[[nodiscard]] auto applyOneGepAndKill(CFLFieldStringManager &Mgr, + CFLFieldAccessPath &F, GEPEvent Evt, uint8_t /*DepthKLimit*/) { auto Offs = addOffsets(F.Offset, Evt.Field); if (Offs == CFLFieldAccessPath::TopOffset) { @@ -175,165 +208,177 @@ struct CFLFieldSensEdgeFunction { return true; } - if (F.Stores.empty()) { + if (F.Stores == psr::CFLFieldStringNodeId::None) { F.Kills.insert(Offs); PHASAR_LOG_LEVEL_CAT(DEBUG, CFLFieldSensEdgeValue::LogCategory, "> add K" << Offs); return true; } - if (F.Stores.back() == Offs) { + auto StoresHead = Mgr[F.Stores]; + + if (StoresHead.Offset == Offs) { PHASAR_LOG_LEVEL_CAT(DEBUG, CFLFieldSensEdgeValue::LogCategory, - "> Kill " << storesToString(F)); + "> Kill " << storesToString(F, Mgr)); return false; } PHASAR_LOG_LEVEL_CAT(DEBUG, CFLFieldSensEdgeValue::LogCategory, - "> Retain " << storesToString(F)); + "> Retain " << storesToString(F, Mgr)); - assert(F.Stores.back() != Offs); + assert(StoresHead.Offset != Offs); return true; } -[[nodiscard]] auto applyOneGep(CFLFieldAccessPath &F, GEPEvent Evt, +[[nodiscard]] auto applyOneGep(CFLFieldStringManager &Mgr, + CFLFieldAccessPath &F, GEPEvent Evt, uint8_t /*DepthKLimit*/) { - if (F.Stores.empty()) { + if (F.Stores == psr::CFLFieldStringNodeId::None) { F.Offset = addOffsets(F.Offset, Evt.Field); } else { - F.Stores.back() = addOffsets(F.Stores.back(), -Evt.Field); + auto StoresHead = Mgr[F.Stores]; + F.Stores = + Mgr.prepend(addOffsets(StoresHead.Offset, -Evt.Field), StoresHead.Next); } return std::true_type{}; } } // namespace -void CFLFieldSensEdgeValue::applyGepAndStore(GEPEvent Evt, - uint8_t DepthKLimit) { - auto Save = std::exchange(Paths, {}); - Paths.reserve(Save.size()); - - for (auto F : Save) { - // TODO: Check, whether we can safely exchange Offset with 0 here! - - if (F.Stores.size() == DepthKLimit) { - // TODO: Optimize: - F.Stores.erase(F.Stores.begin()); - } - F.Stores.push_back(addOffsets(std::exchange(F.Offset, 0), Evt.Field)); - Paths.insert(std::move(F)); - } - - // TODO: What if Paths is empty? Or can't that happen? - // --> Does not happen, as long as the fact is not killed in all paths -} - -void CFLFieldSensEdgeValue::applyGepAndLoad(GEPEvent Evt, uint8_t DepthKLimit) { - llvm::errs() << "[applyGepAndLoad]: " << *this << " + " << Evt.Field << "\n"; - - auto Save = std::exchange(Paths, {}); - - for (const auto &F : Save) { - auto Offs = addOffsets(F.Offset, Evt.Field); - if (F.Stores.empty()) { - - if (F.kills(Offs)) { - continue; - } - auto FF = F; - FF.Offset = 0; - - // TODO: Is this application of k-limiting correct here? - // cf. Section 4.2.3 "K-Limiting" in the paper - if (F.Loads.size() == DepthKLimit) { - Paths.insert(std::move(FF)); - continue; - } - - FF.Loads.push_back(Offs); - FF.Kills.clear(); - Paths.insert(std::move(FF)); - - continue; - } - - if (F.Stores.back() != Offs && - F.Stores.back() != CFLFieldAccessPath::TopOffset) { - continue; - } - - assert(F.Stores.back() == Offs); - auto FF = F; - FF.Offset = 0; - FF.Stores.pop_back(); - Paths.insert(std::move(FF)); - llvm::errs() << "> pop_back\n"; - } - - llvm::errs() << "=> " << *this << '\n'; -} - -void CFLFieldSensEdgeValue::applyGepAndKill(GEPEvent Evt) { - llvm::errs() << "[applyGepAndKill]: " << *this << " + " << Evt.Field << "\n"; - - auto Save = std::exchange(Paths, {}); - - for (const auto &F : Save) { - auto Offs = addOffsets(F.Offset, Evt.Field); - - if (F.Stores.empty()) { - auto FF = F; - FF.Kills.insert(Offs); - Paths.insert(std::move(FF)); - llvm::errs() << "> add K" << Offs << '\n'; - continue; - } - - if (F.Stores.back() == Offs) { - llvm::errs() << "> Kill "; - llvm::interleave( - F.Stores, llvm::errs(), - [](auto StoreOffs) { llvm::errs() << 'S' << StoreOffs; }, "."); - llvm::errs() << '\n'; - continue; - } - - llvm::errs() << "> Retain "; - llvm::interleave( - F.Stores, llvm::errs(), - [](auto StoreOffs) { llvm::errs() << 'S' << StoreOffs; }, "."); - llvm::errs() << '\n'; - - assert(F.Stores.back() != Offs); - Paths.insert(F); - } -} - -void CFLFieldSensEdgeValue::applyGep(GEPEvent Evt) { - auto Save = std::exchange(Paths, {}); - Paths.reserve(Save.size()); - - for (auto F : Save) { - if (F.Stores.empty()) { - F.Offset = addOffsets(F.Offset, Evt.Field); - } else { - F.Stores.back() = addOffsets(F.Stores.back(), -Evt.Field); - } - Paths.insert(std::move(F)); - } -} - -void CFLFieldSensEdgeValue::applyStore(uint8_t DepthKLimit) { - applyGepAndStore(GEPEvent{0}, DepthKLimit); -} -void CFLFieldSensEdgeValue::applyLoad(uint8_t DepthKLimit) { - applyGepAndLoad(GEPEvent{0}, DepthKLimit); -} -void CFLFieldSensEdgeValue::applyKill() { // - applyGepAndKill(GEPEvent{0}); -} +// void CFLFieldSensEdgeValue::applyGepAndStore(GEPEvent Evt, +// uint8_t DepthKLimit) { +// auto Save = std::exchange(Paths, {}); +// Paths.reserve(Save.size()); + +// for (auto F : Save) { +// // TODO: Check, whether we can safely exchange Offset with 0 here! + +// if (F.Stores.size() == DepthKLimit) { +// // TODO: Optimize: +// F.Stores.erase(F.Stores.begin()); +// } +// F.Stores.push_back(addOffsets(std::exchange(F.Offset, 0), Evt.Field)); +// Paths.insert(std::move(F)); +// } + +// // TODO: What if Paths is empty? Or can't that happen? +// // --> Does not happen, as long as the fact is not killed in all paths +// } + +// void CFLFieldSensEdgeValue::applyGepAndLoad(GEPEvent Evt, uint8_t +// DepthKLimit) { +// llvm::errs() << "[applyGepAndLoad]: " << *this << " + " << Evt.Field << +// "\n"; + +// auto Save = std::exchange(Paths, {}); + +// for (const auto &F : Save) { +// auto Offs = addOffsets(F.Offset, Evt.Field); +// if (F.Stores.empty()) { + +// if (F.kills(Offs)) { +// continue; +// } +// auto FF = F; +// FF.Offset = 0; + +// // TODO: Is this application of k-limiting correct here? +// // cf. Section 4.2.3 "K-Limiting" in the paper +// if (F.Loads.size() == DepthKLimit) { +// Paths.insert(std::move(FF)); +// continue; +// } + +// FF.Loads.push_back(Offs); +// FF.Kills.clear(); +// Paths.insert(std::move(FF)); + +// continue; +// } + +// if (F.Stores.back() != Offs && +// F.Stores.back() != CFLFieldAccessPath::TopOffset) { +// continue; +// } + +// assert(F.Stores.back() == Offs); +// auto FF = F; +// FF.Offset = 0; +// FF.Stores.pop_back(); +// Paths.insert(std::move(FF)); +// llvm::errs() << "> pop_back\n"; +// } + +// llvm::errs() << "=> " << *this << '\n'; +// } + +// void CFLFieldSensEdgeValue::applyGepAndKill(GEPEvent Evt) { +// llvm::errs() << "[applyGepAndKill]: " << *this << " + " << Evt.Field << +// "\n"; + +// auto Save = std::exchange(Paths, {}); + +// for (const auto &F : Save) { +// auto Offs = addOffsets(F.Offset, Evt.Field); + +// if (F.Stores.empty()) { +// auto FF = F; +// FF.Kills.insert(Offs); +// Paths.insert(std::move(FF)); +// llvm::errs() << "> add K" << Offs << '\n'; +// continue; +// } + +// if (F.Stores.back() == Offs) { +// llvm::errs() << "> Kill "; +// llvm::interleave( +// F.Stores, llvm::errs(), +// [](auto StoreOffs) { llvm::errs() << 'S' << StoreOffs; }, "."); +// llvm::errs() << '\n'; +// continue; +// } + +// llvm::errs() << "> Retain "; +// llvm::interleave( +// F.Stores, llvm::errs(), +// [](auto StoreOffs) { llvm::errs() << 'S' << StoreOffs; }, "."); +// llvm::errs() << '\n'; + +// assert(F.Stores.back() != Offs); +// Paths.insert(F); +// } +// } + +// void CFLFieldSensEdgeValue::applyGep(GEPEvent Evt) { +// auto Save = std::exchange(Paths, {}); +// Paths.reserve(Save.size()); + +// for (auto F : Save) { +// if (F.Stores.empty()) { +// F.Offset = addOffsets(F.Offset, Evt.Field); +// } else { +// F.Stores.back() = addOffsets(F.Stores.back(), -Evt.Field); +// } +// Paths.insert(std::move(F)); +// } +// } + +// void CFLFieldSensEdgeValue::applyStore(uint8_t DepthKLimit) { +// applyGepAndStore(GEPEvent{0}, DepthKLimit); +// } +// void CFLFieldSensEdgeValue::applyLoad(uint8_t DepthKLimit) { +// applyGepAndLoad(GEPEvent{0}, DepthKLimit); +// } +// void CFLFieldSensEdgeValue::applyKill() { // +// applyGepAndKill(GEPEvent{0}); +// } void CFLFieldSensEdgeValue::applyTransform(const CFLFieldAccessPath &Txn, uint8_t DepthKLimit) { + if (Mgr == nullptr) [[unlikely]] { + llvm::report_fatal_error("Mgr is nullptr!"); + } + if (Paths.empty() || Txn.empty()) { // Nothing to be done here return; @@ -354,24 +399,25 @@ void CFLFieldSensEdgeValue::applyTransform(const CFLFieldAccessPath &Txn, auto Copy = F; bool Retain = [&] { if (TxnOffset) { - if (!applyOneGep(Copy, GEPEvent{TxnOffset}, DepthKLimit)) { + if (!applyOneGep(*Mgr, Copy, GEPEvent{TxnOffset}, DepthKLimit)) { return false; } } - for (auto Ld : Txn.Loads) { - if (!applyOneGepAndLoad(Copy, GEPEvent{Ld}, DepthKLimit)) { + + for (auto Ld : Mgr->getFullFieldString(Txn.Loads)) { + if (!applyOneGepAndLoad(*Mgr, Copy, GEPEvent{Ld}, DepthKLimit)) { return false; } } for (auto Kl : Txn.Kills) { - if (!applyOneGepAndKill(Copy, GEPEvent{Kl}, DepthKLimit)) { + if (!applyOneGepAndKill(*Mgr, Copy, GEPEvent{Kl}, DepthKLimit)) { return false; } } - for (auto St : Txn.Stores) { - if (!applyOneGepAndStore(Copy, GEPEvent{St}, DepthKLimit)) { + for (auto St : Mgr->getFullFieldString(Txn.Stores)) { + if (!applyOneGepAndStore(*Mgr, Copy, GEPEvent{St}, DepthKLimit)) { return false; } } @@ -438,14 +484,10 @@ void CFLFieldSensEdgeValue::applyTransforms(const CFLFieldSensEdgeValue &Txns, } size_t psr::hash_value(const CFLFieldAccessPath &FieldString) noexcept { - auto HCL = llvm::hash_combine_range(FieldString.Loads.begin(), - FieldString.Loads.end()); - auto HCS = llvm::hash_combine_range(FieldString.Stores.begin(), - FieldString.Stores.end()); // Xor does not care about the order auto HCK = std::reduce(FieldString.Kills.begin(), FieldString.Kills.end(), 0, std::bit_xor<>{}); - return llvm::hash_combine(HCL, HCS, HCK); + return llvm::hash_combine(FieldString.Loads, FieldString.Stores, HCK); } llvm::raw_ostream &psr::operator<<(llvm::raw_ostream &OS, @@ -462,41 +504,80 @@ llvm::raw_ostream &psr::operator<<(llvm::raw_ostream &OS, OS << FieldString.Offset << '.'; } - for (auto Ld : FieldString.Loads) { - OS << 'L' << Ld << '.'; + // for (auto Ld : FieldString.Loads) { + // OS << 'L' << Ld << '.'; + // } + if (FieldString.Loads != CFLFieldStringNodeId::None) { + OS << "L#" << uint32_t(FieldString.Loads) << '.'; } for (auto Kl : FieldString.Kills) { OS << 'K' << Kl << '.'; } - for (auto St : FieldString.Stores) { - OS << 'S' << St << '.'; + // for (auto St : FieldString.Stores) { + // OS << 'S' << St << '.'; + // } + + if (FieldString.Loads != CFLFieldStringNodeId::None) { + OS << "S#" << uint32_t(FieldString.Loads) << '.'; } return OS; } +void CFLFieldAccessPath::print(llvm::raw_ostream &OS, + const CFLFieldStringManager &Mgr) const { + if (empty()) { + OS << "ε"; + return; + } + + if (Offset != 0) { + if (Offset > 0) { + OS << '+'; + } + + OS << Offset << '.'; + } + + for (auto Ld : Mgr.getFullFieldString(Loads)) { + OS << 'L' << Ld << '.'; + } + + for (auto Kl : Kills) { + OS << 'K' << Kl << '.'; + } + + for (auto St : Mgr.getFullFieldString(Stores)) { + OS << 'S' << St << '.'; + } +} + llvm::raw_ostream &psr::operator<<(llvm::raw_ostream &OS, const CFLFieldSensEdgeValue &EV) { + assert(EV.Mgr != nullptr); if (EV.Paths.size() == 1) { - return OS << *EV.Paths.begin(); + EV.Paths.begin()->print(OS, *EV.Mgr); + return OS; } OS << "{ "; - llvm::interleaveComma(EV.Paths, OS); + llvm::interleaveComma(EV.Paths, OS, [&](const auto &FieldString) { + FieldString.print(OS, *EV.Mgr); + }); return OS << " }"; } auto FieldSensAllocSitesAwareIFDSProblemBase::makeInitialSeeds( - const InitialSeeds &UserSeeds) - -> InitialSeeds { + const InitialSeeds &UserSeeds, + CFLFieldStringManager &Mgr) -> InitialSeeds { InitialSeeds::GeneralizedSeeds Ret; for (const auto &[Inst, Facts] : UserSeeds.getSeeds()) { auto &SeedsAtInst = Ret[Inst]; for (const auto &[Fact, Weight] : Facts) { - SeedsAtInst[Fact] = CFLFieldSensEdgeValue{{CFLFieldAccessPath{}}}; + SeedsAtInst[Fact] = CFLFieldSensEdgeValue{&Mgr, {CFLFieldAccessPath{}}}; } } @@ -525,7 +606,8 @@ auto FieldSensAllocSitesAwareIFDSProblem::getStoreEdgeFunction( CFLFieldAccessPath FieldString{}; FieldString.Kills.insert(Offset); - return CFLFieldSensEdgeFunction::from(std::move(FieldString), DepthKLimit); + return CFLFieldSensEdgeFunction::from(std::move(FieldString), Mgr, + DepthKLimit); } if (ValueOp == CurrNode && CurrNode != SuccNode) { @@ -538,13 +620,14 @@ auto FieldSensAllocSitesAwareIFDSProblem::getStoreEdgeFunction( if (BaseBasePtr == SuccNode) { // push before Offset, or after? - FieldString.Stores.push_back(BaseOffset); + FieldString.Stores = Mgr.prepend(BaseOffset, FieldString.Stores); } } - FieldString.Stores.push_back(Offset); + FieldString.Stores = Mgr.prepend(Offset, FieldString.Stores); - return CFLFieldSensEdgeFunction::from(std::move(FieldString), DepthKLimit); + return CFLFieldSensEdgeFunction::from(std::move(FieldString), Mgr, + DepthKLimit); } // unaffected by the store @@ -563,7 +646,7 @@ auto FieldSensAllocSitesAwareIFDSProblem::getNormalEdgeFunction( if (isZeroValue(CurrNode) && !isZeroValue(SuccNode)) { // Gen from zero - return CFLFieldSensEdgeFunction::fromEpsilon(DepthKLimit); + return CFLFieldSensEdgeFunction::fromEpsilon(DepthKLimit, Mgr); } if (const auto *Store = llvm::dyn_cast(Curr)) { @@ -583,10 +666,10 @@ auto FieldSensAllocSitesAwareIFDSProblem::getNormalEdgeFunction( // TODO;: How to deal with BasePtr? CFLFieldAccessPath FieldString{}; - FieldString.Loads.push_back(Offset); + FieldString.Loads = Mgr.prepend(Offset, FieldString.Loads); // llvm::errs() << "Handle load: " << llvmIRToString(Load) << '\n'; // llvm::errs() << "> CurrNode: " << llvmIRToString(CurrNode) << '\n'; - return CFLFieldSensEdgeFunction::from(std::move(FieldString), + return CFLFieldSensEdgeFunction::from(std::move(FieldString), Mgr, DepthKLimit); } @@ -596,7 +679,7 @@ auto FieldSensAllocSitesAwareIFDSProblem::getNormalEdgeFunction( CFLFieldAccessPath FieldString{}; FieldString.Offset = OffsVal; - return CFLFieldSensEdgeFunction::from(std::move(FieldString), + return CFLFieldSensEdgeFunction::from(std::move(FieldString), Mgr, DepthKLimit); } } @@ -617,7 +700,7 @@ auto FieldSensAllocSitesAwareIFDSProblem::getCallEdgeFunction( if (isZeroValue(SrcNode) && !isZeroValue(DestNode)) { // Gen from zero - return CFLFieldSensEdgeFunction::fromEpsilon(DepthKLimit); + return CFLFieldSensEdgeFunction::fromEpsilon(DepthKLimit, Mgr); } // This is naturally identity @@ -637,7 +720,7 @@ auto FieldSensAllocSitesAwareIFDSProblem::getReturnEdgeFunction( if (isZeroValue(ExitNode) && !isZeroValue(RetNode)) { // Gen from zero - return CFLFieldSensEdgeFunction::fromEpsilon(DepthKLimit); + return CFLFieldSensEdgeFunction::fromEpsilon(DepthKLimit, Mgr); } return EdgeIdentity{}; @@ -666,7 +749,7 @@ auto FieldSensAllocSitesAwareIFDSProblem::getCallToRetEdgeFunction( if (isZeroValue(CallNode) && !isZeroValue(RetSiteNode)) { // Gen from zero - return CFLFieldSensEdgeFunction::fromEpsilon(DepthKLimit); + return CFLFieldSensEdgeFunction::fromEpsilon(DepthKLimit, Mgr); } // This naturally identity @@ -693,7 +776,7 @@ auto FieldSensAllocSitesAwareIFDSProblem::getSummaryEdgeFunction( CFLFieldAccessPath FieldString{}; FieldString.Kills.insert(*KillOffs); - return CFLFieldSensEdgeFunction::from(std::move(FieldString), + return CFLFieldSensEdgeFunction::from(std::move(FieldString), Mgr, DepthKLimit); } } @@ -701,7 +784,7 @@ auto FieldSensAllocSitesAwareIFDSProblem::getSummaryEdgeFunction( if (isZeroValue(CurrNode) && !isZeroValue(SuccNode)) { // Gen from zero - return CFLFieldSensEdgeFunction::fromEpsilon(DepthKLimit); + return CFLFieldSensEdgeFunction::fromEpsilon(DepthKLimit, Mgr); } // TODO: Is that correct? -- We may need to handle field-indirections here @@ -709,16 +792,18 @@ auto FieldSensAllocSitesAwareIFDSProblem::getSummaryEdgeFunction( return EdgeIdentity{}; } -static void klimitPaths(auto &Paths) { +static void klimitPaths(auto &Paths, CFLFieldStringManager &Mgr) { llvm::SmallDenseMap, 2, CFLFieldAccessPathDMI> ToInsert; for (auto IIt = Paths.begin(), End = Paths.end(); IIt != End;) { auto It = IIt++; - if (!It->Stores.empty()) { + if (It->Stores != CFLFieldStringNodeId::None) { CFLFieldAccessPath Approx = *It; - Approx.Stores.back() = CFLFieldAccessPath::TopOffset; + auto StoresHead = Mgr[Approx.Stores]; + Approx.Stores = + Mgr.prepend(CFLFieldAccessPath::TopOffset, StoresHead.Next); ToInsert[std::move(Approx)].push_back(*It); Paths.erase(It); } @@ -764,7 +849,7 @@ auto FieldSensAllocSitesAwareIFDSProblem::extend(const EdgeFunction &L, // } if (Txn.Paths.size() > BreadthKLimit) { - klimitPaths(Txn.Paths); + klimitPaths(Txn.Paths, Mgr); } return CFLFieldSensEdgeFunction::from(std::move(Txn), DepthKLimit); } @@ -815,11 +900,11 @@ auto FieldSensAllocSitesAwareIFDSProblem::combine(const EdgeFunction &L, Union.insert(It, End); if (Union.size() > BreadthKLimit) { - klimitPaths(Union); + klimitPaths(Union, Mgr); } return CFLFieldSensEdgeFunction::from( - CFLFieldSensEdgeValue{std::move(Union)}, DepthKLimit); + CFLFieldSensEdgeValue{&Mgr, std::move(Union)}, DepthKLimit); } } } From 5dd298d43c67b570c246d0ac4466b8a9566945bd Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Thu, 19 Feb 2026 14:45:23 +0100 Subject: [PATCH 20/29] Some cleanup + small enhancement to LatticeDomain-join --- include/phasar/Domain/LatticeDomain.h | 8 + .../FieldSensAllocSitesAwareIFDSProblem.h | 83 +++--- include/phasar/Utils/TypeTraits.h | 20 +- .../FieldSensAllocSitesAwareIFDSProblem.cpp | 237 ++++-------------- 4 files changed, 117 insertions(+), 231 deletions(-) diff --git a/include/phasar/Domain/LatticeDomain.h b/include/phasar/Domain/LatticeDomain.h index 8b2be7f222..d9d9c779cd 100644 --- a/include/phasar/Domain/LatticeDomain.h +++ b/include/phasar/Domain/LatticeDomain.h @@ -222,6 +222,14 @@ template struct JoinLatticeTraits> { return LHS; } + if constexpr (has_adl_join) { + if (auto LhsPtr = LHS.getValueOrNull()) { + if (auto RhsPtr = RHS.getValueOrNull()) { + return psr::adl_join(*LhsPtr, *RhsPtr); + } + } + } + return Bottom{}; } }; diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h index 9137a760fe..8bb85a6ca1 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h @@ -14,17 +14,18 @@ #include "phasar/DataFlow/IfdsIde/IFDSTabulationProblem.h" #include "phasar/Domain/BinaryDomain.h" #include "phasar/Domain/LatticeDomain.h" -#include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" #include "phasar/PhasarLLVM/Domain/LLVMAnalysisDomain.h" #include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" -#include "phasar/PhasarLLVM/Pointer/LLVMBasePointerAliasSet.h" #include "phasar/Utils/Compressor.h" #include "phasar/Utils/Logger.h" #include "phasar/Utils/TypedVector.h" +#include "phasar/Utils/Utilities.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/FunctionExtras.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" #include "llvm/IR/Operator.h" #include "llvm/Support/raw_ostream.h" @@ -37,15 +38,6 @@ namespace psr { /// of alias-aware IFDS analysis with CFL-based environment transformers" by Li /// et al. -struct StoreEvent {}; -struct LoadEvent {}; - -struct KillEvent {}; - -struct GEPEvent { - int32_t Field; -}; - enum class CFLFieldStringNodeId : uint32_t { None = 0, }; @@ -60,6 +52,11 @@ struct CFLFieldStringNode { [[nodiscard]] constexpr bool operator==(const CFLFieldStringNode &) const noexcept = default; + + friend llvm::hash_code hash_value(CFLFieldStringNode Nod) { + return llvm::DenseMapInfo>::getHashValue( + {uint32_t(Nod.Next), Nod.Offset}); + } }; } // namespace psr @@ -67,17 +64,18 @@ struct CFLFieldStringNode { namespace llvm { template <> struct DenseMapInfo { static constexpr psr::CFLFieldStringNode getEmptyKey() noexcept { - return {psr::CFLFieldStringNodeId(UINT32_MAX), 0}; + return {.Next = psr::CFLFieldStringNodeId(UINT32_MAX), .Offset = INT32_MAX}; } static constexpr psr::CFLFieldStringNode getTombstoneKey() noexcept { - return {psr::CFLFieldStringNodeId(UINT32_MAX - 1), 0}; + return {.Next = psr::CFLFieldStringNodeId(UINT32_MAX - 1), + .Offset = INT32_MAX}; } static constexpr bool isEqual(psr::CFLFieldStringNode L, psr::CFLFieldStringNode R) noexcept { return L == R; } - static llvm::hash_code getHashValue(psr::CFLFieldStringNode Nod) { - return llvm::hash_combine(Nod.Next, Nod.Offset); + static auto getHashValue(psr::CFLFieldStringNode Nod) { + return hash_value(Nod); } }; } // namespace llvm @@ -86,12 +84,7 @@ namespace psr { class CFLFieldStringManager { public: - CFLFieldStringManager() { - // Sentinel - NodeCompressor.insertDummy( - CFLFieldStringNode{CFLFieldStringNodeId::None, 0}); - Depth.push_back(0); - } + CFLFieldStringManager(); [[nodiscard]] CFLFieldStringNodeId intern(CFLFieldStringNode Nod) { auto [Id, Inserted] = NodeCompressor.insert(Nod); @@ -133,9 +126,9 @@ class CFLFieldStringManager { struct CFLFieldAccessPath { static constexpr int32_t TopOffset = INT32_MIN; - CFLFieldStringNodeId Loads; - CFLFieldStringNodeId Stores; - llvm::SmallDenseSet Kills; + CFLFieldStringNodeId Loads{}; + CFLFieldStringNodeId Stores{}; + llvm::SmallDenseSet Kills{}; // Add an offset for pending GEPs; INT32_MIN is Top int32_t Offset = {0}; int32_t EmptyTombstone = 0; @@ -149,7 +142,7 @@ struct CFLFieldAccessPath { return Off != TopOffset && Kills.contains(Off); } - [[nodiscard]] bool + [[nodiscard]] constexpr bool operator==(const CFLFieldAccessPath &Other) const noexcept { return EmptyTombstone == Other.EmptyTombstone && Loads == Other.Loads && Stores == Other.Stores && Kills == Other.Kills; @@ -199,14 +192,6 @@ struct CFLFieldSensEdgeValue { static constexpr llvm::StringLiteral LogCategory = "CFLFieldSensEdgeValue"; - // void applyStore(uint8_t DepthKLimit); - // void applyGepAndStore(GEPEvent Evt, uint8_t DepthKLimit); - // void applyLoad(uint8_t DepthKLimit); - // void applyGepAndLoad(GEPEvent Evt, uint8_t DepthKLimit); - // void applyKill(); - // void applyGepAndKill(GEPEvent Evt); - // void applyGep(GEPEvent Evt); - void applyTransform(const CFLFieldAccessPath &Txn, uint8_t DepthKLimit); void applyTransforms(const CFLFieldSensEdgeValue &Txns, uint8_t DepthKLimit); bool operator==(const CFLFieldSensEdgeValue &Other) const noexcept { @@ -228,6 +213,26 @@ struct CFLFieldSensEdgeValue { [[nodiscard]] bool isEpsilon() const { return Paths.size() == 1 && Paths.begin()->empty(); } + + [[nodiscard]] static CFLFieldSensEdgeValue + epsilon(CFLFieldStringManager *Mgr) { + CFLFieldSensEdgeValue Ret{.Mgr = &assertNotNull(Mgr), .Paths = {}}; + Ret.Paths.insert({}); // Not using initializer_list to prevent copying + return Ret; + } + + [[nodiscard]] friend auto join(const CFLFieldSensEdgeValue &L, + const CFLFieldSensEdgeValue &R) { + assert(L.Mgr == R.Mgr); + assert(L.Mgr != nullptr); + const bool LeftSmaller = L.Paths.size() < R.Paths.size(); + auto Ret = LeftSmaller ? R : L; + const auto &Smaller = LeftSmaller ? L : R; + Ret.Paths.insert(Smaller.Paths.begin(), Smaller.Paths.end()); + // XXX: k-limit num-paths: This may not be necessary, as join() is only + // called from IDE-Phase-II + return Ret; + } }; template @@ -238,7 +243,7 @@ struct CFLFieldSensAnalysisDomain : AnalysisDomainTy { struct FieldSensAllocSitesAwareIFDSProblemConfig : LLVMIFDSAnalysisDomainDefault { llvm::unique_function(n_t Curr, d_t CurrNode)> KillsAt; - // TODO: more + // XXX: more }; class FieldSensAllocSitesAwareIFDSProblemBase @@ -291,16 +296,14 @@ class FieldSensAllocSitesAwareIFDSProblem using typename Base::t_t; using typename Base::v_t; - /// Constructs an IDETabulationProblem with the usual arguments + alias - /// information. - /// - /// \note It is useful to use an instance of FilteredAliasSet for the alias - /// information to lower suprious aliases + /// Constructs an IDETabulationProblem with the usual arguments, forwarded + /// from UserProblem explicit FieldSensAllocSitesAwareIFDSProblem( IFDSTabulationProblem *UserProblem, FieldSensAllocSitesAwareIFDSProblemConfig Config = {}) noexcept(std::is_nothrow_move_constructible_v) - : Base(UserProblem->getProjectIRDB(), UserProblem->getEntryPoints(), + : Base(assertNotNull(UserProblem).getProjectIRDB(), + assertNotNull(UserProblem).getEntryPoints(), UserProblem->getZeroValue()), UserProblem(UserProblem), Config(std::move(Config)) {} diff --git a/include/phasar/Utils/TypeTraits.h b/include/phasar/Utils/TypeTraits.h index ffff49c225..ca8590a28d 100644 --- a/include/phasar/Utils/TypeTraits.h +++ b/include/phasar/Utils/TypeTraits.h @@ -119,6 +119,12 @@ concept has_adl_to_string_v = requires(const T &Val) { { to_string(Val) } -> std::convertible_to; }; +template +concept has_adl_join = requires(const T &Val) { + // TODO: Add psr::join-variant, once we have a fallback! + { join(Val, Val) } -> std::convertible_to; +}; + template concept has_erase_iterator_v = requires( T &Val, typename std::remove_cvref_t::iterator It) { Val.erase(It); }; @@ -204,10 +210,9 @@ template struct [[deprecated("getAsJson should not be used anymore. Use printAsJson " "instead")]] has_getAsJson : std::false_type {}; // NOLINT template -struct [[deprecated( - "getAsJson should not be used anymore. Use printAsJson " - "instead")]] has_getAsJson() - .getAsJson())>> +struct [[deprecated("getAsJson should not be used anymore. Use printAsJson " + "instead")]] +has_getAsJson().getAsJson())>> : std::true_type {}; // NOLINT struct TrueFn { @@ -252,6 +257,13 @@ template return to_string(Val); } +template +[[nodiscard]] decltype(auto) adl_join(const T &L, + const std::type_identity_t &R) { + // using psr::join; // TODO: Enable, once we have a generic psr::join! + return join(L, R); +} + struct IdentityFn { template decltype(auto) operator()(T &&Val) const noexcept { return std::forward(Val); diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp index f6da0508f8..bddba00e0d 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp @@ -3,6 +3,7 @@ #include "phasar/DataFlow/IfdsIde/EdgeFunction.h" #include "phasar/DataFlow/IfdsIde/EdgeFunctionUtils.h" #include "phasar/Domain/LatticeDomain.h" +#include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" #include "phasar/Utils/Fn.h" #include "phasar/Utils/Logger.h" @@ -31,6 +32,13 @@ using namespace psr; +CFLFieldStringManager::CFLFieldStringManager() { + // Sentinel + NodeCompressor.insertDummy( + CFLFieldStringNode{.Next = CFLFieldStringNodeId::None, .Offset = 0}); + Depth.push_back(0); +} + llvm::SmallVector CFLFieldStringManager::getFullFieldString(CFLFieldStringNodeId NId) const { llvm::SmallVector Ret; @@ -73,7 +81,7 @@ constexpr static int32_t addOffsets(int32_t L, int32_t R) noexcept { struct CFLFieldSensEdgeFunction { using l_t = LatticeDomain; [[clang::require_explicit_initialization]] CFLFieldSensEdgeValue Transform; - uint8_t DepthKLimit{}; + [[clang::require_explicit_initialization]] uint8_t DepthKLimit{}; [[nodiscard]] l_t computeTarget(l_t Source) const { Source.onValue(fn<&CFLFieldSensEdgeValue::applyTransforms>, Transform, @@ -116,7 +124,7 @@ struct CFLFieldSensEdgeFunction { uint8_t DepthKLimit) { // Avoid initializer-list as it prevents moving auto Ret = CFLFieldSensEdgeFunction{ - .Transform = {&Mgr, {}}, + .Transform = {.Mgr = &Mgr, .Paths = {}}, .DepthKLimit = DepthKLimit, }; Ret.Transform.Paths.insert(std::move(Txn)); @@ -125,12 +133,10 @@ struct CFLFieldSensEdgeFunction { [[nodiscard]] static auto fromEpsilon(uint8_t DepthKLimit, CFLFieldStringManager &Mgr) { - auto Ret = CFLFieldSensEdgeFunction{ - .Transform = {&Mgr, {}}, + return CFLFieldSensEdgeFunction{ + .Transform = CFLFieldSensEdgeValue::epsilon(&Mgr), .DepthKLimit = DepthKLimit, }; - Ret.Transform.Paths.insert(CFLFieldAccessPath{}); - return Ret; } }; @@ -148,7 +154,7 @@ struct CFLFieldSensEdgeFunction { // Returns whether to retain F [[nodiscard]] auto applyOneGepAndStore(CFLFieldStringManager &Mgr, - CFLFieldAccessPath &F, GEPEvent Evt, + CFLFieldAccessPath &F, int32_t Field, uint8_t DepthKLimit) { if (Mgr.depth(F.Stores) == DepthKLimit) { // TODO: Optimize: @@ -156,15 +162,15 @@ struct CFLFieldSensEdgeFunction { Full.erase(Full.begin()); F.Stores = Mgr.fromFullFieldString(Full); } - F.Stores = Mgr.prepend(std::exchange(F.Offset, 0) + Evt.Field, F.Stores); + F.Stores = Mgr.prepend(std::exchange(F.Offset, 0) + Field, F.Stores); return std::true_type{}; } // Returns whether to retain F [[nodiscard]] auto applyOneGepAndLoad(CFLFieldStringManager &Mgr, - CFLFieldAccessPath &F, GEPEvent Evt, + CFLFieldAccessPath &F, int32_t Field, uint8_t DepthKLimit) { - auto Offs = F.Offset + Evt.Field; + auto Offs = F.Offset + Field; if (F.Stores == psr::CFLFieldStringNodeId::None) { if (F.kills(Offs)) { @@ -200,9 +206,9 @@ struct CFLFieldSensEdgeFunction { } [[nodiscard]] auto applyOneGepAndKill(CFLFieldStringManager &Mgr, - CFLFieldAccessPath &F, GEPEvent Evt, + CFLFieldAccessPath &F, int32_t Field, uint8_t /*DepthKLimit*/) { - auto Offs = addOffsets(F.Offset, Evt.Field); + auto Offs = addOffsets(F.Offset, Field); if (Offs == CFLFieldAccessPath::TopOffset) { // We cannot kill Top return true; @@ -231,193 +237,62 @@ struct CFLFieldSensEdgeFunction { } [[nodiscard]] auto applyOneGep(CFLFieldStringManager &Mgr, - CFLFieldAccessPath &F, GEPEvent Evt, + CFLFieldAccessPath &F, int32_t Field, uint8_t /*DepthKLimit*/) { if (F.Stores == psr::CFLFieldStringNodeId::None) { - F.Offset = addOffsets(F.Offset, Evt.Field); + F.Offset = addOffsets(F.Offset, Field); } else { auto StoresHead = Mgr[F.Stores]; F.Stores = - Mgr.prepend(addOffsets(StoresHead.Offset, -Evt.Field), StoresHead.Next); + Mgr.prepend(addOffsets(StoresHead.Offset, -Field), StoresHead.Next); } return std::true_type{}; } -} // namespace +void applyTransform(CFLFieldSensEdgeValue &EV, const CFLFieldAccessPath &Txn, + uint8_t DepthKLimit) { -// void CFLFieldSensEdgeValue::applyGepAndStore(GEPEvent Evt, -// uint8_t DepthKLimit) { -// auto Save = std::exchange(Paths, {}); -// Paths.reserve(Save.size()); - -// for (auto F : Save) { -// // TODO: Check, whether we can safely exchange Offset with 0 here! - -// if (F.Stores.size() == DepthKLimit) { -// // TODO: Optimize: -// F.Stores.erase(F.Stores.begin()); -// } -// F.Stores.push_back(addOffsets(std::exchange(F.Offset, 0), Evt.Field)); -// Paths.insert(std::move(F)); -// } - -// // TODO: What if Paths is empty? Or can't that happen? -// // --> Does not happen, as long as the fact is not killed in all paths -// } - -// void CFLFieldSensEdgeValue::applyGepAndLoad(GEPEvent Evt, uint8_t -// DepthKLimit) { -// llvm::errs() << "[applyGepAndLoad]: " << *this << " + " << Evt.Field << -// "\n"; - -// auto Save = std::exchange(Paths, {}); - -// for (const auto &F : Save) { -// auto Offs = addOffsets(F.Offset, Evt.Field); -// if (F.Stores.empty()) { - -// if (F.kills(Offs)) { -// continue; -// } -// auto FF = F; -// FF.Offset = 0; - -// // TODO: Is this application of k-limiting correct here? -// // cf. Section 4.2.3 "K-Limiting" in the paper -// if (F.Loads.size() == DepthKLimit) { -// Paths.insert(std::move(FF)); -// continue; -// } - -// FF.Loads.push_back(Offs); -// FF.Kills.clear(); -// Paths.insert(std::move(FF)); - -// continue; -// } - -// if (F.Stores.back() != Offs && -// F.Stores.back() != CFLFieldAccessPath::TopOffset) { -// continue; -// } - -// assert(F.Stores.back() == Offs); -// auto FF = F; -// FF.Offset = 0; -// FF.Stores.pop_back(); -// Paths.insert(std::move(FF)); -// llvm::errs() << "> pop_back\n"; -// } - -// llvm::errs() << "=> " << *this << '\n'; -// } - -// void CFLFieldSensEdgeValue::applyGepAndKill(GEPEvent Evt) { -// llvm::errs() << "[applyGepAndKill]: " << *this << " + " << Evt.Field << -// "\n"; - -// auto Save = std::exchange(Paths, {}); - -// for (const auto &F : Save) { -// auto Offs = addOffsets(F.Offset, Evt.Field); - -// if (F.Stores.empty()) { -// auto FF = F; -// FF.Kills.insert(Offs); -// Paths.insert(std::move(FF)); -// llvm::errs() << "> add K" << Offs << '\n'; -// continue; -// } - -// if (F.Stores.back() == Offs) { -// llvm::errs() << "> Kill "; -// llvm::interleave( -// F.Stores, llvm::errs(), -// [](auto StoreOffs) { llvm::errs() << 'S' << StoreOffs; }, "."); -// llvm::errs() << '\n'; -// continue; -// } - -// llvm::errs() << "> Retain "; -// llvm::interleave( -// F.Stores, llvm::errs(), -// [](auto StoreOffs) { llvm::errs() << 'S' << StoreOffs; }, "."); -// llvm::errs() << '\n'; - -// assert(F.Stores.back() != Offs); -// Paths.insert(F); -// } -// } - -// void CFLFieldSensEdgeValue::applyGep(GEPEvent Evt) { -// auto Save = std::exchange(Paths, {}); -// Paths.reserve(Save.size()); - -// for (auto F : Save) { -// if (F.Stores.empty()) { -// F.Offset = addOffsets(F.Offset, Evt.Field); -// } else { -// F.Stores.back() = addOffsets(F.Stores.back(), -Evt.Field); -// } -// Paths.insert(std::move(F)); -// } -// } - -// void CFLFieldSensEdgeValue::applyStore(uint8_t DepthKLimit) { -// applyGepAndStore(GEPEvent{0}, DepthKLimit); -// } -// void CFLFieldSensEdgeValue::applyLoad(uint8_t DepthKLimit) { -// applyGepAndLoad(GEPEvent{0}, DepthKLimit); -// } -// void CFLFieldSensEdgeValue::applyKill() { // -// applyGepAndKill(GEPEvent{0}); -// } - -void CFLFieldSensEdgeValue::applyTransform(const CFLFieldAccessPath &Txn, - uint8_t DepthKLimit) { - if (Mgr == nullptr) [[unlikely]] { - llvm::report_fatal_error("Mgr is nullptr!"); - } - - if (Paths.empty() || Txn.empty()) { + if (EV.Paths.empty() || Txn.empty()) { // Nothing to be done here return; } - if (isEpsilon()) { - Paths.clear(); - Paths.insert(Txn); + if (EV.isEpsilon()) { + EV.Paths.clear(); + EV.Paths.insert(Txn); return; } - // llvm::errs() << "[applyTransform]: " << *this << " X " << Txn << '\n'; - auto Save = std::exchange(Paths, {}); - Paths.reserve(Save.size()); + auto Save = std::exchange(EV.Paths, {}); + EV.Paths.reserve(Save.size()); const auto TxnOffset = Txn.Offset; + /// XXX: Should we save getFullFieldString(Txn.Loads) and + /// getFullFieldString(Txn.Stores)? Would it be faster? + for (const auto &F : Save) { auto Copy = F; bool Retain = [&] { if (TxnOffset) { - if (!applyOneGep(*Mgr, Copy, GEPEvent{TxnOffset}, DepthKLimit)) { + if (!applyOneGep(*EV.Mgr, Copy, TxnOffset, DepthKLimit)) { return false; } } - for (auto Ld : Mgr->getFullFieldString(Txn.Loads)) { - if (!applyOneGepAndLoad(*Mgr, Copy, GEPEvent{Ld}, DepthKLimit)) { + for (auto Ld : EV.Mgr->getFullFieldString(Txn.Loads)) { + if (!applyOneGepAndLoad(*EV.Mgr, Copy, Ld, DepthKLimit)) { return false; } } for (auto Kl : Txn.Kills) { - if (!applyOneGepAndKill(*Mgr, Copy, GEPEvent{Kl}, DepthKLimit)) { + if (!applyOneGepAndKill(*EV.Mgr, Copy, Kl, DepthKLimit)) { return false; } } - for (auto St : Mgr->getFullFieldString(Txn.Stores)) { - if (!applyOneGepAndStore(*Mgr, Copy, GEPEvent{St}, DepthKLimit)) { + for (auto St : EV.Mgr->getFullFieldString(Txn.Stores)) { + if (!applyOneGepAndStore(*EV.Mgr, Copy, St, DepthKLimit)) { return false; } } @@ -426,31 +301,18 @@ void CFLFieldSensEdgeValue::applyTransform(const CFLFieldAccessPath &Txn, }(); if (Retain) { - Paths.insert(std::move(Copy)); + EV.Paths.insert(std::move(Copy)); } } - - // llvm::errs() << "[applyTransform]: > result: " << *this << '\n'; - - // // TODO: Optimize! - - // if (Txn.Offset) { - // applyGep(GEPEvent{Txn.Offset}); - // } - - // for (auto Ld : Txn.Loads) { - // applyGepAndLoad(GEPEvent{Ld}, DepthKLimit); - // } - // for (auto Kl : Txn.Kills) { - // applyGepAndKill(GEPEvent{Kl}); - // } - // for (auto St : Txn.Stores) { - // applyGepAndStore(GEPEvent{St}, DepthKLimit); - // } } +} // namespace void CFLFieldSensEdgeValue::applyTransforms(const CFLFieldSensEdgeValue &Txns, uint8_t DepthKLimit) { + if (Mgr == nullptr) [[unlikely]] { + llvm::report_fatal_error("Mgr is nullptr!"); + } + if (Txns.Paths.empty()) { Paths.clear(); return; @@ -458,7 +320,7 @@ void CFLFieldSensEdgeValue::applyTransforms(const CFLFieldSensEdgeValue &Txns, auto It = Txns.Paths.begin(); if (Txns.Paths.size() == 1) [[likely]] { - applyTransform(*It, DepthKLimit); + applyTransform(*this, *It, DepthKLimit); return; } @@ -468,12 +330,12 @@ void CFLFieldSensEdgeValue::applyTransforms(const CFLFieldSensEdgeValue &Txns, auto End = Txns.Paths.end(); auto Ret = *this; - Ret.applyTransform(*It, DepthKLimit); + applyTransform(Ret, *It, DepthKLimit); for (++It; It != End; ++It) { if (!It->empty()) { auto Tmp = *this; - Tmp.applyTransform(*It, DepthKLimit); + applyTransform(Tmp, *It, DepthKLimit); Ret.Paths.insert(Tmp.Paths.begin(), Tmp.Paths.end()); } else { Ret.Paths.insert(Paths.begin(), Paths.end()); @@ -577,7 +439,7 @@ auto FieldSensAllocSitesAwareIFDSProblemBase::makeInitialSeeds( for (const auto &[Inst, Facts] : UserSeeds.getSeeds()) { auto &SeedsAtInst = Ret[Inst]; for (const auto &[Fact, Weight] : Facts) { - SeedsAtInst[Fact] = CFLFieldSensEdgeValue{&Mgr, {CFLFieldAccessPath{}}}; + SeedsAtInst.try_emplace(Fact, CFLFieldSensEdgeValue::epsilon(&Mgr)); } } @@ -904,7 +766,8 @@ auto FieldSensAllocSitesAwareIFDSProblem::combine(const EdgeFunction &L, } return CFLFieldSensEdgeFunction::from( - CFLFieldSensEdgeValue{&Mgr, std::move(Union)}, DepthKLimit); + CFLFieldSensEdgeValue{.Mgr = &Mgr, .Paths = std::move(Union)}, + DepthKLimit); } } } From 2f79629966ef4a96ea997dd238c20acf707f4f8d Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Thu, 19 Feb 2026 16:59:41 +0100 Subject: [PATCH 21/29] Add some comment --- .../IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h index 8bb85a6ca1..499f3f4816 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h @@ -276,6 +276,16 @@ class FieldSensAllocSitesAwareIFDSProblemBase } }; +/// An IFDS-Problem adaptor that makes any field-insensitive IFDS analysis +/// field-sensitive. Just wrap your IFDS problem with +/// FieldSensAllocSitesAwareIFDSProblem and use the IterativeIDESolver instead +/// of the IFDSSolver. +/// +/// The only thing to change in your usual IFDS problem is not to kill data-flow +/// facts when only parts of the fields should be killed. This is now handled by +/// the FieldSensAllocSitesAwareIFDSProblem. For that, provide a +/// FieldSensAllocSitesAwareIFDSProblemConfig with a proper KillsAt +/// implementation. class FieldSensAllocSitesAwareIFDSProblem : public FieldSensAllocSitesAwareIFDSProblemBase, public IDETabulationProblem< From 7ef50d94021faa4c19aca5353ed4a61a779c7da0 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Mon, 23 Feb 2026 17:05:40 +0100 Subject: [PATCH 22/29] Fix errors after merge --- .../DataFlow/IfdsIde/DefaultAllocSitesAwareIDEProblem.h | 2 +- .../DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/DefaultAllocSitesAwareIDEProblem.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/DefaultAllocSitesAwareIDEProblem.h index a275a8ee81..bd0e9bb04e 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/DefaultAllocSitesAwareIDEProblem.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/DefaultAllocSitesAwareIDEProblem.h @@ -140,7 +140,7 @@ class DefaultAllocSitesAwareIFDSProblem /// \note It is useful to use an instance of FilteredAliasSet for the alias /// information to lower suprious aliases explicit DefaultAllocSitesAwareIFDSProblem( - const ProjectIRDBBase *IRDB, LLVMAliasInfoRef AS, + const db_t *IRDB, LLVMAliasInfoRef AS, std::vector EntryPoints, d_t ZeroValue) noexcept(std::is_nothrow_move_constructible_v) : IFDSTabulationProblem(IRDB, std::move(EntryPoints), ZeroValue), diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h index 499f3f4816..646fc6c306 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h @@ -14,6 +14,8 @@ #include "phasar/DataFlow/IfdsIde/IFDSTabulationProblem.h" #include "phasar/Domain/BinaryDomain.h" #include "phasar/Domain/LatticeDomain.h" +#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h" +#include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" #include "phasar/PhasarLLVM/Domain/LLVMAnalysisDomain.h" #include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" #include "phasar/Utils/Compressor.h" From 55a996e646fc1f23246c2dc1b773fe500ad80a50 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Mon, 23 Feb 2026 17:05:50 +0100 Subject: [PATCH 23/29] minor --- .../IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp index bddba00e0d..f03282e1c3 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp @@ -366,9 +366,6 @@ llvm::raw_ostream &psr::operator<<(llvm::raw_ostream &OS, OS << FieldString.Offset << '.'; } - // for (auto Ld : FieldString.Loads) { - // OS << 'L' << Ld << '.'; - // } if (FieldString.Loads != CFLFieldStringNodeId::None) { OS << "L#" << uint32_t(FieldString.Loads) << '.'; } @@ -377,10 +374,6 @@ llvm::raw_ostream &psr::operator<<(llvm::raw_ostream &OS, OS << 'K' << Kl << '.'; } - // for (auto St : FieldString.Stores) { - // OS << 'S' << St << '.'; - // } - if (FieldString.Loads != CFLFieldStringNodeId::None) { OS << "S#" << uint32_t(FieldString.Loads) << '.'; } @@ -694,21 +687,15 @@ auto FieldSensAllocSitesAwareIFDSProblem::extend(const EdgeFunction &L, if (FldSensL && FldSensR) { if (FldSensR->Transform.isEpsilon()) { - // llvm::errs() << "[EXTEND]: identity transformation!\n"; return L; } if (FldSensL->Transform.Paths.empty()) { - // llvm::errs() << "[EXTEND]: Empty prefix!\n"; return L; } auto Txn = FldSensL->Transform; Txn.applyTransforms(FldSensR->Transform, DepthKLimit); - // if (Txn.Paths.empty()) { - // // llvm::errs() << "[EXTEND]: kill flow\n"; - // return allTopFunction(); - // } if (Txn.Paths.size() > BreadthKLimit) { klimitPaths(Txn.Paths, Mgr); From 8407c00702657f987c31b62f7db882a85273c02e Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Tue, 24 Feb 2026 15:50:12 +0100 Subject: [PATCH 24/29] Integrate CFL-fieldsens analysis into phasar-cli for IFDSTaintAnalysis --- .../DataFlow/IfdsIde/IDETabulationProblem.h | 14 +++ .../FieldSensAllocSitesAwareIFDSProblem.h | 43 ++++++- .../IfdsIde/Problems/IFDSTaintAnalysis.h | 12 +- .../PhasarLLVM/Utils/DataFlowAnalysisType.def | 1 + include/phasar/Utils/TypeTraits.h | 8 ++ .../IfdsIde/Problems/IFDSTaintAnalysis.cpp | 52 ++++++++- .../Controller/AnalysisController.cpp | 4 + .../Controller/AnalysisControllerInternal.h | 1 + .../AnalysisControllerXIFDSCFLEnvTaint.cpp | 107 ++++++++++++++++++ .../DataFlow/IfdsIde/CFLFieldSensTest.cpp | 7 +- 10 files changed, 235 insertions(+), 14 deletions(-) create mode 100644 tools/phasar-cli/Controller/AnalysisControllerXIFDSCFLEnvTaint.cpp diff --git a/include/phasar/DataFlow/IfdsIde/IDETabulationProblem.h b/include/phasar/DataFlow/IfdsIde/IDETabulationProblem.h index 7cab00da3d..80f80f2e6d 100644 --- a/include/phasar/DataFlow/IfdsIde/IDETabulationProblem.h +++ b/include/phasar/DataFlow/IfdsIde/IDETabulationProblem.h @@ -34,6 +34,7 @@ #include #include #include +#include namespace psr { @@ -122,6 +123,19 @@ class IDETabulationProblem : public FlowFunctions, } } + [[nodiscard]] constexpr AnalysisPrinterBase & + printer() noexcept { + assert(Printer != nullptr); + return *Printer; + } + + [[nodiscard]] constexpr MaybeUniquePtr> + consumePrinter() noexcept { + assert(Printer != nullptr); + return std::exchange(Printer, + NullAnalysisPrinter::getInstance()); + } + /// Checks if the given data-flow fact is the special tautological lambda (or /// zero) fact. [[nodiscard]] virtual bool isZeroValue(d_t FlowFact) const noexcept { diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h index 646fc6c306..509c4cf5ae 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h @@ -20,6 +20,7 @@ #include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" #include "phasar/Utils/Compressor.h" #include "phasar/Utils/Logger.h" +#include "phasar/Utils/TypeTraits.h" #include "phasar/Utils/TypedVector.h" #include "phasar/Utils/Utilities.h" @@ -31,6 +32,7 @@ #include "llvm/IR/Operator.h" #include "llvm/Support/raw_ostream.h" +#include #include #include @@ -295,6 +297,30 @@ class FieldSensAllocSitesAwareIFDSProblem using Base = IDETabulationProblem< CFLFieldSensAnalysisDomain>; + template + static decltype(FieldSensAllocSitesAwareIFDSProblemConfig::KillsAt) + deriveKillsAt(ConcreteProblemT *UserProblem) { + assert(UserProblem != nullptr); + if constexpr (requires() { + { + UserProblem->killsAt() + } -> psr::invocable_r, n_t, d_t>; + }) { + return UserProblem->killsAt(); + } else if constexpr (requires() { + { + UserProblem->killsAt() + } -> std::invocable; + }) { + // Intentionally leaving an unused variable, so that the compiler emits a + // warning here + auto KillsAtHasWrongReturnType = UserProblem->killsAt(); + return nullptr; + } else { + return nullptr; + } + } + public: using typename Base::container_type; using typename Base::d_t; @@ -312,16 +338,27 @@ class FieldSensAllocSitesAwareIFDSProblem /// from UserProblem explicit FieldSensAllocSitesAwareIFDSProblem( IFDSTabulationProblem *UserProblem, - FieldSensAllocSitesAwareIFDSProblemConfig Config = - {}) noexcept(std::is_nothrow_move_constructible_v) + FieldSensAllocSitesAwareIFDSProblemConfig + Config) noexcept(std::is_nothrow_move_constructible_v) : Base(assertNotNull(UserProblem).getProjectIRDB(), assertNotNull(UserProblem).getEntryPoints(), UserProblem->getZeroValue()), UserProblem(UserProblem), Config(std::move(Config)) {} + explicit FieldSensAllocSitesAwareIFDSProblem( + proper_subclass_of< + IFDSTabulationProblem> auto + *UserProblem) + : FieldSensAllocSitesAwareIFDSProblem( + UserProblem, FieldSensAllocSitesAwareIFDSProblemConfig{ + .KillsAt = deriveKillsAt(UserProblem), + }) {} + FieldSensAllocSitesAwareIFDSProblem( std::nullptr_t, - FieldSensAllocSitesAwareIFDSProblemConfig Config = {}) = delete; + FieldSensAllocSitesAwareIFDSProblemConfig Config) = delete; + + FieldSensAllocSitesAwareIFDSProblem(std::nullptr_t) = delete; // TODO: Provide a customization-point to provide gen offsets to the // edge-functions (generating from zero currently always generates at diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysis.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysis.h index 27def5e527..4199dc4ee4 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysis.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysis.h @@ -41,6 +41,12 @@ class LLVMTaintConfig; */ class IFDSTaintAnalysis : public IFDSTabulationProblem { + struct KillsAtFn { + const IFDSTaintAnalysis *Self{}; + + [[nodiscard]] std::optional operator()(n_t Curr, + d_t CurrNode) const; + }; public: // Setup the configuration type @@ -58,7 +64,8 @@ class IFDSTaintAnalysis IFDSTaintAnalysis(const LLVMProjectIRDB *IRDB, LLVMAliasInfoRef PT, const LLVMTaintConfig *Config, std::vector EntryPoints = {"main"}, - bool TaintMainArgs = true); + bool TaintMainArgs = true, + bool EnableStrongUpdateStore = true); FlowFunctionPtrType getNormalFlowFunction(n_t Curr, n_t Succ) override; @@ -86,10 +93,13 @@ class IFDSTaintAnalysis [[nodiscard]] bool isInteresting(const llvm::Instruction *Inst) const noexcept; + [[nodiscard]] KillsAtFn killsAt() const { return {.Self = this}; } + private: const LLVMTaintConfig *Config{}; LLVMAliasInfoRef PT{}; bool TaintMainArgs{}; + bool EnableStrongUpdateStore{}; library_summary::LLVMFunctionDataFlowFacts Llvmfdff; bool isSourceCall(const llvm::CallBase *CB, diff --git a/include/phasar/PhasarLLVM/Utils/DataFlowAnalysisType.def b/include/phasar/PhasarLLVM/Utils/DataFlowAnalysisType.def index 5c11905fc1..e8d1a71ae2 100644 --- a/include/phasar/PhasarLLVM/Utils/DataFlowAnalysisType.def +++ b/include/phasar/PhasarLLVM/Utils/DataFlowAnalysisType.def @@ -14,6 +14,7 @@ DATA_FLOW_ANALYSIS_TYPES(IFDSUninitializedVariables, "ifds-uninit", "Find usages of uninitialized variables.") DATA_FLOW_ANALYSIS_TYPES(IFDSConstAnalysis, "ifds-const", "Find variables that are actually mutated through the program") DATA_FLOW_ANALYSIS_TYPES(IFDSTaintAnalysis, "ifds-taint", "Simple, alias-aware taint-analysis. Use with --analysis-config") +DATA_FLOW_ANALYSIS_TYPES(IFDSCFLEnvTaintAnalysis, "ifds-fieldsens-taint", "Same base analysis as ifds-taint, but uses CFL-Environment-Transformers to achieve field sensitivity. Use with --analysis-config") DATA_FLOW_ANALYSIS_TYPES(SparseIFDSTaintAnalysis, "sparse-ifds-taint", "Simple, alias-aware taint-analysis utilizing SparseIFDS. Use with --analysis-config") DATA_FLOW_ANALYSIS_TYPES(IDEExtendedTaintAnalysis, "ide-xtaint", "More advanced alias-aware taint analysis that provides limited field-sensitivity. Use with --analysis-config") DATA_FLOW_ANALYSIS_TYPES(IFDSTypeAnalysis, "ifds-type", "Simple type analysis") diff --git a/include/phasar/Utils/TypeTraits.h b/include/phasar/Utils/TypeTraits.h index 2837852a82..84ad77858b 100644 --- a/include/phasar/Utils/TypeTraits.h +++ b/include/phasar/Utils/TypeTraits.h @@ -270,6 +270,14 @@ struct IdentityFn { } }; +template +concept invocable_r = requires(T Val, P... Params) { + { std::invoke(PSR_FWD(Val), PSR_FWD(Params)...) } -> std::convertible_to; +}; + +template +concept proper_subclass_of = std::derived_from && !std::same_as; + // NOLINTEND(readability-identifier-naming) } // namespace psr diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysis.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysis.cpp index 28b8b65f0a..b754aa40f9 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysis.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysis.cpp @@ -13,6 +13,7 @@ #include "phasar/DataFlow/IfdsIde/FlowFunctions.h" #include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h" #include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" +#include "phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h" #include "phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMFlowFunctions.h" #include "phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMZeroValue.h" #include "phasar/PhasarLLVM/DataFlow/IfdsIde/LibCSummary.h" @@ -45,9 +46,11 @@ IFDSTaintAnalysis::IFDSTaintAnalysis(const LLVMProjectIRDB *IRDB, LLVMAliasInfoRef PT, const LLVMTaintConfig *Config, std::vector EntryPoints, - bool TaintMainArgs) + bool TaintMainArgs, + bool EnableStrongUpdateStore) : IFDSTabulationProblem(IRDB, std::move(EntryPoints), createZeroValue()), Config(Config), PT(PT), TaintMainArgs(TaintMainArgs), + EnableStrongUpdateStore(EnableStrongUpdateStore), Llvmfdff(library_summary::readFromFDFF(getLibCSummary(), *IRDB)) { assert(Config != nullptr); assert(PT); @@ -280,11 +283,23 @@ auto IFDSTaintAnalysis::getNormalFlowFunction(n_t Curr, Gen.insert(Store->getValueOperand()); } + if (EnableStrongUpdateStore) { + return lambdaFlow( + [Store, Gen{std::move(Gen)}](d_t Source) -> container_type { + if (Store->getPointerOperand() == Source) { + return {}; + } + if (Store->getValueOperand() == Source) { + return Gen; + } + + return {Source}; + }); + } + + // Only weak update on store return lambdaFlow( [Store, Gen{std::move(Gen)}](d_t Source) -> container_type { - if (Store->getPointerOperand() == Source) { - return {}; - } if (Store->getValueOperand() == Source) { return Gen; } @@ -538,4 +553,33 @@ bool IFDSTaintAnalysis::isInteresting( return Config->mayLeakValuesAt(Inst, nullptr); } +std::optional +IFDSTaintAnalysis::KillsAtFn::operator()(n_t Curr, d_t CurrNode) const { + const auto *CS = llvm::dyn_cast(Curr); + if (!CS) { + return std::nullopt; + } + + const auto *DestFun = CS->getCalledFunction(); + if (!DestFun) { + return std::nullopt; + } + + container_type Kill; + psr::collectSanitizedFacts(Kill, *Self->Config, CS, DestFun); + + const auto &DL = Self->IRDB->getModule()->getDataLayout(); + + for (const auto *KillFact : Kill) { + auto [BasePtr, Offset] = + psr::FieldSensAllocSitesAwareIFDSProblemBase::getBaseAndOffset(KillFact, + DL); + if (BasePtr == CurrNode) { + return Offset; + } + } + + return std::nullopt; +} + } // namespace psr diff --git a/tools/phasar-cli/Controller/AnalysisController.cpp b/tools/phasar-cli/Controller/AnalysisController.cpp index 27fb9d2f9c..544142e8a6 100644 --- a/tools/phasar-cli/Controller/AnalysisController.cpp +++ b/tools/phasar-cli/Controller/AnalysisController.cpp @@ -11,6 +11,7 @@ #include "phasar/PhasarLLVM/Passes/GeneralStatisticsAnalysis.h" #include "phasar/PhasarLLVM/TypeHierarchy/DIBasedTypeHierarchy.h" +#include "phasar/PhasarLLVM/Utils/DataFlowAnalysisType.h" #include "phasar/Utils/NlohmannLogging.h" #include "AnalysisControllerInternal.h" @@ -127,6 +128,9 @@ static void executeWholeProgram(AnalysisController &Data) { case DataFlowAnalysisType::IFDSTaintAnalysis: executeIFDSTaint(Data); continue; + case DataFlowAnalysisType::IFDSCFLEnvTaintAnalysis: + executeIFDSCFLEnvTaint(Data); + continue; case DataFlowAnalysisType::SparseIFDSTaintAnalysis: executeSparseIFDSTaint(Data); continue; diff --git a/tools/phasar-cli/Controller/AnalysisControllerInternal.h b/tools/phasar-cli/Controller/AnalysisControllerInternal.h index 8576c51438..42547dbff5 100644 --- a/tools/phasar-cli/Controller/AnalysisControllerInternal.h +++ b/tools/phasar-cli/Controller/AnalysisControllerInternal.h @@ -32,6 +32,7 @@ namespace psr::controller { LLVM_LIBRARY_VISIBILITY void executeIFDSUninitVar(AnalysisController &Data); LLVM_LIBRARY_VISIBILITY void executeIFDSConst(AnalysisController &Data); LLVM_LIBRARY_VISIBILITY void executeIFDSTaint(AnalysisController &Data); +LLVM_LIBRARY_VISIBILITY void executeIFDSCFLEnvTaint(AnalysisController &Data); LLVM_LIBRARY_VISIBILITY void executeIFDSType(AnalysisController &Data); LLVM_LIBRARY_VISIBILITY void executeIFDSSolverTest(AnalysisController &Data); LLVM_LIBRARY_VISIBILITY void diff --git a/tools/phasar-cli/Controller/AnalysisControllerXIFDSCFLEnvTaint.cpp b/tools/phasar-cli/Controller/AnalysisControllerXIFDSCFLEnvTaint.cpp new file mode 100644 index 0000000000..cd55027717 --- /dev/null +++ b/tools/phasar-cli/Controller/AnalysisControllerXIFDSCFLEnvTaint.cpp @@ -0,0 +1,107 @@ +/****************************************************************************** + * Copyright (c) 2026 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#include "phasar/DataFlow/IfdsIde/Solver/IterativeIDESolver.h" +#include "phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h" +#include "phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysis.h" +#include "phasar/PhasarLLVM/Utils/DataFlowAnalysisType.h" +#include "phasar/Utils/IO.h" + +#include "llvm/Support/raw_ostream.h" + +#include "AnalysisController.h" +#include "AnalysisControllerEmitterOptions.h" +#include "AnalysisControllerInternalIDE.h" + +using namespace psr; + +void controller::executeIFDSCFLEnvTaint(AnalysisController &Data) { + auto Config = makeTaintConfig(Data); + + auto UserProblem = createAnalysisProblem( + *Data.HA, &Config, Data.EntryPoints, /*TaintMainArgs*/ false, + /*EnableStrongUpdateStore*/ false); + auto Printer = UserProblem.consumePrinter(); + auto FieldSensProblem = FieldSensAllocSitesAwareIFDSProblem(&UserProblem); + + IterativeIDESolver Solver(&FieldSensProblem, &Data.HA->getICFG()); + + SimpleTimer MeasureTime; + + auto Results = Solver.solve(); + + if (Data.EmitterOptions & + AnalysisControllerEmitterOptions::EmitStatisticsAsText) { + + llvm::outs() << "Elapsed: " << MeasureTime.elapsed() << '\n'; + } + + // emitRequestedDataFlowResults(Data, Solver); + + // TODO: Once we have properly migrated IterativeIDESolver into phasar-cli, we + // should use emitRequestedDataFlowResults here, instead of hand-rolling the + // output! + + const auto WithOutStream = [&Data, + HasResultsDir = !Data.ResultDirectory.empty()]( + const llvm::Twine &FileName, auto Handler) { + if (HasResultsDir) { + if (auto OFS = openFileStream(Data.ResultDirectory.string() + FileName)) { + Handler(*OFS); + } + } else { + Handler(llvm::outs()); + } + }; + + using enum AnalysisControllerEmitterOptions; + + auto EmitterOptions = Data.EmitterOptions; + + if (EmitterOptions & EmitTextReport) { + EmitterOptions &= ~EmitTextReport; + WithOutStream("/psr-report.txt", [&](llvm::raw_ostream &OS) { + Printer->onInitialize(); + bool HasResults = false; + for (const auto &[Inst, Facts] : UserProblem.Leaks) { + for (const auto &Fact : Facts) { + const auto &Fields = Results.resultAt(Inst, Fact); + + if (const auto *FieldStrings = Fields.getValueOrNull()) { + if (FieldStrings->Paths.empty()) { + // filter-out leak + continue; + } + } + + HasResults = true; + Printer->onResult(Inst, Fact, + DataFlowAnalysisType::IFDSCFLEnvTaintAnalysis); + } + } + Printer->onFinalize(OS); + if (!HasResults) { + OS << "No leaks found!\n"; + } + }); + } + + if (EmitterOptions & EmitRawResults) { + EmitterOptions &= ~EmitRawResults; + WithOutStream("/psr-raw-results.txt", [&](llvm::raw_ostream &OS) { + OS << Config << '\n'; + Solver.dumpResults(OS); + }); + } + + if (EmitterOptions != AnalysisControllerEmitterOptions{}) { + llvm::errs() << "Some emit-*** options may be ignored, because they have " + "not been implemented yet for ifds-fieldsens-taint"; + } +} diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensTest.cpp b/unittests/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensTest.cpp index d4ef6cc9bd..1af4d8b663 100644 --- a/unittests/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensTest.cpp +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensTest.cpp @@ -1,5 +1,4 @@ #include "phasar/ControlFlow/CallGraphAnalysisType.h" -#include "phasar/DataFlow/IfdsIde/Solver/IFDSSolver.h" #include "phasar/DataFlow/IfdsIde/Solver/IterativeIDESolver.h" #include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h" #include "phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h" @@ -14,7 +13,6 @@ #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" #include "phasar/Utils/Logger.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Twine.h" #include "llvm/IR/Instruction.h" @@ -161,10 +159,7 @@ class CFLFieldSensTest : public ::testing::Test { psr::LLVMTaintConfig TC(IRDB); ExampleTaintAnalysis TaintProblem(&IRDB, &AS, &TC, {"main"}); - psr::FieldSensAllocSitesAwareIFDSProblem FsTaintProblem( - &TaintProblem, { - .KillsAt = TaintProblem.killsAt(), - }); + psr::FieldSensAllocSitesAwareIFDSProblem FsTaintProblem(&TaintProblem); psr::LLVMBasedICFG ICFG(&IRDB, psr::CallGraphAnalysisType::OTF, {"main"}, nullptr, &BaseAS); From 14de5b81a2d293395693328c7ef14441caf8ddbc Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Tue, 24 Feb 2026 16:17:42 +0100 Subject: [PATCH 25/29] Rename cfl fieldsens analysis problem + introduce own namespace for it --- ...FDSProblem.h => CFLFieldSensIFDSProblem.h} | 230 +++++++++--------- ...roblem.cpp => CFLFieldSensIFDSProblem.cpp} | 203 ++++++++-------- .../IfdsIde/Problems/IFDSTaintAnalysis.cpp | 6 +- .../AnalysisControllerXIFDSCFLEnvTaint.cpp | 4 +- .../DataFlow/IfdsIde/CFLFieldSensTest.cpp | 11 +- 5 files changed, 223 insertions(+), 231 deletions(-) rename include/phasar/PhasarLLVM/DataFlow/IfdsIde/{FieldSensAllocSitesAwareIFDSProblem.h => CFLFieldSensIFDSProblem.h} (62%) rename lib/PhasarLLVM/DataFlow/IfdsIde/{FieldSensAllocSitesAwareIFDSProblem.cpp => CFLFieldSensIFDSProblem.cpp} (74%) diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensIFDSProblem.h similarity index 62% rename from include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h rename to include/phasar/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensIFDSProblem.h index 509c4cf5ae..7522f91e62 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensIFDSProblem.h @@ -36,61 +36,63 @@ #include #include -namespace psr { +namespace psr::cfl_fieldsens { /// \file Implements field-sensitivity after the paper "Boosting the performance /// of alias-aware IFDS analysis with CFL-based environment transformers" by Li /// et al. -enum class CFLFieldStringNodeId : uint32_t { +// NOLINTNEXTLINE(performance-enum-size) +enum class FieldStringNodeId : uint32_t { None = 0, }; -[[nodiscard]] inline llvm::hash_code hash_value(CFLFieldStringNodeId NId) { - return llvm::hash_value(std::underlying_type_t(NId)); +[[nodiscard]] inline llvm::hash_code hash_value(FieldStringNodeId NId) { + return llvm::hash_value(std::underlying_type_t(NId)); } -struct CFLFieldStringNode { - CFLFieldStringNodeId Next{}; +struct FieldStringNode { + FieldStringNodeId Next{}; int32_t Offset{}; [[nodiscard]] constexpr bool - operator==(const CFLFieldStringNode &) const noexcept = default; + operator==(const FieldStringNode &) const noexcept = default; - friend llvm::hash_code hash_value(CFLFieldStringNode Nod) { + friend llvm::hash_code hash_value(FieldStringNode Nod) { return llvm::DenseMapInfo>::getHashValue( {uint32_t(Nod.Next), Nod.Offset}); } }; -} // namespace psr +} // namespace psr::cfl_fieldsens namespace llvm { -template <> struct DenseMapInfo { - static constexpr psr::CFLFieldStringNode getEmptyKey() noexcept { - return {.Next = psr::CFLFieldStringNodeId(UINT32_MAX), .Offset = INT32_MAX}; +template <> struct DenseMapInfo { + using FieldStringNode = psr::cfl_fieldsens::FieldStringNode; + using FieldStringNodeId = psr::cfl_fieldsens::FieldStringNodeId; + + static constexpr FieldStringNode getEmptyKey() noexcept { + return {.Next = FieldStringNodeId(UINT32_MAX), .Offset = INT32_MAX}; } - static constexpr psr::CFLFieldStringNode getTombstoneKey() noexcept { - return {.Next = psr::CFLFieldStringNodeId(UINT32_MAX - 1), - .Offset = INT32_MAX}; + static constexpr FieldStringNode getTombstoneKey() noexcept { + return {.Next = FieldStringNodeId(UINT32_MAX - 1), .Offset = INT32_MAX}; } - static constexpr bool isEqual(psr::CFLFieldStringNode L, - psr::CFLFieldStringNode R) noexcept { + static constexpr bool isEqual(FieldStringNode L, FieldStringNode R) noexcept { return L == R; } - static auto getHashValue(psr::CFLFieldStringNode Nod) { - return hash_value(Nod); - } + static auto getHashValue(FieldStringNode Nod) { return hash_value(Nod); } }; } // namespace llvm namespace psr { -class CFLFieldStringManager { +namespace cfl_fieldsens { + +class FieldStringManager { public: - CFLFieldStringManager(); + FieldStringManager(); - [[nodiscard]] CFLFieldStringNodeId intern(CFLFieldStringNode Nod) { + [[nodiscard]] FieldStringNodeId intern(FieldStringNode Nod) { auto [Id, Inserted] = NodeCompressor.insert(Nod); if (Inserted) { @@ -100,46 +102,46 @@ class CFLFieldStringManager { return Id; } - [[nodiscard]] CFLFieldStringNodeId prepend(int32_t Head, - CFLFieldStringNodeId Tail) { - auto Ret = intern(CFLFieldStringNode{.Next = Tail, .Offset = Head}); + [[nodiscard]] FieldStringNodeId prepend(int32_t Head, + FieldStringNodeId Tail) { + auto Ret = intern(FieldStringNode{.Next = Tail, .Offset = Head}); PHASAR_LOG_LEVEL(DEBUG, "[prepend]: " << Head << " :: #" << uint32_t(Tail) << " = #" << uint32_t(Ret)); return Ret; } - [[nodiscard]] CFLFieldStringNode operator[](CFLFieldStringNodeId NId) const { + [[nodiscard]] FieldStringNode operator[](FieldStringNodeId NId) const { return NodeCompressor[NId]; } [[nodiscard]] llvm::SmallVector - getFullFieldString(CFLFieldStringNodeId NId) const; + getFullFieldString(FieldStringNodeId NId) const; - [[nodiscard]] CFLFieldStringNodeId + [[nodiscard]] FieldStringNodeId fromFullFieldString(llvm::ArrayRef FieldString); - [[nodiscard]] uint32_t depth(CFLFieldStringNodeId NId) const { + [[nodiscard]] uint32_t depth(FieldStringNodeId NId) const { return Depth[NId]; } private: - Compressor NodeCompressor{}; - TypedVector Depth{}; + Compressor NodeCompressor{}; + TypedVector Depth{}; }; -struct CFLFieldAccessPath { +struct AccessPath { static constexpr int32_t TopOffset = INT32_MIN; - CFLFieldStringNodeId Loads{}; - CFLFieldStringNodeId Stores{}; + FieldStringNodeId Loads{}; + FieldStringNodeId Stores{}; llvm::SmallDenseSet Kills{}; // Add an offset for pending GEPs; INT32_MIN is Top int32_t Offset = {0}; int32_t EmptyTombstone = 0; [[nodiscard]] bool empty() const noexcept { - return Loads == CFLFieldStringNodeId::None && - Stores == CFLFieldStringNodeId::None && Kills.empty() && Offset == 0; + return Loads == FieldStringNodeId::None && + Stores == FieldStringNodeId::None && Kills.empty() && Offset == 0; } [[nodiscard]] bool kills(int32_t Off) const { @@ -147,39 +149,38 @@ struct CFLFieldAccessPath { } [[nodiscard]] constexpr bool - operator==(const CFLFieldAccessPath &Other) const noexcept { + operator==(const AccessPath &Other) const noexcept { return EmptyTombstone == Other.EmptyTombstone && Loads == Other.Loads && Stores == Other.Stores && Kills == Other.Kills; } - bool operator!=(const CFLFieldAccessPath &Other) const noexcept { + bool operator!=(const AccessPath &Other) const noexcept { return !(*this == Other); } - friend size_t hash_value(const CFLFieldAccessPath &FieldString) noexcept; + friend size_t hash_value(const AccessPath &FieldString) noexcept; friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, - const CFLFieldAccessPath &FieldString); + const AccessPath &FieldString); - void print(llvm::raw_ostream &OS, const CFLFieldStringManager &Mgr) const; + void print(llvm::raw_ostream &OS, const FieldStringManager &Mgr) const; }; -struct CFLFieldAccessPathDMI { - static CFLFieldAccessPath getEmptyKey() { - CFLFieldAccessPath Ret{}; +struct AccessPathDMI { + static AccessPath getEmptyKey() { + AccessPath Ret{}; Ret.EmptyTombstone = 1; return Ret; } - static CFLFieldAccessPath getTombstoneKey() { - CFLFieldAccessPath Ret{}; + static AccessPath getTombstoneKey() { + AccessPath Ret{}; Ret.EmptyTombstone = 2; return Ret; } - static auto getHashValue(const CFLFieldAccessPath &FieldString) noexcept { + static auto getHashValue(const AccessPath &FieldString) noexcept { return hash_value(FieldString); } - static bool isEqual(const CFLFieldAccessPath &L, - const CFLFieldAccessPath &R) noexcept { + static bool isEqual(const AccessPath &L, const AccessPath &R) noexcept { if (L.EmptyTombstone != R.EmptyTombstone) { return false; } @@ -190,43 +191,43 @@ struct CFLFieldAccessPathDMI { } }; -struct CFLFieldSensEdgeValue { - [[clang::require_explicit_initialization]] CFLFieldStringManager *Mgr{}; - llvm::SmallDenseSet Paths; +struct IFDSEdgeValue { + [[clang::require_explicit_initialization]] FieldStringManager *Mgr{}; + llvm::SmallDenseSet Paths; - static constexpr llvm::StringLiteral LogCategory = "CFLFieldSensEdgeValue"; + static constexpr llvm::StringLiteral LogCategory = "IFDSEdgeValue"; - void applyTransforms(const CFLFieldSensEdgeValue &Txns, uint8_t DepthKLimit); + void applyTransforms(const IFDSEdgeValue &Txns, uint8_t DepthKLimit); - bool operator==(const CFLFieldSensEdgeValue &Other) const noexcept { + bool operator==(const IFDSEdgeValue &Other) const noexcept { assert(Mgr == Other.Mgr); assert(Mgr != nullptr); return Paths == Other.Paths; } - bool operator!=(const CFLFieldSensEdgeValue &Other) const noexcept { + bool operator!=(const IFDSEdgeValue &Other) const noexcept { return !(*this == Other); } - [[nodiscard]] friend auto hash_value(const CFLFieldSensEdgeValue EV) { + [[nodiscard]] friend auto hash_value(const IFDSEdgeValue EV) { return llvm::hash_combine_range(EV.Paths.begin(), EV.Paths.end()); } friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, - const CFLFieldSensEdgeValue &EV); + const IFDSEdgeValue &EV); [[nodiscard]] bool isEpsilon() const { return Paths.size() == 1 && Paths.begin()->empty(); } - [[nodiscard]] static CFLFieldSensEdgeValue - epsilon(CFLFieldStringManager *Mgr) { - CFLFieldSensEdgeValue Ret{.Mgr = &assertNotNull(Mgr), .Paths = {}}; + [[nodiscard]] static IFDSEdgeValue epsilon(FieldStringManager *Mgr) { + IFDSEdgeValue Ret{.Mgr = &assertNotNull(Mgr), .Paths = {}}; Ret.Paths.insert({}); // Not using initializer_list to prevent copying return Ret; } - [[nodiscard]] friend auto join(const CFLFieldSensEdgeValue &L, - const CFLFieldSensEdgeValue &R) { + // To be picked up via ADL by psr::join(LatticeDomain, LatticeDomain) + [[nodiscard]] friend auto join(const IFDSEdgeValue &L, + const IFDSEdgeValue &R) { assert(L.Mgr == R.Mgr); assert(L.Mgr != nullptr); const bool LeftSmaller = L.Paths.size() < R.Paths.size(); @@ -239,46 +240,40 @@ struct CFLFieldSensEdgeValue { } }; -template -struct CFLFieldSensAnalysisDomain : AnalysisDomainTy { - using l_t = LatticeDomain; +struct IFDSDomain : LLVMIFDSAnalysisDomainDefault { + using l_t = LatticeDomain; }; -struct FieldSensAllocSitesAwareIFDSProblemConfig - : LLVMIFDSAnalysisDomainDefault { +struct IFDSProblemConfig : LLVMIFDSAnalysisDomainDefault { llvm::unique_function(n_t Curr, d_t CurrNode)> KillsAt; // XXX: more }; -class FieldSensAllocSitesAwareIFDSProblemBase - : public CFLFieldSensAnalysisDomain { -public: - static constexpr llvm::StringLiteral LogCategory = - "FieldSensAllocSitesAwareIFDSProblem"; - - [[nodiscard]] static InitialSeeds - makeInitialSeeds(const InitialSeeds &UserSeeds, - CFLFieldStringManager &Mgr); - - [[nodiscard]] static std::pair - getBaseAndOffset(const llvm::Value *V, const llvm::DataLayout &DL) { - llvm::APInt Offset(64, 0); - int32_t OffsVal = CFLFieldAccessPath::TopOffset; - const auto *Base = V->stripAndAccumulateConstantOffsets(DL, Offset, true); - - if (llvm::isa(Base)) { - return {Base->stripPointerCastsAndAliases(), - CFLFieldAccessPath::TopOffset}; - } +[[nodiscard]] InitialSeeds +makeInitialSeeds(const InitialSeeds &UserSeeds, + FieldStringManager &Mgr); - auto RawOffsVal = Offset.getSExtValue(); - if (RawOffsVal <= INT32_MAX && RawOffsVal >= INT32_MIN) { - OffsVal = int32_t(RawOffsVal); - } +[[nodiscard]] inline std::pair +getBaseAndOffset(const llvm::Value *V, const llvm::DataLayout &DL) { + llvm::APInt Offset(64, 0); + int32_t OffsVal = AccessPath::TopOffset; + const auto *Base = V->stripAndAccumulateConstantOffsets(DL, Offset, true); - return {Base->stripPointerCastsAndAliases(), OffsVal}; + if (llvm::isa(Base)) { + return {Base->stripPointerCastsAndAliases(), AccessPath::TopOffset}; } -}; + + auto RawOffsVal = Offset.getSExtValue(); + if (RawOffsVal <= INT32_MAX && RawOffsVal >= INT32_MIN) { + OffsVal = int32_t(RawOffsVal); + } + + return {Base->stripPointerCastsAndAliases(), OffsVal}; +} + +} // namespace cfl_fieldsens /// An IFDS-Problem adaptor that makes any field-insensitive IFDS analysis /// field-sensitive. Just wrap your IFDS problem with @@ -290,16 +285,12 @@ class FieldSensAllocSitesAwareIFDSProblemBase /// the FieldSensAllocSitesAwareIFDSProblem. For that, provide a /// FieldSensAllocSitesAwareIFDSProblemConfig with a proper KillsAt /// implementation. -class FieldSensAllocSitesAwareIFDSProblem - : public FieldSensAllocSitesAwareIFDSProblemBase, - public IDETabulationProblem< - CFLFieldSensAnalysisDomain> { - using Base = IDETabulationProblem< - CFLFieldSensAnalysisDomain>; - - template - static decltype(FieldSensAllocSitesAwareIFDSProblemConfig::KillsAt) - deriveKillsAt(ConcreteProblemT *UserProblem) { +class CFLFieldSensIFDSProblem + : public IDETabulationProblem { + using Base = IDETabulationProblem; + + static decltype(cfl_fieldsens::IFDSProblemConfig::KillsAt) + deriveKillsAt(auto *UserProblem) { assert(UserProblem != nullptr); if constexpr (requires() { { @@ -334,38 +325,39 @@ class FieldSensAllocSitesAwareIFDSProblem using typename Base::t_t; using typename Base::v_t; + static constexpr llvm::StringLiteral LogCategory = "CFLFieldSensIFDSProblem"; + /// Constructs an IDETabulationProblem with the usual arguments, forwarded /// from UserProblem - explicit FieldSensAllocSitesAwareIFDSProblem( + explicit CFLFieldSensIFDSProblem( IFDSTabulationProblem *UserProblem, - FieldSensAllocSitesAwareIFDSProblemConfig + cfl_fieldsens::IFDSProblemConfig Config) noexcept(std::is_nothrow_move_constructible_v) : Base(assertNotNull(UserProblem).getProjectIRDB(), assertNotNull(UserProblem).getEntryPoints(), UserProblem->getZeroValue()), UserProblem(UserProblem), Config(std::move(Config)) {} - explicit FieldSensAllocSitesAwareIFDSProblem( + explicit CFLFieldSensIFDSProblem( proper_subclass_of< IFDSTabulationProblem> auto *UserProblem) - : FieldSensAllocSitesAwareIFDSProblem( - UserProblem, FieldSensAllocSitesAwareIFDSProblemConfig{ - .KillsAt = deriveKillsAt(UserProblem), - }) {} + : CFLFieldSensIFDSProblem(UserProblem, + cfl_fieldsens::IFDSProblemConfig{ + .KillsAt = deriveKillsAt(UserProblem), + }) {} - FieldSensAllocSitesAwareIFDSProblem( - std::nullptr_t, - FieldSensAllocSitesAwareIFDSProblemConfig Config) = delete; + CFLFieldSensIFDSProblem(std::nullptr_t, + cfl_fieldsens::IFDSProblemConfig Config) = delete; - FieldSensAllocSitesAwareIFDSProblem(std::nullptr_t) = delete; + CFLFieldSensIFDSProblem(std::nullptr_t) = delete; // TODO: Provide a customization-point to provide gen offsets to the // edge-functions (generating from zero currently always generates at // epsilon!) [[nodiscard]] InitialSeeds initialSeeds() override { - return makeInitialSeeds(UserProblem->initialSeeds(), Mgr); + return cfl_fieldsens::makeInitialSeeds(UserProblem->initialSeeds(), Mgr); } [[nodiscard]] FlowFunctionPtrType getNormalFlowFunction(n_t Curr, @@ -431,8 +423,8 @@ class FieldSensAllocSitesAwareIFDSProblem private: IFDSTabulationProblem *UserProblem{}; - CFLFieldStringManager Mgr{}; - FieldSensAllocSitesAwareIFDSProblemConfig Config{}; + cfl_fieldsens::FieldStringManager Mgr{}; + cfl_fieldsens::IFDSProblemConfig Config{}; uint8_t DepthKLimit = 5; // Original from the paper }; diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensIFDSProblem.cpp similarity index 74% rename from lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp rename to lib/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensIFDSProblem.cpp index f03282e1c3..448ebebfb1 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensIFDSProblem.cpp @@ -1,4 +1,4 @@ -#include "phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h" +#include "phasar/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensIFDSProblem.h" #include "phasar/DataFlow/IfdsIde/EdgeFunction.h" #include "phasar/DataFlow/IfdsIde/EdgeFunctionUtils.h" @@ -31,18 +31,19 @@ #include using namespace psr; +using namespace psr::cfl_fieldsens; -CFLFieldStringManager::CFLFieldStringManager() { +FieldStringManager::FieldStringManager() { // Sentinel NodeCompressor.insertDummy( - CFLFieldStringNode{.Next = CFLFieldStringNodeId::None, .Offset = 0}); + FieldStringNode{.Next = FieldStringNodeId::None, .Offset = 0}); Depth.push_back(0); } llvm::SmallVector -CFLFieldStringManager::getFullFieldString(CFLFieldStringNodeId NId) const { +FieldStringManager::getFullFieldString(FieldStringNodeId NId) const { llvm::SmallVector Ret; - while (NId != CFLFieldStringNodeId::None) { + while (NId != FieldStringNodeId::None) { auto Nod = NodeCompressor[NId]; Ret.push_back(Nod.Offset); NId = Nod.Next; @@ -51,9 +52,9 @@ CFLFieldStringManager::getFullFieldString(CFLFieldStringNodeId NId) const { return Ret; } -CFLFieldStringNodeId CFLFieldStringManager::fromFullFieldString( - llvm::ArrayRef FieldString) { - CFLFieldStringNodeId Ret = CFLFieldStringNodeId::None; +FieldStringNodeId +FieldStringManager::fromFullFieldString(llvm::ArrayRef FieldString) { + FieldStringNodeId Ret = FieldStringNodeId::None; for (const auto &Offset : FieldString) { Ret = prepend(Offset, Ret); } @@ -62,30 +63,28 @@ CFLFieldStringNodeId CFLFieldStringManager::fromFullFieldString( namespace { -using l_t = LatticeDomain; +using l_t = LatticeDomain; constexpr static int32_t addOffsets(int32_t L, int32_t R) noexcept { - if (L == CFLFieldAccessPath::TopOffset || - R == CFLFieldAccessPath::TopOffset) { - return CFLFieldAccessPath::TopOffset; + if (L == AccessPath::TopOffset || R == AccessPath::TopOffset) { + return AccessPath::TopOffset; } int32_t Sum{}; if (llvm::AddOverflow(L, R, Sum)) { - return CFLFieldAccessPath::TopOffset; + return AccessPath::TopOffset; } return Sum; } struct CFLFieldSensEdgeFunction { - using l_t = LatticeDomain; - [[clang::require_explicit_initialization]] CFLFieldSensEdgeValue Transform; + using l_t = LatticeDomain; + [[clang::require_explicit_initialization]] IFDSEdgeValue Transform; [[clang::require_explicit_initialization]] uint8_t DepthKLimit{}; [[nodiscard]] l_t computeTarget(l_t Source) const { - Source.onValue(fn<&CFLFieldSensEdgeValue::applyTransforms>, Transform, - DepthKLimit); + Source.onValue(fn<&IFDSEdgeValue::applyTransforms>, Transform, DepthKLimit); return Source; } @@ -111,18 +110,16 @@ struct CFLFieldSensEdgeFunction { return OS << "Txn[" << EF.Transform << ']'; } - [[nodiscard]] static auto from(CFLFieldSensEdgeValue &&Txn, - uint8_t DepthKLimit) { + [[nodiscard]] static auto from(IFDSEdgeValue &&Txn, uint8_t DepthKLimit) { return CFLFieldSensEdgeFunction{ .Transform = std::move(Txn), .DepthKLimit = DepthKLimit, }; } - [[nodiscard]] static auto from(CFLFieldAccessPath &&Txn, - CFLFieldStringManager &Mgr, + [[nodiscard]] static auto from(AccessPath &&Txn, FieldStringManager &Mgr, uint8_t DepthKLimit) { - // Avoid initializer-list as it prevents moving + // Avoid initializer_list as it prevents moving auto Ret = CFLFieldSensEdgeFunction{ .Transform = {.Mgr = &Mgr, .Paths = {}}, .DepthKLimit = DepthKLimit, @@ -132,16 +129,16 @@ struct CFLFieldSensEdgeFunction { } [[nodiscard]] static auto fromEpsilon(uint8_t DepthKLimit, - CFLFieldStringManager &Mgr) { + FieldStringManager &Mgr) { return CFLFieldSensEdgeFunction{ - .Transform = CFLFieldSensEdgeValue::epsilon(&Mgr), + .Transform = IFDSEdgeValue::epsilon(&Mgr), .DepthKLimit = DepthKLimit, }; } }; -[[nodiscard]] std::string storesToString(const CFLFieldAccessPath &AP, - const CFLFieldStringManager &Mgr) { +[[nodiscard]] std::string storesToString(const AccessPath &AP, + const FieldStringManager &Mgr) { std::string Ret; llvm::raw_string_ostream ROS(Ret); @@ -153,9 +150,8 @@ struct CFLFieldSensEdgeFunction { } // Returns whether to retain F -[[nodiscard]] auto applyOneGepAndStore(CFLFieldStringManager &Mgr, - CFLFieldAccessPath &F, int32_t Field, - uint8_t DepthKLimit) { +[[nodiscard]] auto applyOneGepAndStore(FieldStringManager &Mgr, AccessPath &F, + int32_t Field, uint8_t DepthKLimit) { if (Mgr.depth(F.Stores) == DepthKLimit) { // TODO: Optimize: auto Full = Mgr.getFullFieldString(F.Stores); @@ -167,11 +163,10 @@ struct CFLFieldSensEdgeFunction { } // Returns whether to retain F -[[nodiscard]] auto applyOneGepAndLoad(CFLFieldStringManager &Mgr, - CFLFieldAccessPath &F, int32_t Field, - uint8_t DepthKLimit) { +[[nodiscard]] auto applyOneGepAndLoad(FieldStringManager &Mgr, AccessPath &F, + int32_t Field, uint8_t DepthKLimit) { auto Offs = F.Offset + Field; - if (F.Stores == psr::CFLFieldStringNodeId::None) { + if (F.Stores == FieldStringNodeId::None) { if (F.kills(Offs)) { return false; @@ -192,54 +187,50 @@ struct CFLFieldSensEdgeFunction { auto StoresHead = Mgr[F.Stores]; - if (StoresHead.Offset != Offs && - StoresHead.Offset != CFLFieldAccessPath::TopOffset) { + if (StoresHead.Offset != Offs && StoresHead.Offset != AccessPath::TopOffset) { return false; } assert(StoresHead.Offset == Offs || - StoresHead.Offset == CFLFieldAccessPath::TopOffset); + StoresHead.Offset == AccessPath::TopOffset); F.Offset = 0; F.Stores = StoresHead.Next; // llvm::errs() << "> pop_back\n"; return true; } -[[nodiscard]] auto applyOneGepAndKill(CFLFieldStringManager &Mgr, - CFLFieldAccessPath &F, int32_t Field, - uint8_t /*DepthKLimit*/) { +[[nodiscard]] auto applyOneGepAndKill(FieldStringManager &Mgr, AccessPath &F, + int32_t Field, uint8_t /*DepthKLimit*/) { auto Offs = addOffsets(F.Offset, Field); - if (Offs == CFLFieldAccessPath::TopOffset) { + if (Offs == AccessPath::TopOffset) { // We cannot kill Top return true; } - if (F.Stores == psr::CFLFieldStringNodeId::None) { + if (F.Stores == FieldStringNodeId::None) { F.Kills.insert(Offs); - PHASAR_LOG_LEVEL_CAT(DEBUG, CFLFieldSensEdgeValue::LogCategory, - "> add K" << Offs); + PHASAR_LOG_LEVEL_CAT(DEBUG, IFDSEdgeValue::LogCategory, "> add K" << Offs); return true; } auto StoresHead = Mgr[F.Stores]; if (StoresHead.Offset == Offs) { - PHASAR_LOG_LEVEL_CAT(DEBUG, CFLFieldSensEdgeValue::LogCategory, + PHASAR_LOG_LEVEL_CAT(DEBUG, IFDSEdgeValue::LogCategory, "> Kill " << storesToString(F, Mgr)); return false; } - PHASAR_LOG_LEVEL_CAT(DEBUG, CFLFieldSensEdgeValue::LogCategory, + PHASAR_LOG_LEVEL_CAT(DEBUG, IFDSEdgeValue::LogCategory, "> Retain " << storesToString(F, Mgr)); assert(StoresHead.Offset != Offs); return true; } -[[nodiscard]] auto applyOneGep(CFLFieldStringManager &Mgr, - CFLFieldAccessPath &F, int32_t Field, - uint8_t /*DepthKLimit*/) { - if (F.Stores == psr::CFLFieldStringNodeId::None) { +[[nodiscard]] auto applyOneGep(FieldStringManager &Mgr, AccessPath &F, + int32_t Field, uint8_t /*DepthKLimit*/) { + if (F.Stores == FieldStringNodeId::None) { F.Offset = addOffsets(F.Offset, Field); } else { auto StoresHead = Mgr[F.Stores]; @@ -249,7 +240,7 @@ struct CFLFieldSensEdgeFunction { return std::true_type{}; } -void applyTransform(CFLFieldSensEdgeValue &EV, const CFLFieldAccessPath &Txn, +void applyTransform(IFDSEdgeValue &EV, const AccessPath &Txn, uint8_t DepthKLimit) { if (EV.Paths.empty() || Txn.empty()) { @@ -307,8 +298,8 @@ void applyTransform(CFLFieldSensEdgeValue &EV, const CFLFieldAccessPath &Txn, } } // namespace -void CFLFieldSensEdgeValue::applyTransforms(const CFLFieldSensEdgeValue &Txns, - uint8_t DepthKLimit) { +void IFDSEdgeValue::applyTransforms(const IFDSEdgeValue &Txns, + uint8_t DepthKLimit) { if (Mgr == nullptr) [[unlikely]] { llvm::report_fatal_error("Mgr is nullptr!"); } @@ -345,15 +336,16 @@ void CFLFieldSensEdgeValue::applyTransforms(const CFLFieldSensEdgeValue &Txns, *this = std::move(Ret); } -size_t psr::hash_value(const CFLFieldAccessPath &FieldString) noexcept { +size_t psr::cfl_fieldsens::hash_value(const AccessPath &FieldString) noexcept { // Xor does not care about the order auto HCK = std::reduce(FieldString.Kills.begin(), FieldString.Kills.end(), 0, std::bit_xor<>{}); return llvm::hash_combine(FieldString.Loads, FieldString.Stores, HCK); } -llvm::raw_ostream &psr::operator<<(llvm::raw_ostream &OS, - const CFLFieldAccessPath &FieldString) { +llvm::raw_ostream & +psr::cfl_fieldsens::operator<<(llvm::raw_ostream &OS, + const AccessPath &FieldString) { if (FieldString.empty()) { return OS << "ε"; } @@ -366,7 +358,7 @@ llvm::raw_ostream &psr::operator<<(llvm::raw_ostream &OS, OS << FieldString.Offset << '.'; } - if (FieldString.Loads != CFLFieldStringNodeId::None) { + if (FieldString.Loads != FieldStringNodeId::None) { OS << "L#" << uint32_t(FieldString.Loads) << '.'; } @@ -374,15 +366,15 @@ llvm::raw_ostream &psr::operator<<(llvm::raw_ostream &OS, OS << 'K' << Kl << '.'; } - if (FieldString.Loads != CFLFieldStringNodeId::None) { + if (FieldString.Loads != FieldStringNodeId::None) { OS << "S#" << uint32_t(FieldString.Loads) << '.'; } return OS; } -void CFLFieldAccessPath::print(llvm::raw_ostream &OS, - const CFLFieldStringManager &Mgr) const { +void AccessPath::print(llvm::raw_ostream &OS, + const FieldStringManager &Mgr) const { if (empty()) { OS << "ε"; return; @@ -409,8 +401,8 @@ void CFLFieldAccessPath::print(llvm::raw_ostream &OS, } } -llvm::raw_ostream &psr::operator<<(llvm::raw_ostream &OS, - const CFLFieldSensEdgeValue &EV) { +llvm::raw_ostream &psr::cfl_fieldsens::operator<<(llvm::raw_ostream &OS, + const IFDSEdgeValue &EV) { assert(EV.Mgr != nullptr); if (EV.Paths.size() == 1) { EV.Paths.begin()->print(OS, *EV.Mgr); @@ -424,24 +416,30 @@ llvm::raw_ostream &psr::operator<<(llvm::raw_ostream &OS, return OS << " }"; } -auto FieldSensAllocSitesAwareIFDSProblemBase::makeInitialSeeds( - const InitialSeeds &UserSeeds, - CFLFieldStringManager &Mgr) -> InitialSeeds { - InitialSeeds::GeneralizedSeeds Ret; +InitialSeeds +cfl_fieldsens::makeInitialSeeds( + const InitialSeeds + &UserSeeds, + FieldStringManager &Mgr) { + InitialSeeds::GeneralizedSeeds Ret; for (const auto &[Inst, Facts] : UserSeeds.getSeeds()) { auto &SeedsAtInst = Ret[Inst]; for (const auto &[Fact, Weight] : Facts) { - SeedsAtInst.try_emplace(Fact, CFLFieldSensEdgeValue::epsilon(&Mgr)); + SeedsAtInst.try_emplace(Fact, IFDSEdgeValue::epsilon(&Mgr)); } } return {std::move(Ret)}; } -auto FieldSensAllocSitesAwareIFDSProblem::getStoreEdgeFunction( - d_t CurrNode, d_t SuccNode, d_t PointerOp, d_t ValueOp, uint8_t DepthKLimit, - const llvm::DataLayout &DL) -> EdgeFunction { +auto CFLFieldSensIFDSProblem::getStoreEdgeFunction(d_t CurrNode, d_t SuccNode, + d_t PointerOp, d_t ValueOp, + uint8_t DepthKLimit, + const llvm::DataLayout &DL) + -> EdgeFunction { auto [BasePtr, Offset] = getBaseAndOffset(PointerOp, DL); // TODO;: How to deal with BasePtr? @@ -459,7 +457,7 @@ auto FieldSensAllocSitesAwareIFDSProblem::getStoreEdgeFunction( (BasePtr == CurrNode || BaseBasePtr == CurrNode)) { // Kill - CFLFieldAccessPath FieldString{}; + AccessPath FieldString{}; FieldString.Kills.insert(Offset); return CFLFieldSensEdgeFunction::from(std::move(FieldString), Mgr, DepthKLimit); @@ -468,7 +466,7 @@ auto FieldSensAllocSitesAwareIFDSProblem::getStoreEdgeFunction( if (ValueOp == CurrNode && CurrNode != SuccNode) { // Store - CFLFieldAccessPath FieldString{}; + AccessPath FieldString{}; if (BasePtr != SuccNode && llvm::isa(BasePtr)) { // This is a hack, to be more correct with field-insensitive alias // information @@ -489,8 +487,9 @@ auto FieldSensAllocSitesAwareIFDSProblem::getStoreEdgeFunction( return EdgeIdentity{}; } -auto FieldSensAllocSitesAwareIFDSProblem::getNormalEdgeFunction( - n_t Curr, d_t CurrNode, n_t /*Succ*/, d_t SuccNode) -> EdgeFunction { +auto CFLFieldSensIFDSProblem::getNormalEdgeFunction(n_t Curr, d_t CurrNode, + n_t /*Succ*/, d_t SuccNode) + -> EdgeFunction { PHASAR_LOG_LEVEL_CAT(DEBUG, LogCategory, "[getNormalEdgeFunction]:"); PHASAR_LOG_LEVEL_CAT(DEBUG, LogCategory, " Curr: " << NToString(Curr)); PHASAR_LOG_LEVEL_CAT(DEBUG, LogCategory, @@ -520,10 +519,8 @@ auto FieldSensAllocSitesAwareIFDSProblem::getNormalEdgeFunction( // TODO;: How to deal with BasePtr? - CFLFieldAccessPath FieldString{}; + AccessPath FieldString{}; FieldString.Loads = Mgr.prepend(Offset, FieldString.Loads); - // llvm::errs() << "Handle load: " << llvmIRToString(Load) << '\n'; - // llvm::errs() << "> CurrNode: " << llvmIRToString(CurrNode) << '\n'; return CFLFieldSensEdgeFunction::from(std::move(FieldString), Mgr, DepthKLimit); } @@ -532,7 +529,7 @@ auto FieldSensAllocSitesAwareIFDSProblem::getNormalEdgeFunction( auto OffsVal = getBaseAndOffset(Gep, IRDB->getModule()->getDataLayout()).second; - CFLFieldAccessPath FieldString{}; + AccessPath FieldString{}; FieldString.Offset = OffsVal; return CFLFieldSensEdgeFunction::from(std::move(FieldString), Mgr, DepthKLimit); @@ -542,8 +539,9 @@ auto FieldSensAllocSitesAwareIFDSProblem::getNormalEdgeFunction( return EdgeIdentity{}; } -auto FieldSensAllocSitesAwareIFDSProblem::getCallEdgeFunction( - n_t CallSite, d_t SrcNode, f_t /*DestinationFunction*/, d_t DestNode) +auto CFLFieldSensIFDSProblem::getCallEdgeFunction(n_t CallSite, d_t SrcNode, + f_t /*DestinationFunction*/, + d_t DestNode) -> EdgeFunction { PHASAR_LOG_LEVEL_CAT(DEBUG, LogCategory, "[getCallEdgeFunction]"); PHASAR_LOG_LEVEL_CAT(DEBUG, LogCategory, " Curr: " << NToString(CallSite)); @@ -562,7 +560,7 @@ auto FieldSensAllocSitesAwareIFDSProblem::getCallEdgeFunction( return EdgeIdentity{}; } -auto FieldSensAllocSitesAwareIFDSProblem::getReturnEdgeFunction( +auto CFLFieldSensIFDSProblem::getReturnEdgeFunction( n_t /*CallSite*/, f_t /*CalleeFunction*/, n_t ExitStmt, d_t ExitNode, n_t /*RetSite*/, d_t RetNode) -> EdgeFunction { PHASAR_LOG_LEVEL_CAT(DEBUG, LogCategory, "[getReturnEdgeFunction]"); @@ -581,7 +579,7 @@ auto FieldSensAllocSitesAwareIFDSProblem::getReturnEdgeFunction( return EdgeIdentity{}; } -auto FieldSensAllocSitesAwareIFDSProblem::getCallToRetEdgeFunction( +auto CFLFieldSensIFDSProblem::getCallToRetEdgeFunction( n_t CallSite, d_t CallNode, n_t /*RetSite*/, d_t RetSiteNode, llvm::ArrayRef /*Callees*/) -> EdgeFunction { @@ -611,8 +609,9 @@ auto FieldSensAllocSitesAwareIFDSProblem::getCallToRetEdgeFunction( return EdgeIdentity{}; } -auto FieldSensAllocSitesAwareIFDSProblem::getSummaryEdgeFunction( - n_t Curr, d_t CurrNode, n_t /*Succ*/, d_t SuccNode) -> EdgeFunction { +auto CFLFieldSensIFDSProblem::getSummaryEdgeFunction(n_t Curr, d_t CurrNode, + n_t /*Succ*/, d_t SuccNode) + -> EdgeFunction { PHASAR_LOG_LEVEL_CAT(DEBUG, LogCategory, "[getSummaryEdgeFunction]"); PHASAR_LOG_LEVEL_CAT(DEBUG, LogCategory, " Curr: " << NToString(Curr)); @@ -629,7 +628,7 @@ auto FieldSensAllocSitesAwareIFDSProblem::getSummaryEdgeFunction( << " with offset " << *KillOffs); - CFLFieldAccessPath FieldString{}; + AccessPath FieldString{}; FieldString.Kills.insert(*KillOffs); return CFLFieldSensEdgeFunction::from(std::move(FieldString), Mgr, DepthKLimit); @@ -647,18 +646,17 @@ auto FieldSensAllocSitesAwareIFDSProblem::getSummaryEdgeFunction( return EdgeIdentity{}; } -static void klimitPaths(auto &Paths, CFLFieldStringManager &Mgr) { +static void klimitPaths(auto &Paths, FieldStringManager &Mgr) { - llvm::SmallDenseMap, - 2, CFLFieldAccessPathDMI> + llvm::SmallDenseMap, 2, + AccessPathDMI> ToInsert; for (auto IIt = Paths.begin(), End = Paths.end(); IIt != End;) { auto It = IIt++; - if (It->Stores != CFLFieldStringNodeId::None) { - CFLFieldAccessPath Approx = *It; + if (It->Stores != FieldStringNodeId::None) { + AccessPath Approx = *It; auto StoresHead = Mgr[Approx.Stores]; - Approx.Stores = - Mgr.prepend(CFLFieldAccessPath::TopOffset, StoresHead.Next); + Approx.Stores = Mgr.prepend(AccessPath::TopOffset, StoresHead.Next); ToInsert[std::move(Approx)].push_back(*It); Paths.erase(It); } @@ -674,8 +672,8 @@ static void klimitPaths(auto &Paths, CFLFieldStringManager &Mgr) { static constexpr ptrdiff_t BreadthKLimit = 5; -auto FieldSensAllocSitesAwareIFDSProblem::extend(const EdgeFunction &L, - const EdgeFunction &R) +auto CFLFieldSensIFDSProblem::extend(const EdgeFunction &L, + const EdgeFunction &R) -> EdgeFunction { auto Ret = [&]() -> EdgeFunction { if (auto DfltCompose = psr::defaultComposeOrNull(L, R)) { @@ -717,8 +715,8 @@ auto FieldSensAllocSitesAwareIFDSProblem::extend(const EdgeFunction &L, return Ret; } -auto FieldSensAllocSitesAwareIFDSProblem::combine(const EdgeFunction &L, - const EdgeFunction &R) +auto CFLFieldSensIFDSProblem::combine(const EdgeFunction &L, + const EdgeFunction &R) -> EdgeFunction { auto Ret = [&]() -> EdgeFunction { if (auto Dflt = defaultJoinOrNullNoId(L, R)) { @@ -730,6 +728,11 @@ auto FieldSensAllocSitesAwareIFDSProblem::combine(const EdgeFunction &L, if (FldSensL) { if (FldSensR) { + // A complicated way of expressing set-union of LPaths and RPaths. + // Reason being that we don't want to unnecessarily copy the sets. + // Rather, we like ust incrementing the ref-count of L or R if somehow + // possible. + const auto &LPaths = FldSensL->Transform.Paths; const auto &RPaths = FldSensR->Transform.Paths; const auto LeftSz = LPaths.size(); @@ -753,7 +756,7 @@ auto FieldSensAllocSitesAwareIFDSProblem::combine(const EdgeFunction &L, } return CFLFieldSensEdgeFunction::from( - CFLFieldSensEdgeValue{.Mgr = &Mgr, .Paths = std::move(Union)}, + IFDSEdgeValue{.Mgr = &Mgr, .Paths = std::move(Union)}, DepthKLimit); } } @@ -763,21 +766,21 @@ auto FieldSensAllocSitesAwareIFDSProblem::combine(const EdgeFunction &L, } if (R.isa>()) { - if (FldSensL->Transform.Paths.contains(CFLFieldAccessPath{})) { + if (FldSensL->Transform.Paths.contains(AccessPath{})) { return L; } auto Txn = FldSensL->Transform; - Txn.Paths.insert(CFLFieldAccessPath{}); + Txn.Paths.insert(AccessPath{}); return CFLFieldSensEdgeFunction::from(std::move(Txn), DepthKLimit); } } else if (FldSensR && L.isa>()) { - if (FldSensR->Transform.Paths.contains(CFLFieldAccessPath{})) { + if (FldSensR->Transform.Paths.contains(AccessPath{})) { return R; } auto Txn = FldSensR->Transform; - Txn.Paths.insert(CFLFieldAccessPath{}); + Txn.Paths.insert(AccessPath{}); return CFLFieldSensEdgeFunction::from(std::move(Txn), DepthKLimit); } diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysis.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysis.cpp index b754aa40f9..99761fb7d2 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysis.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysis.cpp @@ -13,7 +13,7 @@ #include "phasar/DataFlow/IfdsIde/FlowFunctions.h" #include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h" #include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" -#include "phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h" +#include "phasar/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensIFDSProblem.h" #include "phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMFlowFunctions.h" #include "phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMZeroValue.h" #include "phasar/PhasarLLVM/DataFlow/IfdsIde/LibCSummary.h" @@ -571,9 +571,7 @@ IFDSTaintAnalysis::KillsAtFn::operator()(n_t Curr, d_t CurrNode) const { const auto &DL = Self->IRDB->getModule()->getDataLayout(); for (const auto *KillFact : Kill) { - auto [BasePtr, Offset] = - psr::FieldSensAllocSitesAwareIFDSProblemBase::getBaseAndOffset(KillFact, - DL); + auto [BasePtr, Offset] = psr::cfl_fieldsens::getBaseAndOffset(KillFact, DL); if (BasePtr == CurrNode) { return Offset; } diff --git a/tools/phasar-cli/Controller/AnalysisControllerXIFDSCFLEnvTaint.cpp b/tools/phasar-cli/Controller/AnalysisControllerXIFDSCFLEnvTaint.cpp index cd55027717..facce52ec1 100644 --- a/tools/phasar-cli/Controller/AnalysisControllerXIFDSCFLEnvTaint.cpp +++ b/tools/phasar-cli/Controller/AnalysisControllerXIFDSCFLEnvTaint.cpp @@ -8,7 +8,7 @@ *****************************************************************************/ #include "phasar/DataFlow/IfdsIde/Solver/IterativeIDESolver.h" -#include "phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h" +#include "phasar/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensIFDSProblem.h" #include "phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysis.h" #include "phasar/PhasarLLVM/Utils/DataFlowAnalysisType.h" #include "phasar/Utils/IO.h" @@ -28,7 +28,7 @@ void controller::executeIFDSCFLEnvTaint(AnalysisController &Data) { *Data.HA, &Config, Data.EntryPoints, /*TaintMainArgs*/ false, /*EnableStrongUpdateStore*/ false); auto Printer = UserProblem.consumePrinter(); - auto FieldSensProblem = FieldSensAllocSitesAwareIFDSProblem(&UserProblem); + auto FieldSensProblem = CFLFieldSensIFDSProblem(&UserProblem); IterativeIDESolver Solver(&FieldSensProblem, &Data.HA->getICFG()); diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensTest.cpp b/unittests/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensTest.cpp index 1af4d8b663..a372b56d6e 100644 --- a/unittests/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensTest.cpp +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensTest.cpp @@ -3,8 +3,8 @@ #include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h" #include "phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h" #include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" +#include "phasar/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensIFDSProblem.h" #include "phasar/PhasarLLVM/DataFlow/IfdsIde/DefaultAllocSitesAwareIDEProblem.h" -#include "phasar/PhasarLLVM/DataFlow/IfdsIde/FieldSensAllocSitesAwareIFDSProblem.h" #include "phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMZeroValue.h" #include "phasar/PhasarLLVM/Pointer/FilteredLLVMAliasSet.h" #include "phasar/PhasarLLVM/Pointer/LLVMAliasSet.h" @@ -82,8 +82,7 @@ class ExampleTaintAnalysis : public psr::DefaultAllocSitesAwareIFDSProblem { for (const auto *KillFact : Kill) { auto [BasePtr, Offset] = - psr::FieldSensAllocSitesAwareIFDSProblemBase::getBaseAndOffset( - KillFact, DL); + psr::cfl_fieldsens::getBaseAndOffset(KillFact, DL); if (BasePtr == CurrNode) { return Offset; } @@ -119,11 +118,11 @@ class ExampleTaintAnalysis : public psr::DefaultAllocSitesAwareIFDSProblem { return Gen; } - if (Leak.count(Source)) { + if (Leak.contains(Source)) { Leaks[CS] = Source; } - if (Kill.count(Source)) { + if (Kill.contains(Source)) { return {}; } @@ -159,7 +158,7 @@ class CFLFieldSensTest : public ::testing::Test { psr::LLVMTaintConfig TC(IRDB); ExampleTaintAnalysis TaintProblem(&IRDB, &AS, &TC, {"main"}); - psr::FieldSensAllocSitesAwareIFDSProblem FsTaintProblem(&TaintProblem); + psr::CFLFieldSensIFDSProblem FsTaintProblem(&TaintProblem); psr::LLVMBasedICFG ICFG(&IRDB, psr::CallGraphAnalysisType::OTF, {"main"}, nullptr, &BaseAS); From 5e0c88c933a97a9d5dc93cb6fe537dc263fa6c80 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Wed, 25 Feb 2026 14:04:38 +0100 Subject: [PATCH 26/29] Add generic mechanism to filter IFDS analysis results field-sensitively --- .../IfdsIde/Solver/IdBasedSolverResults.h | 4 + .../IfdsIde/CFLFieldSensIFDSProblem.h | 77 +++++++++++++++++++ include/phasar/Utils/MapUtils.h | 49 ++++++++++-- .../AnalysisControllerXIFDSCFLEnvTaint.cpp | 24 ++---- 4 files changed, 132 insertions(+), 22 deletions(-) diff --git a/include/phasar/DataFlow/IfdsIde/Solver/IdBasedSolverResults.h b/include/phasar/DataFlow/IfdsIde/Solver/IdBasedSolverResults.h index 3659fecde0..7f670552f3 100644 --- a/include/phasar/DataFlow/IfdsIde/Solver/IdBasedSolverResults.h +++ b/include/phasar/DataFlow/IfdsIde/Solver/IdBasedSolverResults.h @@ -44,6 +44,10 @@ class IdBasedSolverResultsBase { using const_iterator = iterator; using difference_type = ptrdiff_t; + using key_type = d_t; + using mapped_type = l_t; + using value_type = std::pair; + explicit RowView( const detail::IterativeIDESolverResults *Results, const row_map_t *Row) noexcept diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensIFDSProblem.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensIFDSProblem.h index 7522f91e62..c4dad1ae4f 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensIFDSProblem.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensIFDSProblem.h @@ -20,6 +20,7 @@ #include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" #include "phasar/Utils/Compressor.h" #include "phasar/Utils/Logger.h" +#include "phasar/Utils/MapUtils.h" #include "phasar/Utils/TypeTraits.h" #include "phasar/Utils/TypedVector.h" #include "phasar/Utils/Utilities.h" @@ -273,6 +274,82 @@ getBaseAndOffset(const llvm::Value *V, const llvm::DataLayout &DL) { return {Base->stripPointerCastsAndAliases(), OffsVal}; } +/// Checks whether Fact holds at Inst in a field-sensitive way +template +[[nodiscard]] inline bool holdsFactAt(const ResultsT &Results, + IFDSDomain::n_t Inst, + IFDSDomain::d_t Fact) { + const IFDSDomain::l_t Bot = Bottom{}; + const auto &Fields = Results.resultAt(Inst, Fact); + + if (Fields.isTop()) { + // Was not computed by the IDE Solver + return false; + } + + if (const IFDSEdgeValue *FieldStrings = Fields.getValueOrNull()) { + if constexpr (!AllowDeepTaints) { + // whether Facts itself holds, not whether any fields of it may hold + return FieldStrings->isEpsilon(); + } + if (FieldStrings->Paths.empty()) { + // has been killed entirely + return false; + } + } + + return true; +} + +/// Given a QueryMap of the form map>, calls the Handler for all +/// inst-fact pairs that hold in a field-sensitive way and filters out all +/// others. +/// +/// The Handler may opt into early exit by returning false. Returning void is +/// permitted. +template +bool filterFieldSensFacts( + const ResultsT &Results, const auto &QueryMap, + std::invocable auto Handler) { + const IFDSDomain::l_t Bot = Bottom{}; + + for (const auto &[Inst, FactsAtInst] : QueryMap) { + const auto &Row = Results.row(Inst); + for (const auto &Fact : FactsAtInst) { + const auto &Fields = getOr(Row, Fact, Bot); + + if (Fields.isTop()) { + // Was not computed by the IDE Solver + continue; + } + + if (const auto *FieldStrings = Fields.getValueOrNull()) { + if (!AllowDeepTaints && !FieldStrings->isEpsilon()) { + // Fact does not hold itself, but fields of Fact may hold. In + // aggressive mode, we ignore them + continue; + } + if (FieldStrings->Paths.empty()) { + // has been killed entirely + continue; + } + } + + if constexpr (std::convertible_to< + std::invoke_result_t, + bool>) { + if (!std::invoke(Handler, Inst, Fact)) { + return false; + } + } else { + std::invoke(Handler, Inst, Fact); + } + } + } + return true; +} + } // namespace cfl_fieldsens /// An IFDS-Problem adaptor that makes any field-insensitive IFDS analysis diff --git a/include/phasar/Utils/MapUtils.h b/include/phasar/Utils/MapUtils.h index f57f352dd5..270ceb4eef 100644 --- a/include/phasar/Utils/MapUtils.h +++ b/include/phasar/Utils/MapUtils.h @@ -17,14 +17,13 @@ #include "llvm/ADT/STLForwardCompat.h" #include -#include namespace psr { template requires std::is_lvalue_reference_v -static auto getOrDefault(MapT &&Map, KeyT &&Key) -> ByConstRef< - llvm::remove_cvref_tsecond)>> { +inline auto getOrDefault(MapT &&Map, KeyT &&Key) -> ByConstRef< + std::remove_cvref_tsecond)>> { auto It = Map.find(PSR_FWD(Key)); if (It == Map.end()) { return default_value(); @@ -36,7 +35,7 @@ static auto getOrDefault(MapT &&Map, KeyT &&Key) -> ByConstRef< template requires(std::is_lvalue_reference_v && !psr::CanEfficientlyPassByValue>) -static auto getOrNull(MapT &&Map, KeyT &&Key) +inline auto getOrNull(MapT &&Map, KeyT &&Key) -> decltype(&Map.find(PSR_FWD(Key))->second) { auto It = Map.find(PSR_FWD(Key)); decltype(&It->second) Ret = nullptr; @@ -50,9 +49,9 @@ static auto getOrNull(MapT &&Map, KeyT &&Key) template requires(std::is_lvalue_reference_v && psr::CanEfficientlyPassByValue>) -static auto getOrNull(MapT &&Map, KeyT Key) +inline auto getOrNull(MapT &&Map, KeyT Key) -> decltype(&Map.find(Key)->second) { - auto It = Map.find(Key); + auto It = Map.find(PSR_FWD(Key)); decltype(&It->second) Ret = nullptr; if (It != Map.end()) { Ret = &It->second; @@ -60,6 +59,44 @@ static auto getOrNull(MapT &&Map, KeyT Key) return Ret; } + +template + requires CanEfficientlyPassByValue< + typename std::remove_cvref_t::mapped_type> +inline auto getOr(MapT &&Map, KeyT &&Key, ValueT &&FallbackVal) + -> std::remove_cvref_tsecond)> { + auto It = Map.find(PSR_FWD(Key)); + + if (It == Map.end()) { + return PSR_FWD(FallbackVal); + } + + return It->second; +} + +template + requires(!CanEfficientlyPassByValue< + typename std::remove_cvref_t::mapped_type> && + std::is_lvalue_reference_v) +inline auto +getOr(MapT &&Map, KeyT &&Key, + const typename std::remove_cvref_t::mapped_type &FallbackVal) + -> decltype(Map.find(PSR_FWD(Key))->second) const & { + auto It = Map.find(PSR_FWD(Key)); + + if (It == Map.end()) { + return FallbackVal; + } + + return It->second; +} + +template + requires(!CanEfficientlyPassByValue< + typename std::remove_cvref_t::mapped_type>) +auto getOr(MapT &&Map, KeyT &&Key, + const typename std::remove_cvref_t::mapped_type &&FallbackVal) + -> decltype(Map.find(PSR_FWD(Key))->second) const & = delete; } // namespace psr #endif // PHASAR_UTILS_MAPUTILS_H diff --git a/tools/phasar-cli/Controller/AnalysisControllerXIFDSCFLEnvTaint.cpp b/tools/phasar-cli/Controller/AnalysisControllerXIFDSCFLEnvTaint.cpp index facce52ec1..c4ed6a7cf8 100644 --- a/tools/phasar-cli/Controller/AnalysisControllerXIFDSCFLEnvTaint.cpp +++ b/tools/phasar-cli/Controller/AnalysisControllerXIFDSCFLEnvTaint.cpp @@ -69,22 +69,14 @@ void controller::executeIFDSCFLEnvTaint(AnalysisController &Data) { WithOutStream("/psr-report.txt", [&](llvm::raw_ostream &OS) { Printer->onInitialize(); bool HasResults = false; - for (const auto &[Inst, Facts] : UserProblem.Leaks) { - for (const auto &Fact : Facts) { - const auto &Fields = Results.resultAt(Inst, Fact); - - if (const auto *FieldStrings = Fields.getValueOrNull()) { - if (FieldStrings->Paths.empty()) { - // filter-out leak - continue; - } - } - - HasResults = true; - Printer->onResult(Inst, Fact, - DataFlowAnalysisType::IFDSCFLEnvTaintAnalysis); - } - } + + cfl_fieldsens::filterFieldSensFacts( + Results, UserProblem.Leaks, [&](auto Inst, auto Fact) { + HasResults = true; + Printer->onResult(Inst, Fact, + DataFlowAnalysisType::IFDSCFLEnvTaintAnalysis); + }); + Printer->onFinalize(OS); if (!HasResults) { OS << "No leaks found!\n"; From 9bf576222c02af47d5b9d06262fa758cb3828386 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Wed, 25 Feb 2026 14:07:02 +0100 Subject: [PATCH 27/29] Properly handling non-existing value --- .../PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensIFDSProblem.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensIFDSProblem.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensIFDSProblem.h index c4dad1ae4f..6b172fc62f 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensIFDSProblem.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensIFDSProblem.h @@ -279,7 +279,6 @@ template [[nodiscard]] inline bool holdsFactAt(const ResultsT &Results, IFDSDomain::n_t Inst, IFDSDomain::d_t Fact) { - const IFDSDomain::l_t Bot = Bottom{}; const auto &Fields = Results.resultAt(Inst, Fact); if (Fields.isTop()) { @@ -311,12 +310,12 @@ template bool filterFieldSensFacts( const ResultsT &Results, const auto &QueryMap, std::invocable auto Handler) { - const IFDSDomain::l_t Bot = Bottom{}; + const IFDSDomain::l_t Top = psr::Top{}; for (const auto &[Inst, FactsAtInst] : QueryMap) { const auto &Row = Results.row(Inst); for (const auto &Fact : FactsAtInst) { - const auto &Fields = getOr(Row, Fact, Bot); + const auto &Fields = getOr(Row, Fact, Top); if (Fields.isTop()) { // Was not computed by the IDE Solver From ba96bc1e0172c6faa8a1ea1cec7824a5ff4d65d7 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Wed, 25 Feb 2026 14:20:14 +0100 Subject: [PATCH 28/29] Add some comments + minor --- .../IfdsIde/CFLFieldSensIFDSProblem.h | 23 ++++++++++++++++--- .../IfdsIde/CFLFieldSensIFDSProblem.cpp | 14 +++++------ 2 files changed, 27 insertions(+), 10 deletions(-) diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensIFDSProblem.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensIFDSProblem.h index 6b172fc62f..a92c58475b 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensIFDSProblem.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensIFDSProblem.h @@ -89,6 +89,7 @@ namespace psr { namespace cfl_fieldsens { +/// Interns the Store- and Load field-strings class FieldStringManager { public: FieldStringManager(); @@ -130,6 +131,8 @@ class FieldStringManager { TypedVector Depth{}; }; +/// A single CFL Field-Access String consisting of: gep, loads, kills, and +/// stores struct AccessPath { static constexpr int32_t TopOffset = INT32_MIN; @@ -192,6 +195,7 @@ struct AccessPathDMI { } }; +/// An edge-value consisting of a set if CFL field access strings. struct IFDSEdgeValue { [[clang::require_explicit_initialization]] FieldStringManager *Mgr{}; llvm::SmallDenseSet Paths; @@ -245,17 +249,26 @@ struct IFDSDomain : LLVMIFDSAnalysisDomainDefault { using l_t = LatticeDomain; }; +/// Configures, how the CFLFieldSensIFDSProblem should handle strong updates. struct IFDSProblemConfig : LLVMIFDSAnalysisDomainDefault { + /// Gives the byte-offset of a kill at , if any, else nullopt. + /// + /// Can be derived automatically, if the user-problem specifies a + /// member-function killsAt() that returns such a function object. llvm::unique_function(n_t Curr, d_t CurrNode)> KillsAt; // XXX: more }; +/// Transforms user-defined seeds from usual IFDS seeds to field-sensitive IFDS +/// seeds [[nodiscard]] InitialSeeds makeInitialSeeds(const InitialSeeds &UserSeeds, FieldStringManager &Mgr); +/// Utility to strip off potential pointer-arithmetic from V and accumulating +/// the byte-offset. [[nodiscard]] inline std::pair getBaseAndOffset(const llvm::Value *V, const llvm::DataLayout &DL) { llvm::APInt Offset(64, 0); @@ -414,6 +427,9 @@ class CFLFieldSensIFDSProblem UserProblem->getZeroValue()), UserProblem(UserProblem), Config(std::move(Config)) {} + /// Constructs an IDETabulationProblem with the usual arguments, forwarded + /// from UserProblem and tries to automatically derive the config from + /// additional functions specified by UserProblem explicit CFLFieldSensIFDSProblem( proper_subclass_of< IFDSTabulationProblem> auto @@ -428,9 +444,9 @@ class CFLFieldSensIFDSProblem CFLFieldSensIFDSProblem(std::nullptr_t) = delete; - // TODO: Provide a customization-point to provide gen offsets to the - // edge-functions (generating from zero currently always generates at - // epsilon!) + // XXX: Perhaps we need a way to provide a customization-point to specify gen + // offsets to the edge-functions (generating from zero currently always + // generates at epsilon!) [[nodiscard]] InitialSeeds initialSeeds() override { return cfl_fieldsens::makeInitialSeeds(UserProblem->initialSeeds(), Mgr); @@ -495,6 +511,7 @@ class CFLFieldSensIFDSProblem EdgeFunction combine(const EdgeFunction &L, const EdgeFunction &R) override; + /// The wrapped user-problem [[nodiscard]] const auto &base() const noexcept { return *UserProblem; } private: diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensIFDSProblem.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensIFDSProblem.cpp index 448ebebfb1..20dbc686cc 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensIFDSProblem.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/CFLFieldSensIFDSProblem.cpp @@ -257,9 +257,8 @@ void applyTransform(IFDSEdgeValue &EV, const AccessPath &Txn, EV.Paths.reserve(Save.size()); const auto TxnOffset = Txn.Offset; - - /// XXX: Should we save getFullFieldString(Txn.Loads) and - /// getFullFieldString(Txn.Stores)? Would it be faster? + const auto TxnLoads = EV.Mgr->getFullFieldString(Txn.Loads); + const auto TxnStores = EV.Mgr->getFullFieldString(Txn.Stores); for (const auto &F : Save) { auto Copy = F; @@ -270,7 +269,7 @@ void applyTransform(IFDSEdgeValue &EV, const AccessPath &Txn, } } - for (auto Ld : EV.Mgr->getFullFieldString(Txn.Loads)) { + for (auto Ld : TxnLoads) { if (!applyOneGepAndLoad(*EV.Mgr, Copy, Ld, DepthKLimit)) { return false; } @@ -282,7 +281,7 @@ void applyTransform(IFDSEdgeValue &EV, const AccessPath &Txn, } } - for (auto St : EV.Mgr->getFullFieldString(Txn.Stores)) { + for (auto St : TxnStores) { if (!applyOneGepAndStore(*EV.Mgr, Copy, St, DepthKLimit)) { return false; } @@ -594,7 +593,8 @@ auto CFLFieldSensIFDSProblem::getCallToRetEdgeFunction( if (auto KillOffs = Config.KillsAt(CallSite, CallNode)) { // Let the summary-FF kill the fact - // XXX: Can we somehow circumvent calling KillsAt twice? + // XXX: Can we somehow circumvent calling KillsAt twice? (once here, once + // in getSummaryEdgeFunction()) return AllTop{}; } } @@ -730,7 +730,7 @@ auto CFLFieldSensIFDSProblem::combine(const EdgeFunction &L, if (FldSensR) { // A complicated way of expressing set-union of LPaths and RPaths. // Reason being that we don't want to unnecessarily copy the sets. - // Rather, we like ust incrementing the ref-count of L or R if somehow + // Rather, we like just incrementing the ref-count of L or R if somehow // possible. const auto &LPaths = FldSensL->Transform.Paths; From 813cde0b46ae43e879bf15569ae45538e76c4881 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Wed, 25 Feb 2026 18:22:49 +0100 Subject: [PATCH 29/29] pre-commit --- include/phasar/Utils/TypeTraits.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/include/phasar/Utils/TypeTraits.h b/include/phasar/Utils/TypeTraits.h index 84ad77858b..cff4e08c1d 100644 --- a/include/phasar/Utils/TypeTraits.h +++ b/include/phasar/Utils/TypeTraits.h @@ -210,9 +210,10 @@ template struct [[deprecated("getAsJson should not be used anymore. Use printAsJson " "instead")]] has_getAsJson : std::false_type {}; // NOLINT template -struct [[deprecated("getAsJson should not be used anymore. Use printAsJson " - "instead")]] -has_getAsJson().getAsJson())>> +struct [[deprecated( + "getAsJson should not be used anymore. Use printAsJson " + "instead")]] has_getAsJson() + .getAsJson())>> : std::true_type {}; // NOLINT struct TrueFn {