Skip to content

Commit 1310d47

Browse files
authored
Small Enhancements for IFDS Taint Analysis (#815)
* Handle sink variables in IFDSTaintAnalysis * Use auto-seeds in IFDS taint analysis
1 parent 21f761e commit 1310d47

File tree

7 files changed

+71
-15
lines changed

7 files changed

+71
-15
lines changed

include/phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ class LLVMTaintConfig : public TaintConfigBase<LLVMTaintConfig> {
8787

8888
[[nodiscard]] std::map<const llvm::Instruction *,
8989
std::set<const llvm::Value *>>
90-
makeInitialSeedsImpl() const;
90+
makeInitialSeedsImpl(SeedConfig Conf) const;
9191

9292
void printImpl(llvm::raw_ostream &OS) const;
9393

include/phasar/PhasarLLVM/TaintConfig/TaintConfigBase.h

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,13 @@ template <typename Derived> class TaintConfigBase {
4141
using TaintDescriptionCallBackTy =
4242
llvm::unique_function<std::set<v_t>(n_t) const>;
4343

44+
enum class [[clang::flag_enum]] SeedConfig {
45+
Arguments = 1,
46+
Instructions = 2,
47+
48+
All = Arguments | Instructions,
49+
};
50+
4451
void registerSourceCallBack(TaintDescriptionCallBackTy CB) noexcept {
4552
SourceCallBack = std::move(CB);
4653
}
@@ -124,8 +131,9 @@ template <typename Derived> class TaintConfigBase {
124131
return self().getCategoryImpl(std::move(Val));
125132
}
126133

127-
[[nodiscard]] std::map<n_t, std::set<v_t>> makeInitialSeeds() const {
128-
return self().makeInitialSeedsImpl();
134+
[[nodiscard]] std::map<n_t, std::set<v_t>>
135+
makeInitialSeeds(SeedConfig Conf = SeedConfig::All) const {
136+
return self().makeInitialSeedsImpl(Conf);
129137
}
130138

131139
void print(llvm::raw_ostream &OS = llvm::outs()) const {

lib/PhasarLLVM/DataFlow/IfdsIde/LibCSummary.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ using namespace psr::library_summary;
88
static library_summary::FunctionDataFlowFacts createLibCSummary() {
99
FunctionDataFlowFacts Sum;
1010

11+
Sum.addElement("atoi", 0, ReturnValue{});
12+
1113
// abs
1214
Sum.addElement("abs", 0, ReturnValue{});
1315

lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEExtendedTaintAnalysis.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include "llvm/IR/GlobalValue.h"
3131
#include "llvm/IR/IntrinsicInst.h"
3232
#include "llvm/Support/Casting.h"
33+
#include "llvm/Support/WithColor.h"
3334

3435
#include <algorithm>
3536
#include <type_traits>
@@ -54,9 +55,10 @@ IDEExtendedTaintAnalysis::initialSeeds() {
5455
this->base_t::getZeroValue(), bottomElement());
5556

5657
if (Seeds.empty()) {
57-
llvm::errs() << "WARNING: No initial seeds specified, skip the analysis. "
58-
"Please specify an entrypoint function or in the "
59-
"TaintConfig a source llvm::Instruction*\n";
58+
llvm::WithColor::warning()
59+
<< "No initial seeds specified, skip the analysis. "
60+
"Please specify an entrypoint function or in the "
61+
"TaintConfig a source llvm::Instruction*\n";
6062
}
6163

6264
return Seeds;

lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysis.cpp

Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
#include "llvm/IR/LLVMContext.h"
3434
#include "llvm/IR/Value.h"
3535
#include "llvm/Support/Casting.h"
36+
#include "llvm/Support/WithColor.h"
3637
#include "llvm/Support/raw_ostream.h"
3738

3839
#include <utility>
@@ -280,8 +281,8 @@ auto IFDSTaintAnalysis::getNormalFlowFunction(n_t Curr,
280281
Gen.insert(Store->getValueOperand());
281282
}
282283

283-
return lambdaFlow(
284-
[Store, Gen{std::move(Gen)}](d_t Source) -> container_type {
284+
auto Ret =
285+
lambdaFlow([Store, Gen{std::move(Gen)}](d_t Source) -> container_type {
285286
if (Store->getPointerOperand() == Source) {
286287
return {};
287288
}
@@ -291,6 +292,21 @@ auto IFDSTaintAnalysis::getNormalFlowFunction(n_t Curr,
291292

292293
return {Source};
293294
});
295+
if (Config->isSink(Store->getPointerOperand())) {
296+
// Handle sink variables:
297+
298+
return lambdaFlow([this, Store, Ret = std::move(Ret)](d_t Source) {
299+
if (Store->getValueOperand() == Source) {
300+
if (Leaks[Store].insert(Source).second) {
301+
Printer->onResult(Store, Source,
302+
DataFlowAnalysisType::IFDSTaintAnalysis);
303+
}
304+
}
305+
306+
return Ret->computeTargets(Source);
307+
});
308+
}
309+
return Ret;
294310
}
295311
// If a tainted value is loaded, the loaded value is of course tainted
296312
if (const auto *Load = llvm::dyn_cast<llvm::LoadInst>(Curr)) {
@@ -316,6 +332,16 @@ auto IFDSTaintAnalysis::getNormalFlowFunction(n_t Curr,
316332
return transferFlow(Cast, Cast->getOperand(0));
317333
}
318334

335+
if (llvm::isa<llvm::BinaryOperator>(Curr)) {
336+
return lambdaFlow([Curr](d_t Source) -> container_type {
337+
if (llvm::is_contained(Curr->operand_values(), Source)) {
338+
return {Source, Curr};
339+
}
340+
341+
return {Source};
342+
});
343+
}
344+
319345
// Otherwise we do not care and leave everything as it is
320346
return identityFlow();
321347
}
@@ -489,7 +515,10 @@ auto IFDSTaintAnalysis::getSummaryFlowFunction([[maybe_unused]] n_t CallSite,
489515
auto IFDSTaintAnalysis::initialSeeds() -> InitialSeeds<n_t, d_t, l_t> {
490516
PHASAR_LOG_LEVEL(DEBUG, "IFDSTaintAnalysis::initialSeeds()");
491517

492-
InitialSeeds<n_t, d_t, l_t> Seeds;
518+
// Instructions are generated from zero on-the-fly, but args must be generated
519+
// explicitly as seeds
520+
InitialSeeds<n_t, d_t, l_t> Seeds =
521+
Config->makeInitialSeeds(LLVMTaintConfig::SeedConfig::Arguments);
493522

494523
LLVMBasedCFG C;
495524
addSeedsForStartingPoints(EntryPoints, IRDB, C, Seeds, getZeroValue(),
@@ -507,6 +536,13 @@ auto IFDSTaintAnalysis::initialSeeds() -> InitialSeeds<n_t, d_t, l_t> {
507536
}
508537
}
509538

539+
if (Seeds.empty()) {
540+
llvm::WithColor::warning()
541+
<< "No initial seeds specified, skip the analysis. "
542+
"Please specify an entrypoint function or in the "
543+
"TaintConfig a source llvm::Instruction*\n";
544+
}
545+
510546
return Seeds;
511547
}
512548

lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "phasar/PhasarLLVM/TaintConfig/TaintConfigBase.h"
1515
#include "phasar/PhasarLLVM/Utils/Annotation.h"
1616
#include "phasar/PhasarLLVM/Utils/LLVMShorthands.h"
17+
#include "phasar/Utils/EnumFlags.h"
1718
#include "phasar/Utils/Logger.h"
1819

1920
#include "llvm/ADT/SmallVector.h"
@@ -457,17 +458,21 @@ TaintCategory LLVMTaintConfig::getCategoryImpl(const llvm::Value *V) const {
457458
}
458459

459460
std::map<const llvm::Instruction *, std::set<const llvm::Value *>>
460-
LLVMTaintConfig::makeInitialSeedsImpl() const {
461+
LLVMTaintConfig::makeInitialSeedsImpl(SeedConfig Conf) const {
461462
std::map<const llvm::Instruction *, std::set<const llvm::Value *>>
462463
InitialSeeds;
463464
for (const auto *SourceValue : SourceValues) {
464465
if (const auto *Inst = llvm::dyn_cast<llvm::Instruction>(SourceValue)) {
465-
InitialSeeds[Inst].insert(Inst);
466+
if (hasFlag(Conf, SeedConfig::Instructions)) {
467+
InitialSeeds[Inst].insert(Inst);
468+
}
466469
} else if (const auto *Arg = llvm::dyn_cast<llvm::Argument>(SourceValue);
467470
Arg && !Arg->getParent()->isDeclaration()) {
468-
LLVMBasedCFG C;
469-
for (const auto *SP : C.getStartPointsOf(Arg->getParent())) {
470-
InitialSeeds[SP].insert(Arg);
471+
if (hasFlag(Conf, SeedConfig::Arguments)) {
472+
LLVMBasedCFG C;
473+
for (const auto *SP : C.getStartPointsOf(Arg->getParent())) {
474+
InitialSeeds[SP].insert(Arg);
475+
}
471476
}
472477
}
473478
}

tools/phasar-cli/Controller/AnalysisControllerXIFDSTaint.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,5 +15,8 @@ using namespace psr;
1515

1616
void controller::executeIFDSTaint(AnalysisController &Data) {
1717
auto Config = makeTaintConfig(Data);
18-
executeIFDSAnalysis<IFDSTaintAnalysis>(Data, &Config, Data.EntryPoints);
18+
// Note: Don't blindly generate argc and argv. Use a proper taint config
19+
// instead
20+
executeIFDSAnalysis<IFDSTaintAnalysis>(Data, &Config, Data.EntryPoints,
21+
false);
1922
}

0 commit comments

Comments
 (0)