Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 11 additions & 3 deletions centipede/centipede.cc
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@
#include "./centipede/centipede_callbacks.h"
#include "./centipede/command.h"
#include "./centipede/control_flow.h"
#include "./centipede/corpus.h"
#include "./centipede/corpus_io.h"
#include "./centipede/coverage.h"
#include "./centipede/environment.h"
Expand All @@ -98,14 +99,21 @@

namespace fuzztest::internal {

Centipede::Centipede(const Environment &env, CentipedeCallbacks &user_callbacks,
const BinaryInfo &binary_info,
CoverageLogger &coverage_logger, std::atomic<Stats> &stats)
Centipede::Centipede(const Environment& env, CentipedeCallbacks& user_callbacks,
const BinaryInfo& binary_info,
CoverageLogger& coverage_logger, std::atomic<Stats>& stats)
: env_(env),
user_callbacks_(user_callbacks),
rng_(env_.seed),
// TODO(kcc): [impl] find a better way to compute frequency_threshold.
fs_(env_.feature_frequency_threshold, env_.MakeDomainDiscardMask()),
corpus_([this] {
const auto parsed_weight_method =
Corpus::ParseWeightMethod(env_.corpus_weight_method);
FUZZTEST_CHECK(parsed_weight_method.has_value())
<< "Unknown corpus weight method " << env_.corpus_weight_method;
return parsed_weight_method.value();
}()),
coverage_frontier_(binary_info),
binary_info_(binary_info),
pc_table_(binary_info_.pc_table),
Expand Down
3 changes: 3 additions & 0 deletions centipede/centipede_flags.inc
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,9 @@ CENTIPEDE_FLAG(
bool, use_corpus_weights, true,
"If true, use weighted distribution when choosing the corpus element "
"to mutate. This flag is mostly for Centipede developers.")
CENTIPEDE_FLAG(std::string, corpus_weight_method, "feature_rarity",
"The weight method to use on corpus. Available options are "
"`uniform`, `recency`, and `feature_rarity` (default).")
CENTIPEDE_FLAG(
bool, exec_time_weight_scaling, true,
"If true, scale the corpus weight by the execution time of each input.")
Expand Down
49 changes: 41 additions & 8 deletions centipede/corpus.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <optional>
#include <string>
#include <string_view>
#include <utility>
Expand Down Expand Up @@ -45,13 +46,13 @@ namespace fuzztest::internal {
// Corpus
//------------------------------------------------------------------------------

// Returns the weight of `fv` computed using `fs` and `coverage_frontier`.
static size_t ComputeWeight(const FeatureVec &fv, const FeatureSet &fs,
const CoverageFrontier &coverage_frontier) {
size_t weight = fs.ComputeWeight(fv);
// Computes the weight component of input using its features `fv` and
// the overall `coverage_frontier`.
static size_t ComputeFrontierWeight(const FeatureVec& fv,
const CoverageFrontier& coverage_frontier) {
// The following is checking for the cases where PCTable is not present. In
// such cases, we cannot use any ControlFlow related features.
if (coverage_frontier.MaxPcIndex() == 0) return weight;
if (coverage_frontier.MaxPcIndex() == 0) return 1;
size_t frontier_weights_sum = 0;
for (const auto feature : fv) {
if (!feature_domains::kPCs.Contains(feature)) continue;
Expand All @@ -63,7 +64,19 @@ static size_t ComputeWeight(const FeatureVec &fv, const FeatureSet &fs,
frontier_weights_sum += coverage_frontier.FrontierWeight(pc_index);
}
}
return weight * (frontier_weights_sum + 1); // Multiply by at least 1.
return frontier_weights_sum + 1; // Multiply by at least 1.
}

std::optional<Corpus::WeightMethod> Corpus::ParseWeightMethod(
std::string_view method_string) {
if (method_string == "uniform") {
return WeightMethod::Uniform;
} else if (method_string == "recency") {
return WeightMethod::Recency;
} else if (method_string == "feature_rarity") {
return WeightMethod::FeatureRarity;
}
return std::nullopt;
}

std::pair<size_t, size_t> Corpus::MaxAndAvgSize() const {
Expand All @@ -86,7 +99,26 @@ void Corpus::UpdateWeights(const FeatureSet& fs,
auto& record = records_[i];
const size_t unseen = fs.PruneFeaturesAndCountUnseen(record.features);
FUZZTEST_CHECK_EQ(unseen, 0);
weights[i] = fs.ComputeWeight(record.features);
if (record.features.empty()) {
weights[i] = 0;
continue;
}
double base_weight = 0;
switch (method_) {
case WeightMethod::Uniform:
base_weight = 1;
break;
case WeightMethod::Recency:
base_weight = i + 1;
break;
case WeightMethod::FeatureRarity:
base_weight = fs.ComputeRarityWeight(record.features);
break;
default:
FUZZTEST_LOG(FATAL) << "Unknown corpus weight method";
}
weights[i] =
base_weight * ComputeFrontierWeight(record.features, coverage_frontier);
}
if (scale_by_exec_time) {
double total_exec_time_usec = 0;
Expand Down Expand Up @@ -206,7 +238,8 @@ void Corpus::Add(const ByteArray& data, const FeatureVec& fv,
<< "Got request to add empty element to corpus: ignoring";
FUZZTEST_CHECK_EQ(records_.size(), weighted_distribution_.size());
records_.push_back({data, fv, metadata, stats});
weighted_distribution_.AddWeight(ComputeWeight(fv, fs, coverage_frontier));
// Will be updated by `UpdateWeights`.
weighted_distribution_.AddWeight(0);
}

const CorpusRecord& Corpus::WeightedRandom(absl::BitGenRef rng) const {
Expand Down
21 changes: 17 additions & 4 deletions centipede/corpus.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

#include <cstddef>
#include <cstdint>
#include <optional>
#include <ostream>
#include <string>
#include <string_view>
Expand Down Expand Up @@ -98,7 +99,17 @@ struct CorpusRecord {
// Allows to prune (forget) inputs that become uninteresting.
class Corpus {
public:
Corpus() = default;
enum class WeightMethod {
Uniform,
Recency,
FeatureRarity,
};

static std::optional<WeightMethod> ParseWeightMethod(
std::string_view method_string);

Corpus() : Corpus(WeightMethod::FeatureRarity) {}
explicit Corpus(WeightMethod method) : method_(method) {}

Corpus(const Corpus &) = default;
Corpus(Corpus &&) noexcept = default;
Expand All @@ -120,9 +131,9 @@ class Corpus {
// Returns the number of removed elements.
size_t Prune(const FeatureSet &fs, const CoverageFrontier &coverage_frontier,
size_t max_corpus_size, Rng &rng);
// Updates the corpus weights according to `fs` and `coverage_frontier`. If
// `scale_by_exec_time` is set, scales the weights by the corpus execution
// time relative to the average.
// Updates the corpus weights according to `fs` and `coverage_frontier` using
// the weight `method`. If `scale_by_exec_time` is set, scales the weights by
// the corpus execution time relative to the average.
void UpdateWeights(const FeatureSet& fs,
const CoverageFrontier& coverage_frontier,
bool scale_by_exec_time);
Expand Down Expand Up @@ -164,6 +175,8 @@ class Corpus {
// Maintains weights for elements of records_.
WeightedDistribution weighted_distribution_;
size_t num_pruned_ = 0;
// Method for weighting the corpus elements.
WeightMethod method_;
};

// Coverage frontier is a set of PCs that are themselves covered, but some of
Expand Down
141 changes: 137 additions & 4 deletions centipede/corpus_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,8 @@ TEST(Corpus, Prune) {
Add({{2}, {30, 40}});
Add({{3}, {40, 50}});
Add({{4}, {10, 20}});
corpus.UpdateWeights(fs, coverage_frontier, /*scale_by_exec_time=*/false);
corpus.UpdateWeights(fs, coverage_frontier,
/*scale_by_exec_time=*/false);

// Prune. Features 20 and 40 are frequent => input {0} will be removed.
EXPECT_EQ(corpus.NumActive(), 5);
Expand All @@ -124,7 +125,8 @@ TEST(Corpus, Prune) {
VerifyActiveInputs({{1}, {2}, {3}, {4}});

Add({{5}, {30, 60}});
corpus.UpdateWeights(fs, coverage_frontier, /*scale_by_exec_time=*/false);
corpus.UpdateWeights(fs, coverage_frontier,
/*scale_by_exec_time=*/false);

EXPECT_EQ(corpus.NumTotal(), 6);
// Prune. Feature 30 is now frequent => inputs {1} and {2} will be removed.
Expand All @@ -145,6 +147,131 @@ TEST(Corpus, Prune) {
EXPECT_EQ(corpus.NumTotal(), 6);
}

TEST(Corpus, UniformWeightMethodsWorkAsExpected) {
PCTable pc_table(100);
CFTable cf_table(100);
BinaryInfo bin_info{pc_table, {}, cf_table, {}, {}, {}};
CoverageFrontier coverage_frontier(bin_info);
FeatureSet fs(3, {});
Corpus corpus(Corpus::WeightMethod::Uniform);

auto Add = [&](const CorpusRecord& record) {
fs.MergeFeatures(record.features);
corpus.Add(record.data, record.features, /*metadata=*/{}, /*stats=*/{}, fs,
coverage_frontier);
};

Add({/*data=*/{0}, /*features=*/{30, 20}});
Add({/*data=*/{1}, /*features=*/{10, 20}});
Add({/*data=*/{2}, /*features=*/{10}});

constexpr int kNumIter = 10000;
std::vector<uint64_t> freq;

Rng rng;
auto ComputeFreq = [&]() {
freq.clear();
freq.resize(corpus.NumActive());
for (int i = 0; i < kNumIter; i++) {
const auto& record = corpus.WeightedRandom(rng);
const auto id = record.data[0];
ASSERT_LT(id, freq.size());
freq[id]++;
}
};

// The weights should be equal with the uniform method
corpus.UpdateWeights(fs, coverage_frontier,
/*scale_by_exec_time=*/false);
ComputeFreq();
EXPECT_NEAR(freq[0], kNumIter / 3, 100);
EXPECT_NEAR(freq[1], kNumIter / 3, 100);
EXPECT_NEAR(freq[2], kNumIter / 3, 100);
}

TEST(Corpus, RecencyWeightMethodsWorkAsExpected) {
PCTable pc_table(100);
CFTable cf_table(100);
BinaryInfo bin_info{pc_table, {}, cf_table, {}, {}, {}};
CoverageFrontier coverage_frontier(bin_info);
FeatureSet fs(3, {});
Corpus corpus(Corpus::WeightMethod::Recency);

auto Add = [&](const CorpusRecord& record) {
fs.MergeFeatures(record.features);
corpus.Add(record.data, record.features, /*metadata=*/{}, /*stats=*/{}, fs,
coverage_frontier);
};

Add({/*data=*/{0}, /*features=*/{30, 20}});
Add({/*data=*/{1}, /*features=*/{10, 20}});
Add({/*data=*/{2}, /*features=*/{10}});

constexpr int kNumIter = 10000;
std::vector<uint64_t> freq;

Rng rng;
auto ComputeFreq = [&]() {
freq.clear();
freq.resize(corpus.NumActive());
for (int i = 0; i < kNumIter; i++) {
const auto& record = corpus.WeightedRandom(rng);
const auto id = record.data[0];
ASSERT_LT(id, freq.size());
freq[id]++;
}
};

// The weights should favor {2} over {1} over {0} with the recency method.
corpus.UpdateWeights(fs, coverage_frontier,
/*scale_by_exec_time=*/false);
ComputeFreq();
EXPECT_GT(freq[2], freq[1] + 100);
EXPECT_GT(freq[1], freq[0] + 100);
}

TEST(Corpus, FeatureRarityWeightMethodsWorkAsExpected) {
PCTable pc_table(100);
CFTable cf_table(100);
BinaryInfo bin_info{pc_table, {}, cf_table, {}, {}, {}};
CoverageFrontier coverage_frontier(bin_info);
FeatureSet fs(3, {});
Corpus corpus(Corpus::WeightMethod::FeatureRarity);

auto Add = [&](const CorpusRecord& record) {
fs.MergeFeatures(record.features);
corpus.Add(record.data, record.features, /*metadata=*/{}, /*stats=*/{}, fs,
coverage_frontier);
};

Add({/*data=*/{0}, /*features=*/{30, 20}});
Add({/*data=*/{1}, /*features=*/{10, 20}});
Add({/*data=*/{2}, /*features=*/{10}});

constexpr int kNumIter = 10000;
std::vector<uint64_t> freq;

Rng rng;
auto ComputeFreq = [&]() {
freq.clear();
freq.resize(corpus.NumActive());
for (int i = 0; i < kNumIter; i++) {
const auto& record = corpus.WeightedRandom(rng);
const auto id = record.data[0];
ASSERT_LT(id, freq.size());
freq[id]++;
}
};

// The weights should favor {0} over {1} over {2} with the feature rarity
// method.
corpus.UpdateWeights(fs, coverage_frontier,
/*scale_by_exec_time=*/false);
ComputeFreq();
EXPECT_GT(freq[0], freq[1] + 100);
EXPECT_GT(freq[1], freq[2] + 100);
}

TEST(Corpus, ScalesWeightsWithExecTime) {
PCTable pc_table(100);
CFTable cf_table(100);
Expand Down Expand Up @@ -181,14 +308,16 @@ TEST(Corpus, ScalesWeightsWithExecTime) {
};

// The weights should be equal without exec time scaling.
corpus.UpdateWeights(fs, coverage_frontier, /*scale_by_exec_time=*/false);
corpus.UpdateWeights(fs, coverage_frontier,
/*scale_by_exec_time=*/false);
ComputeFreq();
EXPECT_NEAR(freq[0], kNumIter / 3, 100);
EXPECT_NEAR(freq[1], kNumIter / 3, 100);
EXPECT_NEAR(freq[2], kNumIter / 3, 100);

// The weights should favor {0} over {1} over {2} with exec time scaling.
corpus.UpdateWeights(fs, coverage_frontier, /*scale_by_exec_time=*/true);
corpus.UpdateWeights(fs, coverage_frontier,
/*scale_by_exec_time=*/true);
ComputeFreq();
EXPECT_GT(freq[0], freq[1] + 100);
EXPECT_GT(freq[1], freq[2] + 100);
Expand All @@ -208,6 +337,8 @@ TEST(Corpus, PruneCorpusWithAllEmptyFeatureInputs) {
coverage_frontier);
corpus.Add(/*data=*/{2}, /*fv=*/{}, /*metadata=*/{}, /*stats=*/{}, fs,
coverage_frontier);
corpus.UpdateWeights(fs, coverage_frontier,
/*scale_by_exec_time=*/false);
// Should not crash.
corpus.Prune(fs, coverage_frontier, max_corpus_size, rng);
}
Expand All @@ -231,6 +362,8 @@ TEST(Corpus, PruneRegressionTest1) {

Add({{1}, {10, 20}});
Add({{2}, {10}});
corpus.UpdateWeights(fs, coverage_frontier,
/*scale_by_exec_time=*/false);
corpus.Prune(fs, coverage_frontier, max_corpus_size, rng);
}

Expand Down
5 changes: 2 additions & 3 deletions centipede/feature_set.cc
Original file line number Diff line number Diff line change
Expand Up @@ -139,9 +139,8 @@ void FeatureSet::MergeFeatures(const FeatureVec& features) {
}

__attribute__((noinline)) // to see it in profile.
uint64_t
FeatureSet::ComputeWeight(const FeatureVec &features) const {
uint64_t weight = 0;
double FeatureSet::ComputeRarityWeight(const FeatureVec& features) const {
double weight = 0;
for (auto feature : features) {
// The less frequent is the feature, the more valuable it is.
// (frequency == 1) => (weight == 256)
Expand Down
2 changes: 1 addition & 1 deletion centipede/feature_set.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ class FeatureSet {
// Computes combined weight of `features`.
// The less frequent the feature is, the bigger its weight.
// The weight of a FeatureVec is a sum of individual feature weights.
uint64_t ComputeWeight(const FeatureVec &features) const;
double ComputeRarityWeight(const FeatureVec& features) const;

// Returns a debug string representing the state of *this.
std::string DebugString() const;
Expand Down
Loading
Loading