Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
74 commits
Select commit Hold shift + click to select a range
a954eec
work
kripken May 1, 2025
991058e
work
kripken May 2, 2025
b14103c
work
kripken May 2, 2025
1f6ed00
work
kripken May 2, 2025
aef1dd7
work
kripken May 2, 2025
57f19ef
work
kripken May 2, 2025
1fe0ab5
work
kripken May 2, 2025
5624819
work
kripken May 2, 2025
f3c5257
work
kripken May 2, 2025
4c645a6
work
kripken May 2, 2025
5553c45
work
kripken May 2, 2025
17c9fd1
NEXT
kripken May 2, 2025
6987c9a
format
kripken May 2, 2025
9191093
undo
kripken May 2, 2025
0c11461
clean
kripken May 2, 2025
7429f12
Update src/wasm/wasm-ir-builder.cpp
kripken May 2, 2025
57b9eec
validate 0/1
kripken May 2, 2025
deec0b8
nicer iter usage
kripken May 2, 2025
78d5064
Merge branch 'branch.hint' into branch.hint.binary
kripken May 2, 2025
9011302
Merge remote-tracking branch 'origin/main' into branch.hint.binary
kripken May 2, 2025
c47fcee
work
kripken May 2, 2025
fa66aac
format
kripken May 2, 2025
9225708
work
kripken May 2, 2025
1cac4d7
work
kripken May 2, 2025
87267fb
work
kripken May 2, 2025
de1dd26
work
kripken May 2, 2025
0a9ef92
work
kripken May 2, 2025
0ab80b0
work
kripken May 2, 2025
f99f04e
work
kripken May 2, 2025
0cdcfda
work
kripken May 2, 2025
c34222c
work
kripken May 2, 2025
c7f5de2
work
kripken May 2, 2025
d17c155
work
kripken May 2, 2025
36caffb
work
kripken May 2, 2025
f66d19c
work
kripken May 5, 2025
d220c4e
work
kripken May 5, 2025
0248a95
work
kripken May 5, 2025
7b15774
work
kripken May 5, 2025
1f227b4
writing correctly from a single function
kripken May 5, 2025
1609738
work
kripken May 5, 2025
c61b080
fix
kripken May 5, 2025
a6efd68
fix
kripken May 5, 2025
d659951
fix
kripken May 5, 2025
e9e4389
fix
kripken May 5, 2025
3a81bc0
nicer
kripken May 5, 2025
d977d5f
work
kripken May 5, 2025
396fc52
fix
kripken May 5, 2025
31c4823
format
kripken May 5, 2025
5ccfabb
clean
kripken May 5, 2025
0e97014
Merge remote-tracking branch 'origin/main' into branch.hint.binary
kripken May 6, 2025
f1d1ce7
Merge remote-tracking branch 'origin/main' into branch.hint.binary
kripken May 6, 2025
b6d69f7
fix.merge
kripken May 6, 2025
d8bde89
work
kripken May 6, 2025
a7357ab
oops
kripken May 6, 2025
e337f5c
work
kripken May 6, 2025
9a0535b
fix
kripken May 6, 2025
344d08c
more
kripken May 6, 2025
4855925
disable fuzzing, leave that for a later PR
kripken May 6, 2025
d0e06b7
Read the branch hints section last, though it must appear first [ci s…
kripken May 7, 2025
fe7fdfc
prep test
kripken May 7, 2025
a5a8fff
write annotations before code
kripken May 7, 2025
057e2a1
work
kripken May 7, 2025
ef58080
fix
kripken May 7, 2025
5cd9933
work
kripken May 7, 2025
5396d00
format
kripken May 7, 2025
b05e963
test
kripken May 7, 2025
55c0eb9
restore
kripken May 7, 2025
d3a2e5a
todo
kripken May 7, 2025
18b61ef
test
kripken May 7, 2025
4ddde5b
simplfy
kripken May 7, 2025
7c5a7ce
fix loop, we need the increment at the end
kripken May 7, 2025
e652d11
Merge remote-tracking branch 'origin/main' into branch.hint.binary
kripken May 7, 2025
1b41943
feedback
kripken May 8, 2025
541c9ba
format
kripken May 8, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions src/wasm-binary.h
Original file line number Diff line number Diff line change
Expand Up @@ -1404,6 +1404,12 @@ class WasmBinaryWriter {
void trackExpressionEnd(Expression* curr, Function* func);
void trackExpressionDelimiter(Expression* curr, Function* func, size_t id);

// Writes code annotations into a buffer and returns it. We cannot write them
// directly into the output since we write function code first (to get the
// offsets for the annotations), and only then can write annotations, which we
// must then insert before the code (as the spec requires that).
std::optional<BufferWithRandomAccess> writeCodeAnnotations();

// helpers
void writeInlineString(std::string_view name);
void writeEscapedName(std::string_view name);
Expand Down Expand Up @@ -1667,6 +1673,15 @@ class WasmBinaryReader {
void readDylink(size_t payloadLen);
void readDylink0(size_t payloadLen);

// We read branch hints *after* the code section, even though they appear
// earlier. That is simpler for us as we note expression locations as we scan
// code, and then just need to match them up. To do this, we note the branch
// hint position and size in the first pass, and handle it later.
size_t branchHintsPos = 0;
size_t branchHintsLen = 0;

void readBranchHints(size_t payloadLen);

Index readMemoryAccess(Address& alignment, Address& offset);
std::tuple<Name, Address, Address> getMemarg();
MemoryOrder getMemoryOrder(bool isRMW = false);
Expand Down
218 changes: 211 additions & 7 deletions src/wasm/wasm-binary.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include "support/debug.h"
#include "support/stdckdint.h"
#include "support/string.h"
#include "wasm-annotations.h"
#include "wasm-binary.h"
#include "wasm-debug.h"
#include "wasm-limits.h"
Expand Down Expand Up @@ -474,6 +475,21 @@ void WasmBinaryWriter::writeFunctions() {
}
});
finishSection(sectionStart);

// Code annotations must come before the code section (see comment on
// writeCodeAnnotations).
if (auto annotations = writeCodeAnnotations()) {
// We need to move the code section and put the annotations before it.
auto& annotationsBuffer = *annotations;
auto oldSize = o.size();
o.resize(oldSize + annotationsBuffer.size());

// |sectionStart| is the start of the contents of the section. Subtract 1 to
// include the section code as well, so we move all of it.
std::move_backward(&o[sectionStart - 1], &o[oldSize], o.end());
std::copy(
annotationsBuffer.begin(), annotationsBuffer.end(), &o[sectionStart - 1]);
Comment on lines +489 to +491
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be simpler if we wrote sections to several buffers and then concatenated them at the end rather than shifting contents around within buffers. That would be a bigger change, though.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, and possibly less efficient (though I'm not sure, maybe reallocating a single buffer is worse?)

}
}

void WasmBinaryWriter::writeStrings() {
Expand Down Expand Up @@ -1496,14 +1512,17 @@ void WasmBinaryWriter::trackExpressionStart(Expression* curr, Function* func) {
// binary locations tracked, then track it in the output as well. We also
// track locations of instructions that have code annotations, as their binary
// location goes in the custom section.
if (func && !func->expressionLocations.empty()) {
if (func && (!func->expressionLocations.empty() ||
func->codeAnnotations.count(curr))) {
binaryLocations.expressions[curr] =
BinaryLocations::Span{BinaryLocation(o.size()), 0};
binaryLocationTrackedExpressionsForFunc.push_back(curr);
}
}

void WasmBinaryWriter::trackExpressionEnd(Expression* curr, Function* func) {
// TODO: If we need to track the end of annotated code locations, we need to
// enable that here.
if (func && !func->expressionLocations.empty()) {
auto& span = binaryLocations.expressions.at(curr);
span.end = o.size();
Expand All @@ -1513,11 +1532,123 @@ void WasmBinaryWriter::trackExpressionEnd(Expression* curr, Function* func) {
void WasmBinaryWriter::trackExpressionDelimiter(Expression* curr,
Function* func,
size_t id) {
// TODO: If we need to track the delimiters of annotated code locations, we
// need to enable that here.
if (func && !func->expressionLocations.empty()) {
binaryLocations.delimiters[curr][id] = o.size();
}
}

std::optional<BufferWithRandomAccess> WasmBinaryWriter::writeCodeAnnotations() {
// Assemble the info for Branch Hinting: for each function, a vector of the
// hints.
struct ExprHint {
Expression* expr;
// The offset we will write in the custom section.
BinaryLocation offset;
Function::CodeAnnotation* hint;
};

struct FuncHints {
Name func;
std::vector<ExprHint> exprHints;
};

std::vector<FuncHints> funcHintsVec;

for (auto& func : wasm->functions) {
// Collect the Branch Hints for this function.
FuncHints funcHints;

// We compute the location of the function declaration area (where the
// locals are declared) the first time we need it.
BinaryLocation funcDeclarationsOffset = 0;

for (auto& [expr, annotation] : func->codeAnnotations) {
if (annotation.branchLikely) {
auto exprIter = binaryLocations.expressions.find(expr);
if (exprIter == binaryLocations.expressions.end()) {
// No expression exists for this annotation - perhaps optimizations
// removed it.
continue;
}
auto exprOffset = exprIter->second.start;

if (!funcDeclarationsOffset) {
auto funcIter = binaryLocations.functions.find(func.get());
assert(funcIter != binaryLocations.functions.end());
funcDeclarationsOffset = funcIter->second.declarations;
}

// Compute the offset: it should be relative to the start of the
// function locals (i.e. the function declarations).
auto offset = exprOffset - funcDeclarationsOffset;

funcHints.exprHints.push_back(ExprHint{expr, offset, &annotation});
}
}

if (funcHints.exprHints.empty()) {
continue;
}

// We found something. Finalize the data.
funcHints.func = func->name;

// Hints must be sorted by increasing binary offset.
std::sort(
funcHints.exprHints.begin(),
funcHints.exprHints.end(),
[](const ExprHint& a, const ExprHint& b) { return a.offset < b.offset; });

funcHintsVec.emplace_back(std::move(funcHints));
}

if (funcHintsVec.empty()) {
return {};
}

if (sourceMap) {
// TODO: This mode may not matter (when debugging, code annotations are an
// optimization that can be skipped), but atm source maps cause
// annotations to break.
Comment on lines +1612 to +1614
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the reason for the breakage?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure, I only noticed it by chance, and added the TODO to investigate.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Eventually I think we will need this to work. One use case for source maps is deobfuscation of stack traces collected from the wild, which means they need to be accurate (to the extent possible) for production binaries.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, makes sense. I'll get to it sooner, then.

Fatal() << "Annotations are not supported with source maps";
}

BufferWithRandomAccess buffer;

// We found data: emit the section.
buffer << uint8_t(BinaryConsts::Custom);
auto lebPos = buffer.writeU32LEBPlaceholder();
buffer.writeInlineString(Annotations::BranchHint.str);

buffer << U32LEB(funcHintsVec.size());
for (auto& funcHints : funcHintsVec) {
buffer << U32LEB(getFunctionIndex(funcHints.func));

buffer << U32LEB(funcHints.exprHints.size());
for (auto& exprHint : funcHints.exprHints) {
buffer << U32LEB(exprHint.offset);

// Hint size, always 1 for now.
buffer << U32LEB(1);

// We must only emit hints that are present.
assert(exprHint.hint->branchLikely);

// Hint contents: likely or not.
buffer << U32LEB(int(*exprHint.hint->branchLikely));
}
}

// Write the final size. We can ignore the return value, which is the number
// of bytes we shrank (if the LEB was smaller than the maximum size), as no
// value in this section cares.
buffer.emitRetroactiveSectionSizeLEB(lebPos);

return buffer;
}

void WasmBinaryWriter::writeData(const char* data, size_t size) {
for (size_t i = 0; i < size; i++) {
o << int8_t(data[i]);
Expand Down Expand Up @@ -1792,12 +1923,6 @@ WasmBinaryReader::WasmBinaryReader(Module& wasm,
}

void WasmBinaryReader::preScan() {
// TODO: Once we support code annotations here, we will need to always scan,
// but for now, DWARF is the only reason.
if (!DWARF) {
return;
}

assert(pos == 0);
getInt32(); // magic
getInt32(); // version
Expand All @@ -1813,12 +1938,25 @@ void WasmBinaryReader::preScan() {
auto oldPos = pos;
if (sectionCode == BinaryConsts::Section::Custom) {
auto sectionName = getInlineString();

// Code annotations require code locations.
// TODO: For Branch Hinting, we could note which functions require
// code locations, as an optimization.
if (sectionName == Annotations::BranchHint) {
needCodeLocations = true;
// Do not break, so we keep looking for DWARF.
}

// DWARF sections contain code offsets.
if (DWARF && Debug::isDWARFSection(sectionName)) {
needCodeLocations = true;
foundDWARF = true;
break;
}

// TODO: We could stop early if we see the Code section and DWARF is
// disabled, as BranchHint must appear first, but this seems to
// make practically no difference in practice.
}
pos = oldPos + payloadLen;
}
Expand Down Expand Up @@ -1933,6 +2071,12 @@ void WasmBinaryReader::read() {
}
}

// Go back and parse things we deferred.
if (branchHintsPos) {
pos = branchHintsPos;
readBranchHints(branchHintsLen);
}

validateBinary();
}

Expand All @@ -1953,6 +2097,10 @@ void WasmBinaryReader::readCustomSection(size_t payloadLen) {
readDylink(payloadLen);
} else if (sectionName.equals(BinaryConsts::CustomSections::Dylink0)) {
readDylink0(payloadLen);
} else if (sectionName == Annotations::BranchHint) {
// Only note the position and length, we read this later.
branchHintsPos = pos;
branchHintsLen = payloadLen;
} else {
// an unfamiliar custom section
if (sectionName.equals(BinaryConsts::CustomSections::Linking)) {
Expand Down Expand Up @@ -5100,6 +5248,62 @@ void WasmBinaryReader::readDylink0(size_t payloadLen) {
}
}

void WasmBinaryReader::readBranchHints(size_t payloadLen) {
auto sectionPos = pos;

auto numFuncs = getU32LEB();
for (Index i = 0; i < numFuncs; i++) {
auto funcIndex = getU32LEB();
if (funcIndex >= wasm.functions.size()) {
throwError("bad BranchHint function");
}

auto& func = wasm.functions[funcIndex];

// The encoded offsets we read below are relative to the start of the
// function's locals (the declarations).
auto funcLocalsOffset = func->funcLocation.declarations;

// We have a map of expressions to their locations. Invert that to get the
// map we will use below, from offsets to expressions.
std::unordered_map<BinaryLocation, Expression*> locationsMap;

for (auto& [expr, span] : func->expressionLocations) {
locationsMap[span.start] = expr;
}

auto numHints = getU32LEB();
for (Index hint = 0; hint < numHints; hint++) {
// To get the absolute offset, add the function's offset.
auto relativeOffset = getU32LEB();
auto absoluteOffset = funcLocalsOffset + relativeOffset;

auto iter = locationsMap.find(absoluteOffset);
if (iter == locationsMap.end()) {
throwError("bad BranchHint offset");
}
auto* expr = iter->second;

auto size = getU32LEB();
if (size != 1) {
throwError("bad BranchHint size");
}

auto likely = getU32LEB();
if (likely != 0 && likely != 1) {
throwError("bad BranchHint value");
}

// Apply the valid hint.
func->codeAnnotations[expr].branchLikely = likely;
}
}

if (pos != sectionPos + payloadLen) {
throwError("bad BranchHint section size");
}
}

Index WasmBinaryReader::readMemoryAccess(Address& alignment, Address& offset) {
auto rawAlignment = getU32LEB();
bool hasMemIdx = false;
Expand Down
31 changes: 31 additions & 0 deletions test/lit/passes/code-annotations-optimized-away.wast
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
;; NOTE: Assertions have been generated by update_lit_checks.py and should not be edited.

;; RUN: wasm-opt %s --monomorphize --roundtrip -all -S -o - | filecheck %s

;; The callee, which we will monomorphize, has a branch hint. The hinted code
;; will end up vanishing entirely, but not the function it is in, so we end up
;; with an annotation without an instruction in the binary for it. We should
;; ignore it and not error.
(module
;; CHECK: (func $callee (type $1) (param $0 i32)
;; CHECK-NEXT: (nop)
;; CHECK-NEXT: )
(func $callee (param $0 i32)
(block $block
(@metadata.code.branch_hint "\00")
(br_if $block
(i32.const 0)
)
)
)

;; CHECK: (func $caller (type $0)
;; CHECK-NEXT: (call $callee_2)
;; CHECK-NEXT: )
(func $caller
(call $callee
(i32.const 0)
)
)
)

Loading
Loading