diff --git a/src/wasm-binary.h b/src/wasm-binary.h index b59275013cb..ccd07aa1c7e 100644 --- a/src/wasm-binary.h +++ b/src/wasm-binary.h @@ -1404,6 +1404,12 @@ class WasmBinaryWriter { void trackExpressionEnd(Expression* curr, Function* func); void trackExpressionDelimiter(Expression* curr, Function* func, size_t id); + // Writes code annotations into a buffer and returns it. We cannot write them + // directly into the output since we write function code first (to get the + // offsets for the annotations), and only then can write annotations, which we + // must then insert before the code (as the spec requires that). + std::optional writeCodeAnnotations(); + // helpers void writeInlineString(std::string_view name); void writeEscapedName(std::string_view name); @@ -1667,6 +1673,15 @@ class WasmBinaryReader { void readDylink(size_t payloadLen); void readDylink0(size_t payloadLen); + // We read branch hints *after* the code section, even though they appear + // earlier. That is simpler for us as we note expression locations as we scan + // code, and then just need to match them up. To do this, we note the branch + // hint position and size in the first pass, and handle it later. + size_t branchHintsPos = 0; + size_t branchHintsLen = 0; + + void readBranchHints(size_t payloadLen); + Index readMemoryAccess(Address& alignment, Address& offset); std::tuple getMemarg(); MemoryOrder getMemoryOrder(bool isRMW = false); diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index 376fbfc1069..dd7c9e647a0 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -28,6 +28,7 @@ #include "support/debug.h" #include "support/stdckdint.h" #include "support/string.h" +#include "wasm-annotations.h" #include "wasm-binary.h" #include "wasm-debug.h" #include "wasm-limits.h" @@ -474,6 +475,21 @@ void WasmBinaryWriter::writeFunctions() { } }); finishSection(sectionStart); + + // Code annotations must come before the code section (see comment on + // writeCodeAnnotations). + if (auto annotations = writeCodeAnnotations()) { + // We need to move the code section and put the annotations before it. + auto& annotationsBuffer = *annotations; + auto oldSize = o.size(); + o.resize(oldSize + annotationsBuffer.size()); + + // |sectionStart| is the start of the contents of the section. Subtract 1 to + // include the section code as well, so we move all of it. + std::move_backward(&o[sectionStart - 1], &o[oldSize], o.end()); + std::copy( + annotationsBuffer.begin(), annotationsBuffer.end(), &o[sectionStart - 1]); + } } void WasmBinaryWriter::writeStrings() { @@ -1496,7 +1512,8 @@ void WasmBinaryWriter::trackExpressionStart(Expression* curr, Function* func) { // binary locations tracked, then track it in the output as well. We also // track locations of instructions that have code annotations, as their binary // location goes in the custom section. - if (func && !func->expressionLocations.empty()) { + if (func && (!func->expressionLocations.empty() || + func->codeAnnotations.count(curr))) { binaryLocations.expressions[curr] = BinaryLocations::Span{BinaryLocation(o.size()), 0}; binaryLocationTrackedExpressionsForFunc.push_back(curr); @@ -1504,6 +1521,8 @@ void WasmBinaryWriter::trackExpressionStart(Expression* curr, Function* func) { } void WasmBinaryWriter::trackExpressionEnd(Expression* curr, Function* func) { + // TODO: If we need to track the end of annotated code locations, we need to + // enable that here. if (func && !func->expressionLocations.empty()) { auto& span = binaryLocations.expressions.at(curr); span.end = o.size(); @@ -1513,11 +1532,123 @@ void WasmBinaryWriter::trackExpressionEnd(Expression* curr, Function* func) { void WasmBinaryWriter::trackExpressionDelimiter(Expression* curr, Function* func, size_t id) { + // TODO: If we need to track the delimiters of annotated code locations, we + // need to enable that here. if (func && !func->expressionLocations.empty()) { binaryLocations.delimiters[curr][id] = o.size(); } } +std::optional WasmBinaryWriter::writeCodeAnnotations() { + // Assemble the info for Branch Hinting: for each function, a vector of the + // hints. + struct ExprHint { + Expression* expr; + // The offset we will write in the custom section. + BinaryLocation offset; + Function::CodeAnnotation* hint; + }; + + struct FuncHints { + Name func; + std::vector exprHints; + }; + + std::vector funcHintsVec; + + for (auto& func : wasm->functions) { + // Collect the Branch Hints for this function. + FuncHints funcHints; + + // We compute the location of the function declaration area (where the + // locals are declared) the first time we need it. + BinaryLocation funcDeclarationsOffset = 0; + + for (auto& [expr, annotation] : func->codeAnnotations) { + if (annotation.branchLikely) { + auto exprIter = binaryLocations.expressions.find(expr); + if (exprIter == binaryLocations.expressions.end()) { + // No expression exists for this annotation - perhaps optimizations + // removed it. + continue; + } + auto exprOffset = exprIter->second.start; + + if (!funcDeclarationsOffset) { + auto funcIter = binaryLocations.functions.find(func.get()); + assert(funcIter != binaryLocations.functions.end()); + funcDeclarationsOffset = funcIter->second.declarations; + } + + // Compute the offset: it should be relative to the start of the + // function locals (i.e. the function declarations). + auto offset = exprOffset - funcDeclarationsOffset; + + funcHints.exprHints.push_back(ExprHint{expr, offset, &annotation}); + } + } + + if (funcHints.exprHints.empty()) { + continue; + } + + // We found something. Finalize the data. + funcHints.func = func->name; + + // Hints must be sorted by increasing binary offset. + std::sort( + funcHints.exprHints.begin(), + funcHints.exprHints.end(), + [](const ExprHint& a, const ExprHint& b) { return a.offset < b.offset; }); + + funcHintsVec.emplace_back(std::move(funcHints)); + } + + if (funcHintsVec.empty()) { + return {}; + } + + if (sourceMap) { + // TODO: This mode may not matter (when debugging, code annotations are an + // optimization that can be skipped), but atm source maps cause + // annotations to break. + Fatal() << "Annotations are not supported with source maps"; + } + + BufferWithRandomAccess buffer; + + // We found data: emit the section. + buffer << uint8_t(BinaryConsts::Custom); + auto lebPos = buffer.writeU32LEBPlaceholder(); + buffer.writeInlineString(Annotations::BranchHint.str); + + buffer << U32LEB(funcHintsVec.size()); + for (auto& funcHints : funcHintsVec) { + buffer << U32LEB(getFunctionIndex(funcHints.func)); + + buffer << U32LEB(funcHints.exprHints.size()); + for (auto& exprHint : funcHints.exprHints) { + buffer << U32LEB(exprHint.offset); + + // Hint size, always 1 for now. + buffer << U32LEB(1); + + // We must only emit hints that are present. + assert(exprHint.hint->branchLikely); + + // Hint contents: likely or not. + buffer << U32LEB(int(*exprHint.hint->branchLikely)); + } + } + + // Write the final size. We can ignore the return value, which is the number + // of bytes we shrank (if the LEB was smaller than the maximum size), as no + // value in this section cares. + buffer.emitRetroactiveSectionSizeLEB(lebPos); + + return buffer; +} + void WasmBinaryWriter::writeData(const char* data, size_t size) { for (size_t i = 0; i < size; i++) { o << int8_t(data[i]); @@ -1792,12 +1923,6 @@ WasmBinaryReader::WasmBinaryReader(Module& wasm, } void WasmBinaryReader::preScan() { - // TODO: Once we support code annotations here, we will need to always scan, - // but for now, DWARF is the only reason. - if (!DWARF) { - return; - } - assert(pos == 0); getInt32(); // magic getInt32(); // version @@ -1813,12 +1938,25 @@ void WasmBinaryReader::preScan() { auto oldPos = pos; if (sectionCode == BinaryConsts::Section::Custom) { auto sectionName = getInlineString(); + + // Code annotations require code locations. + // TODO: For Branch Hinting, we could note which functions require + // code locations, as an optimization. + if (sectionName == Annotations::BranchHint) { + needCodeLocations = true; + // Do not break, so we keep looking for DWARF. + } + // DWARF sections contain code offsets. if (DWARF && Debug::isDWARFSection(sectionName)) { needCodeLocations = true; foundDWARF = true; break; } + + // TODO: We could stop early if we see the Code section and DWARF is + // disabled, as BranchHint must appear first, but this seems to + // make practically no difference in practice. } pos = oldPos + payloadLen; } @@ -1933,6 +2071,12 @@ void WasmBinaryReader::read() { } } + // Go back and parse things we deferred. + if (branchHintsPos) { + pos = branchHintsPos; + readBranchHints(branchHintsLen); + } + validateBinary(); } @@ -1953,6 +2097,10 @@ void WasmBinaryReader::readCustomSection(size_t payloadLen) { readDylink(payloadLen); } else if (sectionName.equals(BinaryConsts::CustomSections::Dylink0)) { readDylink0(payloadLen); + } else if (sectionName == Annotations::BranchHint) { + // Only note the position and length, we read this later. + branchHintsPos = pos; + branchHintsLen = payloadLen; } else { // an unfamiliar custom section if (sectionName.equals(BinaryConsts::CustomSections::Linking)) { @@ -5100,6 +5248,62 @@ void WasmBinaryReader::readDylink0(size_t payloadLen) { } } +void WasmBinaryReader::readBranchHints(size_t payloadLen) { + auto sectionPos = pos; + + auto numFuncs = getU32LEB(); + for (Index i = 0; i < numFuncs; i++) { + auto funcIndex = getU32LEB(); + if (funcIndex >= wasm.functions.size()) { + throwError("bad BranchHint function"); + } + + auto& func = wasm.functions[funcIndex]; + + // The encoded offsets we read below are relative to the start of the + // function's locals (the declarations). + auto funcLocalsOffset = func->funcLocation.declarations; + + // We have a map of expressions to their locations. Invert that to get the + // map we will use below, from offsets to expressions. + std::unordered_map locationsMap; + + for (auto& [expr, span] : func->expressionLocations) { + locationsMap[span.start] = expr; + } + + auto numHints = getU32LEB(); + for (Index hint = 0; hint < numHints; hint++) { + // To get the absolute offset, add the function's offset. + auto relativeOffset = getU32LEB(); + auto absoluteOffset = funcLocalsOffset + relativeOffset; + + auto iter = locationsMap.find(absoluteOffset); + if (iter == locationsMap.end()) { + throwError("bad BranchHint offset"); + } + auto* expr = iter->second; + + auto size = getU32LEB(); + if (size != 1) { + throwError("bad BranchHint size"); + } + + auto likely = getU32LEB(); + if (likely != 0 && likely != 1) { + throwError("bad BranchHint value"); + } + + // Apply the valid hint. + func->codeAnnotations[expr].branchLikely = likely; + } + } + + if (pos != sectionPos + payloadLen) { + throwError("bad BranchHint section size"); + } +} + Index WasmBinaryReader::readMemoryAccess(Address& alignment, Address& offset) { auto rawAlignment = getU32LEB(); bool hasMemIdx = false; diff --git a/test/lit/passes/code-annotations-optimized-away.wast b/test/lit/passes/code-annotations-optimized-away.wast new file mode 100644 index 00000000000..26805354add --- /dev/null +++ b/test/lit/passes/code-annotations-optimized-away.wast @@ -0,0 +1,31 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py and should not be edited. + +;; RUN: wasm-opt %s --monomorphize --roundtrip -all -S -o - | filecheck %s + +;; The callee, which we will monomorphize, has a branch hint. The hinted code +;; will end up vanishing entirely, but not the function it is in, so we end up +;; with an annotation without an instruction in the binary for it. We should +;; ignore it and not error. +(module + ;; CHECK: (func $callee (type $1) (param $0 i32) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: ) + (func $callee (param $0 i32) + (block $block + (@metadata.code.branch_hint "\00") + (br_if $block + (i32.const 0) + ) + ) + ) + + ;; CHECK: (func $caller (type $0) + ;; CHECK-NEXT: (call $callee_2) + ;; CHECK-NEXT: ) + (func $caller + (call $callee + (i32.const 0) + ) + ) +) + diff --git a/test/lit/wat-annotations.wast b/test/lit/wat-annotations.wast index 6d5be1f0a9d..658c00224be 100644 --- a/test/lit/wat-annotations.wast +++ b/test/lit/wat-annotations.wast @@ -1,10 +1,38 @@ ;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited. -;; RUN: wasm-opt -all %s -S -o - | filecheck %s +;; RUN: wasm-opt -all %s -S -o - | filecheck %s +;; RUN: wasm-opt -all --roundtrip %s -S -o - | filecheck %s --check-prefix=RTRIP (module + ;; CHECK: (type $0 (func (param i32))) + ;; CHECK: (func $no-annotations (type $0) (param $x i32) + ;; CHECK-NEXT: (block $out + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; RTRIP: (type $0 (func (param i32))) + + ;; RTRIP: (func $no-annotations (type $0) (param $x i32) + ;; RTRIP-NEXT: (block $block + ;; RTRIP-NEXT: (br_if $block + ;; RTRIP-NEXT: (local.get $x) + ;; RTRIP-NEXT: ) + ;; RTRIP-NEXT: ) + ;; RTRIP-NEXT: ) + (func $no-annotations (param $x i32) + ;; A function with no annotations. This tests that we use function indexes + ;; properly in the section. + (block $out + (br_if $out + (local.get $x) + ) + ) + ) + ;; CHECK: (func $branch-hints-br_if (type $0) (param $x i32) ;; CHECK-NEXT: (block $out ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") @@ -21,6 +49,22 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) + ;; RTRIP: (func $branch-hints-br_if (type $0) (param $x i32) + ;; RTRIP-NEXT: (block $block + ;; RTRIP-NEXT: (@metadata.code.branch_hint "\00") + ;; RTRIP-NEXT: (br_if $block + ;; RTRIP-NEXT: (local.get $x) + ;; RTRIP-NEXT: ) + ;; RTRIP-NEXT: (@metadata.code.branch_hint "\01") + ;; RTRIP-NEXT: (br_if $block + ;; RTRIP-NEXT: (local.get $x) + ;; RTRIP-NEXT: ) + ;; RTRIP-NEXT: (@metadata.code.branch_hint "\00") + ;; RTRIP-NEXT: (br_if $block + ;; RTRIP-NEXT: (local.get $x) + ;; RTRIP-NEXT: ) + ;; RTRIP-NEXT: ) + ;; RTRIP-NEXT: ) (func $branch-hints-br_if (param $x i32) (block $out ;; A branch annotated as unlikely, and one as likely. @@ -40,4 +84,68 @@ ) ) ) + + ;; CHECK: (func $branch_hints-br_if-2 (type $0) (param $x i32) + ;; CHECK-NEXT: (local $unused f64) + ;; CHECK-NEXT: (block $out + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; RTRIP: (func $branch_hints-br_if-2 (type $0) (param $x i32) + ;; RTRIP-NEXT: (local $unused f64) + ;; RTRIP-NEXT: (block $block + ;; RTRIP-NEXT: (@metadata.code.branch_hint "\01") + ;; RTRIP-NEXT: (br_if $block + ;; RTRIP-NEXT: (local.get $x) + ;; RTRIP-NEXT: ) + ;; RTRIP-NEXT: ) + ;; RTRIP-NEXT: ) + (func $branch_hints-br_if-2 (param $x i32) + (local $unused f64) + ;; A second function with hints. This one also has local definitions, which + ;; should not confuse us (branch hint offsets are relative to the start of + ;; the local definitions, not the end). + (block $out + (@metadata.code.branch_hint "\01") + (br_if $out + (local.get $x) + ) + ) + ) + + ;; CHECK: (func $mixing (type $0) (param $x i32) + ;; CHECK-NEXT: ;;@ mixing.src:1337:42 + ;; CHECK-NEXT: (block $out + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; RTRIP: (func $mixing (type $0) (param $x i32) + ;; RTRIP-NEXT: (block $block + ;; RTRIP-NEXT: (@metadata.code.branch_hint "\01") + ;; RTRIP-NEXT: (br_if $block + ;; RTRIP-NEXT: (local.get $x) + ;; RTRIP-NEXT: ) + ;; RTRIP-NEXT: ) + ;; RTRIP-NEXT: ) + (func $mixing (param $x i32) + ;; Mix branch hints with source locations. Both hints should remain. + ;; TODO: Fix this in the binary format. Atm we test with --roundtrip here, + ;; which does not use source maps, so it is expected for the source + ;; annotation to vanish. But using source maps does not fix it, see + ;; the TODO in the code. + + ;;@ mixing.src:1337:42 + (block $out + (@metadata.code.branch_hint "\01") + (br_if $out + (local.get $x) + ) + ) + ) )