From a954eecc92e52ad04fe4d35d7d6d8d76ee6566ae Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 1 May 2025 16:55:38 -0700 Subject: [PATCH 01/69] work --- src/parser/contexts.h | 14 +++++++++++++- src/parser/lexer.h | 3 +++ src/wasm-ir-builder.h | 7 ++++--- src/wasm.h | 10 ++++++++++ src/wasm/wasm-ir-builder.cpp | 14 ++++++++++++-- 5 files changed, 42 insertions(+), 6 deletions(-) diff --git a/src/parser/contexts.h b/src/parser/contexts.h index fd835134eb5..395f5b29d59 100644 --- a/src/parser/contexts.h +++ b/src/parser/contexts.h @@ -2343,7 +2343,19 @@ struct ParseDefsCtx : TypeParserCtx { const std::vector& annotations, Index label, bool isConditional) { - return withLoc(pos, irBuilder.makeBreak(label, isConditional)); + std::optional likely; + for (auto& annotation : annotations) { + if (annotation.kind == Annotation::BranchHint) { + if (annotation.contents == "0") { + likely = false; + } else if (annotation.contents == "1") { + likely = true; + } else { + std::cerr << "warning: invalid BranchHint " << annotation.contents << '\n'; + } + } + } + return withLoc(pos, irBuilder.makeBreak(label, isConditional, likely)); } Result<> makeSwitch(Index pos, diff --git a/src/parser/lexer.h b/src/parser/lexer.h index 5d5aee392a3..1066ef3f748 100644 --- a/src/parser/lexer.h +++ b/src/parser/lexer.h @@ -49,6 +49,9 @@ struct TextPos { struct Annotation { Name kind; std::string_view contents; + + // Recognized annotations. + static const Name BranchHint = "metadata.code.branch_hint"; }; extern Name srcAnnotationKind; diff --git a/src/wasm-ir-builder.h b/src/wasm-ir-builder.h index 7fbe089c127..d979fb52597 100644 --- a/src/wasm-ir-builder.h +++ b/src/wasm-ir-builder.h @@ -72,8 +72,7 @@ class IRBuilder : public UnifiedExpressionVisitor> { this->codeSectionOffset = codeSectionOffset; } - // Set the function used to add scratch locals when constructing an isolated - // sequence of IR. + // The current function, used to create scratch locals, add annotations, etc. void setFunction(Function* func) { this->func = func; } // Handle the boundaries of control flow structures. Users may choose to use @@ -119,7 +118,9 @@ class IRBuilder : public UnifiedExpressionVisitor> { Result<> makeBlock(Name label, Signature sig); Result<> makeIf(Name label, Signature sig); Result<> makeLoop(Name label, Signature sig); - Result<> makeBreak(Index label, bool isConditional); + Result<> makeBreak(Index label, + bool isConditional, + std::optional likely = std::nullopt); Result<> makeSwitch(const std::vector& labels, Index defaultLabel); // Unlike Builder::makeCall, this assumes the function already exists. Result<> makeCall(Name func, bool isReturn); diff --git a/src/wasm.h b/src/wasm.h index b6541fc6cf5..60efbfd65b8 100644 --- a/src/wasm.h +++ b/src/wasm.h @@ -2191,6 +2191,16 @@ class Function : public Importable { delimiterLocations; BinaryLocations::FunctionLocations funcLocation; + // Code annotations. As with debug info, we do not store these on Expressions + // themselves, as we assume most instances are unannotated, and do not want to + // add constant memory overhead. + struct CodeAnnotation { + // Branch hinting proposal: Whether the branch is likely, or unlikely. + std::optional branchLikely; + }; + + std::unordered_map codeAnnotations; + // The effects for this function, if they have been computed. We use a shared // ptr here to avoid compilation errors with the forward-declared // EffectAnalyzer. diff --git a/src/wasm/wasm-ir-builder.cpp b/src/wasm/wasm-ir-builder.cpp index 0928c99c927..3d698ba4e88 100644 --- a/src/wasm/wasm-ir-builder.cpp +++ b/src/wasm/wasm-ir-builder.cpp @@ -1393,7 +1393,9 @@ Result<> IRBuilder::makeLoop(Name label, Signature sig) { return visitLoopStart(loop, sig.params); } -Result<> IRBuilder::makeBreak(Index label, bool isConditional) { +Result<> IRBuilder::makeBreak(Index label, + bool isConditional, + std::optional likely) { auto name = getLabelName(label); CHECK_ERR(name); auto labelType = getLabelType(label); @@ -1404,7 +1406,15 @@ Result<> IRBuilder::makeBreak(Index label, bool isConditional) { // Use a dummy condition value if we need to pop a condition. curr.condition = isConditional ? &curr : nullptr; CHECK_ERR(ChildPopper{*this}.visitBreak(&curr, *labelType)); - push(builder.makeBreak(curr.name, curr.value, curr.condition)); + auto* br = builder.makeBreak(curr.name, curr.value, curr.condition); + push(br); + + if (likely && func) { + Function::CodeAnnotation annotation; + annotation.branchLikely = likely; + func->codeAnnotations[br] = annotation; + } + return Ok{}; } From 991058eb322057059cf795741cda78d609301227 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 1 May 2025 17:03:12 -0700 Subject: [PATCH 02/69] work --- src/passes/Print.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/passes/Print.cpp b/src/passes/Print.cpp index 35fa57ea51a..9524d28dc29 100644 --- a/src/passes/Print.cpp +++ b/src/passes/Print.cpp @@ -2698,6 +2698,15 @@ void PrintSExpression::printDebugLocation(Expression* curr) { doIndent(o, indent); } } + + auto iter2 = currFunction->codeAnnotations.find(curr); + if (iter2 != currFunction->codeAnnotations.end()) { + auto& annotation = iter2->second; + if (annotation.likely) { + // TODO share constant name + o << "(@metadata.code.branch_hint \"" << (*annotation ? "\\1" : "\\0") << ")"; + } + } } } From b14103cee7c6cd7d4c9714a9f956c6d9b2d34d13 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 2 May 2025 09:16:40 -0700 Subject: [PATCH 03/69] work --- src/parser/contexts.h | 2 +- src/parser/lexer.h | 3 --- src/passes/Print.cpp | 3 ++- src/wasm-annotations.h | 35 +++++++++++++++++++++++++++++++++++ src/wasm/wasm.cpp | 6 ++++++ 5 files changed, 44 insertions(+), 5 deletions(-) create mode 100644 src/wasm-annotations.h diff --git a/src/parser/contexts.h b/src/parser/contexts.h index 395f5b29d59..75e56cb72e7 100644 --- a/src/parser/contexts.h +++ b/src/parser/contexts.h @@ -2345,7 +2345,7 @@ struct ParseDefsCtx : TypeParserCtx { bool isConditional) { std::optional likely; for (auto& annotation : annotations) { - if (annotation.kind == Annotation::BranchHint) { + if (annotation.kind == Annotations::BranchHint) { if (annotation.contents == "0") { likely = false; } else if (annotation.contents == "1") { diff --git a/src/parser/lexer.h b/src/parser/lexer.h index 1066ef3f748..5d5aee392a3 100644 --- a/src/parser/lexer.h +++ b/src/parser/lexer.h @@ -49,9 +49,6 @@ struct TextPos { struct Annotation { Name kind; std::string_view contents; - - // Recognized annotations. - static const Name BranchHint = "metadata.code.branch_hint"; }; extern Name srcAnnotationKind; diff --git a/src/passes/Print.cpp b/src/passes/Print.cpp index 9524d28dc29..9adbdb042ed 100644 --- a/src/passes/Print.cpp +++ b/src/passes/Print.cpp @@ -2704,7 +2704,8 @@ void PrintSExpression::printDebugLocation(Expression* curr) { auto& annotation = iter2->second; if (annotation.likely) { // TODO share constant name - o << "(@metadata.code.branch_hint \"" << (*annotation ? "\\1" : "\\0") << ")"; + o << "(@" << Annotations::BranchHint << " \"" + << (*annotation ? "\\1" : "\\0") << ")"; } } } diff --git a/src/wasm-annotations.h b/src/wasm-annotations.h new file mode 100644 index 00000000000..b220bcc55d8 --- /dev/null +++ b/src/wasm-annotations.h @@ -0,0 +1,35 @@ +/* + * Copyright 2017 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// +// Support for code annotations. +// + +#ifndef wasm_annotations_h +#define wasm_annotations_h + +#include "parsing.h" +#include "pass.h" +#include "support/file.h" +#include "wasm.h" + +namespace wasm::Annotations { + +extern const Name BranchHint; + +} // namespace wasm::Annotations + +#endif // wasm_annotations_h diff --git a/src/wasm/wasm.cpp b/src/wasm/wasm.cpp index 3f885cbccdf..1245cd13432 100644 --- a/src/wasm/wasm.cpp +++ b/src/wasm/wasm.cpp @@ -63,6 +63,12 @@ const char* CustomDescriptorsFeature = "custom-descriptors"; } // namespace BinaryConsts::CustomSections +namespace Annotations { + +const Name BranchHint = "metadata.code.branch_hint"; + +} + Name STACK_POINTER("__stack_pointer"); Name MODULE("module"); Name START("start"); From 1f6ed00206019d6a9e1d3678fe1965217be9b89c Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 2 May 2025 09:17:36 -0700 Subject: [PATCH 04/69] work --- src/parser/contexts.h | 1 + src/passes/Print.cpp | 1 + src/wasm-annotations.h | 5 +---- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/parser/contexts.h b/src/parser/contexts.h index 75e56cb72e7..5fe4ae3d9fb 100644 --- a/src/parser/contexts.h +++ b/src/parser/contexts.h @@ -23,6 +23,7 @@ #include "support/name.h" #include "support/result.h" #include "support/string.h" +#include "wasm-annotations.h" #include "wasm-builder.h" #include "wasm-ir-builder.h" #include "wasm.h" diff --git a/src/passes/Print.cpp b/src/passes/Print.cpp index 9adbdb042ed..95309dc8027 100644 --- a/src/passes/Print.cpp +++ b/src/passes/Print.cpp @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include diff --git a/src/wasm-annotations.h b/src/wasm-annotations.h index b220bcc55d8..dde997593e1 100644 --- a/src/wasm-annotations.h +++ b/src/wasm-annotations.h @@ -21,10 +21,7 @@ #ifndef wasm_annotations_h #define wasm_annotations_h -#include "parsing.h" -#include "pass.h" -#include "support/file.h" -#include "wasm.h" +#include "support/name.h" namespace wasm::Annotations { From aef1dd70190dbda7b645955cdb54eeb797960fd6 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 2 May 2025 09:19:37 -0700 Subject: [PATCH 05/69] work --- src/passes/Print.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/passes/Print.cpp b/src/passes/Print.cpp index 95309dc8027..e09f3a928c9 100644 --- a/src/passes/Print.cpp +++ b/src/passes/Print.cpp @@ -2703,10 +2703,10 @@ void PrintSExpression::printDebugLocation(Expression* curr) { auto iter2 = currFunction->codeAnnotations.find(curr); if (iter2 != currFunction->codeAnnotations.end()) { auto& annotation = iter2->second; - if (annotation.likely) { + if (annotation.branchLikely) { // TODO share constant name o << "(@" << Annotations::BranchHint << " \"" - << (*annotation ? "\\1" : "\\0") << ")"; + << (*annotation.branchLikely ? "\\1" : "\\0") << ")"; } } } From 57f19ef04ba2aa4c2dd65844b9ecd2131075131b Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 2 May 2025 09:20:11 -0700 Subject: [PATCH 06/69] work --- src/wasm/wasm.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wasm/wasm.cpp b/src/wasm/wasm.cpp index 1245cd13432..e1384d8aa50 100644 --- a/src/wasm/wasm.cpp +++ b/src/wasm/wasm.cpp @@ -63,7 +63,7 @@ const char* CustomDescriptorsFeature = "custom-descriptors"; } // namespace BinaryConsts::CustomSections -namespace Annotations { +namespace wasm::Annotations { const Name BranchHint = "metadata.code.branch_hint"; From 1fe0ab51acf5f1604b6dc9166aab8fb7fc310637 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 2 May 2025 09:23:17 -0700 Subject: [PATCH 07/69] work --- src/wasm/wasm.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/wasm/wasm.cpp b/src/wasm/wasm.cpp index e1384d8aa50..b66a2a7c6d3 100644 --- a/src/wasm/wasm.cpp +++ b/src/wasm/wasm.cpp @@ -16,6 +16,7 @@ #include "wasm.h" #include "ir/branch-utils.h" +#include "wasm-annotations.h" #include "wasm-traversal.h" #include "wasm-type.h" @@ -63,11 +64,11 @@ const char* CustomDescriptorsFeature = "custom-descriptors"; } // namespace BinaryConsts::CustomSections -namespace wasm::Annotations { +namespace Annotations { const Name BranchHint = "metadata.code.branch_hint"; -} +} // namespace Annotations Name STACK_POINTER("__stack_pointer"); Name MODULE("module"); From 5624819abe9094abffd6c7a970f89dd6a4fc6720 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 2 May 2025 10:43:18 -0700 Subject: [PATCH 08/69] work --- src/parser/contexts.h | 41 +++++++++++++++++++++++++---------- src/passes/Print.cpp | 2 +- test/lit/wat-annotations.wast | 19 ++++++++++++++++ 3 files changed, 49 insertions(+), 13 deletions(-) create mode 100644 test/lit/wat-annotations.wast diff --git a/src/parser/contexts.h b/src/parser/contexts.h index 5fe4ae3d9fb..a25664dddd7 100644 --- a/src/parser/contexts.h +++ b/src/parser/contexts.h @@ -2340,22 +2340,39 @@ struct ParseDefsCtx : TypeParserCtx { return withLoc(pos, irBuilder.makeCallIndirect(*t, type, isReturn)); } + // TODO move + std::optional getBranchHint(const std::vector& annotations) { + // Find and apply (the last) branch hint. + const Annotation* hint = nullptr; + for (auto& a : annotations) { + if (a.kind == Annotations::BranchHint) { + hint = &a; + } + } + if (!hint) { + return std::nullopt; + } + + Lexer lexer(hint->contents); + if (lexer.empty()) { + std::cerr << "warning: invalid BranchHint\n"; + return std::nullopt; + } + + auto str = lexer.takeString(); + if (!str || str->size() != 1) { + std::cerr << "warning: invalid BranchHint\n"; + return std::nullopt; + } + + return bool((*str)[0]); + } + Result<> makeBreak(Index pos, const std::vector& annotations, Index label, bool isConditional) { - std::optional likely; - for (auto& annotation : annotations) { - if (annotation.kind == Annotations::BranchHint) { - if (annotation.contents == "0") { - likely = false; - } else if (annotation.contents == "1") { - likely = true; - } else { - std::cerr << "warning: invalid BranchHint " << annotation.contents << '\n'; - } - } - } + auto likely = getBranchHint(annotations); return withLoc(pos, irBuilder.makeBreak(label, isConditional, likely)); } diff --git a/src/passes/Print.cpp b/src/passes/Print.cpp index e09f3a928c9..a1aa6639e81 100644 --- a/src/passes/Print.cpp +++ b/src/passes/Print.cpp @@ -2718,7 +2718,7 @@ void PrintSExpression::printDebugDelimiterLocation(Expression* curr, Index i) { if (iter != currFunction->delimiterLocations.end()) { auto& locations = iter->second; Colors::grey(o); - o << ";; code offset: 0x" << std::hex << locations[i] << std::dec << '\n'; + o << ";; code offset: 0x0" << std::hex << locations[i] << std::dec << "\"\n"; restoreNormalColor(o); doIndent(o, indent); } diff --git a/test/lit/wat-annotations.wast b/test/lit/wat-annotations.wast new file mode 100644 index 00000000000..75c2b4d062c --- /dev/null +++ b/test/lit/wat-annotations.wast @@ -0,0 +1,19 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited. + +;; RUN: wasm-opt -all %s -S -o - | filecheck %s + +(module + (func $test (param $x i32) + (@src src.cpp:30:1) + (block $out + (@metadata.code.branch_hint "\00") + (br_if $out + (local.get $x) + ) + (@metadata.code.branch_hint "\01") + (br_if $out + (local.get $x) + ) + ) + ) +) From f3c5257a22e47bc7267105b1903bc458d82863fc Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 2 May 2025 10:46:30 -0700 Subject: [PATCH 09/69] work --- src/passes/Print.cpp | 9 ++++++--- test/lit/wat-annotations.wast | 18 ++++++++++++++++-- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/src/passes/Print.cpp b/src/passes/Print.cpp index a1aa6639e81..2a44fcf94d3 100644 --- a/src/passes/Print.cpp +++ b/src/passes/Print.cpp @@ -2705,8 +2705,11 @@ void PrintSExpression::printDebugLocation(Expression* curr) { auto& annotation = iter2->second; if (annotation.branchLikely) { // TODO share constant name - o << "(@" << Annotations::BranchHint << " \"" - << (*annotation.branchLikely ? "\\1" : "\\0") << ")"; + Colors::grey(o); + o << "(@" << Annotations::BranchHint << " \"\\0" + << (*annotation.branchLikely ? "1" : "0") << "\")\n"; + restoreNormalColor(o); + doIndent(o, indent); } } } @@ -2718,7 +2721,7 @@ void PrintSExpression::printDebugDelimiterLocation(Expression* curr, Index i) { if (iter != currFunction->delimiterLocations.end()) { auto& locations = iter->second; Colors::grey(o); - o << ";; code offset: 0x0" << std::hex << locations[i] << std::dec << "\"\n"; + o << ";; code offset: 0x" << std::hex << locations[i] << std::dec << '\n'; restoreNormalColor(o); doIndent(o, indent); } diff --git a/test/lit/wat-annotations.wast b/test/lit/wat-annotations.wast index 75c2b4d062c..2c4a6ac7404 100644 --- a/test/lit/wat-annotations.wast +++ b/test/lit/wat-annotations.wast @@ -3,8 +3,22 @@ ;; RUN: wasm-opt -all %s -S -o - | filecheck %s (module - (func $test (param $x i32) - (@src src.cpp:30:1) + ;; CHECK: (type $0 (func (param i32))) + + ;; CHECK: (func $branch-hints-br_if (type $0) (param $x i32) + ;; CHECK-NEXT: (block $out + ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $branch-hints-br_if (param $x i32) + ;; A branch annotated as unlikely, and one as likely. (block $out (@metadata.code.branch_hint "\00") (br_if $out From 4c645a606a9acd5ec5b6b0dfcbd59426d8529cde Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 2 May 2025 10:48:58 -0700 Subject: [PATCH 10/69] work --- test/lit/wat-annotations.wast | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/test/lit/wat-annotations.wast b/test/lit/wat-annotations.wast index 2c4a6ac7404..55da1eb74a1 100644 --- a/test/lit/wat-annotations.wast +++ b/test/lit/wat-annotations.wast @@ -1,6 +1,7 @@ ;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited. ;; RUN: wasm-opt -all %s -S -o - | filecheck %s +;; RUN: wasm-opt -all %s --roundtrip -S -o - | filecheck %s --check-prefix=BINARY (module ;; CHECK: (type $0 (func (param i32))) @@ -15,11 +16,30 @@ ;; CHECK-NEXT: (br_if $out ;; CHECK-NEXT: (local.get $x) ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) + ;; BINARY: (type $0 (func (param i32))) + + ;; BINARY: (func $branch-hints-br_if (type $0) (param $x i32) + ;; BINARY-NEXT: (block $block + ;; BINARY-NEXT: (br_if $block + ;; BINARY-NEXT: (local.get $x) + ;; BINARY-NEXT: ) + ;; BINARY-NEXT: (br_if $block + ;; BINARY-NEXT: (local.get $x) + ;; BINARY-NEXT: ) + ;; BINARY-NEXT: (br_if $block + ;; BINARY-NEXT: (local.get $x) + ;; BINARY-NEXT: ) + ;; BINARY-NEXT: ) + ;; BINARY-NEXT: ) (func $branch-hints-br_if (param $x i32) - ;; A branch annotated as unlikely, and one as likely. (block $out + ;; A branch annotated as unlikely, and one as likely. (@metadata.code.branch_hint "\00") (br_if $out (local.get $x) @@ -28,6 +48,12 @@ (br_if $out (local.get $x) ) + ;; The last one wins. + (@metadata.code.branch_hint "\01") + (@metadata.code.branch_hint "\00") + (br_if $out + (local.get $x) + ) ) ) ) From 5553c45bafed48431f6f9df5d1c593787a0e9c96 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 2 May 2025 10:51:15 -0700 Subject: [PATCH 11/69] work --- src/passes/Print.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/passes/Print.cpp b/src/passes/Print.cpp index 2a44fcf94d3..a59aef54b0b 100644 --- a/src/passes/Print.cpp +++ b/src/passes/Print.cpp @@ -268,15 +268,17 @@ struct PrintSExpression : public UnifiedExpressionVisitor { void printDebugLocation(const std::optional& location); - void printDebugLocation(Expression* curr); // Prints debug info for a delimiter in an expression. void printDebugDelimiterLocation(Expression* curr, Index i); + // Prints debug info and code annotations. + void printMetadata(Expression* curr); + void printExpressionContents(Expression* curr); void visit(Expression* curr) { - printDebugLocation(curr); + printMetadata(curr); UnifiedExpressionVisitor::visit(curr); } @@ -2678,7 +2680,7 @@ void PrintSExpression::printDebugLocation( doIndent(o, indent); } -void PrintSExpression::printDebugLocation(Expression* curr) { +void PrintSExpression::printMetadata(Expression* curr) { if (currFunction) { // show an annotation, if there is one auto& debugLocations = currFunction->debugLocations; @@ -2795,7 +2797,7 @@ void PrintSExpression::visitBlock(Block* curr) { while (1) { if (stack.size() > 0) { doIndent(o, indent); - printDebugLocation(curr); + printMetadata(curr); } stack.push_back(curr); o << '('; From 17c9fd17293bac2fc59e085d0d7afaecd877b1b8 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 2 May 2025 11:26:40 -0700 Subject: [PATCH 12/69] NEXT --- src/wasm-binary.h | 2 ++ src/wasm/wasm-binary.cpp | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/src/wasm-binary.h b/src/wasm-binary.h index 95725eff47a..c617b9d7c35 100644 --- a/src/wasm-binary.h +++ b/src/wasm-binary.h @@ -1355,6 +1355,8 @@ class WasmBinaryWriter { void writeDebugLocationEnd(Expression* curr, Function* func); void writeExtraDebugLocation(Expression* curr, Function* func, size_t id); + void writeCodeAnnotations(); + // helpers void writeInlineString(std::string_view name); void writeEscapedName(std::string_view name); diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index 88fb43b345a..751d9ec53b6 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -99,6 +99,7 @@ void WasmBinaryWriter::write() { writeLateCustomSections(); writeFeaturesSection(); + writeCodeAnnotations(); } void WasmBinaryWriter::writeHeader() { @@ -1503,6 +1504,7 @@ void WasmBinaryWriter::writeDebugLocation(Expression* curr, Function* func) { } // If this is an instruction in a function, and if the original wasm had // binary locations tracked, then track it in the output as well. +// here XXX! if (func && !func->expressionLocations.empty()) { binaryLocations.expressions[curr] = BinaryLocations::Span{BinaryLocation(o.size()), 0}; @@ -1525,6 +1527,22 @@ void WasmBinaryWriter::writeExtraDebugLocation(Expression* curr, } } +void WasmBinaryWriter::writeCodeAnnotations() { + // See if we have any annotations at all. + bool have = false; + for (auto& func : wasm.functions) { + if (!func->codeAnnotations.empty()) { + have = true; + break; + } + } + if (!have) { + return; + } + + +} + void WasmBinaryWriter::writeData(const char* data, size_t size) { for (size_t i = 0; i < size; i++) { o << int8_t(data[i]); From 6987c9a3a67a77eb77746fe2320d3ff93fe3b6ae Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 2 May 2025 11:27:00 -0700 Subject: [PATCH 13/69] format --- src/parser/contexts.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/parser/contexts.h b/src/parser/contexts.h index a25664dddd7..88d60ccab92 100644 --- a/src/parser/contexts.h +++ b/src/parser/contexts.h @@ -2341,7 +2341,8 @@ struct ParseDefsCtx : TypeParserCtx { } // TODO move - std::optional getBranchHint(const std::vector& annotations) { + std::optional + getBranchHint(const std::vector& annotations) { // Find and apply (the last) branch hint. const Annotation* hint = nullptr; for (auto& a : annotations) { From 9191093a69f1ed9b06f1f136daa5e7b534c7e608 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 2 May 2025 11:27:23 -0700 Subject: [PATCH 14/69] undo --- test/lit/wat-annotations.wast | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/test/lit/wat-annotations.wast b/test/lit/wat-annotations.wast index 55da1eb74a1..6d5be1f0a9d 100644 --- a/test/lit/wat-annotations.wast +++ b/test/lit/wat-annotations.wast @@ -1,7 +1,6 @@ ;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited. ;; RUN: wasm-opt -all %s -S -o - | filecheck %s -;; RUN: wasm-opt -all %s --roundtrip -S -o - | filecheck %s --check-prefix=BINARY (module ;; CHECK: (type $0 (func (param i32))) @@ -22,21 +21,6 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - ;; BINARY: (type $0 (func (param i32))) - - ;; BINARY: (func $branch-hints-br_if (type $0) (param $x i32) - ;; BINARY-NEXT: (block $block - ;; BINARY-NEXT: (br_if $block - ;; BINARY-NEXT: (local.get $x) - ;; BINARY-NEXT: ) - ;; BINARY-NEXT: (br_if $block - ;; BINARY-NEXT: (local.get $x) - ;; BINARY-NEXT: ) - ;; BINARY-NEXT: (br_if $block - ;; BINARY-NEXT: (local.get $x) - ;; BINARY-NEXT: ) - ;; BINARY-NEXT: ) - ;; BINARY-NEXT: ) (func $branch-hints-br_if (param $x i32) (block $out ;; A branch annotated as unlikely, and one as likely. From 0c1146100e838a993ec3235e2a7f5cfbb525aaaf Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 2 May 2025 11:30:45 -0700 Subject: [PATCH 15/69] clean --- src/parser/contexts.h | 2 +- src/passes/Print.cpp | 1 - src/wasm-annotations.h | 2 +- src/wasm/wasm-ir-builder.cpp | 4 +++- 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/parser/contexts.h b/src/parser/contexts.h index 88d60ccab92..3bc3e546d38 100644 --- a/src/parser/contexts.h +++ b/src/parser/contexts.h @@ -2340,7 +2340,7 @@ struct ParseDefsCtx : TypeParserCtx { return withLoc(pos, irBuilder.makeCallIndirect(*t, type, isReturn)); } - // TODO move + // Return the branch hint for a branching instruction, if there is one. std::optional getBranchHint(const std::vector& annotations) { // Find and apply (the last) branch hint. diff --git a/src/passes/Print.cpp b/src/passes/Print.cpp index a59aef54b0b..bf39923ac96 100644 --- a/src/passes/Print.cpp +++ b/src/passes/Print.cpp @@ -2706,7 +2706,6 @@ void PrintSExpression::printMetadata(Expression* curr) { if (iter2 != currFunction->codeAnnotations.end()) { auto& annotation = iter2->second; if (annotation.branchLikely) { - // TODO share constant name Colors::grey(o); o << "(@" << Annotations::BranchHint << " \"\\0" << (*annotation.branchLikely ? "1" : "0") << "\")\n"; diff --git a/src/wasm-annotations.h b/src/wasm-annotations.h index dde997593e1..42f0e56d732 100644 --- a/src/wasm-annotations.h +++ b/src/wasm-annotations.h @@ -1,5 +1,5 @@ /* - * Copyright 2017 WebAssembly Community Group participants + * Copyright 2025 WebAssembly Community Group participants * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/wasm/wasm-ir-builder.cpp b/src/wasm/wasm-ir-builder.cpp index 3d698ba4e88..138ca5db7f7 100644 --- a/src/wasm/wasm-ir-builder.cpp +++ b/src/wasm/wasm-ir-builder.cpp @@ -1409,7 +1409,9 @@ Result<> IRBuilder::makeBreak(Index label, auto* br = builder.makeBreak(curr.name, curr.value, curr.condition); push(br); - if (likely && func) { + if (likely) { + // Branches are only possible inside functions. + assert(func); Function::CodeAnnotation annotation; annotation.branchLikely = likely; func->codeAnnotations[br] = annotation; From 7429f120f4d0477302ea9d22a065758b2a55e200 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 2 May 2025 13:05:06 -0700 Subject: [PATCH 16/69] Update src/wasm/wasm-ir-builder.cpp Co-authored-by: Thomas Lively --- src/wasm/wasm-ir-builder.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/wasm/wasm-ir-builder.cpp b/src/wasm/wasm-ir-builder.cpp index 138ca5db7f7..269682c5fa0 100644 --- a/src/wasm/wasm-ir-builder.cpp +++ b/src/wasm/wasm-ir-builder.cpp @@ -1412,9 +1412,7 @@ Result<> IRBuilder::makeBreak(Index label, if (likely) { // Branches are only possible inside functions. assert(func); - Function::CodeAnnotation annotation; - annotation.branchLikely = likely; - func->codeAnnotations[br] = annotation; + func->codeAnnotations[br].branchLikely = likely; } return Ok{}; From 57b9eeca4467da10187ce18b091618f1bcb084ba Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 2 May 2025 13:07:05 -0700 Subject: [PATCH 17/69] validate 0/1 --- src/parser/contexts.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/parser/contexts.h b/src/parser/contexts.h index 3bc3e546d38..b4d27133dcb 100644 --- a/src/parser/contexts.h +++ b/src/parser/contexts.h @@ -2356,17 +2356,23 @@ struct ParseDefsCtx : TypeParserCtx { Lexer lexer(hint->contents); if (lexer.empty()) { - std::cerr << "warning: invalid BranchHint\n"; + std::cerr << "warning: empty BranchHint\n"; return std::nullopt; } auto str = lexer.takeString(); if (!str || str->size() != 1) { - std::cerr << "warning: invalid BranchHint\n"; + std::cerr << "warning: invalid BranchHint string\n"; return std::nullopt; } - return bool((*str)[0]); + auto value = (*str)[0]; + if (value != 0 && value != 1) { + std::cerr << "warning: invalid BranchHint value\n"; + return std::nullopt; + } + + return bool(value); } Result<> makeBreak(Index pos, From deec0b86573a10f241a8e002d2e2561418b187d1 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 2 May 2025 13:15:50 -0700 Subject: [PATCH 18/69] nicer iter usage --- src/passes/Print.cpp | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/passes/Print.cpp b/src/passes/Print.cpp index bf39923ac96..435b5061dad 100644 --- a/src/passes/Print.cpp +++ b/src/passes/Print.cpp @@ -2682,18 +2682,18 @@ void PrintSExpression::printDebugLocation( void PrintSExpression::printMetadata(Expression* curr) { if (currFunction) { - // show an annotation, if there is one - auto& debugLocations = currFunction->debugLocations; - auto iter = debugLocations.find(curr); - if (iter != debugLocations.end()) { + // Show a debug location, if there is one. + if (auto iter = currFunction->debugLocations.find(curr); + iter != currFunction->debugLocations.end()) { printDebugLocation(iter->second); } else { printDebugLocation(std::nullopt); } - // show a binary position, if there is one + + // Show a binary position, if there is one. if (debugInfo) { - auto iter = currFunction->expressionLocations.find(curr); - if (iter != currFunction->expressionLocations.end()) { + if (auto iter = currFunction->expressionLocations.find(curr); + iter != currFunction->expressionLocations.end()) { Colors::grey(o); o << ";; code offset: 0x" << std::hex << iter->second.start << std::dec << '\n'; @@ -2702,9 +2702,10 @@ void PrintSExpression::printMetadata(Expression* curr) { } } - auto iter2 = currFunction->codeAnnotations.find(curr); - if (iter2 != currFunction->codeAnnotations.end()) { - auto& annotation = iter2->second; + // Show a code annotation, if there is one. + if (auto iter = currFunction->codeAnnotations.find(curr); + iter != currFunction->codeAnnotations.end()) { + auto& annotation = iter->second; if (annotation.branchLikely) { Colors::grey(o); o << "(@" << Annotations::BranchHint << " \"\\0" From c47fceed4ef4b3ac4e9c04d373158e64aeb5054a Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 2 May 2025 14:37:49 -0700 Subject: [PATCH 19/69] work --- src/wasm/wasm-binary.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index 751d9ec53b6..2975642141c 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -1503,9 +1503,11 @@ void WasmBinaryWriter::writeDebugLocation(Expression* curr, Function* func) { } } // If this is an instruction in a function, and if the original wasm had - // binary locations tracked, then track it in the output as well. -// here XXX! - if (func && !func->expressionLocations.empty()) { + // binary locations tracked, then track it in the output as well. We also + // track locations of instructions that have code annotations, as their binary + // location goes in the custom section. + if (func && + (!func->expressionLocations.empty() || func->codeAnnotations.count(curr))) { binaryLocations.expressions[curr] = BinaryLocations::Span{BinaryLocation(o.size()), 0}; binaryLocationTrackedExpressionsForFunc.push_back(curr); From fa66aac284cebd3d3ef00667198cb70ab242c2ab Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 2 May 2025 14:38:07 -0700 Subject: [PATCH 20/69] format --- src/wasm/wasm-binary.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index 2975642141c..cd65a9f258d 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -1506,8 +1506,8 @@ void WasmBinaryWriter::writeDebugLocation(Expression* curr, Function* func) { // binary locations tracked, then track it in the output as well. We also // track locations of instructions that have code annotations, as their binary // location goes in the custom section. - if (func && - (!func->expressionLocations.empty() || func->codeAnnotations.count(curr))) { + if (func && (!func->expressionLocations.empty() || + func->codeAnnotations.count(curr))) { binaryLocations.expressions[curr] = BinaryLocations::Span{BinaryLocation(o.size()), 0}; binaryLocationTrackedExpressionsForFunc.push_back(curr); @@ -1541,8 +1541,6 @@ void WasmBinaryWriter::writeCodeAnnotations() { if (!have) { return; } - - } void WasmBinaryWriter::writeData(const char* data, size_t size) { From 922570881e7b793429faaaf848f559f8f87c596d Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 2 May 2025 14:47:50 -0700 Subject: [PATCH 21/69] work --- src/wasm-binary.h | 4 ++-- src/wasm-stack.h | 8 +++----- src/wasm/wasm-binary.cpp | 5 +++-- src/wasm/wasm-stack.cpp | 12 ++++-------- 4 files changed, 12 insertions(+), 17 deletions(-) diff --git a/src/wasm-binary.h b/src/wasm-binary.h index c617b9d7c35..38a49d2419b 100644 --- a/src/wasm-binary.h +++ b/src/wasm-binary.h @@ -1351,8 +1351,8 @@ class WasmBinaryWriter { void writeSourceMapEpilog(); void writeDebugLocation(const Function::DebugLocation& loc); void writeNoDebugLocation(); - void writeDebugLocation(Expression* curr, Function* func); - void writeDebugLocationEnd(Expression* curr, Function* func); + void writeMetadata(Expression* curr, Function* func); + void writeMetadataEnd(Expression* curr, Function* func); void writeExtraDebugLocation(Expression* curr, Function* func, size_t id); void writeCodeAnnotations(); diff --git a/src/wasm-stack.h b/src/wasm-stack.h index f48233333da..101647088cc 100644 --- a/src/wasm-stack.h +++ b/src/wasm-stack.h @@ -99,11 +99,11 @@ class BinaryInstWriter : public OverriddenVisitor { void visit(Expression* curr) { if (func && !sourceMap) { - parent.writeDebugLocation(curr, func); + parent.writeMetadata(curr, func); } OverriddenVisitor::visit(curr); if (func && !sourceMap) { - parent.writeDebugLocationEnd(curr, func); + parent.writeMetadataEnd(curr, func); } } @@ -479,9 +479,7 @@ class BinaryenIRToBinaryWriter } void emitUnreachable() { writer.emitUnreachable(); } void emitDebugLocation(Expression* curr) { - if (sourceMap) { - parent.writeDebugLocation(curr, func); - } + parent.writeMetadata(curr, func); } MappedLocals& getMappedLocals() { return writer.mappedLocals; } diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index cd65a9f258d..4066df45c80 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -1490,7 +1490,7 @@ void WasmBinaryWriter::writeNoDebugLocation() { } } -void WasmBinaryWriter::writeDebugLocation(Expression* curr, Function* func) { +void WasmBinaryWriter::writeMetadata(Expression* curr, Function* func) { if (sourceMap) { auto& debugLocations = func->debugLocations; auto iter = debugLocations.find(curr); @@ -1502,6 +1502,7 @@ void WasmBinaryWriter::writeDebugLocation(Expression* curr, Function* func) { writeNoDebugLocation(); } } + // If this is an instruction in a function, and if the original wasm had // binary locations tracked, then track it in the output as well. We also // track locations of instructions that have code annotations, as their binary @@ -1514,7 +1515,7 @@ void WasmBinaryWriter::writeDebugLocation(Expression* curr, Function* func) { } } -void WasmBinaryWriter::writeDebugLocationEnd(Expression* curr, Function* func) { +void WasmBinaryWriter::writeMetadataEnd(Expression* curr, Function* func) { if (func && !func->expressionLocations.empty()) { auto& span = binaryLocations.expressions.at(curr); span.end = o.size(); diff --git a/src/wasm/wasm-stack.cpp b/src/wasm/wasm-stack.cpp index 6f40a1ba3ab..e484175af21 100644 --- a/src/wasm/wasm-stack.cpp +++ b/src/wasm/wasm-stack.cpp @@ -2736,8 +2736,8 @@ void BinaryInstWriter::emitScopeEnd(Expression* curr) { assert(!breakStack.empty()); breakStack.pop_back(); o << int8_t(BinaryConsts::End); - if (func && !sourceMap) { - parent.writeDebugLocationEnd(curr, func); + if (func) { + parent.writeMetadataEnd(curr, func); } } @@ -3212,13 +3212,9 @@ void StackIRToBinaryWriter::write() { case StackInst::IfBegin: case StackInst::LoopBegin: case StackInst::TryTableBegin: { - if (sourceMap) { - parent.writeDebugLocation(inst->origin, func); - } + parent.writeMetadata(inst->origin, func); writer.visit(inst->origin); - if (sourceMap) { - parent.writeDebugLocationEnd(inst->origin, func); - } + parent.writeMetadataEnd(inst->origin, func); break; } case StackInst::TryEnd: From 1cac4d7ff3eae06f1819e9753a0d861fca3ae067 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 2 May 2025 15:12:30 -0700 Subject: [PATCH 22/69] work --- src/wasm/wasm-binary.cpp | 64 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 58 insertions(+), 6 deletions(-) diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index 4066df45c80..339ab16e146 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -1531,17 +1531,69 @@ void WasmBinaryWriter::writeExtraDebugLocation(Expression* curr, } void WasmBinaryWriter::writeCodeAnnotations() { - // See if we have any annotations at all. - bool have = false; + // Assemble the info for Branch Hinting: for each function, a vector of the + // hints. + struct ExprHint { + Expression* expr; + CodeAnnotation* hint; + }; + + struct FuncHints { + Name func; + std::vector exprHints; + }; + + std::vector funcHintsVec; + for (auto& func : wasm.functions) { - if (!func->codeAnnotations.empty()) { - have = true; - break; + // Collect the Branch Hints for this function. + FuncHints funcHints; + + for (auto& [expr, annotation] : func->codeAnnotations) { + if (annotation.branchLikely) { + funcHints.exprHints.push_back(ExprHint{expr, &annotation}); + } + } + + // If we found something, note it all. + if (!funcHints.exprHints) { + funcHints.func = func->name; + funcHintsVec.emplace_back(std::move(funcHints)); } } - if (!have) { + + if (funcHintsVec.empty()) { return; } + + // Emit the section, as we found data. + auto start = startSection(BinaryConsts::Custom); + writeInlineString(Annotations::BranchHint.c_str()); // c_str? + + o << U32LEB(funcHintsVec.size()); + for (auto& funcHints : funcHintsVec) { + o << U32LEB(getFunctionIndex(funcHints.func)); + + o << U32LEB(funcHintsVec.hints.size()); + for (auto& exprHint : funcHintsVec.exprHints) { + // We must only emit hints that are present. + assert(exprHint->branchLikely); + + // Emit the offset as relative to the start of the function locals TODO + auto iter = binaryLocations.expressions.find(exprHint.expr); + assert(iter != binaryLocations.expressions.end()); + auto offset = iter->second.start; + o << U32LEB(offset); + + // Hint size, always 1 for now. + o << U32LEB(1); + + // Hint contents: likely or not. + o << U32LEB(*exprHint->branchLikely); + } + } + + finishSection(start); } void WasmBinaryWriter::writeData(const char* data, size_t size) { From 87267fbc09e99a189c07bc63069f38b426943d92 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 2 May 2025 15:15:09 -0700 Subject: [PATCH 23/69] work --- src/wasm/wasm-binary.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index 339ab16e146..cb89a2201bb 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -1535,7 +1535,7 @@ void WasmBinaryWriter::writeCodeAnnotations() { // hints. struct ExprHint { Expression* expr; - CodeAnnotation* hint; + Function::CodeAnnotation* hint; }; struct FuncHints { From de1dd26d970ec845d1f3ba106e6e75b397b6ed39 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 2 May 2025 15:16:02 -0700 Subject: [PATCH 24/69] work --- src/wasm/wasm-binary.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index cb89a2201bb..00ffb055fb6 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -1545,7 +1545,7 @@ void WasmBinaryWriter::writeCodeAnnotations() { std::vector funcHintsVec; - for (auto& func : wasm.functions) { + for (auto& func : wasm->functions) { // Collect the Branch Hints for this function. FuncHints funcHints; @@ -1556,7 +1556,7 @@ void WasmBinaryWriter::writeCodeAnnotations() { } // If we found something, note it all. - if (!funcHints.exprHints) { + if (!funcHints.exprHints.empty()) { funcHints.func = func->name; funcHintsVec.emplace_back(std::move(funcHints)); } From 0a9ef926d2dc17f2cf69fc734bbb88cf6a6c5ec3 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 2 May 2025 15:20:20 -0700 Subject: [PATCH 25/69] work --- src/wasm/wasm-binary.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index 00ffb055fb6..5cb5256c521 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -28,6 +28,7 @@ #include "support/debug.h" #include "support/stdckdint.h" #include "support/string.h" +#include "wasm-annotations.h" #include "wasm-binary.h" #include "wasm-debug.h" #include "wasm-limits.h" @@ -1568,13 +1569,13 @@ void WasmBinaryWriter::writeCodeAnnotations() { // Emit the section, as we found data. auto start = startSection(BinaryConsts::Custom); - writeInlineString(Annotations::BranchHint.c_str()); // c_str? + writeInlineString(Annotations::BranchHint); o << U32LEB(funcHintsVec.size()); for (auto& funcHints : funcHintsVec) { o << U32LEB(getFunctionIndex(funcHints.func)); - o << U32LEB(funcHintsVec.hints.size()); + o << U32LEB(funcHints.exprHints.size()); for (auto& exprHint : funcHintsVec.exprHints) { // We must only emit hints that are present. assert(exprHint->branchLikely); From 0ab80b03e8319f5868994811ea8b62c445fd5211 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 2 May 2025 15:20:51 -0700 Subject: [PATCH 26/69] work --- src/wasm/wasm-binary.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index 5cb5256c521..cd22ebe24b4 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -1569,14 +1569,14 @@ void WasmBinaryWriter::writeCodeAnnotations() { // Emit the section, as we found data. auto start = startSection(BinaryConsts::Custom); - writeInlineString(Annotations::BranchHint); + writeInlineString(Annotations::BranchHint.str); o << U32LEB(funcHintsVec.size()); for (auto& funcHints : funcHintsVec) { o << U32LEB(getFunctionIndex(funcHints.func)); o << U32LEB(funcHints.exprHints.size()); - for (auto& exprHint : funcHintsVec.exprHints) { + for (auto& exprHint : funcHints.exprHints) { // We must only emit hints that are present. assert(exprHint->branchLikely); From f99f04e2fac6f31bea9f77ed87ad48f4fb1c4291 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 2 May 2025 15:21:21 -0700 Subject: [PATCH 27/69] work --- src/wasm/wasm-binary.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index cd22ebe24b4..a39cf7792d6 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -1578,7 +1578,7 @@ void WasmBinaryWriter::writeCodeAnnotations() { o << U32LEB(funcHints.exprHints.size()); for (auto& exprHint : funcHints.exprHints) { // We must only emit hints that are present. - assert(exprHint->branchLikely); + assert(exprHint.branchLikely); // Emit the offset as relative to the start of the function locals TODO auto iter = binaryLocations.expressions.find(exprHint.expr); @@ -1590,7 +1590,7 @@ void WasmBinaryWriter::writeCodeAnnotations() { o << U32LEB(1); // Hint contents: likely or not. - o << U32LEB(*exprHint->branchLikely); + o << U32LEB(int(*exprHint.branchLikely)); } } From 0cdcfda0519e9e3a8db6ad0bfc4a5426ebd1e5d0 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 2 May 2025 15:22:15 -0700 Subject: [PATCH 28/69] work --- src/wasm/wasm-binary.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index a39cf7792d6..d690ea704f9 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -1577,9 +1577,6 @@ void WasmBinaryWriter::writeCodeAnnotations() { o << U32LEB(funcHints.exprHints.size()); for (auto& exprHint : funcHints.exprHints) { - // We must only emit hints that are present. - assert(exprHint.branchLikely); - // Emit the offset as relative to the start of the function locals TODO auto iter = binaryLocations.expressions.find(exprHint.expr); assert(iter != binaryLocations.expressions.end()); @@ -1589,8 +1586,11 @@ void WasmBinaryWriter::writeCodeAnnotations() { // Hint size, always 1 for now. o << U32LEB(1); + // We must only emit hints that are present. + assert(exprHint.hint->branchLikely); + // Hint contents: likely or not. - o << U32LEB(int(*exprHint.branchLikely)); + o << U32LEB(int(*exprHint.hint->branchLikely)); } } From c34222cea4c192aa4cacf562a03c27c8dbb6339d Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 2 May 2025 16:13:33 -0700 Subject: [PATCH 29/69] work --- src/wasm-binary.h | 12 +++++++++--- src/wasm/wasm-binary.cpp | 30 +++++++++++++----------------- 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/src/wasm-binary.h b/src/wasm-binary.h index 38a49d2419b..94080c188a9 100644 --- a/src/wasm-binary.h +++ b/src/wasm-binary.h @@ -1451,7 +1451,6 @@ class WasmBinaryReader { // Settings. bool debugInfo = true; - bool DWARF = false; bool skipFunctionBodies = false; // Internal state. @@ -1474,7 +1473,7 @@ class WasmBinaryReader { std::vector& sourceMap = defaultEmptySourceMap); void setDebugInfo(bool value) { debugInfo = value; } - void setDWARF(bool value) { DWARF = value; } + void setDWARF(bool value) {} // TODO: rmoof void setSkipFunctionBodies(bool skipFunctionBodies_) { skipFunctionBodies = skipFunctionBodies_; } @@ -1629,7 +1628,14 @@ class WasmBinaryReader { } private: - bool hasDWARFSections(); + // In certain modes we need to note the locations of expressions, to match + // them against sections like DWARF or custom annotations. As this incurs + // overhead, we only note locations when we actually need to. + bool needCodeLocations = false; + + // Scans ahead in the binary to check certain conditions like + // needCodeLocations. + preScan(); }; } // namespace wasm diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index d690ea704f9..79384f62e78 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -1871,7 +1871,7 @@ WasmBinaryReader::WasmBinaryReader(Module& wasm, wasm.features = features; } -bool WasmBinaryReader::hasDWARFSections() { +void WasmBinaryReader::preScan() { assert(pos == 0); getInt32(); // magic getInt32(); // version @@ -1885,28 +1885,24 @@ bool WasmBinaryReader::hasDWARFSections() { auto oldPos = pos; if (sectionCode == BinaryConsts::Section::Custom) { auto sectionName = getInlineString(); - if (Debug::isDWARFSection(sectionName)) { - has = true; + // DWARF sections contain code offsets, as do code annotations; either of + // those force us to need code locations. + // TODO: For Branch Hinting, we could note which functions require + // code locations, as an optimization. + if (Debug::isDWARFSection(sectionName) || + sectionName == Annotations::BranchHint) { + needCodeLocations = true; break; } } pos = oldPos + payloadLen; } + // Reset. pos = 0; - return has; } void WasmBinaryReader::read() { - if (DWARF) { - // In order to update dwarf, we must store info about each IR node's - // binary position. This has noticeable memory overhead, so we don't do it - // by default: the user must request it by setting "DWARF", and even if so - // we scan ahead to see that there actually *are* DWARF sections, so that - // we don't do unnecessary work. - if (!hasDWARFSections()) { - DWARF = false; - } - } + preScan(); // Skip ahead and read the name section so we know what names to use when we // construct module elements. @@ -1951,7 +1947,7 @@ void WasmBinaryReader::read() { readFunctionSignatures(); break; case BinaryConsts::Section::Code: - if (DWARF) { + if (needCodeLocations) { codeSectionLocation = pos; } readFunctions(); @@ -2943,7 +2939,7 @@ void WasmBinaryReader::readFunctions() { if (numFuncBodies + numFuncImports != wasm.functions.size()) { throwError("invalid function section size, must equal types"); } - if (DWARF) { + if (needCodeLocations) { builder.setBinaryLocation(&pos, codeSectionLocation); } for (size_t i = 0; i < numFuncBodies; i++) { @@ -2957,7 +2953,7 @@ void WasmBinaryReader::readFunctions() { auto& func = wasm.functions[numFuncImports + i]; currFunction = func.get(); - if (DWARF) { + if (needCodeLocations) { func->funcLocation = BinaryLocations::FunctionLocations{ BinaryLocation(sizePos - codeSectionLocation), BinaryLocation(pos - codeSectionLocation), From c7f5de2b4aa9a5f82222a7d4eb182ef6e0a9bd9c Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 2 May 2025 16:24:19 -0700 Subject: [PATCH 30/69] work --- src/wasm-binary.h | 7 ++++--- src/wasm/wasm-binary.cpp | 30 ++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 3 deletions(-) diff --git a/src/wasm-binary.h b/src/wasm-binary.h index 94080c188a9..19eeb6fce49 100644 --- a/src/wasm-binary.h +++ b/src/wasm-binary.h @@ -1615,9 +1615,10 @@ class WasmBinaryReader { static Name escape(Name name); void findAndReadNames(); - void readFeatures(size_t); - void readDylink(size_t); - void readDylink0(size_t); + void readFeatures(size_t payloadLen); + void readDylink(size_t payloadLen); + void readDylink0(size_t payloadLen); + void readBranchHints(size_t payloadLen); Index readMemoryAccess(Address& alignment, Address& offset); std::tuple getMemarg(); diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index 79384f62e78..b694cdec59e 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -1906,6 +1906,7 @@ void WasmBinaryReader::read() { // Skip ahead and read the name section so we know what names to use when we // construct module elements. + // TODO: Combine this pre-scan with the one in preScan(). if (debugInfo) { findAndReadNames(); } @@ -2020,6 +2021,8 @@ void WasmBinaryReader::readCustomSection(size_t payloadLen) { readDylink(payloadLen); } else if (sectionName.equals(BinaryConsts::CustomSections::Dylink0)) { readDylink0(payloadLen); + } else if (sectionName.equals(Annotations::BranchHint)) { + readBranchHints(payloadLen); } else { // an unfamiliar custom section if (sectionName.equals(BinaryConsts::CustomSections::Linking)) { @@ -5167,6 +5170,33 @@ void WasmBinaryReader::readDylink0(size_t payloadLen) { } } +void WasmBinaryReader::readBranchHints(size_t payloadLen) { + auto sectionPos = pos; + + auto numFuncs = getU32LEB(); + for (Index func = 0; func < numFuncs; func++) { + auto funcIndex = getU32LEB(); + auto numHints = getU32LEB(); + for (Index hint = 0; hint < numHints; hint++) { + auto offset = getU32LEB(); + + auto size = getU32LEB(); + if (size != 1) { + throwError("bad BranchHint size"); + } + + auto likely = getU32LEB(); + if (likely != 0 && likely != 1) { + throwError("bad BranchHint value"); + } + } + } + + if (pos != sectionPos + payloadLen) { + throwError("bad BranchHint section size"); + } +} + Index WasmBinaryReader::readMemoryAccess(Address& alignment, Address& offset) { auto rawAlignment = getU32LEB(); bool hasMemIdx = false; From d17c15512451e950b515cdb9d9c6483ff75a4798 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 2 May 2025 16:34:05 -0700 Subject: [PATCH 31/69] work --- src/wasm/wasm-binary.cpp | 4 +++- test/lit/wat-annotations.wast | 20 ++++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index b694cdec59e..154b1689840 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -5174,8 +5174,10 @@ void WasmBinaryReader::readBranchHints(size_t payloadLen) { auto sectionPos = pos; auto numFuncs = getU32LEB(); - for (Index func = 0; func < numFuncs; func++) { + for (Index i = 0; i < numFuncs; i++) { auto funcIndex = getU32LEB(); + auto& func = wasm.functions[funcIndex]; + auto numHints = getU32LEB(); for (Index hint = 0; hint < numHints; hint++) { auto offset = getU32LEB(); diff --git a/test/lit/wat-annotations.wast b/test/lit/wat-annotations.wast index 6d5be1f0a9d..aa80bb4b934 100644 --- a/test/lit/wat-annotations.wast +++ b/test/lit/wat-annotations.wast @@ -5,6 +5,16 @@ (module ;; CHECK: (type $0 (func (param i32))) + (func $no-annotations (param $x i32) + ;; A function with no annotations. This tests that we use function indexes + ;; properly in the section. + (block $out + (br_if $out + (local.get $x) + ) + ) + ) + ;; CHECK: (func $branch-hints-br_if (type $0) (param $x i32) ;; CHECK-NEXT: (block $out ;; CHECK-NEXT: (@metadata.code.branch_hint "\00") @@ -40,4 +50,14 @@ ) ) ) + + (func $branch_hints-br_if-2 (param $x i32) + ;; A second function with hints. + (block $out + (@metadata.code.branch_hint "\01") + (br_if $out + (local.get $x) + ) + ) + ) ) From 36caffbab22dcd22a22c9535992a5a8cacbc0c3b Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 2 May 2025 16:48:03 -0700 Subject: [PATCH 32/69] work --- src/wasm/wasm-binary.cpp | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index 154b1689840..eecea4ab39c 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -5178,9 +5178,29 @@ void WasmBinaryReader::readBranchHints(size_t payloadLen) { auto funcIndex = getU32LEB(); auto& func = wasm.functions[funcIndex]; + // The encoded offsets we read below are relative to the start of the + // function's locals (the declarations). + auto funcLocalsOffset = func->funcLocation.declarations; + + // We have a map of expressions to their locations. Invert that to get the + // map we will use below, from offsets to expressions. + std::unordered_map locationsMap; + + for (auto& [expr, span] : func->expressionLocations) { + locationsMap[span.begin] = expr; + } + auto numHints = getU32LEB(); for (Index hint = 0; hint < numHints; hint++) { - auto offset = getU32LEB(); + // To get the absolute offset, add the function's offset. + auto relativeOffset = getU32LEB(); + auto absoluteOffset = funcLocalsOffset + relativeOffset; + + auto iter = locationsMap.find(absoluteOffset); + if (iter == locationsMap.end()) { + throwError("bad BranchHint offset"); + } + auto* expr = iter->second; auto size = getU32LEB(); if (size != 1) { @@ -5191,6 +5211,9 @@ void WasmBinaryReader::readBranchHints(size_t payloadLen) { if (likely != 0 && likely != 1) { throwError("bad BranchHint value"); } + + // Apply the valid hint. + func->codeAnnotations[expr].branchLikely = likely; } } From f66d19c3fd6a21404a4f6c3987edcb6d99764a19 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 5 May 2025 08:56:27 -0700 Subject: [PATCH 33/69] work --- src/wasm-binary.h | 2 +- src/wasm/wasm-binary.cpp | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/wasm-binary.h b/src/wasm-binary.h index 19eeb6fce49..21a7c0ea380 100644 --- a/src/wasm-binary.h +++ b/src/wasm-binary.h @@ -1636,7 +1636,7 @@ class WasmBinaryReader { // Scans ahead in the binary to check certain conditions like // needCodeLocations. - preScan(); + void preScan(); }; } // namespace wasm diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index eecea4ab39c..07a68121599 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -1875,7 +1875,6 @@ void WasmBinaryReader::preScan() { assert(pos == 0); getInt32(); // magic getInt32(); // version - bool has = false; while (more()) { uint8_t sectionCode = getInt8(); uint32_t payloadLen = getU32LEB(); @@ -2021,7 +2020,7 @@ void WasmBinaryReader::readCustomSection(size_t payloadLen) { readDylink(payloadLen); } else if (sectionName.equals(BinaryConsts::CustomSections::Dylink0)) { readDylink0(payloadLen); - } else if (sectionName.equals(Annotations::BranchHint)) { + } else if (sectionName.equals(Annotations::BranchHint.str)) { readBranchHints(payloadLen); } else { // an unfamiliar custom section From d220c4e6f963a927e14edb68c4482cc8ba7c6b21 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 5 May 2025 08:57:14 -0700 Subject: [PATCH 34/69] work --- src/wasm/wasm-binary.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index 07a68121599..2fd5b4456fe 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -5186,7 +5186,7 @@ void WasmBinaryReader::readBranchHints(size_t payloadLen) { std::unordered_map locationsMap; for (auto& [expr, span] : func->expressionLocations) { - locationsMap[span.begin] = expr; + locationsMap[span.start] = expr; } auto numHints = getU32LEB(); From 0248a9583c754ad54d72ee294a7f13f22f2492bb Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 5 May 2025 08:59:26 -0700 Subject: [PATCH 35/69] work --- test/lit/wat-annotations.wast | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/test/lit/wat-annotations.wast b/test/lit/wat-annotations.wast index aa80bb4b934..4f4428a3905 100644 --- a/test/lit/wat-annotations.wast +++ b/test/lit/wat-annotations.wast @@ -1,10 +1,19 @@ ;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited. -;; RUN: wasm-opt -all %s -S -o - | filecheck %s +;; RUN: wasm-opt -all %s -S -o - | filecheck %s +;; RUN: wasm-opt -all --roundtrip %s -S -o - | filecheck %s --check-prefix=BINARY (module + ;; CHECK: (type $0 (func (param i32))) + ;; CHECK: (func $no-annotations (type $0) (param $x i32) + ;; CHECK-NEXT: (block $out + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) (func $no-annotations (param $x i32) ;; A function with no annotations. This tests that we use function indexes ;; properly in the section. @@ -51,6 +60,14 @@ ) ) + ;; CHECK: (func $branch_hints-br_if-2 (type $0) (param $x i32) + ;; CHECK-NEXT: (block $out + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) (func $branch_hints-br_if-2 (param $x i32) ;; A second function with hints. (block $out From 7b15774e4b2b3aca8aa80f075aa665edf67e32ac Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 5 May 2025 10:21:13 -0700 Subject: [PATCH 36/69] work --- src/wasm/wasm-binary.cpp | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index 2fd5b4456fe..34ef44cb779 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -1573,15 +1573,23 @@ void WasmBinaryWriter::writeCodeAnnotations() { o << U32LEB(funcHintsVec.size()); for (auto& funcHints : funcHintsVec) { + auto* func = wasm->getFunction(funcHints.func); + o << U32LEB(getFunctionIndex(funcHints.func)); o << U32LEB(funcHints.exprHints.size()); for (auto& exprHint : funcHints.exprHints) { - // Emit the offset as relative to the start of the function locals TODO - auto iter = binaryLocations.expressions.find(exprHint.expr); - assert(iter != binaryLocations.expressions.end()); - auto offset = iter->second.start; - o << U32LEB(offset); + // Emit the offset as relative to the start of the function locals (i.e. + // the function declarations). + auto exprIter = binaryLocations.expressions.find(exprHint.expr); + assert(exprIter != binaryLocations.expressions.end()); + auto exprOffset = exprIter->second.start; + + auto funcIter = binaryLocations.functions.find(func); + assert(funcIter != binaryLocations.functions.end()); + auto funcOffset = funcIter->second.declarations; + + o << U32LEB(exprOffset - funcOffset); // Hint size, always 1 for now. o << U32LEB(1); From 1f227b42e0dc99dc26b314d50208a73e95a12050 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 5 May 2025 11:21:50 -0700 Subject: [PATCH 37/69] writing correctly from a single function --- src/wasm/wasm-binary.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index 34ef44cb779..0d4540809df 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -1584,12 +1584,16 @@ void WasmBinaryWriter::writeCodeAnnotations() { auto exprIter = binaryLocations.expressions.find(exprHint.expr); assert(exprIter != binaryLocations.expressions.end()); auto exprOffset = exprIter->second.start; +std::cout << "exprOff " << exprOffset << '\n'; auto funcIter = binaryLocations.functions.find(func); assert(funcIter != binaryLocations.functions.end()); auto funcOffset = funcIter->second.declarations; +std::cout << "funcOff " << funcOffset << '\n'; - o << U32LEB(exprOffset - funcOffset); + // exprOffset is relative to the function body (after the declarations), + // so we add. + o << U32LEB(exprOffset + funcOffset); // Hint size, always 1 for now. o << U32LEB(1); From 16097383f3fc292953d9bfafca1d2cc251f35cf6 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 5 May 2025 13:56:42 -0700 Subject: [PATCH 38/69] work --- src/wasm/wasm-binary.cpp | 16 ++++++++++++---- test/lit/wat-annotations.wast | 1 + 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index 0d4540809df..c761ff04b5f 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -181,6 +181,7 @@ void WasmBinaryWriter::finishSection(int32_t start) { // we are at the right absolute address. // We are relative to the section start. auto totalAdjustment = adjustmentForLEBShrinking + body; +std::cout << "Aadjust -= " << totalAdjustment << "\n"; for (auto& [_, locations] : binaryLocations.expressions) { locations.start -= totalAdjustment; locations.end -= totalAdjustment; @@ -459,6 +460,7 @@ void WasmBinaryWriter::writeFunctions() { // We added the binary locations, adjust them: they must be relative // to the code section. auto& span = binaryLocations.expressions[curr]; +std::cout << "Badjust -= " << adjustmentForLEBShrinking << " for " << *curr << "\n"; span.start -= adjustmentForLEBShrinking; span.end -= adjustmentForLEBShrinking; auto iter = binaryLocations.delimiters.find(curr); @@ -1512,6 +1514,7 @@ void WasmBinaryWriter::writeMetadata(Expression* curr, Function* func) { func->codeAnnotations.count(curr))) { binaryLocations.expressions[curr] = BinaryLocations::Span{BinaryLocation(o.size()), 0}; +std::cout << "track " << o.size() << " for " << *curr << "\n"; // XXX this happens twice. too many writeMetadata or such binaryLocationTrackedExpressionsForFunc.push_back(curr); } } @@ -1582,18 +1585,23 @@ void WasmBinaryWriter::writeCodeAnnotations() { // Emit the offset as relative to the start of the function locals (i.e. // the function declarations). auto exprIter = binaryLocations.expressions.find(exprHint.expr); +std::cout << "fynd " << *exprHint.expr << '\n'; assert(exprIter != binaryLocations.expressions.end()); auto exprOffset = exprIter->second.start; std::cout << "exprOff " << exprOffset << '\n'; auto funcIter = binaryLocations.functions.find(func); assert(funcIter != binaryLocations.functions.end()); - auto funcOffset = funcIter->second.declarations; -std::cout << "funcOff " << funcOffset << '\n'; + auto funcStart = funcIter->second.start; + auto funcDeclarations = funcIter->second.declarations; +std::cout << "funcOff1 " << funcStart << '\n'; +std::cout << "funcOff2 " << funcDeclarations << '\n'; // exprOffset is relative to the function body (after the declarations), - // so we add. - o << U32LEB(exprOffset + funcOffset); + // so we just need to add the offset from the function start to the + // declarations. XXX + o << U32LEB(exprOffset - funcDeclarations); +std::cout << "written: " << (exprOffset - funcDeclarations) << '\n'; // Hint size, always 1 for now. o << U32LEB(1); diff --git a/test/lit/wat-annotations.wast b/test/lit/wat-annotations.wast index 4f4428a3905..5dc2731fc58 100644 --- a/test/lit/wat-annotations.wast +++ b/test/lit/wat-annotations.wast @@ -70,6 +70,7 @@ ;; CHECK-NEXT: ) (func $branch_hints-br_if-2 (param $x i32) ;; A second function with hints. + ;; TODO: test a big function so the function's size LEB >1 is tested (block $out (@metadata.code.branch_hint "\01") (br_if $out From c61b080e1c9a34638be4e2ec1227e5e3c6a6a956 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 5 May 2025 15:26:55 -0700 Subject: [PATCH 39/69] fix --- src/wasm-binary.h | 1 + src/wasm-stack.h | 4 +++- src/wasm/wasm-binary.cpp | 22 +++++++++++----------- 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/src/wasm-binary.h b/src/wasm-binary.h index 21a7c0ea380..0e6745f002d 100644 --- a/src/wasm-binary.h +++ b/src/wasm-binary.h @@ -1351,6 +1351,7 @@ class WasmBinaryWriter { void writeSourceMapEpilog(); void writeDebugLocation(const Function::DebugLocation& loc); void writeNoDebugLocation(); + void writeSourceMapLocation(Expression* curr, Function* func); void writeMetadata(Expression* curr, Function* func); void writeMetadataEnd(Expression* curr, Function* func); void writeExtraDebugLocation(Expression* curr, Function* func, size_t id); diff --git a/src/wasm-stack.h b/src/wasm-stack.h index 101647088cc..ae77f6bfae8 100644 --- a/src/wasm-stack.h +++ b/src/wasm-stack.h @@ -479,7 +479,9 @@ class BinaryenIRToBinaryWriter } void emitUnreachable() { writer.emitUnreachable(); } void emitDebugLocation(Expression* curr) { - parent.writeMetadata(curr, func); + if (sourceMap) { + parent.writeSourceMapLocation(curr, func); + } } MappedLocals& getMappedLocals() { return writer.mappedLocals; } diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index c761ff04b5f..3a44f1db75e 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -1493,19 +1493,19 @@ void WasmBinaryWriter::writeNoDebugLocation() { } } -void WasmBinaryWriter::writeMetadata(Expression* curr, Function* func) { - if (sourceMap) { - auto& debugLocations = func->debugLocations; - auto iter = debugLocations.find(curr); - if (iter != debugLocations.end() && iter->second) { - // There is debug information here, write it out. - writeDebugLocation(*(iter->second)); - } else { - // This expression has no debug location. - writeNoDebugLocation(); - } +void WasmBinaryWriter::writeSourceMapLocation(Expression* curr, Function* func) { + auto& debugLocations = func->debugLocations; + auto iter = debugLocations.find(curr); + if (iter != debugLocations.end() && iter->second) { + // There is debug information here, write it out. + writeDebugLocation(*(iter->second)); + } else { + // This expression has no debug location. + writeNoDebugLocation(); } +} +void WasmBinaryWriter::writeMetadata(Expression* curr, Function* func) { // If this is an instruction in a function, and if the original wasm had // binary locations tracked, then track it in the output as well. We also // track locations of instructions that have code annotations, as their binary From a6efd688fac1b796ca172bc6900b650adb7c166e Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 5 May 2025 16:00:38 -0700 Subject: [PATCH 40/69] fix --- src/wasm/wasm-binary.cpp | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index 3a44f1db75e..13a813441bf 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -1514,7 +1514,6 @@ void WasmBinaryWriter::writeMetadata(Expression* curr, Function* func) { func->codeAnnotations.count(curr))) { binaryLocations.expressions[curr] = BinaryLocations::Span{BinaryLocation(o.size()), 0}; -std::cout << "track " << o.size() << " for " << *curr << "\n"; // XXX this happens twice. too many writeMetadata or such binaryLocationTrackedExpressionsForFunc.push_back(curr); } } @@ -1585,23 +1584,14 @@ void WasmBinaryWriter::writeCodeAnnotations() { // Emit the offset as relative to the start of the function locals (i.e. // the function declarations). auto exprIter = binaryLocations.expressions.find(exprHint.expr); -std::cout << "fynd " << *exprHint.expr << '\n'; assert(exprIter != binaryLocations.expressions.end()); auto exprOffset = exprIter->second.start; -std::cout << "exprOff " << exprOffset << '\n'; auto funcIter = binaryLocations.functions.find(func); assert(funcIter != binaryLocations.functions.end()); - auto funcStart = funcIter->second.start; auto funcDeclarations = funcIter->second.declarations; -std::cout << "funcOff1 " << funcStart << '\n'; -std::cout << "funcOff2 " << funcDeclarations << '\n'; - // exprOffset is relative to the function body (after the declarations), - // so we just need to add the offset from the function start to the - // declarations. XXX o << U32LEB(exprOffset - funcDeclarations); -std::cout << "written: " << (exprOffset - funcDeclarations) << '\n'; // Hint size, always 1 for now. o << U32LEB(1); From d65995198f4341ed4b577b636c00fc0171c010d4 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 5 May 2025 16:02:08 -0700 Subject: [PATCH 41/69] fix --- test/lit/wat-annotations.wast | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/test/lit/wat-annotations.wast b/test/lit/wat-annotations.wast index 5dc2731fc58..dda511ad29c 100644 --- a/test/lit/wat-annotations.wast +++ b/test/lit/wat-annotations.wast @@ -69,8 +69,9 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) (func $branch_hints-br_if-2 (param $x i32) - ;; A second function with hints. - ;; TODO: test a big function so the function's size LEB >1 is tested + ;; A second function with hints. This one also has local definitions, which + ;; should not confuse us (branch hint offsets are relative to the start of + ;; the local definitions, not the end). (block $out (@metadata.code.branch_hint "\01") (br_if $out From e9e43893b2ad7d68022ac40c8d66c29f47303e5c Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 5 May 2025 16:02:55 -0700 Subject: [PATCH 42/69] fix --- test/lit/wat-annotations.wast | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/test/lit/wat-annotations.wast b/test/lit/wat-annotations.wast index dda511ad29c..ad5bd59fc00 100644 --- a/test/lit/wat-annotations.wast +++ b/test/lit/wat-annotations.wast @@ -14,6 +14,15 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) + ;; BINARY: (type $0 (func (param i32))) + + ;; BINARY: (func $no-annotations (type $0) (param $x i32) + ;; BINARY-NEXT: (block $block + ;; BINARY-NEXT: (br_if $block + ;; BINARY-NEXT: (local.get $x) + ;; BINARY-NEXT: ) + ;; BINARY-NEXT: ) + ;; BINARY-NEXT: ) (func $no-annotations (param $x i32) ;; A function with no annotations. This tests that we use function indexes ;; properly in the section. @@ -40,6 +49,22 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) + ;; BINARY: (func $branch-hints-br_if (type $0) (param $x i32) + ;; BINARY-NEXT: (block $block + ;; BINARY-NEXT: (@metadata.code.branch_hint "\00") + ;; BINARY-NEXT: (br_if $block + ;; BINARY-NEXT: (local.get $x) + ;; BINARY-NEXT: ) + ;; BINARY-NEXT: (@metadata.code.branch_hint "\01") + ;; BINARY-NEXT: (br_if $block + ;; BINARY-NEXT: (local.get $x) + ;; BINARY-NEXT: ) + ;; BINARY-NEXT: (@metadata.code.branch_hint "\00") + ;; BINARY-NEXT: (br_if $block + ;; BINARY-NEXT: (local.get $x) + ;; BINARY-NEXT: ) + ;; BINARY-NEXT: ) + ;; BINARY-NEXT: ) (func $branch-hints-br_if (param $x i32) (block $out ;; A branch annotated as unlikely, and one as likely. @@ -68,6 +93,14 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) + ;; BINARY: (func $branch_hints-br_if-2 (type $0) (param $x i32) + ;; BINARY-NEXT: (block $block + ;; BINARY-NEXT: (@metadata.code.branch_hint "\01") + ;; BINARY-NEXT: (br_if $block + ;; BINARY-NEXT: (local.get $x) + ;; BINARY-NEXT: ) + ;; BINARY-NEXT: ) + ;; BINARY-NEXT: ) (func $branch_hints-br_if-2 (param $x i32) ;; A second function with hints. This one also has local definitions, which ;; should not confuse us (branch hint offsets are relative to the start of From 3a81bc03c14ceaadf86f07d7e0c92d25c1a41f04 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 5 May 2025 16:05:59 -0700 Subject: [PATCH 43/69] nicer --- src/wasm-binary.h | 6 ++++-- src/wasm-stack.h | 4 ++-- src/wasm/wasm-binary.cpp | 4 ++-- src/wasm/wasm-stack.cpp | 6 +++--- 4 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/wasm-binary.h b/src/wasm-binary.h index 0e6745f002d..659b3a95bb7 100644 --- a/src/wasm-binary.h +++ b/src/wasm-binary.h @@ -1352,10 +1352,12 @@ class WasmBinaryWriter { void writeDebugLocation(const Function::DebugLocation& loc); void writeNoDebugLocation(); void writeSourceMapLocation(Expression* curr, Function* func); - void writeMetadata(Expression* curr, Function* func); - void writeMetadataEnd(Expression* curr, Function* func); void writeExtraDebugLocation(Expression* curr, Function* func, size_t id); + // Track where expressions go in the binary format. + void trackExpressionStart(Expression* curr, Function* func); + void trackExpressionEnd(Expression* curr, Function* func); + void writeCodeAnnotations(); // helpers diff --git a/src/wasm-stack.h b/src/wasm-stack.h index ae77f6bfae8..2a031cccd9f 100644 --- a/src/wasm-stack.h +++ b/src/wasm-stack.h @@ -99,11 +99,11 @@ class BinaryInstWriter : public OverriddenVisitor { void visit(Expression* curr) { if (func && !sourceMap) { - parent.writeMetadata(curr, func); + parent.trackExpressionStart(curr, func); } OverriddenVisitor::visit(curr); if (func && !sourceMap) { - parent.writeMetadataEnd(curr, func); + parent.trackExpressionEnd(curr, func); } } diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index 13a813441bf..82f7652ef91 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -1505,7 +1505,7 @@ void WasmBinaryWriter::writeSourceMapLocation(Expression* curr, Function* func) } } -void WasmBinaryWriter::writeMetadata(Expression* curr, Function* func) { +void WasmBinaryWriter::trackExpressionStart(Expression* curr, Function* func) { // If this is an instruction in a function, and if the original wasm had // binary locations tracked, then track it in the output as well. We also // track locations of instructions that have code annotations, as their binary @@ -1518,7 +1518,7 @@ void WasmBinaryWriter::writeMetadata(Expression* curr, Function* func) { } } -void WasmBinaryWriter::writeMetadataEnd(Expression* curr, Function* func) { +void WasmBinaryWriter::trackExpressionEnd(Expression* curr, Function* func) { if (func && !func->expressionLocations.empty()) { auto& span = binaryLocations.expressions.at(curr); span.end = o.size(); diff --git a/src/wasm/wasm-stack.cpp b/src/wasm/wasm-stack.cpp index e484175af21..61d69e48602 100644 --- a/src/wasm/wasm-stack.cpp +++ b/src/wasm/wasm-stack.cpp @@ -2737,7 +2737,7 @@ void BinaryInstWriter::emitScopeEnd(Expression* curr) { breakStack.pop_back(); o << int8_t(BinaryConsts::End); if (func) { - parent.writeMetadataEnd(curr, func); + parent.trackExpressionEnd(curr, func); } } @@ -3212,9 +3212,9 @@ void StackIRToBinaryWriter::write() { case StackInst::IfBegin: case StackInst::LoopBegin: case StackInst::TryTableBegin: { - parent.writeMetadata(inst->origin, func); + parent.trackExpressionStart(inst->origin, func); writer.visit(inst->origin); - parent.writeMetadataEnd(inst->origin, func); + parent.trackExpressionEnd(inst->origin, func); break; } case StackInst::TryEnd: From d977d5f9313f0f1943a24b87823383ad3afffa1d Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 5 May 2025 16:17:58 -0700 Subject: [PATCH 44/69] work --- src/wasm-stack.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/wasm-stack.h b/src/wasm-stack.h index 2a031cccd9f..941c5fb8458 100644 --- a/src/wasm-stack.h +++ b/src/wasm-stack.h @@ -97,12 +97,15 @@ class BinaryInstWriter : public OverriddenVisitor { bool DWARF) : parent(parent), o(o), func(func), sourceMap(sourceMap), DWARF(DWARF) {} + // TODO: trackExpressions bool? for speeed + // and do we need sourceMap? + void visit(Expression* curr) { - if (func && !sourceMap) { + if (func) { parent.trackExpressionStart(curr, func); } OverriddenVisitor::visit(curr); - if (func && !sourceMap) { + if (func) { parent.trackExpressionEnd(curr, func); } } From 396fc52aef4997155ad72a97ecd4dd49ea17198c Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 5 May 2025 16:24:28 -0700 Subject: [PATCH 45/69] fix --- src/wasm/wasm-stack.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/wasm/wasm-stack.cpp b/src/wasm/wasm-stack.cpp index 61d69e48602..203c8b61069 100644 --- a/src/wasm/wasm-stack.cpp +++ b/src/wasm/wasm-stack.cpp @@ -3213,6 +3213,9 @@ void StackIRToBinaryWriter::write() { case StackInst::LoopBegin: case StackInst::TryTableBegin: { parent.trackExpressionStart(inst->origin, func); + if (sourceMap) { + parent.writeSourceMapLocation(inst->origin, func); + } writer.visit(inst->origin); parent.trackExpressionEnd(inst->origin, func); break; From 31c4823a2cd394186b42f757c8865d5e6819a0aa Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 5 May 2025 16:25:02 -0700 Subject: [PATCH 46/69] format --- src/wasm/wasm-binary.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index 82f7652ef91..055dd70067a 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -181,7 +181,7 @@ void WasmBinaryWriter::finishSection(int32_t start) { // we are at the right absolute address. // We are relative to the section start. auto totalAdjustment = adjustmentForLEBShrinking + body; -std::cout << "Aadjust -= " << totalAdjustment << "\n"; + std::cout << "Aadjust -= " << totalAdjustment << "\n"; for (auto& [_, locations] : binaryLocations.expressions) { locations.start -= totalAdjustment; locations.end -= totalAdjustment; @@ -460,7 +460,8 @@ void WasmBinaryWriter::writeFunctions() { // We added the binary locations, adjust them: they must be relative // to the code section. auto& span = binaryLocations.expressions[curr]; -std::cout << "Badjust -= " << adjustmentForLEBShrinking << " for " << *curr << "\n"; + std::cout << "Badjust -= " << adjustmentForLEBShrinking << " for " + << *curr << "\n"; span.start -= adjustmentForLEBShrinking; span.end -= adjustmentForLEBShrinking; auto iter = binaryLocations.delimiters.find(curr); @@ -1493,7 +1494,8 @@ void WasmBinaryWriter::writeNoDebugLocation() { } } -void WasmBinaryWriter::writeSourceMapLocation(Expression* curr, Function* func) { +void WasmBinaryWriter::writeSourceMapLocation(Expression* curr, + Function* func) { auto& debugLocations = func->debugLocations; auto iter = debugLocations.find(curr); if (iter != debugLocations.end() && iter->second) { From 5ccfabbfbb7019a13035c08d19469d09f0b5b0af Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 5 May 2025 16:25:20 -0700 Subject: [PATCH 47/69] clean --- src/wasm/wasm-binary.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index 055dd70067a..0b70521f46e 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -181,7 +181,6 @@ void WasmBinaryWriter::finishSection(int32_t start) { // we are at the right absolute address. // We are relative to the section start. auto totalAdjustment = adjustmentForLEBShrinking + body; - std::cout << "Aadjust -= " << totalAdjustment << "\n"; for (auto& [_, locations] : binaryLocations.expressions) { locations.start -= totalAdjustment; locations.end -= totalAdjustment; @@ -460,8 +459,6 @@ void WasmBinaryWriter::writeFunctions() { // We added the binary locations, adjust them: they must be relative // to the code section. auto& span = binaryLocations.expressions[curr]; - std::cout << "Badjust -= " << adjustmentForLEBShrinking << " for " - << *curr << "\n"; span.start -= adjustmentForLEBShrinking; span.end -= adjustmentForLEBShrinking; auto iter = binaryLocations.delimiters.find(curr); From b6d69f7774904d5a8539e128c5c47e0bc283c6be Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 6 May 2025 12:40:22 -0700 Subject: [PATCH 48/69] fix.merge --- src/wasm-binary.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/wasm-binary.h b/src/wasm-binary.h index caa1c4e1728..19a1a0b1659 100644 --- a/src/wasm-binary.h +++ b/src/wasm-binary.h @@ -1358,10 +1358,6 @@ class WasmBinaryWriter { void trackExpressionEnd(Expression* curr, Function* func); void trackExpressionDelimiter(Expression* curr, Function* func, size_t id); - // Track where expressions go in the binary format. - void trackExpressionStart(Expression* curr, Function* func); - void trackExpressionEnd(Expression* curr, Function* func); - void writeCodeAnnotations(); // helpers From d8bde89311c694cdbedd11964df368b6edcd1363 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 6 May 2025 12:42:10 -0700 Subject: [PATCH 49/69] work --- src/wasm-binary.h | 3 ++- src/wasm-stack.h | 3 --- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/wasm-binary.h b/src/wasm-binary.h index 19a1a0b1659..d755b71efa4 100644 --- a/src/wasm-binary.h +++ b/src/wasm-binary.h @@ -1454,6 +1454,7 @@ class WasmBinaryReader { // Settings. bool debugInfo = true; + bool DWARF = false; bool skipFunctionBodies = false; // Internal state. @@ -1476,7 +1477,7 @@ class WasmBinaryReader { std::vector& sourceMap = defaultEmptySourceMap); void setDebugInfo(bool value) { debugInfo = value; } - void setDWARF(bool value) {} // TODO: rmoof + void setDWARF(bool value) { DWARF = value; } void setSkipFunctionBodies(bool skipFunctionBodies_) { skipFunctionBodies = skipFunctionBodies_; } diff --git a/src/wasm-stack.h b/src/wasm-stack.h index e2431b8e90a..f97c9c7acc7 100644 --- a/src/wasm-stack.h +++ b/src/wasm-stack.h @@ -96,9 +96,6 @@ class BinaryInstWriter : public OverriddenVisitor { bool DWARF) : parent(parent), o(o), func(func), DWARF(DWARF) {} - // TODO: trackExpressions bool? for speeed - // and do we need sourceMap? - void visit(Expression* curr) { if (func) { parent.trackExpressionStart(curr, func); From a7357abc66c4dd91145b32699a9abe24cfa52f7f Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 6 May 2025 12:51:34 -0700 Subject: [PATCH 50/69] oops --- src/wasm/wasm-stack.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/wasm/wasm-stack.cpp b/src/wasm/wasm-stack.cpp index 7b1750130c9..889ab5e119f 100644 --- a/src/wasm/wasm-stack.cpp +++ b/src/wasm/wasm-stack.cpp @@ -3212,7 +3212,6 @@ void StackIRToBinaryWriter::write() { case StackInst::IfBegin: case StackInst::LoopBegin: case StackInst::TryTableBegin: { - parent.trackExpressionStart(inst->origin, func); if (sourceMap) { parent.writeSourceMapLocation(inst->origin, func); } From e337f5c037fda9a27f5e0adf4be58d9d28336d7d Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 6 May 2025 12:52:53 -0700 Subject: [PATCH 51/69] work --- src/wasm/wasm-binary.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index f4d401f7ca0..c89e899a727 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -1520,6 +1520,8 @@ void WasmBinaryWriter::trackExpressionStart(Expression* curr, Function* func) { } void WasmBinaryWriter::trackExpressionEnd(Expression* curr, Function* func) { + // TODO: If we need to track the end of annotated code locations, we need to + // enable that here. if (func && !func->expressionLocations.empty()) { auto& span = binaryLocations.expressions.at(curr); span.end = o.size(); @@ -1529,6 +1531,8 @@ void WasmBinaryWriter::trackExpressionEnd(Expression* curr, Function* func) { void WasmBinaryWriter::trackExpressionDelimiter(Expression* curr, Function* func, size_t id) { + // TODO: If we need to track the delimiters of annotated code locations, we + // need to enable that here. if (func && !func->expressionLocations.empty()) { binaryLocations.delimiters[curr][id] = o.size(); } From 9a0535b85df7e0520eee111ef21605cb783167f5 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 6 May 2025 13:04:41 -0700 Subject: [PATCH 52/69] fix --- src/wasm/wasm-binary.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index c89e899a727..d2c2700aec9 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -1908,8 +1908,12 @@ void WasmBinaryReader::preScan() { // code locations, as an optimization. if (sectionName == Annotations::BranchHint) { needCodeLocations = true; - // Do not break, so we keep looking for DWARF. - continue; + if (DWARF) { + // Do not break, so we keep looking for DWARF. + continue; + } else { + break; + } } // DWARF sections contain code offsets. From 344d08c8f0837be16378f354819d8dd3b871b717 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 6 May 2025 13:14:55 -0700 Subject: [PATCH 53/69] more --- test/lit/wat-annotations.wast | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/lit/wat-annotations.wast b/test/lit/wat-annotations.wast index ad5bd59fc00..882b7aa7255 100644 --- a/test/lit/wat-annotations.wast +++ b/test/lit/wat-annotations.wast @@ -86,6 +86,7 @@ ) ;; CHECK: (func $branch_hints-br_if-2 (type $0) (param $x i32) + ;; CHECK-NEXT: (local $unused f64) ;; CHECK-NEXT: (block $out ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") ;; CHECK-NEXT: (br_if $out @@ -94,6 +95,7 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; BINARY: (func $branch_hints-br_if-2 (type $0) (param $x i32) + ;; BINARY-NEXT: (local $unused f64) ;; BINARY-NEXT: (block $block ;; BINARY-NEXT: (@metadata.code.branch_hint "\01") ;; BINARY-NEXT: (br_if $block @@ -102,6 +104,7 @@ ;; BINARY-NEXT: ) ;; BINARY-NEXT: ) (func $branch_hints-br_if-2 (param $x i32) + (local $unused f64) ;; A second function with hints. This one also has local definitions, which ;; should not confuse us (branch hint offsets are relative to the start of ;; the local definitions, not the end). From 48559258a0e66fe66fb55edf91559cba439cd642 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 6 May 2025 14:43:42 -0700 Subject: [PATCH 54/69] disable fuzzing, leave that for a later PR --- scripts/test/fuzzing.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/test/fuzzing.py b/scripts/test/fuzzing.py index dc48bb7033e..426c5e2e21d 100644 --- a/scripts/test/fuzzing.py +++ b/scripts/test/fuzzing.py @@ -135,6 +135,8 @@ # TODO: fix split_wast() on tricky escaping situations like a string ending # in \\" (the " is not escaped - there is an escaped \ before it) 'string-lifting-section.wast', + # TODO: fuzz custom annotations + 'wat-annotations.wast', ] From d0e06b75bbe16a67250dc93e368a191b96a4d152 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 7 May 2025 08:40:39 -0700 Subject: [PATCH 55/69] Read the branch hints section last, though it must appear first [ci skip] --- src/wasm-binary.h | 8 ++++++++ src/wasm/wasm-binary.cpp | 10 +++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/src/wasm-binary.h b/src/wasm-binary.h index d755b71efa4..681bf247571 100644 --- a/src/wasm-binary.h +++ b/src/wasm-binary.h @@ -1622,6 +1622,14 @@ class WasmBinaryReader { void readFeatures(size_t payloadLen); void readDylink(size_t payloadLen); void readDylink0(size_t payloadLen); + + // We read branch hints *after* the code section, even though they appear + // earlier. That is simpler for us as we note expression locations as we scan + // code, and then just need to match them up. To do this, we note the branch + // hint position and size in the first pass, and handle it later. + size_t branchHintsPos = 0; + size_t branchHintsLen = 0; + void readBranchHints(size_t payloadLen); Index readMemoryAccess(Address& alignment, Address& offset); diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index d2c2700aec9..4903f76a06e 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -2036,6 +2036,12 @@ void WasmBinaryReader::read() { } } + // Go back and parse things we deferred. + if (branchHintsPos) { + pos = branchHintsPos; + readBranchHints(branchHintsLen); + } + validateBinary(); } @@ -2057,7 +2063,9 @@ void WasmBinaryReader::readCustomSection(size_t payloadLen) { } else if (sectionName.equals(BinaryConsts::CustomSections::Dylink0)) { readDylink0(payloadLen); } else if (sectionName.equals(Annotations::BranchHint.str)) { - readBranchHints(payloadLen); + // Only note the position and length, we read this later. + branchHintsPos = pos; + branchHintsLen = payloadLen; } else { // an unfamiliar custom section if (sectionName.equals(BinaryConsts::CustomSections::Linking)) { From fe7fdfc0f503fcbd32455b0b7191cb41829eb6cd Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 7 May 2025 10:01:14 -0700 Subject: [PATCH 56/69] prep test --- test/lit/wat-annotations.wast | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/test/lit/wat-annotations.wast b/test/lit/wat-annotations.wast index 882b7aa7255..632ad31cfc3 100644 --- a/test/lit/wat-annotations.wast +++ b/test/lit/wat-annotations.wast @@ -115,4 +115,15 @@ ) ) ) + + (func $mixing (param $x i32) + ;; Mix branch hints with source locations. Both hints should remain. + ;;@ mixing.src:1337:42 + (block $out + (@metadata.code.branch_hint "\01") + (br_if $out + (local.get $x) + ) + ) + ) ) From a5a8fff255c7add3fdd74b7c50ba2c0abc1716fc Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 7 May 2025 10:28:23 -0700 Subject: [PATCH 57/69] write annotations before code --- src/wasm-binary.h | 43 +++++++++++++++++++++- src/wasm/wasm-binary.cpp | 77 ++++++++++++++++++++++------------------ 2 files changed, 84 insertions(+), 36 deletions(-) diff --git a/src/wasm-binary.h b/src/wasm-binary.h index 681bf247571..9e07d3d50d7 100644 --- a/src/wasm-binary.h +++ b/src/wasm-binary.h @@ -258,6 +258,43 @@ class BufferWithRandomAccess : public std::vector { std::copy(begin(), end(), ret.begin()); return ret; } + + // Writes bytes in the maximum amount for a U32 LEB placeholder. Return the + // offset we wrote it at. The LEB can then be patched with the proper value + // later, when the size is known. + BinaryLocation writeU32LEBPlaceholder() { + BinaryLocation ret = o.size(); + o << int32_t(0); + o << int8_t(0); + return ret; + } + + // Given the location of a maximum-size LEB placeholder, as returned from + // writeU32LEBPlaceholder, use the current buffer size to figure out the size + // that should be written there, and emit an optimal-size LEB. Move contents + // backwards if we used fewer bytes, and return the number of bytes we moved. + // (Thus, if we return >0, we moved code backwards, and the caller may need to + // adjust things.) + BinaryLocation emitRetroactiveLEB(BinaryLocation start) { + // Do not include the LEB itself in the size. + auto actualSize = size() - start - MaxLEB32Bytes; + auto sizeFieldSize = writeAt(start, U32LEB(actualSize)); + + // We can move things back if the actual LEB for the size doesn't use the + // maximum 5 bytes. In that case we need to adjust offsets after we move + // things backwards. + auto adjustmentForLEBShrinking = MaxLEB32Bytes - sizeFieldSize; + if (adjustmentForLEBShrinking) { + // We can save some room. + assert(sizeFieldSize < MaxLEB32Bytes); + std::move(&o[start] + MaxLEB32Bytes, + &o[start] + MaxLEB32Bytes + size, + &o[start] + sizeFieldSize); + resize(size() - adjustmentForLEBShrinking); + } + + return adjustmentForLEBShrinking; + } }; namespace BinaryConsts { @@ -1358,7 +1395,11 @@ class WasmBinaryWriter { void trackExpressionEnd(Expression* curr, Function* func); void trackExpressionDelimiter(Expression* curr, Function* func, size_t id); - void writeCodeAnnotations(); + // Writes code annotations into a buffer and returns it. We cannot write them + // directly into the output since we write function code first (to get the + // offsets for the annotations), and only then can write annotations, which we + // must then insert before the code (as the spec requires that). + std::optional writeCodeAnnotations(); // helpers void writeInlineString(std::string_view name); diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index 4903f76a06e..e7be2ce45b2 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -109,10 +109,7 @@ void WasmBinaryWriter::writeHeader() { } int32_t WasmBinaryWriter::writeU32LEBPlaceholder() { - int32_t ret = o.size(); - o << int32_t(0); - o << int8_t(0); - return ret; + return o.writeU32LEBPlaceholder(); } void WasmBinaryWriter::writeResizableLimits( @@ -144,26 +141,12 @@ template int32_t WasmBinaryWriter::startSection(T code) { } void WasmBinaryWriter::finishSection(int32_t start) { - // section size does not include the reserved bytes of the size field itself - int32_t size = o.size() - start - MaxLEB32Bytes; - auto sizeFieldSize = o.writeAt(start, U32LEB(size)); - // We can move things back if the actual LEB for the size doesn't use the - // maximum 5 bytes. In that case we need to adjust offsets after we move - // things backwards. - auto adjustmentForLEBShrinking = MaxLEB32Bytes - sizeFieldSize; - if (adjustmentForLEBShrinking) { - // we can save some room, nice - assert(sizeFieldSize < MaxLEB32Bytes); - std::move(&o[start] + MaxLEB32Bytes, - &o[start] + MaxLEB32Bytes + size, - &o[start] + sizeFieldSize); - o.resize(o.size() - adjustmentForLEBShrinking); - if (sourceMap) { - for (auto i = sourceMapLocationsSizeAtSectionStart; - i < sourceMapLocations.size(); - ++i) { - sourceMapLocations[i].first -= adjustmentForLEBShrinking; - } + auto adjustmentForLEBShrinking = o.emitRetroactiveLEB(start); + if (adjustmentForLEBShrinking && sourceMap) { + for (auto i = sourceMapLocationsSizeAtSectionStart; + i < sourceMapLocations.size(); + ++i) { + sourceMapLocations[i].first -= adjustmentForLEBShrinking; } } @@ -489,6 +472,22 @@ void WasmBinaryWriter::writeFunctions() { } }); finishSection(sectionStart); + + // Code annotations must come before the code section (see comment on + // writeCodeAnnotations). + if (auto annotations = writeCodeAnnotations()) { + // We need to move the code section and put the annotations before it. + auto& annotationsBuffer = *annotations; + auto oldSize = o.size(); + o.resize(oldSize + annotationsBuffer.size()); + + // |sectionStart| is the start of the contents of the section. Subtract 1 to + // include the section code as well, so we move all of it. + std::move_backwards(&o[sectionStart - 1], &o[oldSize], o.end()); + std::copy(annotationsBuffer.begin(), + annotationsBuffer.end(), + &o[sectionStart - 1]); + } } void WasmBinaryWriter::writeStrings() { @@ -1538,7 +1537,7 @@ void WasmBinaryWriter::trackExpressionDelimiter(Expression* curr, } } -void WasmBinaryWriter::writeCodeAnnotations() { +std::optional WasmBinaryWriter::writeCodeAnnotations() { // Assemble the info for Branch Hinting: for each function, a vector of the // hints. struct ExprHint { @@ -1571,20 +1570,23 @@ void WasmBinaryWriter::writeCodeAnnotations() { } if (funcHintsVec.empty()) { - return; + return {}; } - // Emit the section, as we found data. - auto start = startSection(BinaryConsts::Custom); + BufferWithRandomAccess buffer; + + // We found data: emit the section. + buffer << uint8_t(BinaryConsts::Custom); + auto lebPos = buffer.writeU32LEBPlaceholder(); writeInlineString(Annotations::BranchHint.str); - o << U32LEB(funcHintsVec.size()); + buffer << U32LEB(funcHintsVec.size()); for (auto& funcHints : funcHintsVec) { auto* func = wasm->getFunction(funcHints.func); - o << U32LEB(getFunctionIndex(funcHints.func)); + buffer << U32LEB(getFunctionIndex(funcHints.func)); - o << U32LEB(funcHints.exprHints.size()); + buffer << U32LEB(funcHints.exprHints.size()); for (auto& exprHint : funcHints.exprHints) { // Emit the offset as relative to the start of the function locals (i.e. // the function declarations). @@ -1596,20 +1598,25 @@ void WasmBinaryWriter::writeCodeAnnotations() { assert(funcIter != binaryLocations.functions.end()); auto funcDeclarations = funcIter->second.declarations; - o << U32LEB(exprOffset - funcDeclarations); + buffer << U32LEB(exprOffset - funcDeclarations); // Hint size, always 1 for now. - o << U32LEB(1); + buffer << U32LEB(1); // We must only emit hints that are present. assert(exprHint.hint->branchLikely); // Hint contents: likely or not. - o << U32LEB(int(*exprHint.hint->branchLikely)); + buffer << U32LEB(int(*exprHint.hint->branchLikely)); } } - finishSection(start); + // Write the final size. We can ignore the return value, which is the number + // of bytes we shrank (if the LEB was smaller than the maximum size), as no + // value in this section cares. + (void)buffer.emitRetroactiveLEB(start); + + return buffer; } void WasmBinaryWriter::writeData(const char* data, size_t size) { From 057e2a1c5afdf6fc332a29c904c3d900e06dddaf Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 7 May 2025 10:37:53 -0700 Subject: [PATCH 58/69] work --- src/wasm-binary.h | 18 +++++++++--------- src/wasm/wasm-binary.cpp | 8 ++++++-- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/src/wasm-binary.h b/src/wasm-binary.h index 9e07d3d50d7..556c729f580 100644 --- a/src/wasm-binary.h +++ b/src/wasm-binary.h @@ -263,9 +263,9 @@ class BufferWithRandomAccess : public std::vector { // offset we wrote it at. The LEB can then be patched with the proper value // later, when the size is known. BinaryLocation writeU32LEBPlaceholder() { - BinaryLocation ret = o.size(); - o << int32_t(0); - o << int8_t(0); + BinaryLocation ret = size(); + *this << int32_t(0); + *this << int8_t(0); return ret; } @@ -276,9 +276,9 @@ class BufferWithRandomAccess : public std::vector { // (Thus, if we return >0, we moved code backwards, and the caller may need to // adjust things.) BinaryLocation emitRetroactiveLEB(BinaryLocation start) { - // Do not include the LEB itself in the size. - auto actualSize = size() - start - MaxLEB32Bytes; - auto sizeFieldSize = writeAt(start, U32LEB(actualSize)); + // Do not include the LEB itself in the section size. + auto sectionSize = size() - start - MaxLEB32Bytes; + auto sizeFieldSize = writeAt(start, U32LEB(sectionSize)); // We can move things back if the actual LEB for the size doesn't use the // maximum 5 bytes. In that case we need to adjust offsets after we move @@ -287,9 +287,9 @@ class BufferWithRandomAccess : public std::vector { if (adjustmentForLEBShrinking) { // We can save some room. assert(sizeFieldSize < MaxLEB32Bytes); - std::move(&o[start] + MaxLEB32Bytes, - &o[start] + MaxLEB32Bytes + size, - &o[start] + sizeFieldSize); + std::move(&(*this)[start] + MaxLEB32Bytes, + &(*this)[start] + MaxLEB32Bytes + sectionSize, + &(*this)[start] + sizeFieldSize); resize(size() - adjustmentForLEBShrinking); } diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index e7be2ce45b2..75babcbf67d 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -157,6 +157,10 @@ void WasmBinaryWriter::finishSection(int32_t start) { // The section type byte is right before the LEB for the size; we want // offsets that are relative to the body, which is after that section type // byte and the the size LEB. + // + // We can compute the size of the size field LEB by considering the original + // size of the maximal LEB, and the adjustment due to shrinking. + auto sizeFieldSize = MaxLEB32Bytes - adjustmentForLEBShrinking; auto body = start + sizeFieldSize; // Offsets are relative to the body of the code section: after the // section type byte and the size. @@ -483,7 +487,7 @@ void WasmBinaryWriter::writeFunctions() { // |sectionStart| is the start of the contents of the section. Subtract 1 to // include the section code as well, so we move all of it. - std::move_backwards(&o[sectionStart - 1], &o[oldSize], o.end()); + std::move_backward(&o[sectionStart - 1], &o[oldSize], o.end()); std::copy(annotationsBuffer.begin(), annotationsBuffer.end(), &o[sectionStart - 1]); @@ -1614,7 +1618,7 @@ std::optional WasmBinaryWriter::writeCodeAnnotations() { // Write the final size. We can ignore the return value, which is the number // of bytes we shrank (if the LEB was smaller than the maximum size), as no // value in this section cares. - (void)buffer.emitRetroactiveLEB(start); + (void)buffer.emitRetroactiveLEB(lebPos); return buffer; } From ef580803b7fb245ee84743118c43afe9873b5e00 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 7 May 2025 11:17:38 -0700 Subject: [PATCH 59/69] fix --- src/wasm-binary.h | 9 +++++++++ src/wasm/wasm-binary.cpp | 6 ++---- test/lit/wat-annotations.wast | 17 +++++++++++++++++ 3 files changed, 28 insertions(+), 4 deletions(-) diff --git a/src/wasm-binary.h b/src/wasm-binary.h index 556c729f580..b52f01d7986 100644 --- a/src/wasm-binary.h +++ b/src/wasm-binary.h @@ -295,6 +295,15 @@ class BufferWithRandomAccess : public std::vector { return adjustmentForLEBShrinking; } + + void writeInlineString(std::string_view name) { + auto size = name.size(); + auto data = name.data(); + *this << U32LEB(size); + for (size_t i = 0; i < size; i++) { + *this << int8_t(data[i]); + } + } }; namespace BinaryConsts { diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index 75babcbf67d..a400a9cf555 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -100,7 +100,6 @@ void WasmBinaryWriter::write() { writeLateCustomSections(); writeFeaturesSection(); - writeCodeAnnotations(); } void WasmBinaryWriter::writeHeader() { @@ -1582,7 +1581,7 @@ std::optional WasmBinaryWriter::writeCodeAnnotations() { // We found data: emit the section. buffer << uint8_t(BinaryConsts::Custom); auto lebPos = buffer.writeU32LEBPlaceholder(); - writeInlineString(Annotations::BranchHint.str); + buffer.writeInlineString(Annotations::BranchHint.str); buffer << U32LEB(funcHintsVec.size()); for (auto& funcHints : funcHintsVec) { @@ -1630,8 +1629,7 @@ void WasmBinaryWriter::writeData(const char* data, size_t size) { } void WasmBinaryWriter::writeInlineString(std::string_view name) { - o << U32LEB(name.size()); - writeData(name.data(), name.size()); + o.writeInlineString(name); } static bool isHexDigit(char ch) { diff --git a/test/lit/wat-annotations.wast b/test/lit/wat-annotations.wast index 632ad31cfc3..c7e9e31dc68 100644 --- a/test/lit/wat-annotations.wast +++ b/test/lit/wat-annotations.wast @@ -116,6 +116,23 @@ ) ) + ;; CHECK: (func $mixing (type $0) (param $x i32) + ;; CHECK-NEXT: ;;@ mixing.src:1337:42 + ;; CHECK-NEXT: (block $out + ;; CHECK-NEXT: (@metadata.code.branch_hint "\01") + ;; CHECK-NEXT: (br_if $out + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; BINARY: (func $mixing (type $0) (param $x i32) + ;; BINARY-NEXT: (block $block + ;; BINARY-NEXT: (@metadata.code.branch_hint "\01") + ;; BINARY-NEXT: (br_if $block + ;; BINARY-NEXT: (local.get $x) + ;; BINARY-NEXT: ) + ;; BINARY-NEXT: ) + ;; BINARY-NEXT: ) (func $mixing (param $x i32) ;; Mix branch hints with source locations. Both hints should remain. ;;@ mixing.src:1337:42 From 5cd99331a544a0beda80b5bb677afc65298c5b18 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 7 May 2025 11:31:17 -0700 Subject: [PATCH 60/69] work --- src/wasm/wasm-binary.cpp | 50 ++++++++++++++++++++++++----------- test/lit/wat-annotations.wast | 1 - 2 files changed, 35 insertions(+), 16 deletions(-) diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index a400a9cf555..b2507383e20 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -1545,6 +1545,8 @@ std::optional WasmBinaryWriter::writeCodeAnnotations() { // hints. struct ExprHint { Expression* expr; + // The offset we will write in the custom section. + BinaryLocation offset; Function::CodeAnnotation* hint; }; @@ -1559,15 +1561,45 @@ std::optional WasmBinaryWriter::writeCodeAnnotations() { // Collect the Branch Hints for this function. FuncHints funcHints; + // We compute the location of the function declaration area (where the + // locals are declared) the first time we need it. + BinaryLocation funcDeclarations = 0; + for (auto& [expr, annotation] : func->codeAnnotations) { if (annotation.branchLikely) { - funcHints.exprHints.push_back(ExprHint{expr, &annotation}); + // Compute the offset: it should be relative to the start of the + // function locals (i.e. the function declarations). + auto exprIter = binaryLocations.expressions.find(expr); + if (exprIter == binaryLocations.expressions.end()) { + // No expression exists for this annotation - perhaps optimizations + // removed it. + continue; + } + auto exprOffset = exprIter->second.start; + + if (!funcDeclarations) { + auto funcIter = binaryLocations.functions.find(func.get()); + assert(funcIter != binaryLocations.functions.end()); + funcDeclarations = funcIter->second.declarations; + } + + auto offset = exprOffset - funcDeclarations; + + funcHints.exprHints.push_back(ExprHint{expr, offset, &annotation}); } } - // If we found something, note it all. if (!funcHints.exprHints.empty()) { + // We found something. Finalize the data. funcHints.func = func->name; + + // Hints must be sorted by increasing binary offset. + std::sort(funcHints.exprHints.begin(), + funcHints.exprHints.end(), + [](const ExprHint& a, const ExprHint& b) { + return a.offset < b.offset; + }); + funcHintsVec.emplace_back(std::move(funcHints)); } } @@ -1585,23 +1617,11 @@ std::optional WasmBinaryWriter::writeCodeAnnotations() { buffer << U32LEB(funcHintsVec.size()); for (auto& funcHints : funcHintsVec) { - auto* func = wasm->getFunction(funcHints.func); - buffer << U32LEB(getFunctionIndex(funcHints.func)); buffer << U32LEB(funcHints.exprHints.size()); for (auto& exprHint : funcHints.exprHints) { - // Emit the offset as relative to the start of the function locals (i.e. - // the function declarations). - auto exprIter = binaryLocations.expressions.find(exprHint.expr); - assert(exprIter != binaryLocations.expressions.end()); - auto exprOffset = exprIter->second.start; - - auto funcIter = binaryLocations.functions.find(func); - assert(funcIter != binaryLocations.functions.end()); - auto funcDeclarations = funcIter->second.declarations; - - buffer << U32LEB(exprOffset - funcDeclarations); + buffer << U32LEB(exprHint.offset); // Hint size, always 1 for now. buffer << U32LEB(1); diff --git a/test/lit/wat-annotations.wast b/test/lit/wat-annotations.wast index c7e9e31dc68..5e4159d9e14 100644 --- a/test/lit/wat-annotations.wast +++ b/test/lit/wat-annotations.wast @@ -77,7 +77,6 @@ (local.get $x) ) ;; The last one wins. - (@metadata.code.branch_hint "\01") (@metadata.code.branch_hint "\00") (br_if $out (local.get $x) From 5396d0070de90f160537bebc6988ba0060128be8 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 7 May 2025 11:32:40 -0700 Subject: [PATCH 61/69] format --- src/wasm/wasm-binary.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index b2507383e20..fa83981e8eb 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -487,9 +487,8 @@ void WasmBinaryWriter::writeFunctions() { // |sectionStart| is the start of the contents of the section. Subtract 1 to // include the section code as well, so we move all of it. std::move_backward(&o[sectionStart - 1], &o[oldSize], o.end()); - std::copy(annotationsBuffer.begin(), - annotationsBuffer.end(), - &o[sectionStart - 1]); + std::copy( + annotationsBuffer.begin(), annotationsBuffer.end(), &o[sectionStart - 1]); } } From b05e96348aeb584c4450e7496cdfda02d3b67f34 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 7 May 2025 11:36:40 -0700 Subject: [PATCH 62/69] test --- scripts/test/fuzzing.py | 2 -- .../code-annotations-optimized-away.wast | 31 +++++++++++++++++++ 2 files changed, 31 insertions(+), 2 deletions(-) create mode 100644 test/lit/passes/code-annotations-optimized-away.wast diff --git a/scripts/test/fuzzing.py b/scripts/test/fuzzing.py index 426c5e2e21d..dc48bb7033e 100644 --- a/scripts/test/fuzzing.py +++ b/scripts/test/fuzzing.py @@ -135,8 +135,6 @@ # TODO: fix split_wast() on tricky escaping situations like a string ending # in \\" (the " is not escaped - there is an escaped \ before it) 'string-lifting-section.wast', - # TODO: fuzz custom annotations - 'wat-annotations.wast', ] diff --git a/test/lit/passes/code-annotations-optimized-away.wast b/test/lit/passes/code-annotations-optimized-away.wast new file mode 100644 index 00000000000..26805354add --- /dev/null +++ b/test/lit/passes/code-annotations-optimized-away.wast @@ -0,0 +1,31 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py and should not be edited. + +;; RUN: wasm-opt %s --monomorphize --roundtrip -all -S -o - | filecheck %s + +;; The callee, which we will monomorphize, has a branch hint. The hinted code +;; will end up vanishing entirely, but not the function it is in, so we end up +;; with an annotation without an instruction in the binary for it. We should +;; ignore it and not error. +(module + ;; CHECK: (func $callee (type $1) (param $0 i32) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: ) + (func $callee (param $0 i32) + (block $block + (@metadata.code.branch_hint "\00") + (br_if $block + (i32.const 0) + ) + ) + ) + + ;; CHECK: (func $caller (type $0) + ;; CHECK-NEXT: (call $callee_2) + ;; CHECK-NEXT: ) + (func $caller + (call $callee + (i32.const 0) + ) + ) +) + From 55c0eb9c5eb20d94839359f04d5c424c138e0277 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 7 May 2025 11:41:12 -0700 Subject: [PATCH 63/69] restore --- test/lit/wat-annotations.wast | 1 + 1 file changed, 1 insertion(+) diff --git a/test/lit/wat-annotations.wast b/test/lit/wat-annotations.wast index 5e4159d9e14..c7e9e31dc68 100644 --- a/test/lit/wat-annotations.wast +++ b/test/lit/wat-annotations.wast @@ -77,6 +77,7 @@ (local.get $x) ) ;; The last one wins. + (@metadata.code.branch_hint "\01") (@metadata.code.branch_hint "\00") (br_if $out (local.get $x) From d3a2e5aa01ca74e8f860a7dc2640a2ad5b0082c0 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 7 May 2025 11:52:06 -0700 Subject: [PATCH 64/69] todo --- src/wasm/wasm-binary.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index fa83981e8eb..ec2405fb944 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -1607,6 +1607,13 @@ std::optional WasmBinaryWriter::writeCodeAnnotations() { return {}; } + if (sourceMap) { + // TODO: This mode may not matter (when debugging, code annotations are an + // optimization that can be skipped), but atm source maps cause + // annotations to break. + Fatal() << "Annotations are not supported with source maps"; + } + BufferWithRandomAccess buffer; // We found data: emit the section. From 18b61ef28ae39e276c3d7b9f39a45b8bf86c3694 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 7 May 2025 11:54:18 -0700 Subject: [PATCH 65/69] test --- test/lit/wat-annotations.wast | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/test/lit/wat-annotations.wast b/test/lit/wat-annotations.wast index c7e9e31dc68..953af5e6653 100644 --- a/test/lit/wat-annotations.wast +++ b/test/lit/wat-annotations.wast @@ -135,6 +135,11 @@ ;; BINARY-NEXT: ) (func $mixing (param $x i32) ;; Mix branch hints with source locations. Both hints should remain. + ;; TODO: Fix this in the binary format. Atm we test with --roundtrip here, + ;; which does not use source maps, so it is expected for the source + ;; annotation to vanish. But using source maps does not fix it, see + ;; the TODO in the code. + ;;@ mixing.src:1337:42 (block $out (@metadata.code.branch_hint "\01") From 4ddde5bdbf19b17cdadae0dff56c3aa95a383718 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 7 May 2025 13:47:49 -0700 Subject: [PATCH 66/69] simplfy --- src/wasm/wasm-binary.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index ec2405fb944..aab7a48afcd 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -1566,8 +1566,6 @@ std::optional WasmBinaryWriter::writeCodeAnnotations() { for (auto& [expr, annotation] : func->codeAnnotations) { if (annotation.branchLikely) { - // Compute the offset: it should be relative to the start of the - // function locals (i.e. the function declarations). auto exprIter = binaryLocations.expressions.find(expr); if (exprIter == binaryLocations.expressions.end()) { // No expression exists for this annotation - perhaps optimizations @@ -1582,6 +1580,8 @@ std::optional WasmBinaryWriter::writeCodeAnnotations() { funcDeclarations = funcIter->second.declarations; } + // Compute the offset: it should be relative to the start of the + // function locals (i.e. the function declarations). auto offset = exprOffset - funcDeclarations; funcHints.exprHints.push_back(ExprHint{expr, offset, &annotation}); @@ -1943,12 +1943,8 @@ void WasmBinaryReader::preScan() { // code locations, as an optimization. if (sectionName == Annotations::BranchHint) { needCodeLocations = true; - if (DWARF) { - // Do not break, so we keep looking for DWARF. - continue; - } else { - break; - } + // Do not break, so we keep looking for DWARF. + continue; } // DWARF sections contain code offsets. @@ -1957,6 +1953,10 @@ void WasmBinaryReader::preScan() { foundDWARF = true; break; } + + // TODO: We could stop early if we see the Code section and DWARF is + // disabled, as BranchHint must appear first, but this seems to + // make practically no difference in practice. } pos = oldPos + payloadLen; } From 7c5a7ceee1abe49ea8e5e1e94baac0ae713bfedc Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 7 May 2025 14:47:09 -0700 Subject: [PATCH 67/69] fix loop, we need the increment at the end --- src/wasm/wasm-binary.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index aab7a48afcd..ec16fb8b49f 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -1944,7 +1944,6 @@ void WasmBinaryReader::preScan() { if (sectionName == Annotations::BranchHint) { needCodeLocations = true; // Do not break, so we keep looking for DWARF. - continue; } // DWARF sections contain code offsets. From 1b4194311e6b24b38dafb81c8deb6b578c9db59d Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 8 May 2025 08:47:06 -0700 Subject: [PATCH 68/69] feedback --- src/wasm/wasm-binary.cpp | 40 ++++++++++------- test/lit/wat-annotations.wast | 84 +++++++++++++++++------------------ 2 files changed, 65 insertions(+), 59 deletions(-) diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index abf4499c19d..229dc918f5b 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -1562,7 +1562,7 @@ std::optional WasmBinaryWriter::writeCodeAnnotations() { // We compute the location of the function declaration area (where the // locals are declared) the first time we need it. - BinaryLocation funcDeclarations = 0; + BinaryLocation funcDeclarationsOffset = 0; for (auto& [expr, annotation] : func->codeAnnotations) { if (annotation.branchLikely) { @@ -1574,33 +1574,35 @@ std::optional WasmBinaryWriter::writeCodeAnnotations() { } auto exprOffset = exprIter->second.start; - if (!funcDeclarations) { + if (!funcDeclarationsOffset) { auto funcIter = binaryLocations.functions.find(func.get()); assert(funcIter != binaryLocations.functions.end()); - funcDeclarations = funcIter->second.declarations; + funcDeclarationsOffset = funcIter->second.declarations; } // Compute the offset: it should be relative to the start of the // function locals (i.e. the function declarations). - auto offset = exprOffset - funcDeclarations; + auto offset = exprOffset - funcDeclarationsOffset; funcHints.exprHints.push_back(ExprHint{expr, offset, &annotation}); } } - if (!funcHints.exprHints.empty()) { - // We found something. Finalize the data. - funcHints.func = func->name; + if (funcHints.exprHints.empty()) { + continue; + } - // Hints must be sorted by increasing binary offset. - std::sort(funcHints.exprHints.begin(), - funcHints.exprHints.end(), - [](const ExprHint& a, const ExprHint& b) { - return a.offset < b.offset; - }); + // We found something. Finalize the data. + funcHints.func = func->name; - funcHintsVec.emplace_back(std::move(funcHints)); - } + // Hints must be sorted by increasing binary offset. + std::sort(funcHints.exprHints.begin(), + funcHints.exprHints.end(), + [](const ExprHint& a, const ExprHint& b) { + return a.offset < b.offset; + }); + + funcHintsVec.emplace_back(std::move(funcHints)); } if (funcHintsVec.empty()) { @@ -1643,7 +1645,7 @@ std::optional WasmBinaryWriter::writeCodeAnnotations() { // Write the final size. We can ignore the return value, which is the number // of bytes we shrank (if the LEB was smaller than the maximum size), as no // value in this section cares. - (void)buffer.emitRetroactiveSectionSizeLEB(lebPos); + buffer.emitRetroactiveSectionSizeLEB(lebPos); return buffer; } @@ -2096,7 +2098,7 @@ void WasmBinaryReader::readCustomSection(size_t payloadLen) { readDylink(payloadLen); } else if (sectionName.equals(BinaryConsts::CustomSections::Dylink0)) { readDylink0(payloadLen); - } else if (sectionName.equals(Annotations::BranchHint.str)) { + } else if (sectionName == Annotations::BranchHint) { // Only note the position and length, we read this later. branchHintsPos = pos; branchHintsLen = payloadLen; @@ -5253,6 +5255,10 @@ void WasmBinaryReader::readBranchHints(size_t payloadLen) { auto numFuncs = getU32LEB(); for (Index i = 0; i < numFuncs; i++) { auto funcIndex = getU32LEB(); + if (funcIndex >= wasm.functions.size()) { + throwError("bad BranchHint function"); + } + auto& func = wasm.functions[funcIndex]; // The encoded offsets we read below are relative to the start of the diff --git a/test/lit/wat-annotations.wast b/test/lit/wat-annotations.wast index 953af5e6653..658c00224be 100644 --- a/test/lit/wat-annotations.wast +++ b/test/lit/wat-annotations.wast @@ -1,7 +1,7 @@ ;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited. ;; RUN: wasm-opt -all %s -S -o - | filecheck %s -;; RUN: wasm-opt -all --roundtrip %s -S -o - | filecheck %s --check-prefix=BINARY +;; RUN: wasm-opt -all --roundtrip %s -S -o - | filecheck %s --check-prefix=RTRIP (module @@ -14,15 +14,15 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - ;; BINARY: (type $0 (func (param i32))) + ;; RTRIP: (type $0 (func (param i32))) - ;; BINARY: (func $no-annotations (type $0) (param $x i32) - ;; BINARY-NEXT: (block $block - ;; BINARY-NEXT: (br_if $block - ;; BINARY-NEXT: (local.get $x) - ;; BINARY-NEXT: ) - ;; BINARY-NEXT: ) - ;; BINARY-NEXT: ) + ;; RTRIP: (func $no-annotations (type $0) (param $x i32) + ;; RTRIP-NEXT: (block $block + ;; RTRIP-NEXT: (br_if $block + ;; RTRIP-NEXT: (local.get $x) + ;; RTRIP-NEXT: ) + ;; RTRIP-NEXT: ) + ;; RTRIP-NEXT: ) (func $no-annotations (param $x i32) ;; A function with no annotations. This tests that we use function indexes ;; properly in the section. @@ -49,22 +49,22 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - ;; BINARY: (func $branch-hints-br_if (type $0) (param $x i32) - ;; BINARY-NEXT: (block $block - ;; BINARY-NEXT: (@metadata.code.branch_hint "\00") - ;; BINARY-NEXT: (br_if $block - ;; BINARY-NEXT: (local.get $x) - ;; BINARY-NEXT: ) - ;; BINARY-NEXT: (@metadata.code.branch_hint "\01") - ;; BINARY-NEXT: (br_if $block - ;; BINARY-NEXT: (local.get $x) - ;; BINARY-NEXT: ) - ;; BINARY-NEXT: (@metadata.code.branch_hint "\00") - ;; BINARY-NEXT: (br_if $block - ;; BINARY-NEXT: (local.get $x) - ;; BINARY-NEXT: ) - ;; BINARY-NEXT: ) - ;; BINARY-NEXT: ) + ;; RTRIP: (func $branch-hints-br_if (type $0) (param $x i32) + ;; RTRIP-NEXT: (block $block + ;; RTRIP-NEXT: (@metadata.code.branch_hint "\00") + ;; RTRIP-NEXT: (br_if $block + ;; RTRIP-NEXT: (local.get $x) + ;; RTRIP-NEXT: ) + ;; RTRIP-NEXT: (@metadata.code.branch_hint "\01") + ;; RTRIP-NEXT: (br_if $block + ;; RTRIP-NEXT: (local.get $x) + ;; RTRIP-NEXT: ) + ;; RTRIP-NEXT: (@metadata.code.branch_hint "\00") + ;; RTRIP-NEXT: (br_if $block + ;; RTRIP-NEXT: (local.get $x) + ;; RTRIP-NEXT: ) + ;; RTRIP-NEXT: ) + ;; RTRIP-NEXT: ) (func $branch-hints-br_if (param $x i32) (block $out ;; A branch annotated as unlikely, and one as likely. @@ -94,15 +94,15 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - ;; BINARY: (func $branch_hints-br_if-2 (type $0) (param $x i32) - ;; BINARY-NEXT: (local $unused f64) - ;; BINARY-NEXT: (block $block - ;; BINARY-NEXT: (@metadata.code.branch_hint "\01") - ;; BINARY-NEXT: (br_if $block - ;; BINARY-NEXT: (local.get $x) - ;; BINARY-NEXT: ) - ;; BINARY-NEXT: ) - ;; BINARY-NEXT: ) + ;; RTRIP: (func $branch_hints-br_if-2 (type $0) (param $x i32) + ;; RTRIP-NEXT: (local $unused f64) + ;; RTRIP-NEXT: (block $block + ;; RTRIP-NEXT: (@metadata.code.branch_hint "\01") + ;; RTRIP-NEXT: (br_if $block + ;; RTRIP-NEXT: (local.get $x) + ;; RTRIP-NEXT: ) + ;; RTRIP-NEXT: ) + ;; RTRIP-NEXT: ) (func $branch_hints-br_if-2 (param $x i32) (local $unused f64) ;; A second function with hints. This one also has local definitions, which @@ -125,14 +125,14 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - ;; BINARY: (func $mixing (type $0) (param $x i32) - ;; BINARY-NEXT: (block $block - ;; BINARY-NEXT: (@metadata.code.branch_hint "\01") - ;; BINARY-NEXT: (br_if $block - ;; BINARY-NEXT: (local.get $x) - ;; BINARY-NEXT: ) - ;; BINARY-NEXT: ) - ;; BINARY-NEXT: ) + ;; RTRIP: (func $mixing (type $0) (param $x i32) + ;; RTRIP-NEXT: (block $block + ;; RTRIP-NEXT: (@metadata.code.branch_hint "\01") + ;; RTRIP-NEXT: (br_if $block + ;; RTRIP-NEXT: (local.get $x) + ;; RTRIP-NEXT: ) + ;; RTRIP-NEXT: ) + ;; RTRIP-NEXT: ) (func $mixing (param $x i32) ;; Mix branch hints with source locations. Both hints should remain. ;; TODO: Fix this in the binary format. Atm we test with --roundtrip here, From 541c9bac22f79f5319593bdacb98ac1d692c962f Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 8 May 2025 08:52:49 -0700 Subject: [PATCH 69/69] format --- src/wasm/wasm-binary.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index 229dc918f5b..dd7c9e647a0 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -1596,11 +1596,10 @@ std::optional WasmBinaryWriter::writeCodeAnnotations() { funcHints.func = func->name; // Hints must be sorted by increasing binary offset. - std::sort(funcHints.exprHints.begin(), - funcHints.exprHints.end(), - [](const ExprHint& a, const ExprHint& b) { - return a.offset < b.offset; - }); + std::sort( + funcHints.exprHints.begin(), + funcHints.exprHints.end(), + [](const ExprHint& a, const ExprHint& b) { return a.offset < b.offset; }); funcHintsVec.emplace_back(std::move(funcHints)); }