diff --git a/src/wasm-binary.h b/src/wasm-binary.h
index b59275013cb..ccd07aa1c7e 100644
--- a/src/wasm-binary.h
+++ b/src/wasm-binary.h
@@ -1404,6 +1404,12 @@ class WasmBinaryWriter {
   void trackExpressionEnd(Expression* curr, Function* func);
   void trackExpressionDelimiter(Expression* curr, Function* func, size_t id);
 
+  // Writes code annotations into a buffer and returns it. We cannot write them
+  // directly into the output since we write function code first (to get the
+  // offsets for the annotations), and only then can write annotations, which we
+  // must then insert before the code (as the spec requires that).
+  std::optional<BufferWithRandomAccess> writeCodeAnnotations();
+
   // helpers
   void writeInlineString(std::string_view name);
   void writeEscapedName(std::string_view name);
@@ -1667,6 +1673,15 @@ class WasmBinaryReader {
   void readDylink(size_t payloadLen);
   void readDylink0(size_t payloadLen);
 
+  // We read branch hints *after* the code section, even though they appear
+  // earlier. That is simpler for us as we note expression locations as we scan
+  // code, and then just need to match them up. To do this, we note the branch
+  // hint position and size in the first pass, and handle it later.
+  size_t branchHintsPos = 0;
+  size_t branchHintsLen = 0;
+
+  void readBranchHints(size_t payloadLen);
+
   Index readMemoryAccess(Address& alignment, Address& offset);
   std::tuple<Name, Address, Address> getMemarg();
   MemoryOrder getMemoryOrder(bool isRMW = false);
diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp
index 376fbfc1069..dd7c9e647a0 100644
--- a/src/wasm/wasm-binary.cpp
+++ b/src/wasm/wasm-binary.cpp
@@ -28,6 +28,7 @@
 #include "support/debug.h"
 #include "support/stdckdint.h"
 #include "support/string.h"
+#include "wasm-annotations.h"
 #include "wasm-binary.h"
 #include "wasm-debug.h"
 #include "wasm-limits.h"
@@ -474,6 +475,21 @@ void WasmBinaryWriter::writeFunctions() {
     }
   });
   finishSection(sectionStart);
+
+  // Code annotations must come before the code section (see comment on
+  // writeCodeAnnotations).
+  if (auto annotations = writeCodeAnnotations()) {
+    // We need to move the code section and put the annotations before it.
+    auto& annotationsBuffer = *annotations;
+    auto oldSize = o.size();
+    o.resize(oldSize + annotationsBuffer.size());
+
+    // |sectionStart| is the start of the contents of the section. Subtract 1 to
+    // include the section code as well, so we move all of it.
+    std::move_backward(&o[sectionStart - 1], &o[oldSize], o.end());
+    std::copy(
+      annotationsBuffer.begin(), annotationsBuffer.end(), &o[sectionStart - 1]);
+  }
 }
 
 void WasmBinaryWriter::writeStrings() {
@@ -1496,7 +1512,8 @@ void WasmBinaryWriter::trackExpressionStart(Expression* curr, Function* func) {
   // binary locations tracked, then track it in the output as well. We also
   // track locations of instructions that have code annotations, as their binary
   // location goes in the custom section.
-  if (func && !func->expressionLocations.empty()) {
+  if (func && (!func->expressionLocations.empty() ||
+               func->codeAnnotations.count(curr))) {
     binaryLocations.expressions[curr] =
       BinaryLocations::Span{BinaryLocation(o.size()), 0};
     binaryLocationTrackedExpressionsForFunc.push_back(curr);
@@ -1504,6 +1521,8 @@ void WasmBinaryWriter::trackExpressionStart(Expression* curr, Function* func) {
 }
 
 void WasmBinaryWriter::trackExpressionEnd(Expression* curr, Function* func) {
+  // TODO: If we need to track the end of annotated code locations, we need to
+  //       enable that here.
   if (func && !func->expressionLocations.empty()) {
     auto& span = binaryLocations.expressions.at(curr);
     span.end = o.size();
@@ -1513,11 +1532,123 @@ void WasmBinaryWriter::trackExpressionEnd(Expression* curr, Function* func) {
 void WasmBinaryWriter::trackExpressionDelimiter(Expression* curr,
                                                 Function* func,
                                                 size_t id) {
+  // TODO: If we need to track the delimiters of annotated code locations, we
+  //       need to enable that here.
   if (func && !func->expressionLocations.empty()) {
     binaryLocations.delimiters[curr][id] = o.size();
   }
 }
 
+std::optional<BufferWithRandomAccess> WasmBinaryWriter::writeCodeAnnotations() {
+  // Assemble the info for Branch Hinting: for each function, a vector of the
+  // hints.
+  struct ExprHint {
+    Expression* expr;
+    // The offset we will write in the custom section.
+    BinaryLocation offset;
+    Function::CodeAnnotation* hint;
+  };
+
+  struct FuncHints {
+    Name func;
+    std::vector<ExprHint> exprHints;
+  };
+
+  std::vector<FuncHints> funcHintsVec;
+
+  for (auto& func : wasm->functions) {
+    // Collect the Branch Hints for this function.
+    FuncHints funcHints;
+
+    // We compute the location of the function declaration area (where the
+    // locals are declared) the first time we need it.
+    BinaryLocation funcDeclarationsOffset = 0;
+
+    for (auto& [expr, annotation] : func->codeAnnotations) {
+      if (annotation.branchLikely) {
+        auto exprIter = binaryLocations.expressions.find(expr);
+        if (exprIter == binaryLocations.expressions.end()) {
+          // No expression exists for this annotation - perhaps optimizations
+          // removed it.
+          continue;
+        }
+        auto exprOffset = exprIter->second.start;
+
+        if (!funcDeclarationsOffset) {
+          auto funcIter = binaryLocations.functions.find(func.get());
+          assert(funcIter != binaryLocations.functions.end());
+          funcDeclarationsOffset = funcIter->second.declarations;
+        }
+
+        // Compute the offset: it should be relative to the start of the
+        // function locals (i.e. the function declarations).
+        auto offset = exprOffset - funcDeclarationsOffset;
+
+        funcHints.exprHints.push_back(ExprHint{expr, offset, &annotation});
+      }
+    }
+
+    if (funcHints.exprHints.empty()) {
+      continue;
+    }
+
+    // We found something. Finalize the data.
+    funcHints.func = func->name;
+
+    // Hints must be sorted by increasing binary offset.
+    std::sort(
+      funcHints.exprHints.begin(),
+      funcHints.exprHints.end(),
+      [](const ExprHint& a, const ExprHint& b) { return a.offset < b.offset; });
+
+    funcHintsVec.emplace_back(std::move(funcHints));
+  }
+
+  if (funcHintsVec.empty()) {
+    return {};
+  }
+
+  if (sourceMap) {
+    // TODO: This mode may not matter (when debugging, code annotations are an
+    //       optimization that can be skipped), but atm source maps cause
+    //       annotations to break.
+    Fatal() << "Annotations are not supported with source maps";
+  }
+
+  BufferWithRandomAccess buffer;
+
+  // We found data: emit the section.
+  buffer << uint8_t(BinaryConsts::Custom);
+  auto lebPos = buffer.writeU32LEBPlaceholder();
+  buffer.writeInlineString(Annotations::BranchHint.str);
+
+  buffer << U32LEB(funcHintsVec.size());
+  for (auto& funcHints : funcHintsVec) {
+    buffer << U32LEB(getFunctionIndex(funcHints.func));
+
+    buffer << U32LEB(funcHints.exprHints.size());
+    for (auto& exprHint : funcHints.exprHints) {
+      buffer << U32LEB(exprHint.offset);
+
+      // Hint size, always 1 for now.
+      buffer << U32LEB(1);
+
+      // We must only emit hints that are present.
+      assert(exprHint.hint->branchLikely);
+
+      // Hint contents: likely or not.
+      buffer << U32LEB(int(*exprHint.hint->branchLikely));
+    }
+  }
+
+  // Write the final size. We can ignore the return value, which is the number
+  // of bytes we shrank (if the LEB was smaller than the maximum size), as no
+  // value in this section cares.
+  buffer.emitRetroactiveSectionSizeLEB(lebPos);
+
+  return buffer;
+}
+
 void WasmBinaryWriter::writeData(const char* data, size_t size) {
   for (size_t i = 0; i < size; i++) {
     o << int8_t(data[i]);
@@ -1792,12 +1923,6 @@ WasmBinaryReader::WasmBinaryReader(Module& wasm,
 }
 
 void WasmBinaryReader::preScan() {
-  // TODO: Once we support code annotations here, we will need to always scan,
-  //       but for now, DWARF is the only reason.
-  if (!DWARF) {
-    return;
-  }
-
   assert(pos == 0);
   getInt32(); // magic
   getInt32(); // version
@@ -1813,12 +1938,25 @@ void WasmBinaryReader::preScan() {
     auto oldPos = pos;
     if (sectionCode == BinaryConsts::Section::Custom) {
       auto sectionName = getInlineString();
+
+      // Code annotations require code locations.
+      // TODO: For Branch Hinting, we could note which functions require
+      //       code locations, as an optimization.
+      if (sectionName == Annotations::BranchHint) {
+        needCodeLocations = true;
+        // Do not break, so we keep looking for DWARF.
+      }
+
       // DWARF sections contain code offsets.
       if (DWARF && Debug::isDWARFSection(sectionName)) {
         needCodeLocations = true;
         foundDWARF = true;
         break;
       }
+
+      // TODO: We could stop early if we see the Code section and DWARF is
+      //       disabled, as BranchHint must appear first, but this seems to
+      //       make practically no difference in practice.
     }
     pos = oldPos + payloadLen;
   }
@@ -1933,6 +2071,12 @@ void WasmBinaryReader::read() {
     }
   }
 
+  // Go back and parse things we deferred.
+  if (branchHintsPos) {
+    pos = branchHintsPos;
+    readBranchHints(branchHintsLen);
+  }
+
   validateBinary();
 }
 
@@ -1953,6 +2097,10 @@ void WasmBinaryReader::readCustomSection(size_t payloadLen) {
     readDylink(payloadLen);
   } else if (sectionName.equals(BinaryConsts::CustomSections::Dylink0)) {
     readDylink0(payloadLen);
+  } else if (sectionName == Annotations::BranchHint) {
+    // Only note the position and length, we read this later.
+    branchHintsPos = pos;
+    branchHintsLen = payloadLen;
   } else {
     // an unfamiliar custom section
     if (sectionName.equals(BinaryConsts::CustomSections::Linking)) {
@@ -5100,6 +5248,62 @@ void WasmBinaryReader::readDylink0(size_t payloadLen) {
   }
 }
 
+void WasmBinaryReader::readBranchHints(size_t payloadLen) {
+  auto sectionPos = pos;
+
+  auto numFuncs = getU32LEB();
+  for (Index i = 0; i < numFuncs; i++) {
+    auto funcIndex = getU32LEB();
+    if (funcIndex >= wasm.functions.size()) {
+      throwError("bad BranchHint function");
+    }
+
+    auto& func = wasm.functions[funcIndex];
+
+    // The encoded offsets we read below are relative to the start of the
+    // function's locals (the declarations).
+    auto funcLocalsOffset = func->funcLocation.declarations;
+
+    // We have a map of expressions to their locations. Invert that to get the
+    // map we will use below, from offsets to expressions.
+    std::unordered_map<BinaryLocation, Expression*> locationsMap;
+
+    for (auto& [expr, span] : func->expressionLocations) {
+      locationsMap[span.start] = expr;
+    }
+
+    auto numHints = getU32LEB();
+    for (Index hint = 0; hint < numHints; hint++) {
+      // To get the absolute offset, add the function's offset.
+      auto relativeOffset = getU32LEB();
+      auto absoluteOffset = funcLocalsOffset + relativeOffset;
+
+      auto iter = locationsMap.find(absoluteOffset);
+      if (iter == locationsMap.end()) {
+        throwError("bad BranchHint offset");
+      }
+      auto* expr = iter->second;
+
+      auto size = getU32LEB();
+      if (size != 1) {
+        throwError("bad BranchHint size");
+      }
+
+      auto likely = getU32LEB();
+      if (likely != 0 && likely != 1) {
+        throwError("bad BranchHint value");
+      }
+
+      // Apply the valid hint.
+      func->codeAnnotations[expr].branchLikely = likely;
+    }
+  }
+
+  if (pos != sectionPos + payloadLen) {
+    throwError("bad BranchHint section size");
+  }
+}
+
 Index WasmBinaryReader::readMemoryAccess(Address& alignment, Address& offset) {
   auto rawAlignment = getU32LEB();
   bool hasMemIdx = false;
diff --git a/test/lit/passes/code-annotations-optimized-away.wast b/test/lit/passes/code-annotations-optimized-away.wast
new file mode 100644
index 00000000000..26805354add
--- /dev/null
+++ b/test/lit/passes/code-annotations-optimized-away.wast
@@ -0,0 +1,31 @@
+;; NOTE: Assertions have been generated by update_lit_checks.py and should not be edited.
+
+;; RUN: wasm-opt %s --monomorphize --roundtrip -all -S -o - | filecheck %s
+
+;; The callee, which we will monomorphize, has a branch hint. The hinted code
+;; will end up vanishing entirely, but not the function it is in, so we end up
+;; with an annotation without an instruction in the binary for it. We should
+;; ignore it and not error.
+(module
+ ;; CHECK:      (func $callee (type $1) (param $0 i32)
+ ;; CHECK-NEXT:  (nop)
+ ;; CHECK-NEXT: )
+ (func $callee (param $0 i32)
+  (block $block
+   (@metadata.code.branch_hint "\00")
+   (br_if $block
+    (i32.const 0)
+   )
+  )
+ )
+
+ ;; CHECK:      (func $caller (type $0)
+ ;; CHECK-NEXT:  (call $callee_2)
+ ;; CHECK-NEXT: )
+ (func $caller
+  (call $callee
+   (i32.const 0)
+  )
+ )
+)
+
diff --git a/test/lit/wat-annotations.wast b/test/lit/wat-annotations.wast
index 6d5be1f0a9d..658c00224be 100644
--- a/test/lit/wat-annotations.wast
+++ b/test/lit/wat-annotations.wast
@@ -1,10 +1,38 @@
 ;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited.
 
-;; RUN: wasm-opt -all %s -S -o - | filecheck %s
+;; RUN: wasm-opt -all             %s -S -o - | filecheck %s
+;; RUN: wasm-opt -all --roundtrip %s -S -o - | filecheck %s --check-prefix=RTRIP
 
 (module
+
   ;; CHECK:      (type $0 (func (param i32)))
 
+  ;; CHECK:      (func $no-annotations (type $0) (param $x i32)
+  ;; CHECK-NEXT:  (block $out
+  ;; CHECK-NEXT:   (br_if $out
+  ;; CHECK-NEXT:    (local.get $x)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+  ;; RTRIP:      (type $0 (func (param i32)))
+
+  ;; RTRIP:      (func $no-annotations (type $0) (param $x i32)
+  ;; RTRIP-NEXT:  (block $block
+  ;; RTRIP-NEXT:   (br_if $block
+  ;; RTRIP-NEXT:    (local.get $x)
+  ;; RTRIP-NEXT:   )
+  ;; RTRIP-NEXT:  )
+  ;; RTRIP-NEXT: )
+  (func $no-annotations (param $x i32)
+    ;; A function with no annotations. This tests that we use function indexes
+    ;; properly in the section.
+    (block $out
+      (br_if $out
+        (local.get $x)
+      )
+    )
+  )
+
   ;; CHECK:      (func $branch-hints-br_if (type $0) (param $x i32)
   ;; CHECK-NEXT:  (block $out
   ;; CHECK-NEXT:   (@metadata.code.branch_hint "\00")
@@ -21,6 +49,22 @@
   ;; CHECK-NEXT:   )
   ;; CHECK-NEXT:  )
   ;; CHECK-NEXT: )
+  ;; RTRIP:      (func $branch-hints-br_if (type $0) (param $x i32)
+  ;; RTRIP-NEXT:  (block $block
+  ;; RTRIP-NEXT:   (@metadata.code.branch_hint "\00")
+  ;; RTRIP-NEXT:   (br_if $block
+  ;; RTRIP-NEXT:    (local.get $x)
+  ;; RTRIP-NEXT:   )
+  ;; RTRIP-NEXT:   (@metadata.code.branch_hint "\01")
+  ;; RTRIP-NEXT:   (br_if $block
+  ;; RTRIP-NEXT:    (local.get $x)
+  ;; RTRIP-NEXT:   )
+  ;; RTRIP-NEXT:   (@metadata.code.branch_hint "\00")
+  ;; RTRIP-NEXT:   (br_if $block
+  ;; RTRIP-NEXT:    (local.get $x)
+  ;; RTRIP-NEXT:   )
+  ;; RTRIP-NEXT:  )
+  ;; RTRIP-NEXT: )
   (func $branch-hints-br_if (param $x i32)
     (block $out
       ;; A branch annotated as unlikely, and one as likely.
@@ -40,4 +84,68 @@
       )
     )
   )
+
+  ;; CHECK:      (func $branch_hints-br_if-2 (type $0) (param $x i32)
+  ;; CHECK-NEXT:  (local $unused f64)
+  ;; CHECK-NEXT:  (block $out
+  ;; CHECK-NEXT:   (@metadata.code.branch_hint "\01")
+  ;; CHECK-NEXT:   (br_if $out
+  ;; CHECK-NEXT:    (local.get $x)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+  ;; RTRIP:      (func $branch_hints-br_if-2 (type $0) (param $x i32)
+  ;; RTRIP-NEXT:  (local $unused f64)
+  ;; RTRIP-NEXT:  (block $block
+  ;; RTRIP-NEXT:   (@metadata.code.branch_hint "\01")
+  ;; RTRIP-NEXT:   (br_if $block
+  ;; RTRIP-NEXT:    (local.get $x)
+  ;; RTRIP-NEXT:   )
+  ;; RTRIP-NEXT:  )
+  ;; RTRIP-NEXT: )
+  (func $branch_hints-br_if-2 (param $x i32)
+    (local $unused f64)
+    ;; A second function with hints. This one also has local definitions, which
+    ;; should not confuse us (branch hint offsets are relative to the start of
+    ;; the local definitions, not the end).
+    (block $out
+      (@metadata.code.branch_hint "\01")
+      (br_if $out
+        (local.get $x)
+      )
+    )
+  )
+
+  ;; CHECK:      (func $mixing (type $0) (param $x i32)
+  ;; CHECK-NEXT:  ;;@ mixing.src:1337:42
+  ;; CHECK-NEXT:  (block $out
+  ;; CHECK-NEXT:   (@metadata.code.branch_hint "\01")
+  ;; CHECK-NEXT:   (br_if $out
+  ;; CHECK-NEXT:    (local.get $x)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+  ;; RTRIP:      (func $mixing (type $0) (param $x i32)
+  ;; RTRIP-NEXT:  (block $block
+  ;; RTRIP-NEXT:   (@metadata.code.branch_hint "\01")
+  ;; RTRIP-NEXT:   (br_if $block
+  ;; RTRIP-NEXT:    (local.get $x)
+  ;; RTRIP-NEXT:   )
+  ;; RTRIP-NEXT:  )
+  ;; RTRIP-NEXT: )
+  (func $mixing (param $x i32)
+    ;; Mix branch hints with source locations. Both hints should remain.
+    ;; TODO: Fix this in the binary format. Atm we test with --roundtrip here,
+    ;;       which does not use source maps, so it is expected for the source
+    ;;       annotation to vanish. But using source maps does not fix it, see
+    ;;       the TODO in the code.
+
+    ;;@ mixing.src:1337:42
+    (block $out
+      (@metadata.code.branch_hint "\01")
+      (br_if $out
+        (local.get $x)
+      )
+    )
+  )
 )