From 40e63fd46e36165f4ebbf1a46172e24ad7e9d581 Mon Sep 17 00:00:00 2001 From: Steffen Deusch Date: Mon, 18 Aug 2025 15:26:30 +0200 Subject: [PATCH 1/2] replace --- c_src/lazy_html.cpp | 79 +++++++++++++++++++++++++++++++++++++++++ lib/lazy_html.ex | 32 +++++++++++++++++ lib/lazy_html/nif.ex | 1 + test/lazy_html_test.exs | 65 +++++++++++++++++++++++++++++++++ 4 files changed, 177 insertions(+) diff --git a/c_src/lazy_html.cpp b/c_src/lazy_html.cpp index a9dc1a0..ef53043 100644 --- a/c_src/lazy_html.cpp +++ b/c_src/lazy_html.cpp @@ -821,6 +821,85 @@ std::vector tag(ErlNifEnv *env, ExLazyHTML ex_lazy_html) { FINE_NIF(tag, 0); +ExLazyHTML replace(ErlNifEnv *env, ExLazyHTML ex_lazy_html, + ErlNifBinary css_selector, ExLazyHTML ex_new_content) { + // Parse the CSS selector + auto parser = lxb_css_parser_create(); + auto status = lxb_css_parser_init(parser, NULL); + if (status != LXB_STATUS_OK) { + throw std::runtime_error("failed to create css parser"); + } + auto parser_guard = + ScopeGuard([&]() { lxb_css_parser_destroy(parser, true); }); + + auto css_selector_list = parse_css_selector(parser, css_selector); + + // Find matching nodes + auto matching_nodes = std::vector(); + + for (auto node : ex_lazy_html.resource->nodes) { + auto selectors = lxb_selectors_create(); + auto status = lxb_selectors_init(selectors); + if (status != LXB_STATUS_OK) { + throw std::runtime_error("failed to create selectors"); + } + auto selectors_guard = + ScopeGuard([&]() { lxb_selectors_destroy(selectors, true); }); + + std::vector nodes_from_this_root; + status = lxb_selectors_find( + selectors, node, css_selector_list, + [](lxb_dom_node_t *node, lxb_css_selector_specificity_t spec, + void *ctx) -> lxb_status_t { + auto nodes = + reinterpret_cast *>(ctx); + nodes->push_back(node); + return LXB_STATUS_OK; + }, + &nodes_from_this_root); + if (status != LXB_STATUS_OK) { + throw std::runtime_error("failed to run find"); + } + + matching_nodes.insert(matching_nodes.end(), nodes_from_this_root.begin(), + nodes_from_this_root.end()); + } + + // Check that exactly one node matches + if (matching_nodes.size() == 0) { + throw std::invalid_argument("no elements found matching selector"); + } + if (matching_nodes.size() > 1) { + throw std::invalid_argument("expected exactly 1 element matching selector, but found " + + std::to_string(matching_nodes.size())); + } + + auto target_node = matching_nodes[0]; + auto parent_node = lxb_dom_node_parent(target_node); + + if (parent_node == NULL) { + throw std::runtime_error("cannot replace root node"); + } + + // Insert all new content nodes before the target node + for (auto new_node : ex_new_content.resource->nodes) { + // Clone the node to avoid ownership issues + auto cloned_node = lxb_dom_node_clone(new_node, true); + if (cloned_node == NULL) { + throw std::runtime_error("failed to clone new content node"); + } + lxb_dom_node_insert_before(target_node, cloned_node); + } + + // Remove the target node + lxb_dom_node_remove(target_node); + + // Return the original lazy_html (which has been modified in place) + return ex_lazy_html; +} + +FINE_NIF(replace, ERL_NIF_DIRTY_JOB_CPU_BOUND); + } // namespace lazy_html FINE_INIT("Elixir.LazyHTML.NIF"); diff --git a/lib/lazy_html.ex b/lib/lazy_html.ex index d814697..32c5f4a 100644 --- a/lib/lazy_html.ex +++ b/lib/lazy_html.ex @@ -481,6 +481,38 @@ defmodule LazyHTML do LazyHTML.NIF.tag(lazy_html) end + @doc ~S''' + Replaces the element matching the given CSS selector with new content. + + The function expects exactly one element to match the selector. If no + element or more than one element matches, it raises an ArgumentError. + + ## Examples + + iex> lazy_html = LazyHTML.from_fragment(~S|
Old content
|) + iex> new_content = LazyHTML.from_fragment(~S|

New content

|) + iex> LazyHTML.replace(lazy_html, "#main span", new_content) + #LazyHTML< + 1 node + #1 +

New content

+ > + + iex> lazy_html = LazyHTML.from_fragment(~S|
  • Item 1
  • Item 2
  • Item 3
|) + iex> new_content = LazyHTML.from_fragment(~S|
  • Replaced item
  • |) + iex> LazyHTML.replace(lazy_html, "#target", new_content) + #LazyHTML< + 1 node + #1 +
    • Item 1
    • Replaced item
    • Item 3
    + > + + ''' + @spec replace(t(), String.t(), t()) :: t() + def replace(%LazyHTML{} = lazy_html, selector, %LazyHTML{} = new_content) when is_binary(selector) do + LazyHTML.NIF.replace(lazy_html, selector, new_content) + end + @doc ~S""" Escapes the given string to make a valid HTML text. diff --git a/lib/lazy_html/nif.ex b/lib/lazy_html/nif.ex index e7098ac..acda97d 100644 --- a/lib/lazy_html/nif.ex +++ b/lib/lazy_html/nif.ex @@ -27,6 +27,7 @@ defmodule LazyHTML.NIF do def tag(_lazy_html), do: err!() def nodes(_lazy_html), do: err!() def num_nodes(_lazy_html), do: err!() + def replace(_lazy_html, _css_selector, _new_content), do: err!() defp err!(), do: :erlang.nif_error(:not_loaded) end diff --git a/test/lazy_html_test.exs b/test/lazy_html_test.exs index 422a36e..1f0c0b7 100644 --- a/test/lazy_html_test.exs +++ b/test/lazy_html_test.exs @@ -250,6 +250,71 @@ defmodule LazyHTMLTest do end end + describe "replace/3" do + test "replaces a single element with new content" do + lazy_html = LazyHTML.from_fragment(~S|
    Old content
    |) + new_content = LazyHTML.from_fragment(~S|

    New content

    |) + + result = LazyHTML.replace(lazy_html, "#main span", new_content) + + assert LazyHTML.to_html(result) == ~S|

    New content

    | + end + + test "replaces element in a list" do + lazy_html = LazyHTML.from_fragment(~S|
    • Item 1
    • Item 2
    • Item 3
    |) + new_content = LazyHTML.from_fragment(~S|
  • Replaced item
  • |) + + result = LazyHTML.replace(lazy_html, "#target", new_content) + + assert LazyHTML.to_html(result) == ~S|
    • Item 1
    • Replaced item
    • Item 3
    | + end + + test "replaces with multiple nodes" do + lazy_html = LazyHTML.from_fragment(~S|

    Old paragraph

    |) + new_content = LazyHTML.from_fragment(~S|

    Title

    New paragraph

    |) + + result = LazyHTML.replace(lazy_html, "#old", new_content) + + assert LazyHTML.to_html(result) == ~S|

    Title

    New paragraph

    | + end + + test "raises when no elements match" do + lazy_html = LazyHTML.from_fragment(~S|
    Content
    |) + new_content = LazyHTML.from_fragment(~S|

    New content

    |) + + assert_raise ArgumentError, "no elements found matching selector", fn -> + LazyHTML.replace(lazy_html, "#nonexistent", new_content) + end + end + + test "raises when multiple elements match" do + lazy_html = LazyHTML.from_fragment(~S|
    FirstSecond
    |) + new_content = LazyHTML.from_fragment(~S|

    New content

    |) + + assert_raise ArgumentError, ~r/expected exactly 1 element matching selector.*but found 2/, fn -> + LazyHTML.replace(lazy_html, "span", new_content) + end + end + + test "works with complex selectors" do + lazy_html = LazyHTML.from_fragment(~S|
    Active item
    Inactive item
    |) + new_content = LazyHTML.from_fragment(~S|
    Updated item
    |) + + result = LazyHTML.replace(lazy_html, ".item.active", new_content) + + assert LazyHTML.to_html(result) == ~S|
    Updated item
    Inactive item
    | + end + + test "preserves document structure when replacing nested elements" do + lazy_html = LazyHTML.from_fragment(~S|

    Old Title

    Content
    |) + new_content = LazyHTML.from_fragment(~S|

    New Title

    |) + + result = LazyHTML.replace(lazy_html, "#title", new_content) + + assert LazyHTML.to_html(result) == ~S|

    New Title

    Content
    | + end + end + describe "query_by_id/2" do test "raises when an empty id is given" do assert_raise ArgumentError, ~r/id cannot be empty/, fn -> From 78578f2f8729c34d893e8ae27431ec4ee3272cfa Mon Sep 17 00:00:00 2001 From: Steffen Deusch Date: Mon, 18 Aug 2025 15:39:32 +0200 Subject: [PATCH 2/2] appendChild --- c_src/lazy_html.cpp | 72 +++++++++++++++++++++++++++++++++++ lib/lazy_html.ex | 33 ++++++++++++++++ lib/lazy_html/nif.ex | 1 + test/lazy_html_test.exs | 83 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 189 insertions(+) diff --git a/c_src/lazy_html.cpp b/c_src/lazy_html.cpp index ef53043..b21731a 100644 --- a/c_src/lazy_html.cpp +++ b/c_src/lazy_html.cpp @@ -900,6 +900,78 @@ ExLazyHTML replace(ErlNifEnv *env, ExLazyHTML ex_lazy_html, FINE_NIF(replace, ERL_NIF_DIRTY_JOB_CPU_BOUND); +ExLazyHTML append_child(ErlNifEnv *env, ExLazyHTML ex_lazy_html, + ErlNifBinary css_selector, ExLazyHTML ex_child_content) { + // Parse the CSS selector + auto parser = lxb_css_parser_create(); + auto status = lxb_css_parser_init(parser, NULL); + if (status != LXB_STATUS_OK) { + throw std::runtime_error("failed to create css parser"); + } + auto parser_guard = + ScopeGuard([&]() { lxb_css_parser_destroy(parser, true); }); + + auto css_selector_list = parse_css_selector(parser, css_selector); + + // Find matching nodes + auto selectors = lxb_selectors_create(); + status = lxb_selectors_init(selectors); + if (status != LXB_STATUS_OK) { + throw std::runtime_error("failed to create selectors"); + } + auto selectors_guard = + ScopeGuard([&]() { lxb_selectors_destroy(selectors, true); }); + + // Set selector options to match root nodes and get unique elements + lxb_selectors_opt_set(selectors, static_cast( + LXB_SELECTORS_OPT_MATCH_FIRST | + LXB_SELECTORS_OPT_MATCH_ROOT)); + + auto matching_nodes = std::vector(); + + for (auto node : ex_lazy_html.resource->nodes) { + status = lxb_selectors_find( + selectors, node, css_selector_list, + [](lxb_dom_node_t *node, lxb_css_selector_specificity_t spec, + void *ctx) -> lxb_status_t { + auto nodes = + reinterpret_cast *>(ctx); + nodes->push_back(node); + return LXB_STATUS_OK; + }, + &matching_nodes); + if (status != LXB_STATUS_OK) { + throw std::runtime_error("failed to run find"); + } + } + + // Check that exactly one node matches + if (matching_nodes.size() == 0) { + throw std::invalid_argument("no elements found matching selector"); + } + if (matching_nodes.size() > 1) { + throw std::invalid_argument("expected exactly 1 element matching selector, but found " + + std::to_string(matching_nodes.size())); + } + + auto parent_node = matching_nodes[0]; + + // Append all child content nodes to the parent node + for (auto child_node : ex_child_content.resource->nodes) { + // Clone the node to avoid ownership issues + auto cloned_node = lxb_dom_node_clone(child_node, true); + if (cloned_node == NULL) { + throw std::runtime_error("failed to clone child content node"); + } + lxb_dom_node_insert_child(parent_node, cloned_node); + } + + // Return the original lazy_html (which has been modified in place) + return ex_lazy_html; +} + +FINE_NIF(append_child, ERL_NIF_DIRTY_JOB_CPU_BOUND); + } // namespace lazy_html FINE_INIT("Elixir.LazyHTML.NIF"); diff --git a/lib/lazy_html.ex b/lib/lazy_html.ex index 32c5f4a..ea23778 100644 --- a/lib/lazy_html.ex +++ b/lib/lazy_html.ex @@ -513,6 +513,39 @@ defmodule LazyHTML do LazyHTML.NIF.replace(lazy_html, selector, new_content) end + @doc ~S''' + Appends child content to the element matching the given CSS selector. + + The function expects exactly one element to match the selector. If no + element or more than one element matches, it raises an ArgumentError. + The child content is appended as the last child(ren) of the matched element. + + ## Examples + + iex> lazy_html = LazyHTML.from_fragment(~S|

    Existing content

    |) + iex> child_content = LazyHTML.from_fragment(~S|New child|) + iex> LazyHTML.appendChild(lazy_html, "#container", child_content) + #LazyHTML< + 1 node + #1 +

    Existing content

    New child
    + > + + iex> lazy_html = LazyHTML.from_fragment(~S|
    • Item 1
    |) + iex> child_content = LazyHTML.from_fragment(~S|
  • Item 2
  • Item 3
  • |) + iex> LazyHTML.appendChild(lazy_html, "#list", child_content) + #LazyHTML< + 1 node + #1 +
    • Item 1
    • Item 2
    • Item 3
    + > + + ''' + @spec appendChild(t(), String.t(), t()) :: t() + def appendChild(%LazyHTML{} = lazy_html, selector, %LazyHTML{} = child_content) when is_binary(selector) do + LazyHTML.NIF.append_child(lazy_html, selector, child_content) + end + @doc ~S""" Escapes the given string to make a valid HTML text. diff --git a/lib/lazy_html/nif.ex b/lib/lazy_html/nif.ex index acda97d..efcccdf 100644 --- a/lib/lazy_html/nif.ex +++ b/lib/lazy_html/nif.ex @@ -28,6 +28,7 @@ defmodule LazyHTML.NIF do def nodes(_lazy_html), do: err!() def num_nodes(_lazy_html), do: err!() def replace(_lazy_html, _css_selector, _new_content), do: err!() + def append_child(_lazy_html, _css_selector, _child_content), do: err!() defp err!(), do: :erlang.nif_error(:not_loaded) end diff --git a/test/lazy_html_test.exs b/test/lazy_html_test.exs index 1f0c0b7..20c475a 100644 --- a/test/lazy_html_test.exs +++ b/test/lazy_html_test.exs @@ -315,6 +315,89 @@ defmodule LazyHTMLTest do end end + describe "appendChild/3" do + test "appends a single child to container" do + lazy_html = LazyHTML.from_fragment(~S|

    Existing content

    |) + child_content = LazyHTML.from_fragment(~S|New child|) + + result = LazyHTML.appendChild(lazy_html, "#container", child_content) + + assert LazyHTML.to_html(result) == ~S|

    Existing content

    New child
    | + end + + test "appends multiple children to list" do + lazy_html = LazyHTML.from_fragment(~S|
    • Item 1
    |) + child_content = LazyHTML.from_fragment(~S|
  • Item 2
  • Item 3
  • |) + + result = LazyHTML.appendChild(lazy_html, "#list", child_content) + + assert LazyHTML.to_html(result) == ~S|
    • Item 1
    • Item 2
    • Item 3
    | + end + + test "appends to empty element" do + lazy_html = LazyHTML.from_fragment(~S|
    |) + child_content = LazyHTML.from_fragment(~S|

    First content

    |) + + result = LazyHTML.appendChild(lazy_html, "#empty", child_content) + + assert LazyHTML.to_html(result) == ~S|

    First content

    | + end + + test "appends mixed content types" do + lazy_html = LazyHTML.from_fragment(~S|

    Title

    |) + child_content = LazyHTML.from_fragment(~S|

    Paragraph

    • List item
    |) + + result = LazyHTML.appendChild(lazy_html, "#content", child_content) + + assert LazyHTML.to_html(result) == ~S|

    Title

    Paragraph

    • List item
    | + end + + test "preserves existing children order" do + lazy_html = LazyHTML.from_fragment(~S|
    FirstSecond
    |) + child_content = LazyHTML.from_fragment(~S|Third|) + + result = LazyHTML.appendChild(lazy_html, ".parent", child_content) + + assert LazyHTML.to_html(result) == ~S|
    FirstSecondThird
    | + end + + test "raises when no elements match" do + lazy_html = LazyHTML.from_fragment(~S|
    Content
    |) + child_content = LazyHTML.from_fragment(~S|

    Child content

    |) + + assert_raise ArgumentError, "no elements found matching selector", fn -> + LazyHTML.appendChild(lazy_html, "#nonexistent", child_content) + end + end + + test "raises when multiple elements match" do + lazy_html = LazyHTML.from_fragment(~S|
    First
    Second
    |) + child_content = LazyHTML.from_fragment(~S|

    Child content

    |) + + assert_raise ArgumentError, ~r/expected exactly 1 element matching selector.*but found 2/, fn -> + LazyHTML.appendChild(lazy_html, ".target", child_content) + end + end + + test "works with complex selectors" do + lazy_html = LazyHTML.from_fragment(~S|

    Existing

    Other
    |) + child_content = LazyHTML.from_fragment(~S|

    Appended to main

    |) + + result = LazyHTML.appendChild(lazy_html, ".content.main", child_content) + + assert LazyHTML.to_html(result) == ~S|

    Existing

    Appended to main

    Other
    | + end + + test "works with nested elements" do + lazy_html = LazyHTML.from_fragment(~S|

    Section

    |) + child_content = LazyHTML.from_fragment(~S|

    New paragraph

    |) + + result = LazyHTML.appendChild(lazy_html, "#target", child_content) + + assert LazyHTML.to_html(result) == ~S|

    Section

    New paragraph

    | + end + end + describe "query_by_id/2" do test "raises when an empty id is given" do assert_raise ArgumentError, ~r/id cannot be empty/, fn ->