Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 69 additions & 0 deletions c-api/include/lol_html.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ typedef struct lol_html_HtmlRewriterBuilder lol_html_rewriter_builder_t;
typedef struct lol_html_HtmlRewriter lol_html_rewriter_t;
typedef struct lol_html_Doctype lol_html_doctype_t;
typedef struct lol_html_DocumentEnd lol_html_doc_end_t;
typedef struct lol_html_EndTag lol_html_end_tag_t;
typedef struct lol_html_Comment lol_html_comment_t;
typedef struct lol_html_TextChunk lol_html_text_chunk_t;
typedef struct lol_html_Element lol_html_element_t;
Expand Down Expand Up @@ -110,6 +111,11 @@ typedef lol_html_rewriter_directive_t (*lol_html_doc_end_handler_t)(
void *user_data
);

typedef lol_html_rewriter_directive_t (*lol_html_end_tag_handler_t)(
lol_html_end_tag_t *end_tag,
void *user_data
);

// Selector
//---------------------------------------------------------------------

Expand Down Expand Up @@ -671,6 +677,69 @@ void lol_html_element_user_data_set(
// Returns user data attached to the text chunk.
void *lol_html_element_user_data_get(const lol_html_element_t *element);

// Adds content handlers to the builder for the end tag of the given element.
//
// Subsequent calls to the method on the same element replace the previous handler.
//
// The handler can optionally have associated user data which will be
// passed to the handler on each invocation along with the rewritable
// unit argument.
//
// If the handler returns LOL_HTML_STOP directive then rewriting
// stops immediately and `write()` or `end()` of the rewriter methods
// return an error code.
//
// Returns 0 in case of success and -1 otherwise. The actual error message
// can be obtained using `lol_html_take_last_error` function.
//
// WARNING: Pointers passed to handlers are valid only during the
// handler execution. So they should never be leaked outside of handlers.
int lol_html_element_on_end_tag(lol_html_element_t* element, lol_html_end_tag_handler_t end_tag_handler, void* user_data);

// Inserts the content string before the element's end tag either as raw text or as HTML.
//
// Content should be a valid UTF8-string.
//
// Returns 0 in case of success and -1 otherwise. The actual error message
// can be obtained using `lol_html_take_last_error` function.
int lol_html_end_tag_before(
lol_html_end_tag_t *end_tag,
const char *content,
size_t content_len,
bool is_html
);

// Inserts the content string right after the element's end tag as raw text or as HTML.
//
// Content should be a valid UTF8-string.
//
// Returns 0 in case of success and -1 otherwise. The actual error message
// can be obtained using `lol_html_take_last_error` function.
int lol_html_end_tag_after(
lol_html_end_tag_t *end_tag,
const char *content,
size_t content_len,
bool is_html
);

// Removes the end tag.
void lol_html_end_tag_remove(lol_html_end_tag_t *end_tag);

// Returns the end tag name.
lol_html_str_t lol_html_end_tag_name_get(const lol_html_end_tag_t *end_tag);

// Sets the tag name of the end tag.
//
// Name should be a valid UTF8-string.
//
// Returns 0 in case of success and -1 otherwise. The actual error message
// can be obtained using `lol_html_take_last_error` function.
int lol_html_end_tag_name_set(
lol_html_end_tag_t *end_tag,
const char *name,
size_t name_len
);

// Inserts the content at the end of the document, either as raw text or as HTML.
//
// The content should be a valid UTF-8 string.
Expand Down
61 changes: 61 additions & 0 deletions c-api/src/element.rs
Original file line number Diff line number Diff line change
Expand Up @@ -217,3 +217,64 @@ pub extern "C" fn lol_html_element_user_data_set(element: *mut Element, user_dat
pub extern "C" fn lol_html_element_user_data_get(element: *mut Element) -> *mut c_void {
get_user_data!(element)
}

type EndTagHandler = unsafe extern "C" fn(*mut EndTag, *mut c_void) -> RewriterDirective;

#[no_mangle]
pub extern "C" fn lol_html_element_on_end_tag(
element: *mut Element,
handler: EndTagHandler,
user_data: *mut c_void,
) -> c_int {
let element = to_ref_mut!(element);
let () = unwrap_or_ret_err_code!(element.on_end_tag(move |end_tag| {
match unsafe { handler(end_tag, user_data) } {
RewriterDirective::Continue => Ok(()),
RewriterDirective::Stop => Err("The rewriter has been stopped.".into()),
}
}));
0
}

#[no_mangle]
pub extern "C" fn lol_html_end_tag_before(
end_tag: *mut EndTag,
content: *const c_char,
content_len: size_t,
is_html: bool,
) -> c_int {
content_insertion_fn_body! { end_tag.before(content, content_len, is_html) }
}

#[no_mangle]
pub extern "C" fn lol_html_end_tag_after(
end_tag: *mut EndTag,
content: *const c_char,
content_len: size_t,
is_html: bool,
) -> c_int {
content_insertion_fn_body! { end_tag.after(content, content_len, is_html) }
}

#[no_mangle]
pub extern "C" fn lol_html_end_tag_remove(end_tag: *mut EndTag) {
to_ref_mut!(end_tag).remove();
}

#[no_mangle]
pub extern "C" fn lol_html_end_tag_name_get(end_tag: *mut EndTag) -> Str {
let tag = to_ref_mut!(end_tag);
Str::new(tag.name())
}

#[no_mangle]
pub extern "C" fn lol_html_end_tag_name_set(
end_tag: *mut EndTag,
name: *const c_char,
len: size_t,
) -> c_int {
let tag = to_ref_mut!(end_tag);
let name = unwrap_or_ret_err_code! { to_str!(name, len) };
tag.set_name_str(name.to_string());
0
}
11 changes: 11 additions & 0 deletions c-api/tests/src/deps/picotest/picotest.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include <stdio.h>
#include <string.h>
#include "picotest.h"
#include "../../../../include/lol_html.h"

struct test_t {
int num_tests;
Expand Down Expand Up @@ -75,6 +76,16 @@ void _ok(int cond, const char *fmt, ...)
fflush(stdout);
}

void _lol_ok(int cond, const char *file, int line) {
cond = !cond; // lol-html returns 0 on success
_ok(cond, "%s %d", file, line);
if (!cond) {
lol_html_str_t err = lol_html_take_last_error();
assert(err.data != NULL && err.len != 0);
printf("err: last lol_html err: %s", err.data);
}
}

int done_testing(void)
{
indent();
Expand Down
4 changes: 4 additions & 0 deletions c-api/tests/src/deps/picotest/picotest.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,17 @@
#ifndef picotest_h
#define picotest_h

#include <assert.h>

#ifdef __cplusplus
extern "C" {
#endif

void note(const char *fmt, ...) __attribute__((format (printf, 1, 2)));
void _ok(int cond, const char *fmt, ...) __attribute__((format (printf, 2, 3)));
void _lol_ok(int cond, const char *file, int line);
#define ok(cond) _ok(cond, "%s %d", __FILE__, __LINE__)
#define lol_ok(cond) _lol_ok(cond, __FILE__, __LINE__)
int done_testing(void);
void subtest(const char *name, void (*cb)(void));

Expand Down
68 changes: 68 additions & 0 deletions c-api/tests/src/test_element_api.c
Original file line number Diff line number Diff line change
Expand Up @@ -617,6 +617,48 @@ static void test_stop(lol_html_selector_t *selector, void *user_data) {
ok(!err);
}

static lol_html_rewriter_directive_t modify_element_end_tag_name_inner(lol_html_end_tag_t *end_tag, void *user_data) {
int times_run = *(int*)user_data;

if (times_run == 0) {
lol_ok(lol_html_end_tag_before(end_tag, "!", 1, false));
const char *after_html = "<span>extra data</span>";
lol_ok(lol_html_end_tag_after(end_tag, after_html, strlen(after_html), true));

lol_html_str_t name = lol_html_end_tag_name_get(end_tag);
str_eq(name, "div");

lol_ok(lol_html_end_tag_name_set(end_tag, "div1", strlen("div1")));
name = lol_html_end_tag_name_get(end_tag);
str_eq(name, "div1");
} else {
lol_html_end_tag_remove(end_tag);
}

return LOL_HTML_CONTINUE;
}

static lol_html_rewriter_directive_t modify_element_end_tag_name_outer(
lol_html_element_t *element,
void *user_data
) {
UNUSED(user_data);

static int times_run = -1; // so that it will be 0 on the first call to `inner`

lol_ok(lol_html_element_on_end_tag(element, modify_element_end_tag_name_inner, &times_run));
times_run += 1;

return LOL_HTML_CONTINUE;
}

EXPECT_OUTPUT(
modify_element_end_tag,
"<div>42!</div1><span>extra data</span><div>some data",
&EXPECTED_USER_DATA,
sizeof(EXPECTED_USER_DATA)
);

void element_api_test() {
int user_data = 43;

Expand Down Expand Up @@ -718,4 +760,30 @@ void element_api_test() {

lol_html_selector_free(selector);
}

{
note("EndTagChange");

const char *selector_str = "div";
lol_html_selector_t *selector = lol_html_selector_parse(
selector_str,
strlen(selector_str)
);

lol_html_rewriter_builder_t *builder = lol_html_rewriter_builder_new();

lol_ok(lol_html_rewriter_builder_add_element_content_handlers(
builder,
selector,
modify_element_end_tag_name_outer,
NULL,
NULL,
NULL,
NULL,
NULL
));

const char *input = "<div>42</div><div>some data</div>";
run_rewriter(builder, input, modify_element_end_tag, &user_data);
}
}
9 changes: 9 additions & 0 deletions src/base/bytes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,15 @@ impl<'b> Bytes<'b> {
encoding.encode(string).0.into()
}

/// Same as `Bytes::from_str(&string).into_owned()`, but avoids copying in the common case where
/// the output and input encodings are the same.
pub fn from_string(string: String, encoding: &'static Encoding) -> Bytes<'static> {
Bytes(Cow::Owned(match encoding.encode(&string).0 {
Cow::Owned(bytes) => bytes,
Cow::Borrowed(_) => string.into_bytes(),
}))
}

#[inline]
pub fn from_str_without_replacements(
string: &'b str,
Expand Down
5 changes: 5 additions & 0 deletions src/rewritable_units/tokens/end_tag.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,11 @@ impl<'i> EndTag<'i> {
self.raw = None;
}

#[inline]
pub fn set_name_str(&mut self, name: String) {
self.set_name(Bytes::from_string(name, self.encoding))
}

#[inline]
pub fn before(&mut self, content: &str, content_type: ContentType) {
self.mutations.before(content, content_type);
Expand Down