diff --git a/c-api/include/lol_html.h b/c-api/include/lol_html.h index 6b0894f5..237b1b92 100644 --- a/c-api/include/lol_html.h +++ b/c-api/include/lol_html.h @@ -23,6 +23,7 @@ typedef struct lol_html_HtmlRewriterBuilder lol_html_rewriter_builder_t; typedef struct lol_html_HtmlRewriter lol_html_rewriter_t; typedef struct lol_html_Doctype lol_html_doctype_t; typedef struct lol_html_DocumentEnd lol_html_doc_end_t; +typedef struct lol_html_EndTag lol_html_end_tag_t; typedef struct lol_html_Comment lol_html_comment_t; typedef struct lol_html_TextChunk lol_html_text_chunk_t; typedef struct lol_html_Element lol_html_element_t; @@ -110,6 +111,11 @@ typedef lol_html_rewriter_directive_t (*lol_html_doc_end_handler_t)( void *user_data ); +typedef lol_html_rewriter_directive_t (*lol_html_end_tag_handler_t)( + lol_html_end_tag_t *end_tag, + void *user_data +); + // Selector //--------------------------------------------------------------------- @@ -671,6 +677,69 @@ void lol_html_element_user_data_set( // Returns user data attached to the text chunk. void *lol_html_element_user_data_get(const lol_html_element_t *element); +// Adds content handlers to the builder for the end tag of the given element. +// +// Subsequent calls to the method on the same element replace the previous handler. +// +// The handler can optionally have associated user data which will be +// passed to the handler on each invocation along with the rewritable +// unit argument. +// +// If the handler returns LOL_HTML_STOP directive then rewriting +// stops immediately and `write()` or `end()` of the rewriter methods +// return an error code. +// +// Returns 0 in case of success and -1 otherwise. The actual error message +// can be obtained using `lol_html_take_last_error` function. +// +// WARNING: Pointers passed to handlers are valid only during the +// handler execution. So they should never be leaked outside of handlers. +int lol_html_element_on_end_tag(lol_html_element_t* element, lol_html_end_tag_handler_t end_tag_handler, void* user_data); + +// Inserts the content string before the element's end tag either as raw text or as HTML. +// +// Content should be a valid UTF8-string. +// +// Returns 0 in case of success and -1 otherwise. The actual error message +// can be obtained using `lol_html_take_last_error` function. +int lol_html_end_tag_before( + lol_html_end_tag_t *end_tag, + const char *content, + size_t content_len, + bool is_html +); + +// Inserts the content string right after the element's end tag as raw text or as HTML. +// +// Content should be a valid UTF8-string. +// +// Returns 0 in case of success and -1 otherwise. The actual error message +// can be obtained using `lol_html_take_last_error` function. +int lol_html_end_tag_after( + lol_html_end_tag_t *end_tag, + const char *content, + size_t content_len, + bool is_html +); + +// Removes the end tag. +void lol_html_end_tag_remove(lol_html_end_tag_t *end_tag); + +// Returns the end tag name. +lol_html_str_t lol_html_end_tag_name_get(const lol_html_end_tag_t *end_tag); + +// Sets the tag name of the end tag. +// +// Name should be a valid UTF8-string. +// +// Returns 0 in case of success and -1 otherwise. The actual error message +// can be obtained using `lol_html_take_last_error` function. +int lol_html_end_tag_name_set( + lol_html_end_tag_t *end_tag, + const char *name, + size_t name_len +); + // Inserts the content at the end of the document, either as raw text or as HTML. // // The content should be a valid UTF-8 string. diff --git a/c-api/src/element.rs b/c-api/src/element.rs index 178f35e8..c0bc0936 100644 --- a/c-api/src/element.rs +++ b/c-api/src/element.rs @@ -217,3 +217,64 @@ pub extern "C" fn lol_html_element_user_data_set(element: *mut Element, user_dat pub extern "C" fn lol_html_element_user_data_get(element: *mut Element) -> *mut c_void { get_user_data!(element) } + +type EndTagHandler = unsafe extern "C" fn(*mut EndTag, *mut c_void) -> RewriterDirective; + +#[no_mangle] +pub extern "C" fn lol_html_element_on_end_tag( + element: *mut Element, + handler: EndTagHandler, + user_data: *mut c_void, +) -> c_int { + let element = to_ref_mut!(element); + let () = unwrap_or_ret_err_code!(element.on_end_tag(move |end_tag| { + match unsafe { handler(end_tag, user_data) } { + RewriterDirective::Continue => Ok(()), + RewriterDirective::Stop => Err("The rewriter has been stopped.".into()), + } + })); + 0 +} + +#[no_mangle] +pub extern "C" fn lol_html_end_tag_before( + end_tag: *mut EndTag, + content: *const c_char, + content_len: size_t, + is_html: bool, +) -> c_int { + content_insertion_fn_body! { end_tag.before(content, content_len, is_html) } +} + +#[no_mangle] +pub extern "C" fn lol_html_end_tag_after( + end_tag: *mut EndTag, + content: *const c_char, + content_len: size_t, + is_html: bool, +) -> c_int { + content_insertion_fn_body! { end_tag.after(content, content_len, is_html) } +} + +#[no_mangle] +pub extern "C" fn lol_html_end_tag_remove(end_tag: *mut EndTag) { + to_ref_mut!(end_tag).remove(); +} + +#[no_mangle] +pub extern "C" fn lol_html_end_tag_name_get(end_tag: *mut EndTag) -> Str { + let tag = to_ref_mut!(end_tag); + Str::new(tag.name()) +} + +#[no_mangle] +pub extern "C" fn lol_html_end_tag_name_set( + end_tag: *mut EndTag, + name: *const c_char, + len: size_t, +) -> c_int { + let tag = to_ref_mut!(end_tag); + let name = unwrap_or_ret_err_code! { to_str!(name, len) }; + tag.set_name_str(name.to_string()); + 0 +} diff --git a/c-api/tests/src/deps/picotest/picotest.c b/c-api/tests/src/deps/picotest/picotest.c index ddf59de8..4822b421 100644 --- a/c-api/tests/src/deps/picotest/picotest.c +++ b/c-api/tests/src/deps/picotest/picotest.c @@ -26,6 +26,7 @@ #include #include #include "picotest.h" +#include "../../../../include/lol_html.h" struct test_t { int num_tests; @@ -75,6 +76,16 @@ void _ok(int cond, const char *fmt, ...) fflush(stdout); } +void _lol_ok(int cond, const char *file, int line) { + cond = !cond; // lol-html returns 0 on success + _ok(cond, "%s %d", file, line); + if (!cond) { + lol_html_str_t err = lol_html_take_last_error(); + assert(err.data != NULL && err.len != 0); + printf("err: last lol_html err: %s", err.data); + } +} + int done_testing(void) { indent(); diff --git a/c-api/tests/src/deps/picotest/picotest.h b/c-api/tests/src/deps/picotest/picotest.h index 63b4d583..3339f48b 100644 --- a/c-api/tests/src/deps/picotest/picotest.h +++ b/c-api/tests/src/deps/picotest/picotest.h @@ -22,13 +22,17 @@ #ifndef picotest_h #define picotest_h +#include + #ifdef __cplusplus extern "C" { #endif void note(const char *fmt, ...) __attribute__((format (printf, 1, 2))); void _ok(int cond, const char *fmt, ...) __attribute__((format (printf, 2, 3))); +void _lol_ok(int cond, const char *file, int line); #define ok(cond) _ok(cond, "%s %d", __FILE__, __LINE__) +#define lol_ok(cond) _lol_ok(cond, __FILE__, __LINE__) int done_testing(void); void subtest(const char *name, void (*cb)(void)); diff --git a/c-api/tests/src/test_element_api.c b/c-api/tests/src/test_element_api.c index 739dd6bb..e6e34a75 100644 --- a/c-api/tests/src/test_element_api.c +++ b/c-api/tests/src/test_element_api.c @@ -617,6 +617,48 @@ static void test_stop(lol_html_selector_t *selector, void *user_data) { ok(!err); } +static lol_html_rewriter_directive_t modify_element_end_tag_name_inner(lol_html_end_tag_t *end_tag, void *user_data) { + int times_run = *(int*)user_data; + + if (times_run == 0) { + lol_ok(lol_html_end_tag_before(end_tag, "!", 1, false)); + const char *after_html = "extra data"; + lol_ok(lol_html_end_tag_after(end_tag, after_html, strlen(after_html), true)); + + lol_html_str_t name = lol_html_end_tag_name_get(end_tag); + str_eq(name, "div"); + + lol_ok(lol_html_end_tag_name_set(end_tag, "div1", strlen("div1"))); + name = lol_html_end_tag_name_get(end_tag); + str_eq(name, "div1"); + } else { + lol_html_end_tag_remove(end_tag); + } + + return LOL_HTML_CONTINUE; +} + +static lol_html_rewriter_directive_t modify_element_end_tag_name_outer( + lol_html_element_t *element, + void *user_data +) { + UNUSED(user_data); + + static int times_run = -1; // so that it will be 0 on the first call to `inner` + + lol_ok(lol_html_element_on_end_tag(element, modify_element_end_tag_name_inner, ×_run)); + times_run += 1; + + return LOL_HTML_CONTINUE; +} + +EXPECT_OUTPUT( + modify_element_end_tag, + "
42!extra data
some data", + &EXPECTED_USER_DATA, + sizeof(EXPECTED_USER_DATA) +); + void element_api_test() { int user_data = 43; @@ -718,4 +760,30 @@ void element_api_test() { lol_html_selector_free(selector); } + + { + note("EndTagChange"); + + const char *selector_str = "div"; + lol_html_selector_t *selector = lol_html_selector_parse( + selector_str, + strlen(selector_str) + ); + + lol_html_rewriter_builder_t *builder = lol_html_rewriter_builder_new(); + + lol_ok(lol_html_rewriter_builder_add_element_content_handlers( + builder, + selector, + modify_element_end_tag_name_outer, + NULL, + NULL, + NULL, + NULL, + NULL + )); + + const char *input = "
42
some data
"; + run_rewriter(builder, input, modify_element_end_tag, &user_data); + } } diff --git a/src/base/bytes.rs b/src/base/bytes.rs index 5f232654..828811f2 100644 --- a/src/base/bytes.rs +++ b/src/base/bytes.rs @@ -20,6 +20,15 @@ impl<'b> Bytes<'b> { encoding.encode(string).0.into() } + /// Same as `Bytes::from_str(&string).into_owned()`, but avoids copying in the common case where + /// the output and input encodings are the same. + pub fn from_string(string: String, encoding: &'static Encoding) -> Bytes<'static> { + Bytes(Cow::Owned(match encoding.encode(&string).0 { + Cow::Owned(bytes) => bytes, + Cow::Borrowed(_) => string.into_bytes(), + })) + } + #[inline] pub fn from_str_without_replacements( string: &'b str, diff --git a/src/rewritable_units/tokens/end_tag.rs b/src/rewritable_units/tokens/end_tag.rs index c2ac2248..54577de1 100644 --- a/src/rewritable_units/tokens/end_tag.rs +++ b/src/rewritable_units/tokens/end_tag.rs @@ -36,6 +36,11 @@ impl<'i> EndTag<'i> { self.raw = None; } + #[inline] + pub fn set_name_str(&mut self, name: String) { + self.set_name(Bytes::from_string(name, self.encoding)) + } + #[inline] pub fn before(&mut self, content: &str, content_type: ContentType) { self.mutations.before(content, content_type);