diff --git a/src/docc/plugins/html/__init__.py b/src/docc/plugins/html/__init__.py index f9b8c41..88385cf 100644 --- a/src/docc/plugins/html/__init__.py +++ b/src/docc/plugins/html/__init__.py @@ -17,9 +17,9 @@ Plugin that renders to HTML. """ - import html.parser import sys +import warnings import xml.etree.ElementTree as ET from dataclasses import dataclass from io import StringIO, TextIOBase @@ -822,6 +822,241 @@ def references_definition( return None +_NON_TRANSPARENT_ELEMENTS: FrozenSet[str] = frozenset( + { + "tr", + "td", + "th", + "tbody", + "thead", + "tfoot", + "col", + "colgroup", + "caption", + "table", + "form", + "fieldset", + "legend", + "button", + "input", + "select", + "textarea", + "label", + "details", + "summary", + "dialog", + "menu", + "menuitem", + "iframe", + "object", + "embed", + "video", + "audio", + "source", + "track", + "canvas", + "map", + "area", + "svg", + "math", + } +) + + +class _NonTransparentElementVisitor(Visitor): + """ + Visitor that checks if a node or any of its descendants contains HTML + elements that cannot be descendants of an `` tag. + """ + + def __init__(self) -> None: + self.found_non_transparent = False + + def enter(self, node: Node) -> Visit: + """ + Check if the current node is a non-transparent element. + """ + if isinstance(node, HTMLTag): + if node.tag_name.lower() in _NON_TRANSPARENT_ELEMENTS: + self.found_non_transparent = True + return ( + Visit.SkipChildren + ) # No need to check children if we found one + return Visit.TraverseChildren + + def exit(self, node: Node) -> None: + """ + Called after visiting a node and its children. + """ + pass + + +def _contains_non_transparent_elements(node: Node) -> bool: + """ + Check if a node or any of its descendants contains HTML elements + that cannot be descendants of an tag. + """ + visitor = _NonTransparentElementVisitor() + node.visit(visitor) + return visitor.found_non_transparent + + +def _handle_non_transparent_reference( + context: Context, + parent: Union[HTMLRoot, HTMLTag], + reference: references.Reference, +) -> RenderResult: + """ + Handle references that contain non-transparent HTML elements. + + Strategy: Try to invert the structure when possible, otherwise + render without the link and warn the user. + """ + # Try to invert the structure. For example: + # + # ```html + # + # + # foo + # + # + # ``` + # + # Should become: + # + # ```html + # + # + # foo + # + # + inverted_content = _try_invert_reference_structure(context, reference) + if inverted_content is not None: + # Successfully inverted, render the inverted structure + visitor = HTMLVisitor(context) + inverted_content.visit(visitor) + + for child in visitor.root.children: + parent.append(child) + return None + + # If inversion failed, render without the link and warn + + warnings.warn( + f"Reference `{reference.identifier}` contains non-transparent HTML " + "elements that cannot be wrapped in an tag. Rendering without " + "link.", + UserWarning, + stacklevel=2, + ) + + # Render the child content normally without the link + visitor = HTMLVisitor(context) + reference.child.visit(visitor) + + # Append all rendered children to the parent + for child in visitor.root.children: + parent.append(child) + + return None + + +def _find_anchor_insertion_point(node: Node, href: str) -> Optional[Node]: + """ + Find a suitable place to insert an anchor tag within the node structure. + + This function looks for text nodes or simple HTML elements that can + contain an anchor tag, and creates a modified version of the node + with the anchor inserted. + """ + return _find_anchor_insertion_point_recursive(node, href, False) + + +def _find_anchor_insertion_point_recursive( + node: Node, href: str, already_has_anchor: bool +) -> Optional[Node]: + """ + Recursive helper that tracks whether we're already inside an anchor tag. + """ + if isinstance(node, HTMLTag): + # Check if we're already inside an anchor tag + if node.tag_name.lower() == "a": + # Already an anchor tag, don't create nested anchors + return node if already_has_anchor else None + + # For HTML tags, try to find a suitable child to wrap with an anchor + if node.tag_name.lower() in _NON_TRANSPARENT_ELEMENTS: + # This is a non-transparent element, look at its children + for child in node.children: + insertion_point = _find_anchor_insertion_point_recursive( + child, href, already_has_anchor + ) + if insertion_point is not None: + # Found an insertion point, create a modified version. + new_node = HTMLTag(node.tag_name, node.attributes.copy()) + # Copy all children to the new node + for original_child in node.children: + assert isinstance(original_child, (HTMLTag, TextNode)) + new_node.append(original_child) + # Replace the specific child with the modified version + new_node.replace_child(child, insertion_point) + return new_node + return None + else: + # This element can contain an anchor + if already_has_anchor: + # Don't create nested anchors + return None + # Wrap it in a new anchor + anchor = HTMLTag("a", {"href": href}) + anchor.append(node) + return anchor + elif isinstance(node, TextNode): + # Text nodes can be wrapped in an anchor, but only if we're not already + # inside one + if already_has_anchor: + return None + anchor = HTMLTag("a", {"href": href}) + anchor.append(node) + return anchor + elif hasattr(node, "children"): + # For other node types with children, try to find a suitable child + for child in node.children: + insertion_point = _find_anchor_insertion_point_recursive( + child, href, already_has_anchor + ) + if insertion_point is not None: + # Found a suitable insertion point, but we can't create a copy + # of arbitrary node types, so return None to fall back to + # warning + return None + return None + + +def _try_invert_reference_structure( + context: Context, reference: references.Reference +) -> Optional[Node]: + """ + Try to invert the reference structure to make it HTML-valid. + + For example, if we have a reference with a table row containing a cell, + we try to move the link inside the cell instead of wrapping the row. + + Returns the inverted structure if successful, None otherwise. + """ + # Get the anchor tag attributes from render_reference + try: + anchor = render_reference(context, reference) + href = anchor.attributes.get("href") + if not href: + return None + except Exception: + return None + + # Try to find a suitable place to put the anchor tag + return _find_anchor_insertion_point(reference.child, href) + + def references_reference( context: object, parent: object, @@ -834,15 +1069,18 @@ def references_reference( assert isinstance(parent, (HTMLRoot, HTMLTag)) assert isinstance(reference, references.Reference) - anchor = render_reference(context, reference) - parent.append(anchor) - if not reference.child: + anchor = render_reference(context, reference) + parent.append(anchor) anchor.append(TextNode(reference.identifier)) return None - # TODO: handle tr, td, and other elements that can't be wrapped in an . + if _contains_non_transparent_elements(reference.child): + return _handle_non_transparent_reference(context, parent, reference) + # Standard case: child content can be wrapped in an anchor + anchor = render_reference(context, reference) + parent.append(anchor) return anchor diff --git a/test_non_transaparent_tags.py b/test_non_transaparent_tags.py new file mode 100644 index 0000000..812bc15 --- /dev/null +++ b/test_non_transaparent_tags.py @@ -0,0 +1,89 @@ +""" +Test script to demonstrate the fix for non-transparent elements in references. +""" + +import sys +import os + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "src")) + +from docc.plugins.html import ( + HTMLTag, + TextNode, + _contains_non_transparent_elements, +) +from docc.plugins import references + + +def test_non_transparent_detection(): + """Test that we can detect non-transparent elements.""" + + # Test case 1: Simple text node (should be fine) + text_node = TextNode("Hello World") + assert not _contains_non_transparent_elements(text_node) + print("✓ Text node detection works") + + # Test case 2: Simple HTML tag that can be in anchor + div_tag = HTMLTag("div") + div_tag.append(TextNode("Some content")) + assert not _contains_non_transparent_elements(div_tag) + print("✓ Div tag detection works") + + # Test case 3: Table row (should be detected as non-transparent) + tr_tag = HTMLTag("tr") + td_tag = HTMLTag("td") + td_tag.append(TextNode("Cell content")) + tr_tag.append(td_tag) + assert _contains_non_transparent_elements(tr_tag) + print("✓ Table row detection works") + + # Test case 4: Table cell (should be detected as non-transparent) + assert _contains_non_transparent_elements(td_tag) + print("✓ Table cell detection works") + + # Test case 5: Nested structure with non-transparent element + table_tag = HTMLTag("table") + table_tag.append(tr_tag) + assert _contains_non_transparent_elements(table_tag) + print("✓ Nested non-transparent detection works") + + print( + "\nAll tests passed! The non-transparent element detection is working correctly." + ) + + +def test_reference_creation(): + """Test creating references with different content types.""" + + # Test case 1: Reference with simple text + simple_ref = references.Reference("test_ref", TextNode("Simple text")) + print(f"✓ Created reference with simple text: {simple_ref.identifier}") + + # Test case 2: Reference with table content + tr_tag = HTMLTag("tr") + td_tag = HTMLTag("td") + td_tag.append(TextNode("Table cell")) + tr_tag.append(td_tag) + + table_ref = references.Reference("table_ref", tr_tag) + print(f"✓ Created reference with table content: {table_ref.identifier}") + + print("\nReference creation tests passed!") + + +if __name__ == "__main__": + print("Testing non-transparent element handling in references...\n") + + test_non_transparent_detection() + print() + test_reference_creation() + + print("\n" + "=" * 60) + print("SUMMARY:") + print("The solution successfully detects non-transparent HTML elements") + print("that cannot be descendants of tags. When such elements are") + print("found in a reference, the system will:") + print("1. Try to invert the structure (move inside suitable elements)") + print("2. If inversion fails, render without the link and warn the user") + print("3. Maintain HTML validity in all cases") + print("=" * 60) diff --git a/whitelist.txt b/whitelist.txt index 7cb4694..e37f059 100644 --- a/whitelist.txt +++ b/whitelist.txt @@ -54,6 +54,7 @@ rvalue setext setitem src +stacklevel starttag stmt strikethrough