From 83ebf1db399387170d6c224f18a0cbe404d2ea90 Mon Sep 17 00:00:00 2001 From: user Date: Fri, 25 Mar 2016 07:35:09 +0000 Subject: [PATCH 01/31] Removing tree inconsistencies Leftover bad links to detached/moved nodes. A few times I think a link back was needed, to remove it from the list. Some last/first child nodes not getting updated. --- src/html/dom.d | 40 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 3 deletions(-) diff --git a/src/html/dom.d b/src/html/dom.d index 068c00a..425c6cc 100644 --- a/src/html/dom.d +++ b/src/html/dom.d @@ -325,7 +325,9 @@ struct Node { assert(!lastChild_); firstChild_ = node; lastChild_ = node; + lastChild_.next_ = null; } + firstChild_.prev_ = null; } void appendChild(Node* node) { @@ -343,8 +345,10 @@ struct Node { } else { assert(!firstChild_); firstChild_ = node; + firstChild_.prev_ = null; lastChild_ = node; } + lastChild_.next_ = null; } void removeChild(Node* node) { @@ -391,19 +395,25 @@ struct Node { firstChild_ = node; lastChild_ = node; } + lastChild_.next_ = null; node.parent_ = &this; } void insertBefore(Node* node) { assert(document_ == node.document_); - + assert(node); + parent_ = node.parent_; prev_ = node.prev_; next_ = node; node.prev_ = &this; - if (parent_ && (parent_.firstChild_ == node)) - parent_.firstChild_ = &this; + if (parent_ && (parent_.firstChild_ == node)) { + assert(!prev_); + parent_.firstChild_ = &this; + } else if(prev_) { + prev_.next_ = &this; + } } void insertAfter(Node* node) { @@ -413,6 +423,12 @@ struct Node { prev_ = node; next_ = node.next_; node.next_ = &this; + if(parent_ && (parent_.lastChild_ == node)) { + assert(!next_); + parent_.lastChild_ = &this; + } else if(next_) { + next_.prev_ = &this; + } } void detach() { @@ -444,7 +460,16 @@ struct Node { prev_ = null; } parent_ = null; + } else { + if(prev_) { + prev_.next_ = next_; + } + if(next_) { + next_.prev_ = prev_; + } } + prev_ = null; + next_ = null; } package void detachFast() { @@ -471,7 +496,16 @@ struct Node { next_.prev_ = prev_; } } + } else { + if(prev_) { + prev_.next_ = next_; + } + if(next_) { + next_.prev_ = prev_; + } } + prev_ = null; + next_ = null; } void destroy() { From d909c7a1a685913773575e4e9c53432e378d631e Mon Sep 17 00:00:00 2001 From: user Date: Sun, 27 Mar 2016 06:10:19 +0000 Subject: [PATCH 02/31] auto (/extra/home/packages/git/dlang/htmld/src/html/dom.d) 50 902 --- src/html/dom.d | 55 ++++++++++++++++++++++++++++++++++---------------- 1 file changed, 38 insertions(+), 17 deletions(-) diff --git a/src/html/dom.d b/src/html/dom.d index 425c6cc..71712ca 100644 --- a/src/html/dom.d +++ b/src/html/dom.d @@ -100,6 +100,8 @@ private struct AncestorsForward(NodeType, alias Condition = null) { private struct DescendantsDFForward(NodeType, alias Condition = null) { this(NodeType* first) { curr_ = first; + import std.stdio : writeln; + writeln("mocukent ",curr_.document_); top_ = (first && first.parent_) ? first.parent_ : null; static if (!is(typeof(Condition) == typeof(null))) { if (!Condition(first)) @@ -331,6 +333,10 @@ struct Node { } void appendChild(Node* node) { + if(document_ != node.document_) { + import std.stdio; + writeln("oyyy ",document_," ",node.document_); + } assert(document_ == node.document_); assert(isElementNode, "cannot append to non-element nodes"); @@ -402,7 +408,7 @@ struct Node { void insertBefore(Node* node) { assert(document_ == node.document_); assert(node); - + parent_ = node.parent_; prev_ = node.prev_; next_ = node; @@ -830,7 +836,7 @@ struct Node { return document_.clone(&this); } -package: +public:/*package:*/ enum TypeMask = 0x7; enum TypeShift = 0; enum FlagsBit = TypeMask + 1; @@ -853,7 +859,7 @@ package: Document* document_; } -auto createDocument(size_t options = DOMCreateOptions.Default)(HTMLString source) { +auto ref createDocument(size_t options = DOMCreateOptions.Default)(HTMLString source) { enum parserOptions = ((options & DOMCreateOptions.DecodeEntities) ? ParserOptions.DecodeEntities : 0); auto document = createDocument(); @@ -913,15 +919,16 @@ unittest { -static auto createDocument() { - auto document = Document(); - document.init(); +static auto ref createDocument() { + auto document = new Document; + document.initialize(); document.root(document.createElement("root")); return document; } struct Document { + @disable this(this); auto createElement(HTMLString tagName, Node* parent = null) { auto node = alloc_.alloc(); *node = Node(&this, tagName); @@ -984,9 +991,9 @@ struct Document { return source.clone(&this); } - Document clone() const { - Document other = Document(); - other.init(); + Document* clone() const { + Document* other = new Document; + other.initialize(); other.root(other.clone(this.root_)); return other; } @@ -1039,8 +1046,9 @@ struct Document { NodeWrapper!(const(Node)) querySelector(Selector selector, const(Node)* context = null) const { auto top = context ? context : root_; - foreach(node; DescendantsDFForward!(const(Node), mixin(OnlyElements))(top)) { + assert(node.document_ == top.document_); + assert(&this == node.document_); if (selector.matches(node)) return node; } @@ -1051,6 +1059,8 @@ struct Document { auto top = context ? context : root_; foreach(node; DescendantsDFForward!(Node, mixin(OnlyElements))(top)) { + assert(node.document_ == top.document_); + assert(&this == node.document_); if (selector.matches(node)) return node; } @@ -1077,6 +1087,8 @@ struct Document { QuerySelectorAllResult querySelectorAll(Selector selector, Node* context = null) { auto top = context ? context : root_; + import std.stdio : writeln; + writeln("dockent ",top.document_,context); return QuerySelectorMatcher!(Node, DescendantsDFForward!Node)(selector, DescendantsDFForward!Node(top)); } @@ -1094,8 +1106,8 @@ struct Document { return alloc_; } -private: - void init() { +public:/*private:*/ + void initialize() { alloc_.init; } @@ -1135,10 +1147,19 @@ unittest { assert(docc.root.html == doc.root.html, docc.root.html); } +unittest { + auto foo() { + return createDocument(); + } + + auto bar = foo(); + bar.root.appendChild(bar.createElement("yay")); +} + struct DOMBuilder(Document) { - this(ref Document document, Node* parent = null) { - document_ = &document; + this(Document* document, Node* parent = null) { + document_ = document; element_ = parent ? parent : document.root; } @@ -1260,7 +1281,7 @@ struct DOMBuilder(Document) { } } -private: +public:/*private:*/ Document* document_; Node* element_; States state_; @@ -1543,7 +1564,7 @@ private struct Rule { return relation_; } -package: +public:/*package:*/ ushort flags_; MatchType match_; Relation relation_; @@ -1922,7 +1943,7 @@ struct Selector { return true; } -private: +public:/*private:*/ HTMLString source_; Rule[] rules_; } From 786a80048749d2852e9cc9481183a9ce690843de Mon Sep 17 00:00:00 2001 From: user Date: Mon, 28 Mar 2016 23:17:19 +0000 Subject: [PATCH 03/31] leave entities to character parsers --- src/html/dom.d | 42 ++++++++++++++++++++++++++---------------- src/html/parser.d | 6 +++++- 2 files changed, 31 insertions(+), 17 deletions(-) diff --git a/src/html/dom.d b/src/html/dom.d index 71712ca..d943c84 100644 --- a/src/html/dom.d +++ b/src/html/dom.d @@ -100,8 +100,6 @@ private struct AncestorsForward(NodeType, alias Condition = null) { private struct DescendantsDFForward(NodeType, alias Condition = null) { this(NodeType* first) { curr_ = first; - import std.stdio : writeln; - writeln("mocukent ",curr_.document_); top_ = (first && first.parent_) ? first.parent_ : null; static if (!is(typeof(Condition) == typeof(null))) { if (!Condition(first)) @@ -294,8 +292,7 @@ struct Node { enum parserOptions = ((options & DOMCreateOptions.DecodeEntities) ? ParserOptions.DecodeEntities : 0); destroyChildren(); - - auto builder = DOMBuilder!(Document)(*document_, &this); + auto builder = DOMBuilder!(Document)(document_, &this); parseHTML!(typeof(builder), parserOptions)(html, builder); } @@ -440,6 +437,7 @@ struct Node { void detach() { if (parent_) { if (parent_.firstChild_ == &this) { + assert(!prev_); parent_.firstChild_ = next_; if (next_) { next_.prev_ = null; @@ -564,7 +562,7 @@ struct Node { } break; case Text: - writeHTMLEscaped(app, tag_); + app.put(tag_); break; case Comment: app.put("