Skip to content
171 changes: 153 additions & 18 deletions src/html/dom.d
Original file line number Diff line number Diff line change
Expand Up @@ -791,6 +791,14 @@ struct Node {
return c;
}

Node* clone(Node* oldnode) {
return document_.clone(oldnode);
}

Node* clone() {
return document_.clone(&this);
}

package:
enum TypeMask = 0x7;
enum TypeShift = 0;
Expand Down Expand Up @@ -883,10 +891,17 @@ static auto createDocument() {


struct Document {
auto clone(Node* source) {
Node* clone(Node* source) {
return source.clone(&this, alloc_);
}

Document clone() {
Document other = Document();
other.init();
other.root(other.clone(this.root_));
return other;
}

auto createElement(HTMLString tagName, Node* parent = null) {
auto node = alloc_.alloc();
*node = Node(&this, tagName);
Expand Down Expand Up @@ -1053,31 +1068,151 @@ private:
PageAllocator!(Node, 1024) alloc_;
}

version(unittest) {
/* just a quick and dirty unittest thing, so I can actually read the
errors that are going on without a huge useless stack trace
in my face */
import std.stdio: writeln;
import core.exception: AssertError;
class DUnitIsBetter: AssertError {
this(string msg, string file, size_t line) {
super(msg,file,line);
}
}
shared static this() {
import core.runtime: Runtime, ModuleInfo;
Runtime.moduleUnitTester = function() {
foreach( m; ModuleInfo )
{
if( m )
{
auto fp = m.unitTest;

if( fp )
{
try
{
fp();
} catch( DUnitIsBetter e )
{
writeln(e.msg," at ",e.file,":",e.line);
return false;
}
}
}
}
return true;
};
}

template assertEqual(A,B) {
void assertEqual(A actual, B expected,
string file = __FILE__,
size_t line = __LINE__) {
if(actual != expected) {
writeln("Expected:");
writeln(expected);
writeln("We got:");
writeln(actual);
throw new DUnitIsBetter("fail",file,line);
}
}
}
}

///
unittest {
import std.stdio;

//import htmld: createDocument;
const(char)[] s = `<parent attr="value"><child/>andsometext</parent>`;
auto doc = createDocument(s);
s = doc.root().html(); // normalize
auto c = doc.clone(doc.root());
assert(s == c.html);
assert(s == doc.root().html());
auto other = createDocument();
c = other.clone(doc.root().children.front);
assert(s == c.outerHTML);
auto me = doc.clone(doc.root());
assertEqual(me.html,s);
assertEqual(doc.root().html(),s);

auto other = createDocument("<other/>");
auto them = other.root().children.front;
them.appendChild(other.clone(them));

assertEqual(them.outerHTML,"<other><other></other></other>");

import std.regex: regex, replaceAll;
auto noformat = regex(`\s*\n\s*`); // can't kill spaces between attrs
typeof(s) clean(typeof(s) s) {
return s.replaceAll(noformat,"");
}

me = me.children.front;

me.attr("shoop", "woop");
them.appendChild(other.clone(me));

s = clean(
`<parent attr="value">
<child></child>andsometext
<parent shoop="woop" attr="value">
<child></child>andsometext
</parent>
</parent>`);
s = clean(`<other>
<other></other>
<parent shoop="woop" attr="value">
<child></child>andsometext
</parent>
</other>`);
assertEqual(them.outerHTML,s);

assertEqual(other.root().outerHTML(),
"<root>"~s~"</root>");

other.root().appendChild(other.clone(me));
me.attr("still","here");

assertEqual(other.root.outerHTML,
clean(`<root>
<other>
<other></other>
<parent shoop="woop" attr="value">
<child></child>andsometext
</parent>
</other>
<parent shoop="woop" attr="value">
<child></child>andsometext
</parent>
</root>`));
Node* a = doc.root().firstChild;
Node* b = other.root().firstChild;
b.attr("jutsu","henge");
b.appendChild(b.clone());
a.appendChild(a.clone(b));

assertEqual(
doc.root.outerHTML,
clean(`<root>
<parent attr="value">
<child></child>andsometext
<other jutsu="henge">
<other></other>
<parent shoop="woop" attr="value">
<child></child>andsometext
</parent>
<other jutsu="henge">
<other></other>
<parent shoop="woop" attr="value">
<child></child>andsometext
</parent>
</other>
</other>
</parent>
</root>`));

other = doc.clone();
assertEqual(doc.toString(),other.toString());

s = `<parent attr="value"><child></child>andsometext<parent attr="value"><child></child>andsometext</parent></parent>`;
c.appendChild(other.clone(c));
assert(s == c.outerHTML);

s = "<root>"~s~"</root>";
other.root().appendChild(c);

assert(s == other.root().outerHTML());
}



struct DOMBuilder(Document) {
this(ref Document document, Node* parent = null) {
document_ = &document;
Expand Down Expand Up @@ -1202,7 +1337,7 @@ struct DOMBuilder(Document) {
}
}

private:

Document* document_;
Node* element_;
States state_;
Expand Down
163 changes: 163 additions & 0 deletions src/html/pushdom.d
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
module html.pushdom;
import html.dom: Document, Node, DOMBuilder, HTMLString;
import std.stdio: writeln;

class NodeReceiver(Document) {
Builder!Document parent;
this(Builder!Document parent) {
// for changing parent.receiver
this.parent = parent;
}
void swap(NodeReceiver!Document other) {
this.parent.receiver = other;
}
void onOpenEnd(Node* element) {}
void onClose(Node* element) {}
void onCloseText(HTMLString text) {}
void onSelfClosing(Node* element) {
if(element)
this.onClose(element);
}
void onDocumentEnd(Document* doc) {}
}

import std.string: replace, strip;
import std.format: format;

immutable string prefix = "souper.";

string wrapper(string name, int nargs, string block = "@super@") {
string arg_signature = "";
string args = "";
for(int i=0;i<nargs;++i) {
if(arg_signature != "") {
arg_signature ~= ", ";
args ~= ", ";
}
auto arg = "a%d".format(i);
arg_signature ~= "HTMLString " ~ arg;
args ~= arg;
}
block = block.replace("@super@",
prefix~name~"("~args~")").strip;
if(block[$-1] != ';') {
block ~= ";";
}

return q{
void @name@(@arg_signature@) {
@block@
}
}.replace("@name@",name)
.replace("@arg_signature@",arg_signature)
.replace("@block@",block);
}

string makeBlankWrappers() {
string s = "";

foreach(name;["onText",
"onOpenStart",
"onAttrName",
"onAttrValue",
"onComment",
"onCDATA",
"onDeclaration",
"onProcessingInstruction",
"onNamedEntity",
"onNumericEntity",
"onHexEntity",
]) {
s ~= wrapper(name,1);
}
foreach(name;[
"onAttrEnd"]) {
s ~= wrapper(name,0);
}
foreach(name;[
"onEntity",
]) {
s ~= wrapper(name,2);
}
return s;
}


struct Builder(Document) {
DOMBuilder!Document souper;
this(ref Document document, Node* parent = null) {
souper = DOMBuilder!Document(document,parent);
}
NodeReceiver!Document receiver;
mixin(wrapper("onOpenEnd",1,q{
@super@;
receiver.onOpenEnd(souper.element_);
}));
mixin(wrapper("onClose",1,q{
if(souper.element_) {
receiver.onClose(souper.element_);
} else {
receiver.onCloseText(souper.text_);
}
@super@;
}));
mixin(wrapper("onSelfClosing",0,q{
if(souper.element_)
receiver.onSelfClosing(souper.element_);
@super@;
}));
mixin(wrapper("onDocumentEnd",0,q{
@super@;
receiver.onDocumentEnd(souper.document_);
}));
mixin(makeBlankWrappers());
}

unittest {
import html.dom: createDocument, DOMCreateOptions, ParserOptions;
import html.parser: parseHTML;
import std.array: Appender;

class ImageCollector(Document): NodeReceiver!Document {
Node*[] images;
Appender!(Node*[]) a;
this(Builder!Document b) {
super(b);
}
override void onClose(Node* e) {
if(e.tag == "img" && e.hasAttr("src")) {
a.put(e);
}
}
override void onDocumentEnd(Document* d) {
images = a.data;
}
}

enum parserOptions = ((DOMCreateOptions.Default & DOMCreateOptions.DecodeEntities) ? ParserOptions.DecodeEntities : 0);
auto document = createDocument();
auto b = Builder!Document(document);
ImageCollector!Document c = new ImageCollector!Document(b);
b.receiver = c;
HTMLString source = `
<html>
<head>
<title>whatever</title>
</head>
<body>
<img src="one.png"/>
<img src="two.png"/>
<p>
<img src="three.png"/>
</p>
</body>
</html>`;
parseHTML!(typeof(b), parserOptions)(source, b);
writeln(document.root.html);
string s = "";
foreach(img; c.images) {
writeln("image: ",img.attr("src"));
s ~= img.attr("src");
}
assert(s=="one.pngtwo.pngthree.png");
}