Skip to content

Commit 020cc1f

Browse files
committed
Fix html parsing of incomplete and invalid character entity references
1 parent 0f85aa8 commit 020cc1f

File tree

2 files changed

+30
-14
lines changed

2 files changed

+30
-14
lines changed

generic/domhtml.c

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -598,6 +598,7 @@ static void TranslateEntityRefs (
598598
value += c-'a' + 10;
599599
} else {
600600
/* error */
601+
break;
601602
}
602603
i++;
603604
}
@@ -608,28 +609,36 @@ static void TranslateEntityRefs (
608609
value += c-'0';
609610
} else {
610611
/* error */
612+
break;
611613
}
612614
i++;
613615
}
614616
}
615-
if (z[i]!=';') {
616-
/* error */
617-
}
618-
from = i+1;
617+
if (z[i] == ';') {
618+
from = i+1;
619619
#if TclOnly8Bits
620-
z[to++] = value;
621-
#else
622-
if (value < 0x80) {
623620
z[to++] = value;
624-
} else if (value <= 0x7FF) {
625-
z[to++] = (char) ((value >> 6) | 0xC0);
626-
z[to++] = (char) ((value | 0x80) & 0xBF);
627-
} else if (value <= 0xFFFF) {
628-
z[to++] = (char) ((value >> 12) | 0xE0);
629-
z[to++] = (char) (((value >> 6) | 0x80) & 0xBF);
630-
z[to++] = (char) ((value | 0x80) & 0xBF);
621+
#else
622+
if (value < 0x80) {
623+
z[to++] = value;
624+
} else if (value <= 0x7FF) {
625+
z[to++] = (char) ((value >> 6) | 0xC0);
626+
z[to++] = (char) ((value | 0x80) & 0xBF);
627+
} else if (value <= 0xFFFF) {
628+
z[to++] = (char) ((value >> 12) | 0xE0);
629+
z[to++] = (char) (((value >> 6) | 0x80) & 0xBF);
630+
z[to++] = (char) ((value | 0x80) & 0xBF);
631+
} else {
632+
/* error */
633+
while (from < i-1) {
634+
z[to++] = z[from++];
635+
}
636+
}
631637
} else {
632638
/* error */
639+
while (from < i-1) {
640+
z[to++] = z[from++];
641+
}
633642
}
634643
#endif
635644
} else {

tests/htmlreader.test

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,13 @@ test html-2.8 {HTML parsing} {
175175
</select></form></body>
176176
</html>}
177177

178+
test html-2.9 {HTML parsing - incomplete character references} {
179+
set doc [dom parse -html "<html>foo &#12399, foo</html>"]
180+
$doc documentElement root
181+
$doc asHTML
182+
} {<html>foo &amp;#12399, foo</html>}
183+
184+
178185
test html-3.1 {Bad data} {
179186
set data {line 6 column 17 - Warning: <script> lacks "type" attribute
180187
line 10 column 17 - Warning: <script> lacks "type" attribute

0 commit comments

Comments
 (0)