Workaround more invalid HTML cases parsed incorrectly by Mastermind/HTML5 (#6713)

pull/6734/head
Aleksander Machniak 5 years ago
parent 07d4996825
commit 55cca61134

@ -782,17 +782,28 @@ class rcube_washtml
*/
protected function fix_html5($html)
{
// There might be content before html/body tag, we'll move it to the body
// We'll wrap it by a div container, it's an invalid HTML anyway
if (strpos($html, '<')) {
$pos = stripos($html, '<!DOCTYPE') ?: stripos($html, '<html') ?: stripos($html, '<body');
$prefix = '<div>' . substr($html, 0, $pos) . '</div>';
$html = substr($html, $pos);
}
// HTML5 requires <head> or <body> (#6713)
// https://github.com/Masterminds/html5-php/issues/166
if (!preg_match('/<(head|body)/i', $html)) {
$pos = stripos($html, '<html');
if (isset($prefix) || !preg_match('/<(head|body)/i', $html)) {
$body_pos = stripos($html, '<body');
$pos = $body_pos !== false ? $body_pos : stripos($html, '<html');
// No HTML and no BODY tag
if ($pos === false) {
$html = '<html><body>' . $html;
$html = '<html><body>' . $prefix . $html;
}
// Either HTML or BODY tag found
else {
$pos = strpos($html, '>', $pos);
$html = substr_replace($html, '<body>', $pos + 1, 0);
$html = substr_replace($html, ($body_pos === false ? '<body>' : '') . $prefix, $pos + 1, 0);
}
}

@ -471,5 +471,17 @@ class Framework_Washtml extends PHPUnit_Framework_TestCase
$washed = $washer->wash($html);
$this->assertContains('First line', $washed);
// Not really valid HTML, but because its common in email world
// and because it works with DOMDocument, we make sure its supported
$html = 'First line<br /><html><body>Second line';
$washed = $washer->wash($html);
$this->assertContains('First line', $washed);
$html = 'First line<br /><html>Second line';
$washed = $washer->wash($html);
$this->assertContains('First line', $washed);
}
}

Loading…
Cancel
Save