Fix bug in HTML parser that could cause missing text fragments when there was no head/body tag (#6713)

pull/6724/head
Aleksander Machniak 6 years ago
parent 1078d8df16
commit 03d56926d8

@ -12,6 +12,7 @@ CHANGELOG Roundcube Webmail
- Elastic: Fix folders list scrolling on touch devices (#6706)
- Elastic: Fix non-working pretty selects in Chrome browser (#6705)
- Managesieve: Fix bug where global includes were requested for vacation (#6716)
- Fix bug in HTML parser that could cause missing text fragments when there was no head/body tag (#6713)
- Fix bug where HTML messages with a xml:namespace tag were not rendered (#6697)
- Fix TinyMCE download location (#6694)
- Fix so "Open in new window" consistently displays "external window" interface (#6659)

@ -562,7 +562,7 @@ class rcube_washtml
if (!$this->is_xml && class_exists('Masterminds\HTML5')) {
try {
$html5 = new Masterminds\HTML5();
$node = $html5->loadHTML($html);
$node = $html5->loadHTML($this->fix_html5($html));
}
catch (Exception $e) {
// ignore, fallback to DOMDocument
@ -778,6 +778,28 @@ class rcube_washtml
}
}
/**
* Cleanup and workarounds on input to Masterminds/HTML5
*/
protected function fix_html5($html)
{
// HTML5 requires <head> or <body> (#6713)
// https://github.com/Masterminds/html5-php/issues/166
if (!preg_match('/<(head|body)/i', $html)) {
$pos = stripos($html, '<html');
if ($pos === false) {
$html = '<html><body>' . $html;
}
else {
$pos = strpos($html, '>', $pos);
$html = substr_replace($html, '<body>', $pos, 0);
}
}
return $html;
}
/**
* Explode css style value
*/

@ -444,4 +444,32 @@ class Framework_Washtml extends PHPUnit_Framework_TestCase
$this->assertNotContains('&lt;?xml:namespace"', $washed);
$this->assertSame($washed, '<p></p>');
}
/**
* Test missing main HTML hierarchy tags (#6713)
*/
function test_missing_tags()
{
$washer = new rcube_washtml();
$html = '<head></head>First line<br />Second line';
$washed = $washer->wash($html);
$this->assertContains('First line', $washed);
$html = 'First line<br />Second line';
$washed = $washer->wash($html);
$this->assertContains('First line', $washed);
$html = '<html>First line<br />Second line</html>';
$washed = $washer->wash($html);
$this->assertContains('First line', $washed);
$html = '<html><head></head>First line<br />Second line</html>';
$washed = $washer->wash($html);
$this->assertContains('First line', $washed);
}
}

Loading…
Cancel
Save