Use Masterminds/HTML5 parser for HTML5 support (#5761)

pull/5850/merge
Aleksander Machniak 6 years ago
parent 2f7aaca342
commit 63d3ad11fb

@ -7,6 +7,7 @@ CHANGELOG Roundcube Webmail
- Added private key listing and generating to identity settings
- Enable encrypt & sign option if Mailvelope supports it
- Update to jQuery-3.3.1
- Use Masterminds/HTML5 parser for HTML5 support (#5761)
- vcard_attachments: Add possibility to send contact vCard from Contacts toolbar (#6080)
- Add More actions button in Contacts toolbar with Copy/Move actions (#6081)
- Display an error when clicking disabled link to register protocol handler (#6079)

@ -19,6 +19,7 @@
"pear/crypt_gpg": "~1.6.2",
"pear/net_sieve": "~1.4.3",
"roundcube/plugin-installer": "~0.1.6",
"masterminds/html5": "~2.3.0",
"endroid/qr-code": "~1.6.5"
},
"require-dev": {

@ -536,8 +536,6 @@ class rcube_washtml
*/
public function wash($html)
{
// Charset seems to be ignored (probably if defined in the HTML document)
$node = new DOMDocument('1.0', $this->config['charset']);
$this->extlinks = false;
$html = $this->cleanup($html);
@ -556,15 +554,22 @@ class rcube_washtml
// SVG need to be parsed as XML
$this->is_xml = stripos($html, '<html') === false && stripos($html, '<svg') !== false;
$method = $this->is_xml ? 'loadXML' : 'loadHTML';
$options = 0;
// Use optimizations if supported
if (PHP_VERSION_ID >= 50400) {
$options = LIBXML_PARSEHUGE | LIBXML_COMPACT | LIBXML_NONET;
@$node->{$method}($html, $options);
// DOMDocument does not support HTML5, try Masterminds parser if available
if (!$this->is_xml && class_exists('Masterminds\HTML5')) {
try {
$html5 = new Masterminds\HTML5();
$node = $html5->loadHTML($html);
}
catch (Exception $e) {
// ignore, fallback to DOMDocument
}
}
else {
@$node->{$method}($html);
if (empty($node)) {
// Charset seems to be ignored (probably if defined in the HTML document)
$node = new DOMDocument('1.0', $this->config['charset']);
@$node->{$method}($html, LIBXML_PARSEHUGE | LIBXML_COMPACT | LIBXML_NONET);
}
return $this->dumpHtml($node);

@ -7,6 +7,14 @@
*/
class Framework_Washtml extends PHPUnit_Framework_TestCase
{
/**
* A helper method to remove comments added by rcube_washtml
*/
function cleanupResult($html)
{
return preg_replace('/<!-- [a-z]+ (ignored|not allowed) -->/', '', $html);
}
/**
* Test the elimination of some XSS vulnerabilities
@ -67,24 +75,24 @@ class Framework_Washtml extends PHPUnit_Framework_TestCase
$washer = new rcube_washtml;
$html = "<!--[if gte mso 10]><p>p1</p><!--><p>p2</p>";
$washed = $washer->wash($html);
$washed = $this->cleanupResult($washer->wash($html));
$this->assertEquals('<!-- html ignored --><!-- body ignored --><p>p2</p>', $washed, "HTML conditional comments (#1489004)");
$this->assertEquals('<p>p2</p>', $washed, "HTML conditional comments (#1489004)");
$html = "<!--TestCommentInvalid><p>test</p>";
$washed = $washer->wash($html);
$washed = $this->cleanupResult($washer->wash($html));
$this->assertEquals('<!-- html ignored --><!-- body ignored --><p>test</p>', $washed, "HTML invalid comments (#1487759)");
$this->assertEquals('<p>test</p>', $washed, "HTML invalid comments (#1487759)");
$html = "<p>para1</p><!-- comment --><p>para2</p>";
$washed = $washer->wash($html);
$washed = $this->cleanupResult($washer->wash($html));
$this->assertEquals('<!-- html ignored --><!-- body ignored --><p>para1</p><p>para2</p>', $washed, "HTML comments - simple comment");
$this->assertEquals('<p>para1</p><p>para2</p>', $washed, "HTML comments - simple comment");
$html = "<p>para1</p><!-- <hr> comment --><p>para2</p>";
$washed = $washer->wash($html);
$washed = $this->cleanupResult($washer->wash($html));
$this->assertEquals('<!-- html ignored --><!-- body ignored --><p>para1</p><p>para2</p>', $washed, "HTML comments - tags inside (#1489904)");
$this->assertEquals('<p>para1</p><p>para2</p>', $washed, "HTML comments - tags inside (#1489904)");
}
/**
@ -295,7 +303,7 @@ class Framework_Washtml extends PHPUnit_Framework_TestCase
function test_wash_mathml()
{
$mathml = '<html><head><meta http-equiv="content-type" content="text/html; charset=utf-8"></head><body>
<math xmlns="http://www.w3.org/1998/Math/MathML"><semantics>
<math><semantics>
<mrow>
<msub><mi>I</mi><mi>D</mi></msub>
<mo>=</mo>
@ -312,7 +320,7 @@ class Framework_Washtml extends PHPUnit_Framework_TestCase
</body></html>';
$exp = '<!-- html ignored --><!-- head ignored --><!-- meta ignored --><!-- body ignored -->
<math xmlns="http://www.w3.org/1998/Math/MathML"><semantics>
<math><semantics>
<mrow>
<msub><mi>I</mi><mi>D</mi></msub>
<mo>=</mo>

Loading…
Cancel
Save