Followup fix on handling HTML content w/o html/head/body tag (#6713)

pull/6724/head
Aleksander Machniak 6 years ago
parent 03d56926d8
commit 92ed0154d5

@ -793,7 +793,7 @@ class rcube_washtml
}
else {
$pos = strpos($html, '>', $pos);
$html = substr_replace($html, '<body>', $pos, 0);
$html = substr_replace($html, '<body>', $pos + 1, 0);
}
}

@ -838,15 +838,24 @@ function rcmail_wash_html($html, $p, $cid_replaces = array())
// charset was converted to UTF-8 in rcube_storage::get_message_part(),
// change/add charset specification in HTML accordingly,
// washtml cannot work without that
$meta = '<meta http-equiv="Content-Type" content="text/html; charset='.RCUBE_CHARSET.'" />';
// washtml's DOMDocument methods cannot work without that
$meta = '<meta charset="'.RCUBE_CHARSET.'" />';
// remove old meta tag and add the new one, making sure
// that it is placed in the head (#1488093)
$html = preg_replace('/<meta[^>]+charset=[a-z0-9-_]+[^>]*>/Ui', '', $html);
$html = preg_replace('/(<head[^>]*>)/Ui', '\\1'.$meta, $html, -1, $rcount);
$html = preg_replace('/<meta[^>]+charset=[a-z0-9_"-]+[^>]*>/Ui', $meta, $html, -1, $rcount);
if (!$rcount) {
$html = '<head>' . $meta . '</head>' . $html;
$html = preg_replace('/(<head[^>]*>)/Ui', '\\1'.$meta, $html, -1, $rcount);
}
if (!$rcount) {
// Note: HTML without <html> tag may still be a valid input (#6713)
if (($pos = stripos($html, '<html')) === false) {
$html = '<html><head>' . $meta . '</head>' . $html;
}
else {
$pos = strpos($html, '>', $pos);
$html = substr_replace($html, '<head>' . $meta . '</head>', $pos + 1, 0);
}
}
// clean HTML with washhtml by Frederic Motte

@ -465,7 +465,7 @@ class Framework_Washtml extends PHPUnit_Framework_TestCase
$html = '<html>First line<br />Second line</html>';
$washed = $washer->wash($html);
$this->assertContains('First line', $washed);
$this->assertContains('>First line', $washed);
$html = '<html><head></head>First line<br />Second line</html>';
$washed = $washer->wash($html);

@ -123,6 +123,38 @@ class MailFunc extends PHPUnit_Framework_TestCase
$this->assertRegExp('/<p>(символ|симол)<\/p>/', $washed, "Remove non-unicode characters from HTML message body");
}
/**
* Test inserting meta tag with required charset definition
*/
function test_meta_insertion()
{
$meta = '<meta charset="'.RCUBE_CHARSET.'" />';
$args = array(
'html_elements' => array('html', 'body', 'meta', 'head'),
'html_attribs' => array('charset'),
);
$body = '<html><head><meta charset="iso-8859-1_X"></head><body>Test1<br>Test2';
$washed = rcmail_wash_html($body, $args);
$this->assertContains("<html><head>$meta</head><body>Test1", $washed, "Meta tag insertion (1)");
$body = '<html><head><meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1" /></head><body>Test1<br>Test2';
$washed = rcmail_wash_html($body, $args);
$this->assertContains("<html><head>$meta</head><body>Test1", $washed, "Meta tag insertion (2)");
$body = 'Test1<br>Test2';
$washed = rcmail_wash_html($body, $args);
$this->assertTrue(strpos($washed, "<html><head>$meta</head>") === 0, "Meta tag insertion (3)");
$body = '<html>Test1<br>Test2';
$washed = rcmail_wash_html($body, $args);
$this->assertTrue(strpos($washed, "<html><head>$meta</head>") === 0, "Meta tag insertion (4)");
$body = '<html><head></head>Test1<br>Test2';
$washed = rcmail_wash_html($body, $args);
$this->assertTrue(strpos($washed, "<html><head>$meta</head>") === 0, "Meta tag insertion (5)");
}
/**
* Test links pattern replacements in plaintext messages
*/

Loading…
Cancel
Save