Replace our crappy html sanitization with the dom-based washtml script + fix inline message parts + remove old code + add some doc comments

release-0.6
thomascube 17 years ago
parent 06c1652d7f
commit 45f56c1c40

@ -1,7 +1,7 @@
<form action="index.php" method="get">
<?php
$required_php_exts = array('PCRE' => 'pcre', 'Session' => 'session');
$required_php_exts = array('PCRE' => 'pcre', 'Session' => 'session', 'DOM XML' => 'dom');
$optional_php_exts = array('FileInfo' => 'fileinfo', 'Libiconv' => 'iconv',
'Multibyte' => 'mbstring', 'OpenSSL' => 'openssl', 'Mcrypt' => 'mcrypt', 'GD' => 'gd');

@ -21,9 +21,8 @@
/**
* Interface class for accessing an IMAP server
*
* This is a wrapper that implements the Iloha IMAP Library (IIL)
* Logical representation of a mail message with all its data
* and related functions
*
* @package Mail
* @author Thomas Bruederli <roundcube@gmail.com>
@ -65,8 +64,8 @@ class rcube_message
);
if ($this->structure = $this->imap->get_structure($uid)) {
$this->parse_structure($this->structure);
$this->get_mime_numbers($this->structure);
$this->parse_structure($this->structure);
}
else {
$this->body = $this->imap->get_body($uid);
@ -356,18 +355,18 @@ class rcube_message
}
// if this was a related part try to resolve references
if ($message_ctype_secondary == 'related' && sizeof($this->inline_objects)) {
if ($message_ctype_secondary == 'related' && sizeof($this->inline_parts)) {
$a_replaces = array();
foreach ($this->inline_parts as $inline_object) {
$a_replaces['cid:'.$inline_object->content_id] = htmlspecialchars(sprintf($this->opt['get_url'], $inline_object->mime_id));
$a_replaces['cid:'.$inline_object->content_id] = $this->get_part_url($inline_object->mime_id);
}
// add replace array to each content part
// (will be applied later when part body is available)
for ($i=0; $i<count($a_return_parts); $i++) {
if ($a_return_parts[$i]->type=='content')
$a_return_parts[$i]->replaces = $a_replaces;
foreach ($this->parts as $i => $part) {
if ($part->type == 'content')
$this->parts[$i]->replaces = $a_replaces;
}
}
}

@ -24,7 +24,7 @@
/**
* Class representing a system user
*
* @package core
* @package Core
* @author Thomas Bruederli <roundcube@gmail.com>
*/
class rcube_user

@ -0,0 +1,196 @@
<?php
/* Washtml, a HTML sanityzer.
*
* Copyright (c) 2007 Frederic Motte <fmotte@ubixis.com>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* Please send me your comments about this code if you have some, thanks, Fred. */
/* OVERVIEW:
*
* Wahstml take an untrusted HTML and return a safe html string.
*
* SYNOPSIS:
*
* washtml::wash($html, $config, $full);
* It return a sanityzed string of the $html parameter without html and head tags.
* $html is a string containing the html code to wash.
* $config is an array containing options:
* $config['allow_remote'] is a boolean to allow link to remote images.
* $config['blocked_src'] string with image-src to be used for blocked remote images
* $config['show_washed'] is a boolean to include washed out attributes as x-washed
* $config['cid_map'] is an array where cid urls index urls to replace them.
* $config['charset'] is a string containing the charset of the HTML document if it is not defined in it.
* $full is a reference to a boolean that is set to true if no remote images are removed. (FE: show remote images link)
*
* INTERNALS:
*
* Only tags and attributes in the globals $html_elements and $html_attributes
* are kept, inline styles are also filtered: all style identifiers matching
* /[a-z\-]/i are allowed. Values matching colors, sizes, /[a-z\-]/i and safe
* urls if allowed and cid urls if mapped are kept.
*
* BUGS: It MUST be safe !
* - Check regexp
* - urlencode URLs instead of htmlspecials
* - Check is a 3 bytes utf8 first char can eat '">'
* - Update PCRE: CVE-2007-1659 - CVE-2007-1660 - CVE-2007-1661 - CVE-2007-1662
* CVE-2007-4766 - CVE-2007-4767 - CVE-2007-4768
* http://lists.debian.org/debian-security-announce/debian-security-announce-2007/msg00177.html
* - ...
*
* MISSING:
* - relative links, can be implemented by prefixing an absolute path, ask me
* if you need it...
* - ...
*
* Dont be a fool:
* - Dont alter data on a GET: '<img src="http://yourhost/mail?action=delete&uid=3267" />'
* - ...
*/
class washtml
{
/* Allowed HTML elements */
static $html_elements = array('a', 'abbr', 'acronym', 'address', 'area', 'b', 'basefont', 'bdo', 'big', 'blockquote', 'br', 'caption', 'center', 'cite', 'code', 'col', 'colgroup', 'dd', 'del', 'dfn', 'dir', 'div', 'dl', 'dt', 'em', 'fieldset', 'font', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'ins', 'label', 'legend', 'li', 'map', 'menu', 'ol', 'p', 'pre', 'q', 's', 'samp', 'small', 'span', 'strike', 'strong', 'sub', 'sup', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'title', 'tr', 'tt', 'u', 'ul', 'var', 'img');
/* Allowed HTML attributes */
static $html_attribs = array('name', 'class', 'title', 'alt', 'width', 'height', 'align', 'nowrap', 'col', 'row', 'id', 'rowspan', 'colspan', 'cellspacing', 'cellpadding', 'valign', 'bgcolor', 'color', 'border', 'bordercolorlight', 'bordercolordark', 'face', 'marginwidth', 'marginheight', 'axis', 'border', 'abbr', 'char', 'charoff', 'clear', 'compact', 'coords', 'vspace', 'hspace', 'cellborder', 'size', 'lang', 'dir');
/* Check CSS style */
static function wash_style($style, $config, &$full) {
$s = '';
foreach(explode(';', $style) as $declaration) {
if(preg_match('/^\s*([a-z\-]+)\s*:\s*(.*)\s*$/i', $declaration, $match)) {
$cssid = $match[1];
$str = $match[2];
$value = '';
while(sizeof($str) > 0 &&
preg_match('/^(url\(\s*[\'"]?([^\'"\)]*)[\'"]?\s*\)'./*1,2*/
'|rgb\(\s*[0-9]+\s*,\s*[0-9]+\s*,\s*[0-9]+\s*\)'.
'|-?[0-9.]+\s*(em|ex|px|cm|mm|in|pt|pc|deg|rad|grad|ms|s|hz|khz|%)?'.
'|#[0-9a-f]{3,6}|[a-z0-9\-]+'.
')\s*/i', $str, $match)) {
if($match[2]) {
if(preg_match('/^(http|https|ftp):.*$/i', $match[2], $url)) {
if($config['allow_remote'])
$value .= ' url(\''.htmlspecialchars($url[0], ENT_QUOTES).'\')';
else
$full = false;
} else if(preg_match('/^cid:(.*)$/i', $match[2], $cid))
$value .= ' url(\''.htmlspecialchars($config['cid_map']['cid:'.$cid[1]], ENT_QUOTES) . '\')';
} else if($match[0] != 'url' && $match[0] != 'rbg')//whitelist ?
$value .= ' ' . $match[0];
$str = substr($str, strlen($match[0]));
}
if($value)
$s .= ($s?' ':'') . $cssid . ':' . $value . ';';
}
}
return $s;
}
/* Take a node and return allowed attributes and check values */
static function wash_attribs($node, $config, &$full) {
$t = '';
$washed;
foreach($node->attributes as $key => $plop) {
$key = strtolower($key);
$value = $node->getAttribute($key);
if((in_array($key, self::$html_attribs)) ||
($key == 'href' && preg_match('/^(http|https|ftp|mailto):.*/i', $value)))
$t .= ' ' . $key . '="' . htmlspecialchars($value, ENT_QUOTES) . '"';
else if($key == 'style' && ($style = self::wash_style($value, $config, $full)))
$t .= ' style="' . $style . '"';
else if($key == 'src' && strtolower($node->tagName) == 'img') { //check tagName anyway
if(preg_match('/^(http|https|ftp):.*/i', $value)) {
if($config['allow_remote'])
$t .= ' ' . $key . '="' . htmlspecialchars($value, ENT_QUOTES) . '"';
else {
$full = false;
if ($config['blocked_src'])
$t .= ' src="' . htmlspecialchars($config['blocked_src'], ENT_QUOTES) . '"';
}
} else if(preg_match('/^cid:(.*)$/i', $value, $cid))
$t .= ' ' . $key . '="' . htmlspecialchars($config['cid_map']['cid:'.$cid[1]], ENT_QUOTES) . '"';
} else
$washed .= ($washed?' ':'') . $key;
}
return $t . ($washed && $config['show_washed']?' x-washed="'.$washed.'"':'');
}
/* The main loop that recurse on a node tree.
* It output only allowed tags with allowed attributes
* and allowed inline styles */
static function dumpHtml($node, $config, &$full) {
if(!$node->hasChildNodes())
return '';
$node = $node->firstChild;
$dump = '';
do {
switch($node->nodeType) {
case XML_ELEMENT_NODE: //Check element
$tagName = strtolower($node->tagName);
if(in_array($tagName, self::$html_elements)) {
$content = self::dumpHtml($node, $config, $full);
$dump .= '<' . $tagName . self::wash_attribs($node, $config, $full) .
($content?">$content</$tagName>":' />');
} else if($tagName == 'html' || $tagName == 'body') {
$dump .= self::dumpHtml($node, $config, $full); //Just ignored
} else
$dump .= '<!-- ' . htmlspecialchars($tagName, ENT_QUOTES) . ' not allowed -->';
break;
case XML_TEXT_NODE:
$dump .= htmlspecialchars($node->nodeValue);
break;
case XML_HTML_DOCUMENT_NODE:
$dump .= self::dumpHtml($node, $config, $full);
break;
case XML_DOCUMENT_TYPE_NODE: break;
default:
}
} while($node = $node->nextSibling);
return $dump;
}
/* Main function, give it untrusted HTML, tell it if you allow loading
* remote images and give it a map to convert "cid:" urls. */
static function wash($html, $config=array(), &$full=true) {
$config += array('show_washed'=>true, 'allow_remote'=>false, 'cid_map'=>array());
//Charset seems to be ignored (probably if defined in the HTML document)
$node = new DOMDocument('1.0', $config['charset']);
$full = true;
@$node->loadHTML($html);
return self::dumpHtml($node, $config, $full);
}
}
?>

@ -74,7 +74,9 @@ if (empty($RCMAIL->action) || $RCMAIL->action == 'list')
// return the message list as HTML table
/**
* return the message list as HTML table
*/
function rcmail_message_list($attrib)
{
global $IMAP, $CONFIG, $COMM_PATH, $OUTPUT;
@ -295,7 +297,9 @@ function rcmail_message_list($attrib)
}
// return javascript commands to add rows to the message list
/**
* return javascript commands to add rows to the message list
*/
function rcmail_js_message_list($a_headers, $insert_top=FALSE)
{
global $CONFIG, $IMAP, $OUTPUT;
@ -358,7 +362,9 @@ function rcmail_js_message_list($a_headers, $insert_top=FALSE)
}
// return an HTML iframe for loading mail content
/**
* return an HTML iframe for loading mail content
*/
function rcmail_messagecontent_frame($attrib)
{
global $OUTPUT;
@ -381,6 +387,9 @@ function rcmail_messagecontent_frame($attrib)
}
/**
*
*/
function rcmail_messagecount_display($attrib)
{
global $IMAP, $OUTPUT;
@ -401,6 +410,9 @@ function rcmail_messagecount_display($attrib)
}
/**
*
*/
function rcmail_quota_display($attrib)
{
global $OUTPUT, $COMM_PATH;
@ -423,6 +435,9 @@ function rcmail_quota_display($attrib)
}
/**
*
*/
function rcmail_quota_content($quota=NULL)
{
global $IMAP, $COMM_PATH;
@ -466,6 +481,9 @@ function rcmail_quota_content($quota=NULL)
}
/**
*
*/
function rcmail_get_messagecount_text($count=NULL, $page=NULL)
{
global $IMAP, $MESSAGE;
@ -495,188 +513,44 @@ function rcmail_get_messagecount_text($count=NULL, $page=NULL)
}
/* Stolen from Squirrelmail */
function sq_deent(&$attvalue, $regex, $hex=false)
{
$ret_match = false;
preg_match_all($regex, $attvalue, $matches);
if (is_array($matches) && sizeof($matches[0]) > 0)
{
$repl = Array();
for ($i = 0; $i < sizeof($matches[0]); $i++)
{
$numval = $matches[1][$i];
if ($hex)
$numval = hexdec($numval);
$repl{$matches[0][$i]} = chr($numval);
}
$attvalue = strtr($attvalue, $repl);
return true;
}
else
return false;
}
/* Stolen verbatim from Squirrelmail */
function sq_defang(&$attvalue)
{
/* Skip this if there aren't ampersands or backslashes. */
if ((strpos($attvalue, '&') === false) &&
(strpos($attvalue, '\\') === false))
return;
$m = false;
do
{
$m = false;
$m = $m || sq_deent($attvalue, '/\&#0*(\d+);*/s');
$m = $m || sq_deent($attvalue, '/\&#x0*((\d|[a-f])+);*/si', true);
$m = $m || sq_deent($attvalue, '/\\\\(\d+)/s', true);
} while ($m == true);
$attvalue = stripslashes($attvalue);
}
function rcmail_html_filter($html)
{
preg_match_all('/<\/?\w+((\s+\w+(\s*=\s*(?:".*?"|\'.*?\'|[^\'">\s]+))?)+\s*|\s*)\/?>/', $html, $tags);
/* From Squirrelmail: Translate all dangerous Unicode or Shift_JIS characters which are accepted by
* IE as regular characters. */
$replace = array(array('&#x029F;', '&#0671;', /* L UNICODE IPA Extension */
'&#x0280;', '&#0640;', /* R UNICODE IPA Extension */
'&#x0274;', '&#0628;', /* N UNICODE IPA Extension */
'&#xFF25;', '&#65317;', /* Unicode FULLWIDTH LATIN CAPITAL LETTER E */
'&#xFF45;', '&#65349;', /* Unicode FULLWIDTH LATIN SMALL LETTER E */
'&#xFF38;', '&#65336;', /* Unicode FULLWIDTH LATIN CAPITAL LETTER X */
'&#xFF58;', '&#65368;', /* Unicode FULLWIDTH LATIN SMALL LETTER X */
'&#xFF30;', '&#65328;', /* Unicode FULLWIDTH LATIN CAPITAL LETTER P */
'&#xFF50;', '&#65360;', /* Unicode FULLWIDTH LATIN SMALL LETTER P */
'&#xFF32;', '&#65330;', /* Unicode FULLWIDTH LATIN CAPITAL LETTER R */
'&#xFF52;', '&#65362;', /* Unicode FULLWIDTH LATIN SMALL LETTER R */
'&#xFF33;', '&#65331;', /* Unicode FULLWIDTH LATIN CAPITAL LETTER S */
'&#xFF53;', '&#65363;', /* Unicode FULLWIDTH LATIN SMALL LETTER S */
'&#xFF29;', '&#65321;', /* Unicode FULLWIDTH LATIN CAPITAL LETTER I */
'&#xFF49;', '&#65353;', /* Unicode FULLWIDTH LATIN SMALL LETTER I */
'&#xFF2F;', '&#65327;', /* Unicode FULLWIDTH LATIN CAPITAL LETTER O */
'&#xFF4F;', '&#65359;', /* Unicode FULLWIDTH LATIN SMALL LETTER O */
'&#xFF2E;', '&#65326;', /* Unicode FULLWIDTH LATIN CAPITAL LETTER N */
'&#xFF4E;', '&#65358;', /* Unicode FULLWIDTH LATIN SMALL LETTER N */
'&#xFF2C;', '&#65324;', /* Unicode FULLWIDTH LATIN CAPITAL LETTER L */
'&#xFF4C;', '&#65356;', /* Unicode FULLWIDTH LATIN SMALL LETTER L */
'&#xFF35;', '&#65333;', /* Unicode FULLWIDTH LATIN CAPITAL LETTER U */
'&#xFF55;', '&#65365;', /* Unicode FULLWIDTH LATIN SMALL LETTER U */
'&#x207F;', '&#8319;' , /* Unicode SUPERSCRIPT LATIN SMALL LETTER N */
"\xEF\xBC\xA5", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER E */
/* in unicode this is some Chinese char range */
"\xEF\xBD\x85", /* Shift JIS FULLWIDTH LATIN SMALL LETTER E */
"\xEF\xBC\xB8", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER X */
"\xEF\xBD\x98", /* Shift JIS FULLWIDTH LATIN SMALL LETTER X */
"\xEF\xBC\xB0", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER P */
"\xEF\xBD\x90", /* Shift JIS FULLWIDTH LATIN SMALL LETTER P */
"\xEF\xBC\xB2", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER R */
"\xEF\xBD\x92", /* Shift JIS FULLWIDTH LATIN SMALL LETTER R */
"\xEF\xBC\xB3", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER S */
"\xEF\xBD\x93", /* Shift JIS FULLWIDTH LATIN SMALL LETTER S */
"\xEF\xBC\xA9", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER I */
"\xEF\xBD\x89", /* Shift JIS FULLWIDTH LATIN SMALL LETTER I */
"\xEF\xBC\xAF", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER O */
"\xEF\xBD\x8F", /* Shift JIS FULLWIDTH LATIN SMALL LETTER O */
"\xEF\xBC\xAE", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER N */
"\xEF\xBD\x8E", /* Shift JIS FULLWIDTH LATIN SMALL LETTER N */
"\xEF\xBC\xAC", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER L */
"\xEF\xBD\x8C", /* Shift JIS FULLWIDTH LATIN SMALL LETTER L */
"\xEF\xBC\xB5", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER U */
"\xEF\xBD\x95", /* Shift JIS FULLWIDTH LATIN SMALL LETTER U */
"\xE2\x81\xBF", /* Shift JIS FULLWIDTH SUPERSCRIPT N */
"\xCA\x9F", /* L UNICODE IPA Extension */
"\xCA\x80", /* R UNICODE IPA Extension */
"\xC9\xB4"), /* N UNICODE IPA Extension */
array('l', 'l', 'r', 'r', 'n', 'n', 'E', 'E', 'e', 'e', 'X', 'X', 'x', 'x',
'P', 'P', 'p', 'p', 'R', 'R', 'r', 'r', 'S', 'S', 's', 's', 'I', 'I',
'i', 'i', 'O', 'O', 'o', 'o', 'N', 'N', 'n', 'n', 'L', 'L', 'l', 'l',
'U', 'U', 'u', 'u', 'n', 'n', 'E', 'e', 'X', 'x', 'P', 'p', 'R', 'r',
'S', 's', 'I', 'i', 'O', 'o', 'N', 'n', 'L', 'l', 'U', 'u', 'n', 'l', 'r', 'n'));
if ((count($tags)>3) && (count($tags[3])>0))
foreach ($tags[3] as $nr=>$value)
{
/* Remove comments */
$newvalue = preg_replace('/(\/\*.*\*\/)/','$2',$value);
/* Translate dangerous characters */
$newvalue = str_replace($replace[0], $replace[1], $newvalue);
sq_defang($newvalue);
/* Rename dangerous CSS */
$newvalue = preg_replace('/expression/i', 'idiocy', $newvalue);
$newvalue = preg_replace('/url/i', 'idiocy', $newvalue);
$newattrs = preg_replace('/'.preg_quote($value, '/').'$/', $newvalue, $tags[1][$nr]);
$newtag = preg_replace('/'.preg_quote($tags[1][$nr], '/').'/', $newattrs, $tags[0][$nr]);
$html = preg_replace('/'.preg_quote($tags[0][$nr], '/').'/', $newtag, $html);
}
return $html;
}
/**
*
*/
function rcmail_print_body($part, $safe=FALSE, $plain=FALSE)
{
global $IMAP, $REMOTE_OBJECTS;
$body = is_array($part->replaces) ? strtr($part->body, $part->replaces) : $part->body;
global $REMOTE_OBJECTS;
// convert html to text/plain
if ($part->ctype_secondary=='html' && $plain)
{
$txt = new html2text($body, false, true);
if ($part->ctype_secondary == 'html' && $plain) {
$txt = new html2text($part->body, false, true);
$body = $txt->get_text();
$part->ctype_secondary = 'plain';
}
// text/html
if ($part->ctype_secondary=='html')
{
// remove charset specification in HTML message
$body = preg_replace('/charset=[a-z0-9\-]+/i', '', $body);
if (!$safe) // remove remote images and scripts
{
$remote_patterns = array('/<img\s+(.*)src=(["\']?)([hftps]{3,5}:\/{2}[^"\'\s]+)(\2|\s|>)/Ui',
'/(src|background)=(["\']?)([hftps]{3,5}:\/{2}[^"\'\s]+)(\2|\s|>)/Ui',
'/(<base.*href=["\']?)([hftps]{3,5}:\/{2}[^"\'\s]+)([^<]*>)/i',
'/(<link.*href=["\']?)([hftps]{3,5}:\/{2}[^"\'\s]+)([^<]*>)/i',
'/url\s*\(["\']?([hftps]{3,5}:\/{2}[^"\'\s]+)["\']?\)/i',
'/url\s*\(["\']?([\.\/]+[^"\'\s]+)["\']?\)/i',
'/<script.+<\/script>/Umis');
$remote_replaces = array('<img \\1src=\\2./program/blocked.gif\\4',
'',
'',
'',
'none',
'none',
'');
// set flag if message containes remote obejcts that where blocked
foreach ($remote_patterns as $pattern)
{
if (preg_match($pattern, $body))
{
$REMOTE_OBJECTS = TRUE;
break;
}
}
else if ($part->ctype_secondary == 'html') {
// clean HTML with washhtml by Frederic Motte
$body = washtml::wash($part->body, array(
'show_washed' => false,
'allow_remote' => $safe,
'blocked_src' => "./program/blocked.gif",
'charset' => 'UTF-8',
'cid_map' => $part->replaces,
), $full_inline);
$body = preg_replace($remote_patterns, $remote_replaces, $body);
}
$REMOTE_OBJECTS = !$full_inline;
return Q(rcmail_html_filter($body), 'show', FALSE);
return $body;
}
// text/enriched
if ($part->ctype_secondary=='enriched')
{
else if ($part->ctype_secondary=='enriched') {
return Q(enriched_to_html($body), 'show');
}
else
{
$body = $part->body;
/**** assert plaintext ****/
// make links and email-addresses clickable
$convert_patterns = $convert_replaces = $replace_strings = array();
@ -695,6 +569,7 @@ function rcmail_print_body($part, $safe=FALSE, $plain=FALSE)
// if ($part->ctype_parameters['format'] != 'flowed')
// $body = wordwrap(trim($body), 80);
// search for patterns like links and e-mail addresses
$body = preg_replace($convert_patterns, $convert_replaces, $body);
// split body into single lines
@ -702,14 +577,12 @@ function rcmail_print_body($part, $safe=FALSE, $plain=FALSE)
$quote_level = 0;
// colorize quoted parts
for($n=0; $n<sizeof($a_lines); $n++)
{
for ($n=0; $n < sizeof($a_lines); $n++) {
$line = $a_lines[$n];
$quotation = '';
$q = 0;
if (preg_match('/^(>+\s*)+/', $line, $regs))
{
if (preg_match('/^(>+\s*)+/', $line, $regs)) {
$q = strlen(preg_replace('/\s/', '', $regs[0]));
$line = substr($line, strlen($regs[0]));
@ -722,7 +595,7 @@ function rcmail_print_body($part, $safe=FALSE, $plain=FALSE)
$quotation = str_repeat("</blockquote>", $quote_level);
$quote_level = $q;
$a_lines[$n] = $quotation . Q($line, 'replace', FALSE);
$a_lines[$n] = $quotation . Q($line, 'replace', false); // htmlquote plaintext
}
// insert the links for urls and mailtos
@ -730,11 +603,12 @@ function rcmail_print_body($part, $safe=FALSE, $plain=FALSE)
return "<div class=\"pre\">".$body."\n</div>";
}
}
// add a string to the replacement array and return a replacement string
/**
* add a string to the replacement array and return a replacement string
*/
function rcmail_str_replacement($str, &$rep)
{
static $count = 0;
@ -743,200 +617,10 @@ function rcmail_str_replacement($str, &$rep)
}
function rcmail_parse_message(&$structure, $arg=array(), $recursive=FALSE)
{
global $IMAP;
static $sa_inline_objects = array();
// arguments are: (bool)$prefer_html, (string)$get_url
extract($arg);
$a_attachments = array();
$a_return_parts = array();
$out = '';
$message_ctype_primary = strtolower($structure->ctype_primary);
$message_ctype_secondary = strtolower($structure->ctype_secondary);
// show message headers
if ($recursive && is_array($structure->headers) && isset($structure->headers['subject']))
{
$c = new stdClass;
$c->type = 'headers';
$c->headers = &$structure->headers;
$a_return_parts[] = $c;
}
// print body if message doesn't have multiple parts
if ($message_ctype_primary=='text')
{
$structure->type = 'content';
$a_return_parts[] = &$structure;
}
// message contains alternative parts
else if ($message_ctype_primary=='multipart' && $message_ctype_secondary=='alternative' && is_array($structure->parts))
{
// get html/plaintext parts
$plain_part = $html_part = $print_part = $related_part = NULL;
foreach ($structure->parts as $p => $sub_part)
{
$rel_parts = $attachmnts = null;
$sub_ctype_primary = strtolower($sub_part->ctype_primary);
$sub_ctype_secondary = strtolower($sub_part->ctype_secondary);
// check if sub part is
if ($sub_ctype_primary=='text' && $sub_ctype_secondary=='plain')
$plain_part = $p;
else if ($sub_ctype_primary=='text' && $sub_ctype_secondary=='html')
$html_part = $p;
else if ($sub_ctype_primary=='text' && $sub_ctype_secondary=='enriched')
$enriched_part = $p;
else if ($sub_ctype_primary=='multipart' && ($sub_ctype_secondary=='related' || $sub_ctype_secondary=='mixed'))
$related_part = $p;
}
// parse related part (alternative part could be in here)
if ($related_part!==NULL)
{
list($rel_parts, $attachmnts) = rcmail_parse_message($structure->parts[$related_part], $arg, TRUE);
$a_attachments = array_merge($a_attachments, $attachmnts);
}
// merge related parts if any
if ($rel_parts && $prefer_html && !$html_part)
$a_return_parts = array_merge($a_return_parts, $rel_parts);
// choose html/plain part to print
else if ($html_part!==NULL && $prefer_html)
$print_part = &$structure->parts[$html_part];
else if ($enriched_part!==NULL)
$print_part = &$structure->parts[$enriched_part];
else if ($plain_part!==NULL)
$print_part = &$structure->parts[$plain_part];
// show message body
if (is_object($print_part))
{
$print_part->type = 'content';
$a_return_parts[] = $print_part;
}
// show plaintext warning
else if ($html_part!==NULL && empty($a_return_parts))
{
$c = new stdClass;
$c->type = 'content';
$c->body = rcube_label('htmlmessage');
$c->ctype_primary = 'text';
$c->ctype_secondary = 'plain';
$a_return_parts[] = $c;
}
// add html part as attachment
if ($html_part!==NULL && $structure->parts[$html_part]!==$print_part)
{
$html_part = &$structure->parts[$html_part];
$html_part->filename = rcube_label('htmlmessage');
$html_part->mimetype = 'text/html';
$a_attachments[] = $html_part;
}
}
// message contains multiple parts
else if (is_array($structure->parts) && !empty($structure->parts))
{
for ($i=0; $i<count($structure->parts); $i++)
{
$mail_part = &$structure->parts[$i];
$primary_type = strtolower($mail_part->ctype_primary);
$secondary_type = strtolower($mail_part->ctype_secondary);
// multipart/alternative
if ($primary_type=='multipart')
{
list($parts, $attachmnts) = rcmail_parse_message($mail_part, $arg, TRUE);
$a_return_parts = array_merge($a_return_parts, $parts);
$a_attachments = array_merge($a_attachments, $attachmnts);
}
// part text/[plain|html] OR message/delivery-status
else if (($primary_type=='text' && ($secondary_type=='plain' || $secondary_type=='html') && $mail_part->disposition!='attachment') ||
($primary_type=='message' && ($secondary_type=='delivery-status' || $secondary_type=='disposition-notification')))
{
$mail_part->type = 'content';
$a_return_parts[] = $mail_part;
}
// part message/*
else if ($primary_type=='message')
{
list($parts, $attachmnts) = rcmail_parse_message($mail_part, $arg, TRUE);
$a_return_parts = array_merge($a_return_parts, $parts);
$a_attachments = array_merge($a_attachments, $attachmnts);
}
// ignore "virtual" protocol parts
else if ($primary_type=='protocol')
continue;
// part is file/attachment
else if ($mail_part->disposition=='attachment' || $mail_part->disposition=='inline' || $mail_part->headers['content-id'] ||
(empty($mail_part->disposition) && $mail_part->filename))
{
// skip apple resource forks
if ($message_ctype_secondary=='appledouble' && $secondary_type=='applefile')
continue;
// part belongs to a related message
if ($message_ctype_secondary=='related' && $mail_part->headers['content-id'])
{
$mail_part->content_id = preg_replace(array('/^</', '/>$/'), '', $mail_part->headers['content-id']);
$sa_inline_objects[] = $mail_part;
}
// is regular attachment
else
{
if (!$mail_part->filename)
$mail_part->filename = 'Part '.$mail_part->mime_id;
$a_attachments[] = $mail_part;
}
}
}
// if this was a related part try to resolve references
if ($message_ctype_secondary=='related' && sizeof($sa_inline_objects))
{
$a_replaces = array();
foreach ($sa_inline_objects as $inline_object)
$a_replaces['cid:'.$inline_object->content_id] = htmlspecialchars(sprintf($get_url, $inline_object->mime_id));
// add replace array to each content part
// (will be applied later when part body is available)
for ($i=0; $i<count($a_return_parts); $i++)
{
if ($a_return_parts[$i]->type=='content')
$a_return_parts[$i]->replaces = $a_replaces;
}
}
}
// message is single part non-text
else if ($structure->filename)
$a_attachments[] = $structure;
return array($a_return_parts, $a_attachments);
}
// return table with message headers
/**
* return table with message headers
*/
function rcmail_message_headers($attrib, $headers=NULL)
{
global $IMAP, $OUTPUT, $MESSAGE;
@ -989,7 +673,9 @@ function rcmail_message_headers($attrib, $headers=NULL)
}
/**
*
*/
function rcmail_message_body($attrib)
{
global $CONFIG, $OUTPUT, $MESSAGE, $IMAP, $REMOTE_OBJECTS;
@ -1028,7 +714,7 @@ function rcmail_message_body($attrib)
$out .= '<div class="message-part">';
if ($part->ctype_secondary != 'plain')
$out .= rcmail_sanitize_html($body, $attrib['id']);
$out .= rcmail_html4inline($body, $attrib['id']);
else
$out .= $body;
@ -1068,12 +754,11 @@ function rcmail_message_body($attrib)
// modify a HTML message that it can be displayed inside a HTML page
function rcmail_sanitize_html($body, $container_id)
/**
* modify a HTML message that it can be displayed inside a HTML page
*/
function rcmail_html4inline($body, $container_id)
{
// remove any null-byte characters before parsing
$body = preg_replace('/\x00/', '', $body);
$base_url = "";
$last_style_pos = 0;
$body_lc = strtolower($body);
@ -1095,26 +780,6 @@ function rcmail_sanitize_html($body, $container_id)
$last_style_pos = $pos2;
}
// remove SCRIPT tags
foreach (array('script', 'applet', 'object', 'embed', 'iframe') as $tag)
{
while (($pos = strpos($body_lc, '<'.$tag)) && (($pos2 = strpos($body_lc, '</'.$tag.'>', $pos)) || ($pos3 = strpos($body_lc, '>', $pos))))
{
$end = $pos2 ? $pos2 + strlen('</'.$tag.'>') : $pos3 + 1;
$body = substr($body, 0, $pos) . substr($body, $end, strlen($body)-$end);
$body_lc = strtolower($body);
}
}
// replace event handlers on any object
while ($body != $prev_body)
{
$prev_body = $body;
$body = preg_replace('/(<[^!][^>]*\s)on(?:load|unload|click|dblclick|mousedown|mouseup|mouseover|mousemove|mouseout|focus|blur|keypress|keydown|keyup|submit|reset|select|change)=([^>]+>)/im', '$1__removed=$2', $body);
$body = preg_replace('/(<[^!][^>]*\shref=["\']?)(javascript:)([^>]*?>)/im', '$1null:$3', $body);
}
// resolve <base href>
if ($base_url)
{
@ -1138,14 +803,8 @@ function rcmail_sanitize_html($body, $container_id)
$body);
$out = preg_replace(
array(
'/<body([^>]*)>/i',
'/<\/body>/i',
),
array(
'<div class="rcmBody"\\1>',
'</div>',
),
array('/<body([^>]*)>/i', '/<\/body>/i'),
array('<div class="rcmBody"\\1>', '</div>'),
$out);
// quote <? of php and xml files that are specified as text/html
@ -1155,7 +814,9 @@ function rcmail_sanitize_html($body, $container_id)
}
// parse link attributes and set correct target
/**
* parse link attributes and set correct target
*/
function rcmail_alter_html_link($tag, $attrs, $container_id)
{
$attrib = parse_attrib_string($attrs);
@ -1176,7 +837,9 @@ function rcmail_alter_html_link($tag, $attrs, $container_id)
}
// decode address string and re-format it as HTML links
/**
* decode address string and re-format it as HTML links
*/
function rcmail_address_string($input, $max=NULL, $addicon=NULL)
{
global $IMAP, $PRINT_MODE, $CONFIG, $OUTPUT, $EMAIL_ADDRESS_PATTERN;
@ -1277,7 +940,9 @@ function rcmail_message_part_frame($attrib)
}
// clear message composing settings
/**
* clear message composing settings
*/
function rcmail_compose_cleanup()
{
if (!isset($_SESSION['compose']))

Loading…
Cancel
Save