Merge branch 'master' of github.com:roundcube/roundcubemail

pull/54/merge
Thomas Bruederli 12 years ago
commit c942e4a2e9

@ -13,7 +13,7 @@ class rcube_pam_password
{ {
$user = $_SESSION['username']; $user = $_SESSION['username'];
if (extension_loaded('pam')) { if (extension_loaded('pam') || extension_loaded('pam_auth')) {
if (pam_auth($user, $currpass, $error, false)) { if (pam_auth($user, $currpass, $error, false)) {
if (pam_chpass($user, $currpass, $newpass)) { if (pam_chpass($user, $currpass, $newpass)) {
return PASSWORD_SUCCESS; return PASSWORD_SUCCESS;

@ -408,3 +408,11 @@ function enriched_to_html($data)
class rcube_html_page extends rcmail_html_page class rcube_html_page extends rcmail_html_page
{ {
} }
class washtml extends rcube_washtml
{
}
class html2text extends rcube_html2text
{
}

@ -1,35 +1,23 @@
<?php <?php
/************************************************************************* /**
* * +-----------------------------------------------------------------------+
* class.html2text.inc * | This file is part of the Roundcube Webmail client |
* * | Copyright (C) 2008-2012, The Roundcube Dev Team |
************************************************************************* | Copyright (c) 2005-2007, Jon Abernathy <jon@chuggnutt.com> |
* * | |
* Converts HTML to formatted plain text * | Licensed under the GNU General Public License version 3 or |
* * | any later version with exceptions for skins & plugins. |
* Copyright (c) 2005-2007 Jon Abernathy <jon@chuggnutt.com> * | See the README file for a full license statement. |
* All rights reserved. * | |
* * | PURPOSE: |
* This script is free software; you can redistribute it and/or modify * | Converts HTML to formatted plain text (based on html2text class) |
* it under the terms of the GNU General Public License as published by * +-----------------------------------------------------------------------+
* the Free Software Foundation; either version 2 of the License, or * | Author: Thomas Bruederli <roundcube@gmail.com> |
* (at your option) any later version. * | Author: Aleksander Machniak <alec@alec.pl> |
* * | Author: Jon Abernathy <jon@chuggnutt.com> |
* The GNU General Public License can be found at * +-----------------------------------------------------------------------+
* http://www.gnu.org/copyleft/gpl.html. * */
* *
* This script is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* Author(s): Jon Abernathy <jon@chuggnutt.com> *
* *
* Last modified: 08/08/07 *
* *
*************************************************************************/
/** /**
* Takes HTML and converts it to formatted, plain text. * Takes HTML and converts it to formatted, plain text.
@ -99,58 +87,55 @@
* future time. * future time.
* *
* *** End of the housecleaning updates. Updated 08/08/07. * *** End of the housecleaning updates. Updated 08/08/07.
*/
/**
* Converts HTML to formatted plain text
* *
* @author Jon Abernathy <jon@chuggnutt.com> * @package Framework
* @version 1.0.0 * @subpackage Utils
* @since PHP 4.0.2
*/ */
class html2text class rcube_html2text
{ {
/** /**
* Contains the HTML content to convert. * Contains the HTML content to convert.
* *
* @var string $html * @var string $html
* @access public
*/ */
var $html; protected $html;
/** /**
* Contains the converted, formatted text. * Contains the converted, formatted text.
* *
* @var string $text * @var string $text
* @access public
*/ */
var $text; protected $text;
/** /**
* Maximum width of the formatted text, in columns. * Maximum width of the formatted text, in columns.
* *
* Set this value to 0 (or less) to ignore word wrapping * Set this value to 0 (or less) to ignore word wrapping
* and not constrain text to a fixed-width column. * and not constrain text to a fixed-width column.
* *
* @var integer $width * @var integer $width
* @access public
*/ */
var $width = 70; protected $width = 70;
/** /**
* Target character encoding for output text * Target character encoding for output text
* *
* @var string $charset * @var string $charset
* @access public
*/ */
var $charset = 'UTF-8'; protected $charset = 'UTF-8';
/** /**
* List of preg* regular expression patterns to search for, * List of preg* regular expression patterns to search for,
* used in conjunction with $replace. * used in conjunction with $replace.
* *
* @var array $search * @var array $search
* @access public * @see $replace
* @see $replace
*/ */
var $search = array( protected $search = array(
"/\r/", // Non-legal carriage return "/\r/", // Non-legal carriage return
"/[\n\t]+/", // Newlines and tabs "/[\n\t]+/", // Newlines and tabs
'/<head[^>]*>.*?<\/head>/i', // <head> '/<head[^>]*>.*?<\/head>/i', // <head>
@ -172,13 +157,12 @@ class html2text
); );
/** /**
* List of pattern replacements corresponding to patterns searched. * List of pattern replacements corresponding to patterns searched.
* *
* @var array $replace * @var array $replace
* @access public * @see $search
* @see $search
*/ */
var $replace = array( protected $replace = array(
'', // Non-legal carriage return '', // Non-legal carriage return
' ', // Newlines and tabs ' ', // Newlines and tabs
'', // <head> '', // <head>
@ -200,14 +184,13 @@ class html2text
); );
/** /**
* List of preg* regular expression patterns to search for, * List of preg* regular expression patterns to search for,
* used in conjunction with $ent_replace. * used in conjunction with $ent_replace.
* *
* @var array $ent_search * @var array $ent_search
* @access public * @see $ent_replace
* @see $ent_replace
*/ */
var $ent_search = array( protected $ent_search = array(
'/&(nbsp|#160);/i', // Non-breaking space '/&(nbsp|#160);/i', // Non-breaking space
'/&(quot|rdquo|ldquo|#8220|#8221|#147|#148);/i', '/&(quot|rdquo|ldquo|#8220|#8221|#147|#148);/i',
// Double quotes // Double quotes
@ -227,13 +210,12 @@ class html2text
); );
/** /**
* List of pattern replacements corresponding to patterns searched. * List of pattern replacements corresponding to patterns searched.
* *
* @var array $ent_replace * @var array $ent_replace
* @access public * @see $ent_search
* @see $ent_search
*/ */
var $ent_replace = array( protected $ent_replace = array(
' ', // Non-breaking space ' ', // Non-breaking space
'"', // Double quotes '"', // Double quotes
"'", // Single quotes "'", // Single quotes
@ -252,13 +234,12 @@ class html2text
); );
/** /**
* List of preg* regular expression patterns to search for * List of preg* regular expression patterns to search for
* and replace using callback function. * and replace using callback function.
* *
* @var array $callback_search * @var array $callback_search
* @access public
*/ */
var $callback_search = array( protected $callback_search = array(
'/<(a) [^>]*href=("|\')([^"\']+)\2[^>]*>(.*?)<\/a>/i', // <a href=""> '/<(a) [^>]*href=("|\')([^"\']+)\2[^>]*>(.*?)<\/a>/i', // <a href="">
'/<(h)[123456]( [^>]*)?>(.*?)<\/h[123456]>/i', // h1 - h6 '/<(h)[123456]( [^>]*)?>(.*?)<\/h[123456]>/i', // h1 - h6
'/<(b)( [^>]*)?>(.*?)<\/b>/i', // <b> '/<(b)( [^>]*)?>(.*?)<\/b>/i', // <b>
@ -267,14 +248,13 @@ class html2text
); );
/** /**
* List of preg* regular expression patterns to search for in PRE body, * List of preg* regular expression patterns to search for in PRE body,
* used in conjunction with $pre_replace. * used in conjunction with $pre_replace.
* *
* @var array $pre_search * @var array $pre_search
* @access public * @see $pre_replace
* @see $pre_replace
*/ */
var $pre_search = array( protected $pre_search = array(
"/\n/", "/\n/",
"/\t/", "/\t/",
'/ /', '/ /',
@ -283,13 +263,12 @@ class html2text
); );
/** /**
* List of pattern replacements corresponding to patterns searched for PRE body. * List of pattern replacements corresponding to patterns searched for PRE body.
* *
* @var array $pre_replace * @var array $pre_replace
* @access public * @see $pre_search
* @see $pre_search
*/ */
var $pre_replace = array( protected $pre_replace = array(
'<br>', '<br>',
'&nbsp;&nbsp;&nbsp;&nbsp;', '&nbsp;&nbsp;&nbsp;&nbsp;',
'&nbsp;', '&nbsp;',
@ -298,103 +277,95 @@ class html2text
); );
/** /**
* Contains a list of HTML tags to allow in the resulting text. * Contains a list of HTML tags to allow in the resulting text.
* *
* @var string $allowed_tags * @var string $allowed_tags
* @access public * @see set_allowed_tags()
* @see set_allowed_tags()
*/ */
var $allowed_tags = ''; protected $allowed_tags = '';
/** /**
* Contains the base URL that relative links should resolve to. * Contains the base URL that relative links should resolve to.
* *
* @var string $url * @var string $url
* @access public
*/ */
var $url; protected $url;
/** /**
* Indicates whether content in the $html variable has been converted yet. * Indicates whether content in the $html variable has been converted yet.
* *
* @var boolean $_converted * @var boolean $_converted
* @access private * @see $html, $text
* @see $html, $text
*/ */
var $_converted = false; protected $_converted = false;
/** /**
* Contains URL addresses from links to be rendered in plain text. * Contains URL addresses from links to be rendered in plain text.
* *
* @var array $_link_list * @var array $_link_list
* @access private * @see _build_link_list()
* @see _build_link_list()
*/ */
var $_link_list = array(); protected $_link_list = array();
/** /**
* Boolean flag, true if a table of link URLs should be listed after the text. * Boolean flag, true if a table of link URLs should be listed after the text.
* *
* @var boolean $_do_links * @var boolean $_do_links
* @access private * @see __construct()
* @see html2text()
*/ */
var $_do_links = true; protected $_do_links = true;
/** /**
* Constructor. * Constructor.
* *
* If the HTML source string (or file) is supplied, the class * If the HTML source string (or file) is supplied, the class
* will instantiate with that source propagated, all that has * will instantiate with that source propagated, all that has
* to be done it to call get_text(). * to be done it to call get_text().
* *
* @param string $source HTML content * @param string $source HTML content
* @param boolean $from_file Indicates $source is a file to pull content from * @param boolean $from_file Indicates $source is a file to pull content from
* @param boolean $do_links Indicate whether a table of link URLs is desired * @param boolean $do_links Indicate whether a table of link URLs is desired
* @param integer $width Maximum width of the formatted text, 0 for no limit * @param integer $width Maximum width of the formatted text, 0 for no limit
* @access public
* @return void
*/ */
function html2text( $source = '', $from_file = false, $do_links = true, $width = 75, $charset = 'UTF-8' ) function __construct($source = '', $from_file = false, $do_links = true, $width = 75, $charset = 'UTF-8')
{ {
if ( !empty($source) ) { if (!empty($source)) {
$this->set_html($source, $from_file); $this->set_html($source, $from_file);
} }
$this->set_base_url(); $this->set_base_url();
$this->_do_links = $do_links; $this->_do_links = $do_links;
$this->width = $width; $this->width = $width;
$this->charset = $charset; $this->charset = $charset;
} }
/** /**
* Loads source HTML into memory, either from $source string or a file. * Loads source HTML into memory, either from $source string or a file.
* *
* @param string $source HTML content * @param string $source HTML content
* @param boolean $from_file Indicates $source is a file to pull content from * @param boolean $from_file Indicates $source is a file to pull content from
* @access public
* @return void
*/ */
function set_html( $source, $from_file = false ) function set_html($source, $from_file = false)
{ {
if ( $from_file && file_exists($source) ) { if ($from_file && file_exists($source)) {
$this->html = file_get_contents($source); $this->html = file_get_contents($source);
} }
else else {
$this->html = $source; $this->html = $source;
}
$this->_converted = false; $this->_converted = false;
} }
/** /**
* Returns the text, converted from HTML. * Returns the text, converted from HTML.
* *
* @access public * @return string Plain text
* @return string
*/ */
function get_text() function get_text()
{ {
if ( !$this->_converted ) { if (!$this->_converted) {
$this->_convert(); $this->_convert();
} }
@ -402,10 +373,7 @@ class html2text
} }
/** /**
* Prints the text, converted from HTML. * Prints the text, converted from HTML.
*
* @access public
* @return void
*/ */
function print_text() function print_text()
{ {
@ -413,50 +381,34 @@ class html2text
} }
/** /**
* Alias to print_text(), operates identically. * Sets the allowed HTML tags to pass through to the resulting text.
*
* @access public
* @return void
* @see print_text()
*/
function p()
{
print $this->get_text();
}
/**
* Sets the allowed HTML tags to pass through to the resulting text.
* *
* Tags should be in the form "<p>", with no corresponding closing tag. * Tags should be in the form "<p>", with no corresponding closing tag.
*
* @access public
* @return void
*/ */
function set_allowed_tags( $allowed_tags = '' ) function set_allowed_tags($allowed_tags = '')
{ {
if ( !empty($allowed_tags) ) { if (!empty($allowed_tags)) {
$this->allowed_tags = $allowed_tags; $this->allowed_tags = $allowed_tags;
} }
} }
/** /**
* Sets a base URL to handle relative links. * Sets a base URL to handle relative links.
*
* @access public
* @return void
*/ */
function set_base_url( $url = '' ) function set_base_url($url = '')
{ {
if ( empty($url) ) { if (empty($url)) {
if ( !empty($_SERVER['HTTP_HOST']) ) { if (!empty($_SERVER['HTTP_HOST'])) {
$this->url = 'http://' . $_SERVER['HTTP_HOST']; $this->url = 'http://' . $_SERVER['HTTP_HOST'];
} else { }
else {
$this->url = ''; $this->url = '';
} }
} else { }
else {
// Strip any trailing slashes for consistency (relative // Strip any trailing slashes for consistency (relative
// URLs may already start with a slash like "/file.html") // URLs may already start with a slash like "/file.html")
if ( substr($url, -1) == '/' ) { if (substr($url, -1) == '/') {
$url = substr($url, 0, -1); $url = substr($url, 0, -1);
} }
$this->url = $url; $this->url = $url;
@ -464,12 +416,9 @@ class html2text
} }
/** /**
* Workhorse function that does actual conversion (calls _converter() method). * Workhorse function that does actual conversion (calls _converter() method).
*
* @access private
* @return void
*/ */
function _convert() protected function _convert()
{ {
// Variables used for building the link list // Variables used for building the link list
$this->_link_list = array(); $this->_link_list = array();
@ -487,25 +436,21 @@ class html2text
} }
} }
$this->text = $text; $this->text = $text;
$this->_converted = true; $this->_converted = true;
} }
/** /**
* Workhorse function that does actual conversion. * Workhorse function that does actual conversion.
* *
* First performs custom tag replacement specified by $search and * First performs custom tag replacement specified by $search and
* $replace arrays. Then strips any remaining HTML tags, reduces whitespace * $replace arrays. Then strips any remaining HTML tags, reduces whitespace
* and newlines to a readable format, and word wraps the text to * and newlines to a readable format, and word wraps the text to
* $width characters. * $width characters.
* *
* @param string Reference to HTML content string * @param string Reference to HTML content string
*
* @access private
* @return void
*/ */
function _converter(&$text) protected function _converter(&$text)
{ {
// Convert <BLOCKQUOTE> (before PRE!) // Convert <BLOCKQUOTE> (before PRE!)
$this->_convert_blockquotes($text); $this->_convert_blockquotes($text);
@ -517,7 +462,7 @@ class html2text
$text = preg_replace($this->search, $this->replace, $text); $text = preg_replace($this->search, $this->replace, $text);
// Run our defined tags search-and-replace with callback // Run our defined tags search-and-replace with callback
$text = preg_replace_callback($this->callback_search, array('html2text', '_preg_callback'), $text); $text = preg_replace_callback($this->callback_search, array($this, 'tags_preg_callback'), $text);
// Strip any other HTML tags // Strip any other HTML tags
$text = strip_tags($text, $this->allowed_tags); $text = strip_tags($text, $this->allowed_tags);
@ -551,19 +496,17 @@ class html2text
} }
/** /**
* Helper function called by preg_replace() on link replacement. * Helper function called by preg_replace() on link replacement.
* *
* Maintains an internal list of links to be displayed at the end of the * Maintains an internal list of links to be displayed at the end of the
* text, with numeric indices to the original point in the text they * text, with numeric indices to the original point in the text they
* appeared. Also makes an effort at identifying and handling absolute * appeared. Also makes an effort at identifying and handling absolute
* and relative links. * and relative links.
* *
* @param string $link URL of the link * @param string $link URL of the link
* @param string $display Part of the text to associate number with * @param string $display Part of the text to associate number with
* @access private
* @return string
*/ */
function _build_link_list( $link, $display ) protected function _build_link_list( $link, $display )
{ {
if (!$this->_do_links || empty($link)) { if (!$this->_do_links || empty($link)) {
return $display; return $display;
@ -594,12 +537,11 @@ class html2text
} }
/** /**
* Helper function for PRE body conversion. * Helper function for PRE body conversion.
* *
* @param string HTML content * @param string HTML content
* @access private
*/ */
function _convert_pre(&$text) protected function _convert_pre(&$text)
{ {
// get the content of PRE element // get the content of PRE element
while (preg_match('/<pre[^>]*>(.*)<\/pre>/ismU', $text, $matches)) { while (preg_match('/<pre[^>]*>(.*)<\/pre>/ismU', $text, $matches)) {
@ -607,7 +549,7 @@ class html2text
// Run our defined tags search-and-replace with callback // Run our defined tags search-and-replace with callback
$this->pre_content = preg_replace_callback($this->callback_search, $this->pre_content = preg_replace_callback($this->callback_search,
array('html2text', '_preg_callback'), $this->pre_content); array($this, 'tags_preg_callback'), $this->pre_content);
// convert the content // convert the content
$this->pre_content = sprintf('<div><br>%s<br></div>', $this->pre_content = sprintf('<div><br>%s<br></div>',
@ -615,7 +557,7 @@ class html2text
// replace the content (use callback because content can contain $0 variable) // replace the content (use callback because content can contain $0 variable)
$text = preg_replace_callback('/<pre[^>]*>.*<\/pre>/ismU', $text = preg_replace_callback('/<pre[^>]*>.*<\/pre>/ismU',
array('html2text', '_preg_pre_callback'), $text, 1); array($this, 'pre_preg_callback'), $text, 1);
// free memory // free memory
$this->pre_content = ''; $this->pre_content = '';
@ -623,12 +565,11 @@ class html2text
} }
/** /**
* Helper function for BLOCKQUOTE body conversion. * Helper function for BLOCKQUOTE body conversion.
* *
* @param string HTML content * @param string HTML content
* @access private
*/ */
function _convert_blockquotes(&$text) protected function _convert_blockquotes(&$text)
{ {
if (preg_match_all('/<\/*blockquote[^>]*>/i', $text, $matches, PREG_OFFSET_CAPTURE)) { if (preg_match_all('/<\/*blockquote[^>]*>/i', $text, $matches, PREG_OFFSET_CAPTURE)) {
$level = 0; $level = 0;
@ -679,12 +620,12 @@ class html2text
} }
/** /**
* Callback function for preg_replace_callback use. * Callback function for preg_replace_callback use.
* *
* @param array PREG matches * @param array PREG matches
* @return string * @return string
*/ */
private function _preg_callback($matches) public function tags_preg_callback($matches)
{ {
switch (strtolower($matches[1])) { switch (strtolower($matches[1])) {
case 'b': case 'b':
@ -702,12 +643,12 @@ class html2text
} }
/** /**
* Callback function for preg_replace_callback use in PRE content handler. * Callback function for preg_replace_callback use in PRE content handler.
* *
* @param array PREG matches * @param array PREG matches
* @return string * @return string
*/ */
private function _preg_pre_callback($matches) public function pre_preg_callback($matches)
{ {
return $this->pre_content; return $this->pre_content;
} }
@ -742,12 +683,7 @@ class html2text
private function _strtoupper($str) private function _strtoupper($str)
{ {
$str = html_entity_decode($str, ENT_COMPAT, $this->charset); $str = html_entity_decode($str, ENT_COMPAT, $this->charset);
$str = mb_strtoupper($str);
if (function_exists('mb_strtoupper'))
$str = mb_strtoupper($str);
else
$str = strtoupper($str);
$str = htmlspecialchars($str, ENT_COMPAT, $this->charset); $str = htmlspecialchars($str, ENT_COMPAT, $this->charset);
return $str; return $str;

@ -272,7 +272,7 @@ class rcube_message
$out = $this->get_part_content($mime_id); $out = $this->get_part_content($mime_id);
// create instance of html2text class // create instance of html2text class
$txt = new html2text($out); $txt = new rcube_html2text($out);
return $txt->get_text(); return $txt->get_text();
} }
} }

@ -443,7 +443,7 @@ class rcube_spellchecker
private function html2text($text) private function html2text($text)
{ {
$h2t = new html2text($text, false, true, 0); $h2t = new rcube_html2text($text, false, true, 0);
return $h2t->get_text(); return $h2t->get_text();
} }

@ -36,7 +36,7 @@ class rcube_string_replacer
// Support unicode/punycode in top-level domain part // Support unicode/punycode in top-level domain part
$utf_domain = '[^?&@"\'\\/()\s\r\t\n]+\\.?([^\\x00-\\x2f\\x3b-\\x40\\x5b-\\x60\\x7b-\\x7f]{2,}|xn--[a-zA-Z0-9]{2,})'; $utf_domain = '[^?&@"\'\\/()\s\r\t\n]+\\.?([^\\x00-\\x2f\\x3b-\\x40\\x5b-\\x60\\x7b-\\x7f]{2,}|xn--[a-zA-Z0-9]{2,})';
$url1 = '.:;,'; $url1 = '.:;,';
$url2 = 'a-zA-Z0-9%=#$@+?!&\\/_~\\[\\]{}\*-'; $url2 = 'a-zA-Z0-9%=#$@+?!&\\/_~\\[\\]\\(\\){}\*-';
$this->link_pattern = "/([\w]+:\/\/|\W[Ww][Ww][Ww]\.|^[Ww][Ww][Ww]\.)($utf_domain([$url1]?[$url2]+)*)/"; $this->link_pattern = "/([\w]+:\/\/|\W[Ww][Ww][Ww]\.|^[Ww][Ww][Ww]\.)($utf_domain([$url1]?[$url2]+)*)/";
$this->mailto_pattern = "/(" $this->mailto_pattern = "/("
@ -161,6 +161,9 @@ class rcube_string_replacer
// "http://example.com/?a[b]=c". However we need to handle // "http://example.com/?a[b]=c". However we need to handle
// properly situation when a bracket is placed at the end // properly situation when a bracket is placed at the end
// of the link e.g. "[http://example.com]" // of the link e.g. "[http://example.com]"
// Yes, this is not perfect handles correctly only paired characters
// but it should work for common cases
if (preg_match('/(\\[|\\])/', $url)) { if (preg_match('/(\\[|\\])/', $url)) {
$in = false; $in = false;
for ($i=0, $len=strlen($url); $i<$len; $i++) { for ($i=0, $len=strlen($url); $i<$len; $i++) {
@ -182,6 +185,28 @@ class rcube_string_replacer
} }
} }
// Do the same for parentheses
if (preg_match('/(\\(|\\))/', $url)) {
$in = false;
for ($i=0, $len=strlen($url); $i<$len; $i++) {
if ($url[$i] == '(') {
if ($in)
break;
$in = true;
}
else if ($url[$i] == ')') {
if (!$in)
break;
$in = false;
}
}
if ($i < $len) {
$suffix = substr($url, $i);
$url = substr($url, 0, $i);
}
}
return $suffix; return $suffix;
} }
} }

@ -0,0 +1,451 @@
<?php
/**
+-----------------------------------------------------------------------+
| This file is part of the Roundcube Webmail client |
| Copyright (C) 2008-2012, The Roundcube Dev Team |
| |
| Licensed under the GNU General Public License version 3 or |
| any later version with exceptions for skins & plugins. |
| See the README file for a full license statement. |
| |
| PURPOSE: |
| Utility class providing HTML sanityzer (based on Washtml class) |
+-----------------------------------------------------------------------+
| Author: Thomas Bruederli <roundcube@gmail.com> |
| Author: Aleksander Machniak <alec@alec.pl> |
| Author: Frederic Motte <fmotte@ubixis.com> |
+-----------------------------------------------------------------------+
*/
/**
* Washtml, a HTML sanityzer.
*
* Copyright (c) 2007 Frederic Motte <fmotte@ubixis.com>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* OVERVIEW:
*
* Wahstml take an untrusted HTML and return a safe html string.
*
* SYNOPSIS:
*
* $washer = new washtml($config);
* $washer->wash($html);
* It return a sanityzed string of the $html parameter without html and head tags.
* $html is a string containing the html code to wash.
* $config is an array containing options:
* $config['allow_remote'] is a boolean to allow link to remote images.
* $config['blocked_src'] string with image-src to be used for blocked remote images
* $config['show_washed'] is a boolean to include washed out attributes as x-washed
* $config['cid_map'] is an array where cid urls index urls to replace them.
* $config['charset'] is a string containing the charset of the HTML document if it is not defined in it.
* $washer->extlinks is a reference to a boolean that is set to true if remote images were removed. (FE: show remote images link)
*
* INTERNALS:
*
* Only tags and attributes in the static lists $html_elements and $html_attributes
* are kept, inline styles are also filtered: all style identifiers matching
* /[a-z\-]/i are allowed. Values matching colors, sizes, /[a-z\-]/i and safe
* urls if allowed and cid urls if mapped are kept.
*
* Roundcube Changes:
* - added $block_elements
* - changed $ignore_elements behaviour
* - added RFC2397 support
* - base URL support
* - invalid HTML comments removal before parsing
* - "fixing" unitless CSS values for XHTML output
* - base url resolving
*/
/**
* Utility class providing HTML sanityzer
*
* @package Framework
* @subpackage Utils
*/
class rcube_washtml
{
/* Allowed HTML elements (default) */
static $html_elements = array('a', 'abbr', 'acronym', 'address', 'area', 'b',
'basefont', 'bdo', 'big', 'blockquote', 'br', 'caption', 'center',
'cite', 'code', 'col', 'colgroup', 'dd', 'del', 'dfn', 'dir', 'div', 'dl',
'dt', 'em', 'fieldset', 'font', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i',
'ins', 'label', 'legend', 'li', 'map', 'menu', 'nobr', 'ol', 'p', 'pre', 'q',
's', 'samp', 'small', 'span', 'strike', 'strong', 'sub', 'sup', 'table',
'tbody', 'td', 'tfoot', 'th', 'thead', 'tr', 'tt', 'u', 'ul', 'var', 'wbr', 'img',
// form elements
'button', 'input', 'textarea', 'select', 'option', 'optgroup'
);
/* Ignore these HTML tags and their content */
static $ignore_elements = array('script', 'applet', 'embed', 'object', 'style');
/* Allowed HTML attributes */
static $html_attribs = array('name', 'class', 'title', 'alt', 'width', 'height',
'align', 'nowrap', 'col', 'row', 'id', 'rowspan', 'colspan', 'cellspacing',
'cellpadding', 'valign', 'bgcolor', 'color', 'border', 'bordercolorlight',
'bordercolordark', 'face', 'marginwidth', 'marginheight', 'axis', 'border',
'abbr', 'char', 'charoff', 'clear', 'compact', 'coords', 'vspace', 'hspace',
'cellborder', 'size', 'lang', 'dir', 'usemap', 'shape', 'media',
// attributes of form elements
'type', 'rows', 'cols', 'disabled', 'readonly', 'checked', 'multiple', 'value'
);
/* Block elements which could be empty but cannot be returned in short form (<tag />) */
static $block_elements = array('div', 'p', 'pre', 'blockquote', 'a', 'font', 'center',
'table', 'ul', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'ol', 'dl', 'strong',
'i', 'b', 'u', 'span',
);
/* State for linked objects in HTML */
public $extlinks = false;
/* Current settings */
private $config = array();
/* Registered callback functions for tags */
private $handlers = array();
/* Allowed HTML elements */
private $_html_elements = array();
/* Ignore these HTML tags but process their content */
private $_ignore_elements = array();
/* Block elements which could be empty but cannot be returned in short form (<tag />) */
private $_block_elements = array();
/* Allowed HTML attributes */
private $_html_attribs = array();
/**
* Class constructor
*/
public function __construct($p = array())
{
$this->_html_elements = array_flip((array)$p['html_elements']) + array_flip(self::$html_elements) ;
$this->_html_attribs = array_flip((array)$p['html_attribs']) + array_flip(self::$html_attribs);
$this->_ignore_elements = array_flip((array)$p['ignore_elements']) + array_flip(self::$ignore_elements);
$this->_block_elements = array_flip((array)$p['block_elements']) + array_flip(self::$block_elements);
unset($p['html_elements'], $p['html_attribs'], $p['ignore_elements'], $p['block_elements']);
$this->config = $p + array('show_washed' => true, 'allow_remote' => false, 'cid_map' => array());
}
/**
* Register a callback function for a certain tag
*/
public function add_callback($tagName, $callback)
{
$this->handlers[$tagName] = $callback;
}
/**
* Check CSS style
*/
private function wash_style($style)
{
$s = '';
foreach (explode(';', $style) as $declaration) {
if (preg_match('/^\s*([a-z\-]+)\s*:\s*(.*)\s*$/i', $declaration, $match)) {
$cssid = $match[1];
$str = $match[2];
$value = '';
while (sizeof($str) > 0 &&
preg_match('/^(url\(\s*[\'"]?([^\'"\)]*)[\'"]?\s*\)'./*1,2*/
'|rgb\(\s*[0-9]+\s*,\s*[0-9]+\s*,\s*[0-9]+\s*\)'.
'|-?[0-9.]+\s*(em|ex|px|cm|mm|in|pt|pc|deg|rad|grad|ms|s|hz|khz|%)?'.
'|#[0-9a-f]{3,6}'.
'|[a-z0-9", -]+'.
')\s*/i', $str, $match)
) {
if ($match[2]) {
if (($src = $this->config['cid_map'][$match[2]])
|| ($src = $this->config['cid_map'][$this->config['base_url'].$match[2]])
) {
$value .= ' url('.htmlspecialchars($src, ENT_QUOTES) . ')';
}
else if (preg_match('!^(https?:)?//[a-z0-9/._+-]+$!i', $match[2], $url)) {
if ($this->config['allow_remote']) {
$value .= ' url('.htmlspecialchars($url[0], ENT_QUOTES).')';
}
else {
$this->extlinks = true;
}
}
else if (preg_match('/^data:.+/i', $match[2])) { // RFC2397
$value .= ' url('.htmlspecialchars($match[2], ENT_QUOTES).')';
}
}
else {
// whitelist ?
$value .= ' ' . $match[0];
// #1488535: Fix size units, so width:800 would be changed to width:800px
if (preg_match('/(left|right|top|bottom|width|height)/i', $cssid)
&& preg_match('/^[0-9]+$/', $match[0])
) {
$value .= 'px';
}
}
$str = substr($str, strlen($match[0]));
}
if (isset($value[0])) {
$s .= ($s?' ':'') . $cssid . ':' . $value . ';';
}
}
}
return $s;
}
/**
* Take a node and return allowed attributes and check values
*/
private function wash_attribs($node)
{
$t = '';
$washed = '';
foreach ($node->attributes as $key => $plop) {
$key = strtolower($key);
$value = $node->getAttribute($key);
if (isset($this->_html_attribs[$key]) ||
($key == 'href' && !preg_match('!^(javascript|vbscript|data:text)!i', $value)
&& preg_match('!^([a-z][a-z0-9.+-]+:|//|#).+!i', $value))
) {
$t .= ' ' . $key . '="' . htmlspecialchars($value, ENT_QUOTES) . '"';
}
else if ($key == 'style' && ($style = $this->wash_style($value))) {
$quot = strpos($style, '"') !== false ? "'" : '"';
$t .= ' style=' . $quot . $style . $quot;
}
else if ($key == 'background' || ($key == 'src' && strtolower($node->tagName) == 'img')) { //check tagName anyway
if (($src = $this->config['cid_map'][$value])
|| ($src = $this->config['cid_map'][$this->config['base_url'].$value])
) {
$t .= ' ' . $key . '="' . htmlspecialchars($src, ENT_QUOTES) . '"';
}
else if (preg_match('/^(http|https|ftp):.+/i', $value)) {
if ($this->config['allow_remote']) {
$t .= ' ' . $key . '="' . htmlspecialchars($value, ENT_QUOTES) . '"';
}
else {
$this->extlinks = true;
if ($this->config['blocked_src']) {
$t .= ' ' . $key . '="' . htmlspecialchars($this->config['blocked_src'], ENT_QUOTES) . '"';
}
}
}
else if (preg_match('/^data:.+/i', $value)) { // RFC2397
$t .= ' ' . $key . '="' . htmlspecialchars($value, ENT_QUOTES) . '"';
}
}
else {
$washed .= ($washed ? ' ' : '') . $key;
}
}
return $t . ($washed && $this->config['show_washed'] ? ' x-washed="'.$washed.'"' : '');
}
/**
* The main loop that recurse on a node tree.
* It output only allowed tags with allowed attributes
* and allowed inline styles
*/
private function dumpHtml($node)
{
if (!$node->hasChildNodes()) {
return '';
}
$node = $node->firstChild;
$dump = '';
do {
switch($node->nodeType) {
case XML_ELEMENT_NODE: //Check element
$tagName = strtolower($node->tagName);
if ($callback = $this->handlers[$tagName]) {
$dump .= call_user_func($callback, $tagName,
$this->wash_attribs($node), $this->dumpHtml($node), $this);
}
else if (isset($this->_html_elements[$tagName])) {
$content = $this->dumpHtml($node);
$dump .= '<' . $tagName . $this->wash_attribs($node) .
($content != '' || isset($this->_block_elements[$tagName]) ? ">$content</$tagName>" : ' />');
}
else if (isset($this->_ignore_elements[$tagName])) {
$dump .= '<!-- ' . htmlspecialchars($tagName, ENT_QUOTES) . ' not allowed -->';
}
else {
$dump .= '<!-- ' . htmlspecialchars($tagName, ENT_QUOTES) . ' ignored -->';
$dump .= $this->dumpHtml($node); // ignore tags not its content
}
break;
case XML_CDATA_SECTION_NODE:
$dump .= $node->nodeValue;
break;
case XML_TEXT_NODE:
$dump .= htmlspecialchars($node->nodeValue);
break;
case XML_HTML_DOCUMENT_NODE:
$dump .= $this->dumpHtml($node);
break;
case XML_DOCUMENT_TYPE_NODE:
break;
default:
$dump . '<!-- node type ' . $node->nodeType . ' -->';
}
} while($node = $node->nextSibling);
return $dump;
}
/**
* Main function, give it untrusted HTML, tell it if you allow loading
* remote images and give it a map to convert "cid:" urls.
*/
public function wash($html)
{
// Charset seems to be ignored (probably if defined in the HTML document)
$node = new DOMDocument('1.0', $this->config['charset']);
$this->extlinks = false;
$html = $this->cleanup($html);
// Find base URL for images
if (preg_match('/<base\s+href=[\'"]*([^\'"]+)/is', $html, $matches)) {
$this->config['base_url'] = $matches[1];
}
else {
$this->config['base_url'] = '';
}
@$node->loadHTML($html);
return $this->dumpHtml($node);
}
/**
* Getter for config parameters
*/
public function get_config($prop)
{
return $this->config[$prop];
}
/**
* Clean HTML input
*/
private function cleanup($html)
{
// special replacements (not properly handled by washtml class)
$html_search = array(
'/(<\/nobr>)(\s+)(<nobr>)/i', // space(s) between <NOBR>
'/<title[^>]*>[^<]*<\/title>/i', // PHP bug #32547 workaround: remove title tag
'/^(\0\0\xFE\xFF|\xFF\xFE\0\0|\xFE\xFF|\xFF\xFE|\xEF\xBB\xBF)/', // byte-order mark (only outlook?)
'/<html\s[^>]+>/i', // washtml/DOMDocument cannot handle xml namespaces
);
$html_replace = array(
'\\1'.' &nbsp; '.'\\3',
'',
'',
'<html>',
);
$html = preg_replace($html_search, $html_replace, trim($html));
// PCRE errors handling (#1486856), should we use something like for every preg_* use?
if ($html === null && ($preg_error = preg_last_error()) != PREG_NO_ERROR) {
$errstr = "Could not clean up HTML message! PCRE Error: $preg_error.";
if ($preg_error == PREG_BACKTRACK_LIMIT_ERROR) {
$errstr .= " Consider raising pcre.backtrack_limit!";
}
if ($preg_error == PREG_RECURSION_LIMIT_ERROR) {
$errstr .= " Consider raising pcre.recursion_limit!";
}
rcube::raise_error(array('code' => 620, 'type' => 'php',
'line' => __LINE__, 'file' => __FILE__,
'message' => $errstr), true, false);
return '';
}
// fix (unknown/malformed) HTML tags before "wash"
$html = preg_replace_callback('/(<[\/]*)([^\s>]+)/', array($this, 'html_tag_callback'), $html);
// Remove invalid HTML comments (#1487759)
// Don't remove valid conditional comments
$html = preg_replace('/<!--[^->[\n]*>/', '', $html);
// turn relative into absolute urls
$html = self::resolve_base($html);
return $html;
}
/**
* Callback function for HTML tags fixing
*/
public static function html_tag_callback($matches)
{
$tagname = $matches[2];
$tagname = preg_replace(array(
'/:.*$/', // Microsoft's Smart Tags <st1:xxxx>
'/[^a-z0-9_\[\]\!-]/i', // forbidden characters
), '', $tagname);
return $matches[1] . $tagname;
}
/**
* Convert all relative URLs according to a <base> in HTML
*/
public static function resolve_base($body)
{
// check for <base href=...>
if (preg_match('!(<base.*href=["\']?)([hftps]{3,5}://[a-z0-9/.%-]+)!i', $body, $regs)) {
$replacer = new rcube_base_replacer($regs[2]);
$body = $replacer->replace($body);
}
return $body;
}
}

@ -1,330 +0,0 @@
<?php
/* Washtml, a HTML sanityzer.
*
* Copyright (c) 2007 Frederic Motte <fmotte@ubixis.com>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* Please send me your comments about this code if you have some, thanks, Fred. */
/* OVERVIEW:
*
* Wahstml take an untrusted HTML and return a safe html string.
*
* SYNOPSIS:
*
* $washer = new washtml($config);
* $washer->wash($html);
* It return a sanityzed string of the $html parameter without html and head tags.
* $html is a string containing the html code to wash.
* $config is an array containing options:
* $config['allow_remote'] is a boolean to allow link to remote images.
* $config['blocked_src'] string with image-src to be used for blocked remote images
* $config['show_washed'] is a boolean to include washed out attributes as x-washed
* $config['cid_map'] is an array where cid urls index urls to replace them.
* $config['charset'] is a string containing the charset of the HTML document if it is not defined in it.
* $washer->extlinks is a reference to a boolean that is set to true if remote images were removed. (FE: show remote images link)
*
* INTERNALS:
*
* Only tags and attributes in the static lists $html_elements and $html_attributes
* are kept, inline styles are also filtered: all style identifiers matching
* /[a-z\-]/i are allowed. Values matching colors, sizes, /[a-z\-]/i and safe
* urls if allowed and cid urls if mapped are kept.
*
* BUGS: It MUST be safe !
* - Check regexp
* - urlencode URLs instead of htmlspecials
* - Check is a 3 bytes utf8 first char can eat '">'
* - Update PCRE: CVE-2007-1659 - CVE-2007-1660 - CVE-2007-1661 - CVE-2007-1662
* CVE-2007-4766 - CVE-2007-4767 - CVE-2007-4768
* http://lists.debian.org/debian-security-announce/debian-security-announce-2007/msg00177.html
* - ...
*
* MISSING:
* - relative links, can be implemented by prefixing an absolute path, ask me
* if you need it...
* - ...
*
* Dont be a fool:
* - Dont alter data on a GET: '<img src="http://yourhost/mail?action=delete&uid=3267" />'
* - ...
*
* Roundcube Changes:
* - added $block_elements
* - changed $ignore_elements behaviour
* - added RFC2397 support
* - base URL support
* - invalid HTML comments removal before parsing
* - "fixing" unitless CSS values for XHTML output
*/
class washtml
{
/* Allowed HTML elements (default) */
static $html_elements = array('a', 'abbr', 'acronym', 'address', 'area', 'b',
'basefont', 'bdo', 'big', 'blockquote', 'br', 'caption', 'center',
'cite', 'code', 'col', 'colgroup', 'dd', 'del', 'dfn', 'dir', 'div', 'dl',
'dt', 'em', 'fieldset', 'font', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i',
'ins', 'label', 'legend', 'li', 'map', 'menu', 'nobr', 'ol', 'p', 'pre', 'q',
's', 'samp', 'small', 'span', 'strike', 'strong', 'sub', 'sup', 'table',
'tbody', 'td', 'tfoot', 'th', 'thead', 'tr', 'tt', 'u', 'ul', 'var', 'wbr', 'img',
// form elements
'button', 'input', 'textarea', 'select', 'option', 'optgroup'
);
/* Ignore these HTML tags and their content */
static $ignore_elements = array('script', 'applet', 'embed', 'object', 'style');
/* Allowed HTML attributes */
static $html_attribs = array('name', 'class', 'title', 'alt', 'width', 'height',
'align', 'nowrap', 'col', 'row', 'id', 'rowspan', 'colspan', 'cellspacing',
'cellpadding', 'valign', 'bgcolor', 'color', 'border', 'bordercolorlight',
'bordercolordark', 'face', 'marginwidth', 'marginheight', 'axis', 'border',
'abbr', 'char', 'charoff', 'clear', 'compact', 'coords', 'vspace', 'hspace',
'cellborder', 'size', 'lang', 'dir', 'usemap', 'shape', 'media',
// attributes of form elements
'type', 'rows', 'cols', 'disabled', 'readonly', 'checked', 'multiple', 'value'
);
/* Block elements which could be empty but cannot be returned in short form (<tag />) */
static $block_elements = array('div', 'p', 'pre', 'blockquote', 'a', 'font', 'center',
'table', 'ul', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'ol', 'dl', 'strong', 'i', 'b', 'u', 'span');
/* State for linked objects in HTML */
public $extlinks = false;
/* Current settings */
private $config = array();
/* Registered callback functions for tags */
private $handlers = array();
/* Allowed HTML elements */
private $_html_elements = array();
/* Ignore these HTML tags but process their content */
private $_ignore_elements = array();
/* Block elements which could be empty but cannot be returned in short form (<tag />) */
private $_block_elements = array();
/* Allowed HTML attributes */
private $_html_attribs = array();
/* Constructor */
public function __construct($p = array())
{
$this->_html_elements = array_flip((array)$p['html_elements']) + array_flip(self::$html_elements) ;
$this->_html_attribs = array_flip((array)$p['html_attribs']) + array_flip(self::$html_attribs);
$this->_ignore_elements = array_flip((array)$p['ignore_elements']) + array_flip(self::$ignore_elements);
$this->_block_elements = array_flip((array)$p['block_elements']) + array_flip(self::$block_elements);
unset($p['html_elements'], $p['html_attribs'], $p['ignore_elements'], $p['block_elements']);
$this->config = $p + array('show_washed'=>true, 'allow_remote'=>false, 'cid_map'=>array());
}
/* Register a callback function for a certain tag */
public function add_callback($tagName, $callback)
{
$this->handlers[$tagName] = $callback;
}
/* Check CSS style */
private function wash_style($style)
{
$s = '';
foreach (explode(';', $style) as $declaration) {
if (preg_match('/^\s*([a-z\-]+)\s*:\s*(.*)\s*$/i', $declaration, $match)) {
$cssid = $match[1];
$str = $match[2];
$value = '';
while (sizeof($str) > 0 &&
preg_match('/^(url\(\s*[\'"]?([^\'"\)]*)[\'"]?\s*\)'./*1,2*/
'|rgb\(\s*[0-9]+\s*,\s*[0-9]+\s*,\s*[0-9]+\s*\)'.
'|-?[0-9.]+\s*(em|ex|px|cm|mm|in|pt|pc|deg|rad|grad|ms|s|hz|khz|%)?'.
'|#[0-9a-f]{3,6}'.
'|[a-z0-9", -]+'.
')\s*/i', $str, $match)
) {
if ($match[2]) {
if (($src = $this->config['cid_map'][$match[2]])
|| ($src = $this->config['cid_map'][$this->config['base_url'].$match[2]])) {
$value .= ' url('.htmlspecialchars($src, ENT_QUOTES) . ')';
}
else if (preg_match('!^(https?:)?//[a-z0-9/._+-]+$!i', $match[2], $url)) {
if ($this->config['allow_remote'])
$value .= ' url('.htmlspecialchars($url[0], ENT_QUOTES).')';
else
$this->extlinks = true;
}
else if (preg_match('/^data:.+/i', $match[2])) { // RFC2397
$value .= ' url('.htmlspecialchars($match[2], ENT_QUOTES).')';
}
}
else { //whitelist ?
$value .= ' ' . $match[0];
// #1488535: Fix size units, so width:800 would be changed to width:800px
if (preg_match('/(left|right|top|bottom|width|height)/i', $cssid) && preg_match('/^[0-9]+$/', $match[0])) {
$value .= 'px';
}
}
$str = substr($str, strlen($match[0]));
}
if (isset($value[0])) {
$s .= ($s?' ':'') . $cssid . ':' . $value . ';';
}
}
}
return $s;
}
/* Take a node and return allowed attributes and check values */
private function wash_attribs($node)
{
$t = '';
$washed;
foreach ($node->attributes as $key => $plop) {
$key = strtolower($key);
$value = $node->getAttribute($key);
if (isset($this->_html_attribs[$key]) ||
($key == 'href' && !preg_match('!^(javascript|vbscript|data:text)!i', $value)
&& preg_match('!^([a-z][a-z0-9.+-]+:|//|#).+!i', $value))
) {
$t .= ' ' . $key . '="' . htmlspecialchars($value, ENT_QUOTES) . '"';
}
else if ($key == 'style' && ($style = $this->wash_style($value))) {
$quot = strpos($style, '"') !== false ? "'" : '"';
$t .= ' style=' . $quot . $style . $quot;
}
else if ($key == 'background' || ($key == 'src' && strtolower($node->tagName) == 'img')) { //check tagName anyway
if (($src = $this->config['cid_map'][$value])
|| ($src = $this->config['cid_map'][$this->config['base_url'].$value])) {
$t .= ' ' . $key . '="' . htmlspecialchars($src, ENT_QUOTES) . '"';
}
else if (preg_match('/^(http|https|ftp):.+/i', $value)) {
if ($this->config['allow_remote'])
$t .= ' ' . $key . '="' . htmlspecialchars($value, ENT_QUOTES) . '"';
else {
$this->extlinks = true;
if ($this->config['blocked_src'])
$t .= ' ' . $key . '="' . htmlspecialchars($this->config['blocked_src'], ENT_QUOTES) . '"';
}
}
else if (preg_match('/^data:.+/i', $value)) { // RFC2397
$t .= ' ' . $key . '="' . htmlspecialchars($value, ENT_QUOTES) . '"';
}
}
else
$washed .= ($washed?' ':'') . $key;
}
return $t . ($washed && $this->config['show_washed']?' x-washed="'.$washed.'"':'');
}
/* The main loop that recurse on a node tree.
* It output only allowed tags with allowed attributes
* and allowed inline styles */
private function dumpHtml($node)
{
if(!$node->hasChildNodes())
return '';
$node = $node->firstChild;
$dump = '';
do {
switch($node->nodeType) {
case XML_ELEMENT_NODE: //Check element
$tagName = strtolower($node->tagName);
if ($callback = $this->handlers[$tagName]) {
$dump .= call_user_func($callback, $tagName, $this->wash_attribs($node), $this->dumpHtml($node), $this);
}
else if (isset($this->_html_elements[$tagName])) {
$content = $this->dumpHtml($node);
$dump .= '<' . $tagName . $this->wash_attribs($node) .
($content != '' || isset($this->_block_elements[$tagName]) ? ">$content</$tagName>" : ' />');
}
else if (isset($this->_ignore_elements[$tagName])) {
$dump .= '<!-- ' . htmlspecialchars($tagName, ENT_QUOTES) . ' not allowed -->';
}
else {
$dump .= '<!-- ' . htmlspecialchars($tagName, ENT_QUOTES) . ' ignored -->';
$dump .= $this->dumpHtml($node); // ignore tags not its content
}
break;
case XML_CDATA_SECTION_NODE:
$dump .= $node->nodeValue;
break;
case XML_TEXT_NODE:
$dump .= htmlspecialchars($node->nodeValue);
break;
case XML_HTML_DOCUMENT_NODE:
$dump .= $this->dumpHtml($node);
break;
case XML_DOCUMENT_TYPE_NODE:
break;
default:
$dump . '<!-- node type ' . $node->nodeType . ' -->';
}
} while($node = $node->nextSibling);
return $dump;
}
/* Main function, give it untrusted HTML, tell it if you allow loading
* remote images and give it a map to convert "cid:" urls. */
public function wash($html)
{
// Charset seems to be ignored (probably if defined in the HTML document)
$node = new DOMDocument('1.0', $this->config['charset']);
$this->extlinks = false;
// Find base URL for images
if (preg_match('/<base\s+href=[\'"]*([^\'"]+)/is', $html, $matches))
$this->config['base_url'] = $matches[1];
else
$this->config['base_url'] = '';
// Remove invalid HTML comments (#1487759)
// Don't remove valid conditional comments
$html = preg_replace('/<!--[^->[\n]*>/', '', $html);
@$node->loadHTML($html);
return $this->dumpHtml($node);
}
/**
* Getter for config parameters
*/
public function get_config($prop)
{
return $this->config[$prop];
}
}

@ -470,7 +470,7 @@ function rcmail_compose_header_from($attrib)
$text = $html = $sql_arr['signature']; $text = $html = $sql_arr['signature'];
if ($sql_arr['html_signature']) { if ($sql_arr['html_signature']) {
$h2t = new html2text($sql_arr['signature'], false, false); $h2t = new rcube_html2text($sql_arr['signature'], false, false);
$text = trim($h2t->get_text()); $text = trim($h2t->get_text());
} }
else { else {
@ -667,7 +667,7 @@ function rcmail_compose_part_body($part, $isHtml = false)
// use html part if it has been used for message (pre)viewing // use html part if it has been used for message (pre)viewing
// decrease line length for quoting // decrease line length for quoting
$len = $compose_mode == RCUBE_COMPOSE_REPLY ? $LINE_LENGTH-2 : $LINE_LENGTH; $len = $compose_mode == RCUBE_COMPOSE_REPLY ? $LINE_LENGTH-2 : $LINE_LENGTH;
$txt = new html2text($body, false, true, $len); $txt = new rcube_html2text($body, false, true, $len);
$body = $txt->get_text(); $body = $txt->get_text();
} }
else if ($part->ctype_secondary == 'enriched') { else if ($part->ctype_secondary == 'enriched') {

@ -628,39 +628,6 @@ function rcmail_wash_html($html, $p, $cid_replaces)
$p += array('safe' => false, 'inline_html' => true); $p += array('safe' => false, 'inline_html' => true);
// special replacements (not properly handled by washtml class)
$html_search = array(
'/(<\/nobr>)(\s+)(<nobr>)/i', // space(s) between <NOBR>
'/<title[^>]*>[^<]*<\/title>/i', // PHP bug #32547 workaround: remove title tag
'/^(\0\0\xFE\xFF|\xFF\xFE\0\0|\xFE\xFF|\xFF\xFE|\xEF\xBB\xBF)/', // byte-order mark (only outlook?)
'/<html\s[^>]+>/i', // washtml/DOMDocument cannot handle xml namespaces
);
$html_replace = array(
'\\1'.' &nbsp; '.'\\3',
'',
'',
'<html>',
);
$html = preg_replace($html_search, $html_replace, trim($html));
// PCRE errors handling (#1486856), should we use something like for every preg_* use?
if ($html === null && ($preg_error = preg_last_error()) != PREG_NO_ERROR) {
$errstr = "Could not clean up HTML message! PCRE Error: $preg_error.";
if ($preg_error == PREG_BACKTRACK_LIMIT_ERROR)
$errstr .= " Consider raising pcre.backtrack_limit!";
if ($preg_error == PREG_RECURSION_LIMIT_ERROR)
$errstr .= " Consider raising pcre.recursion_limit!";
raise_error(array('code' => 620, 'type' => 'php',
'line' => __LINE__, 'file' => __FILE__,
'message' => $errstr), true, false);
return '';
}
// fix (unknown/malformed) HTML tags before "wash"
$html = preg_replace_callback('/(<[\/]*)([^\s>]+)/', 'rcmail_html_tag_callback', $html);
// charset was converted to UTF-8 in rcube_storage::get_message_part(), // charset was converted to UTF-8 in rcube_storage::get_message_part(),
// change/add charset specification in HTML accordingly, // change/add charset specification in HTML accordingly,
// washtml cannot work without that // washtml cannot work without that
@ -674,9 +641,6 @@ function rcmail_wash_html($html, $p, $cid_replaces)
$html = '<head>' . $meta . '</head>' . $html; $html = '<head>' . $meta . '</head>' . $html;
} }
// turn relative into absolute urls
$html = rcmail_resolve_base($html);
// clean HTML with washhtml by Frederic Motte // clean HTML with washhtml by Frederic Motte
$wash_opts = array( $wash_opts = array(
'show_washed' => false, 'show_washed' => false,
@ -702,7 +666,7 @@ function rcmail_wash_html($html, $p, $cid_replaces)
$wash_opts['html_attribs'] = $p['html_attribs']; $wash_opts['html_attribs'] = $p['html_attribs'];
// initialize HTML washer // initialize HTML washer
$washer = new washtml($wash_opts); $washer = new rcube_washtml($wash_opts);
if (!$p['skip_washer_form_callback']) if (!$p['skip_washer_form_callback'])
$washer->add_callback('form', 'rcmail_washtml_callback'); $washer->add_callback('form', 'rcmail_washtml_callback');
@ -740,7 +704,7 @@ function rcmail_print_body($part, $p = array())
// convert html to text/plain // convert html to text/plain
if ($data['type'] == 'html' && $data['plain']) { if ($data['type'] == 'html' && $data['plain']) {
$txt = new html2text($data['body'], false, true); $txt = new rcube_html2text($data['body'], false, true);
$body = $txt->get_text(); $body = $txt->get_text();
$part->ctype_secondary = 'plain'; $part->ctype_secondary = 'plain';
} }
@ -920,22 +884,6 @@ function rcmail_washtml_callback($tagname, $attrib, $content, $washtml)
} }
/**
* Callback function for HTML tags fixing
*/
function rcmail_html_tag_callback($matches)
{
$tagname = $matches[2];
$tagname = preg_replace(array(
'/:.*$/', // Microsoft's Smart Tags <st1:xxxx>
'/[^a-z0-9_\[\]\!-]/i', // forbidden characters
), '', $tagname);
return $matches[1].$tagname;
}
/** /**
* return table with message headers * return table with message headers
*/ */
@ -1319,20 +1267,6 @@ function rcmail_part_image_type($part)
} }
} }
/**
* Convert all relative URLs according to a <base> in HTML
*/
function rcmail_resolve_base($body)
{
// check for <base href=...>
if (preg_match('!(<base.*href=["\']?)([hftps]{3,5}://[a-z0-9/.%-]+)!i', $body, $regs)) {
$replacer = new rcube_base_replacer($regs[2]);
$body = $replacer->replace($body);
}
return $body;
}
/** /**
* modify a HTML message that it can be displayed inside a HTML page * modify a HTML message that it can be displayed inside a HTML page

@ -559,7 +559,7 @@ if ($isHtml) {
$plugin['body'] = rcmail_replace_emoticons($plugin['body']); $plugin['body'] = rcmail_replace_emoticons($plugin['body']);
// add a plain text version of the e-mail as an alternative part. // add a plain text version of the e-mail as an alternative part.
$h2t = new html2text($plugin['body'], false, true, 0, $message_charset); $h2t = new rcube_html2text($plugin['body'], false, true, 0, $message_charset);
$plainTextPart = rc_wordwrap($h2t->get_text(), $LINE_LENGTH, "\r\n", false, $message_charset); $plainTextPart = rc_wordwrap($h2t->get_text(), $LINE_LENGTH, "\r\n", false, $message_charset);
$plainTextPart = wordwrap($plainTextPart, 998, "\r\n", true); $plainTextPart = wordwrap($plainTextPart, 998, "\r\n", true);

@ -24,10 +24,8 @@ $html = $HTTP_RAW_POST_DATA;
// Replace emoticon images with its text representation // Replace emoticon images with its text representation
$html = rcmail_replace_emoticons($html); $html = rcmail_replace_emoticons($html);
$converter = new html2text($html, false, true, 0); $converter = new rcube_html2text($html, false, true, 0);
header('Content-Type: text/plain; charset=UTF-8'); header('Content-Type: text/plain; charset=UTF-8');
print rtrim($converter->get_text()); print rtrim($converter->get_text());
exit; exit;

@ -1,11 +1,11 @@
<?php <?php
/** /**
* Test class to test html2text class * Test class to test rcube_html2text class
* *
* @package Tests * @package Tests
*/ */
class HtmlToText extends PHPUnit_Framework_TestCase class rc_html2text extends PHPUnit_Framework_TestCase
{ {
function data_html2text() function data_html2text()
@ -49,7 +49,7 @@ class HtmlToText extends PHPUnit_Framework_TestCase
*/ */
function test_html2text($title, $in, $out) function test_html2text($title, $in, $out)
{ {
$ht = new html2text(null, false, false); $ht = new rcube_html2text(null, false, false);
$ht->set_html($in); $ht->set_html($in);
$res = $ht->get_text(); $res = $ht->get_text();

@ -29,6 +29,12 @@ class Framework_StringReplacer extends PHPUnit_Framework_TestCase
array('Start http://localhost/?foo End', 'Start <a href="http://localhost/?foo" target="_blank">http://localhost/?foo</a> End'), array('Start http://localhost/?foo End', 'Start <a href="http://localhost/?foo" target="_blank">http://localhost/?foo</a> End'),
array('www.domain.tld', '<a href="http://www.domain.tld" target="_blank">www.domain.tld</a>'), array('www.domain.tld', '<a href="http://www.domain.tld" target="_blank">www.domain.tld</a>'),
array('WWW.DOMAIN.TLD', '<a href="http://WWW.DOMAIN.TLD" target="_blank">WWW.DOMAIN.TLD</a>'), array('WWW.DOMAIN.TLD', '<a href="http://WWW.DOMAIN.TLD" target="_blank">WWW.DOMAIN.TLD</a>'),
array('[http://link.com]', '[<a href="http://link.com" target="_blank">http://link.com</a>]'),
array('http://link.com?a[]=1', '<a href="http://link.com?a[]=1" target="_blank">http://link.com?a[]=1</a>'),
array('http://link.com?a[]', '<a href="http://link.com?a[]" target="_blank">http://link.com?a[]</a>'),
array('(http://link.com)', '(<a href="http://link.com" target="_blank">http://link.com</a>)'),
array('http://link.com?a(b)c', '<a href="http://link.com?a(b)c" target="_blank">http://link.com?a(b)c</a>'),
array('http://link.com?(link)', '<a href="http://link.com?(link)" target="_blank">http://link.com?(link)</a>'),
); );
} }

@ -0,0 +1,28 @@
<?php
/**
* Test class to test rcube_washtml class
*
* @package Tests
*/
class Framework_Washtml extends PHPUnit_Framework_TestCase
{
/**
* Test the elimination of some XSS vulnerabilities
*/
function test_html_xss3()
{
// #1488850
$html = '<p><a href="data:text/html,&lt;script&gt;alert(document.cookie)&lt;/script&gt;">Firefox</a>'
.'<a href="vbscript:alert(document.cookie)">Internet Explorer</a></p>';
$washer = new rcube_washtml;
$washed = $washer->wash($html);
$this->assertNotRegExp('/data:text/', $washed, "Remove data:text/html links");
$this->assertNotRegExp('/vbscript:/', $washed, "Remove vbscript: links");
}
}

@ -173,7 +173,7 @@ class MailFunc extends PHPUnit_Framework_TestCase
function test_resolve_base() function test_resolve_base()
{ {
$html = file_get_contents(TESTS_DIR . 'src/htmlbase.txt'); $html = file_get_contents(TESTS_DIR . 'src/htmlbase.txt');
$html = rcmail_resolve_base($html); $html = rcube_washtml::resolve_base($html);
$this->assertRegExp('|src="http://alec\.pl/dir/img1\.gif"|', $html, "URI base resolving [1]"); $this->assertRegExp('|src="http://alec\.pl/dir/img1\.gif"|', $html, "URI base resolving [1]");
$this->assertRegExp('|src="http://alec\.pl/dir/img2\.gif"|', $html, "URI base resolving [2]"); $this->assertRegExp('|src="http://alec\.pl/dir/img2\.gif"|', $html, "URI base resolving [2]");

@ -12,6 +12,7 @@
<file>Framework/Csv2vcard.php</file> <file>Framework/Csv2vcard.php</file>
<file>Framework/Enriched.php</file> <file>Framework/Enriched.php</file>
<file>Framework/Html.php</file> <file>Framework/Html.php</file>
<file>Framework/Html2text.php</file>
<file>Framework/Imap.php</file> <file>Framework/Imap.php</file>
<file>Framework/ImapGeneric.php</file> <file>Framework/ImapGeneric.php</file>
<file>Framework/Image.php</file> <file>Framework/Image.php</file>
@ -28,7 +29,7 @@
<file>Framework/User.php</file> <file>Framework/User.php</file>
<file>Framework/Utils.php</file> <file>Framework/Utils.php</file>
<file>Framework/VCard.php</file> <file>Framework/VCard.php</file>
<file>HtmlToText.php</file> <file>Framework/Washtml.php</file>
<file>MailFunc.php</file> <file>MailFunc.php</file>
</testsuite> </testsuite>
<testsuite name="Managesieve Tests"> <testsuite name="Managesieve Tests">

Loading…
Cancel
Save