Managesieve: Refactored script parser to be 100x faster

pull/5193/head
Aleksander Machniak 8 years ago
parent 800235f682
commit 80cb362b80

@ -1,6 +1,7 @@
CHANGELOG Roundcube Webmail
===========================
- Managesieve: Refactored script parser to be 100x faster
- Enigma: Added option to attach public keys to sent mail (#5152)
- Enigma: Handle messages with text before an encrypted block (#5149)
- Enigma: Handle encrypted/signed content inside message/rfc822 attachments

@ -1,3 +1,7 @@
* version 8.6 [2016-04-06]
-----------------------------------------------------------
- Refactored script parser to be 100x faster
* version 8.5 [2015-11-15]
-----------------------------------------------------------
- Add support for :from parameter in vacation action (patch from Michele Locati)

@ -3,7 +3,7 @@
"type": "roundcube-plugin",
"description": "Adds a possibility to manage Sieve scripts (incoming mail filters). It's clickable interface which operates on text scripts and communicates with server using managesieve protocol. Adds Filters tab in Settings.",
"license": "GPLv3+",
"version": "8.5",
"version": "8.6",
"authors": [
{
"name": "Aleksander Machniak",
@ -14,16 +14,20 @@
"repositories": [
{
"type": "composer",
"url": "http://plugins.roundcube.net"
"url": "https://plugins.roundcube.net"
},
{
"type": "pear",
"url": "http://pear.php.net/"
"url": "https://pear.php.net/"
},
{
"type": "vcs",
"url": "https://github.com/roundcube/Net_Sieve.git"
}
],
"require": {
"php": ">=5.3.0",
"roundcube/plugin-installer": ">=0.1.3",
"pear-pear/Net_Sieve": ">=1.3.2"
"roundcube/net_sieve": "~1.5.0"
}
}

@ -63,7 +63,7 @@ class rcube_sieve_engine
1 => 'notifyimportancehigh'
);
const VERSION = '8.5';
const VERSION = '8.6';
const PROGNAME = 'Roundcube (Managesieve)';
const PORT = 4190;

@ -529,17 +529,20 @@ class rcube_sieve_script
*/
private function _parse_text($script)
{
$prefix = '';
$options = array();
while ($script) {
$script = trim($script);
$rule = array();
$prefix = '';
$options = array();
$position = 0;
$length = strlen($script);
while ($position < $length) {
// skip whitespace chars
$position = self::ltrim_position($script, $position);
$rulename = '';
// Comments
while (!empty($script) && $script[0] == '#') {
$endl = strpos($script, "\n");
$line = $endl ? substr($script, 0, $endl) : $script;
while ($script[$position] === '#') {
$endl = strpos($script, "\n", $position) ?: $length;
$line = substr($script, $position, $endl - $position);
// Roundcube format
if (preg_match('/^# rule:\[(.*)\]/', $line, $matches)) {
@ -559,7 +562,7 @@ class rcube_sieve_script
$prefix .= $line . "\n";
}
$script = ltrim(substr($script, strlen($line) + 1));
$position = $endl + 1;
}
// handle script header
@ -571,15 +574,15 @@ class rcube_sieve_script
}
// Control structures/blocks
if (preg_match('/^(if|else|elsif)/i', $script)) {
$rule = $this->_tokenize_rule($script);
if (preg_match('/^(if|else|elsif)/i', substr($script, $position, 5))) {
$rule = $this->_tokenize_rule($script, $position);
if (strlen($rulename) && !empty($rule)) {
$rule['name'] = $rulename;
}
}
// Simple commands
else {
$rule = $this->_parse_actions($script, ';');
$rule = $this->_parse_actions($script, $position, ';');
if (!empty($rule[0]) && is_array($rule)) {
// set "global" variables
if ($rule[0]['type'] == 'set') {
@ -593,8 +596,6 @@ class rcube_sieve_script
}
}
$rulename = '';
if (!empty($rule)) {
$this->content[] = $rule;
}
@ -608,14 +609,14 @@ class rcube_sieve_script
/**
* Convert text script fragment to rule object
*
* @param string Text rule
* @param string $content The whole script content
* @param int &$position Start position in the script
*
* @return array Rule data
*/
private function _tokenize_rule(&$content)
private function _tokenize_rule($content, &$position)
{
$cond = strtolower(self::tokenize($content, 1));
$cond = strtolower(self::tokenize($content, 1, $position));
if ($cond != 'if' && $cond != 'elsif' && $cond != 'else') {
return null;
}
@ -623,15 +624,16 @@ class rcube_sieve_script
$disabled = false;
$join = false;
$join_not = false;
$length = strlen($content);
// disabled rule (false + comment): if false # .....
if (preg_match('/^\s*false\s+#/i', $content)) {
$content = preg_replace('/^\s*false\s+#\s*/i', '', $content);
if (preg_match('/^\s*false\s+#\s*/i', substr($content, $position, 20), $m)) {
$position += strlen($m[0]);
$disabled = true;
}
while (strlen($content)) {
$tokens = self::tokenize($content, true);
while ($position < $length) {
$tokens = self::tokenize($content, true, $position);
$separator = array_pop($tokens);
if (!empty($tokens)) {
@ -768,7 +770,7 @@ class rcube_sieve_script
}
// ...and actions block
$actions = $this->_parse_actions($content);
$actions = $this->_parse_actions($content, $position);
if ($tests && $actions) {
$result = array(
@ -786,17 +788,19 @@ class rcube_sieve_script
/**
* Parse body of actions section
*
* @param string $content Text body
* @param string $end End of text separator
* @param string $content The whole script content
* @param int &$position Start position in the script
* @param string $end End of text separator
*
* @return array Array of parsed action type/target pairs
*/
private function _parse_actions(&$content, $end = '}')
private function _parse_actions($content, &$position, $end = '}')
{
$result = null;
$length = strlen($content);
while (strlen($content)) {
$tokens = self::tokenize($content, true);
while ($position < $length) {
$tokens = self::tokenize($content, true, $position);
$separator = array_pop($tokens);
$token = !empty($tokens) ? array_shift($tokens) : $separator;
@ -1074,28 +1078,29 @@ class rcube_sieve_script
/**
* Splits script into string tokens
*
* @param string &$str The script
* @param mixed $num Number of tokens to return, 0 for all
* or True for all tokens until separator is found.
* Separator will be returned as last token.
* @param string $str The script
* @param mixed $num Number of tokens to return, 0 for all
* or True for all tokens until separator is found.
* Separator will be returned as last token.
* @param int &$position Parsing start position
*
* @return mixed Tokens array or string if $num=1
*/
static function tokenize(&$str, $num=0)
static function tokenize($str, $num = 0, &$position = 0)
{
$result = array();
$length = strlen($str);
// remove spaces from the beginning of the string
while (($str = ltrim($str)) !== ''
&& (!$num || $num === true || count($result) < $num)
) {
switch ($str[0]) {
while ($position < $length && (!$num || $num === true || count($result) < $num)) {
// skip whitespace chars
$position = self::ltrim_position($str, $position);
switch ($str[$position]) {
// Quoted string
case '"':
$len = strlen($str);
for ($pos=1; $pos<$len; $pos++) {
for ($pos = $position + 1; $pos < $length; $pos++) {
if ($str[$pos] == '"') {
break;
}
@ -1108,18 +1113,19 @@ class rcube_sieve_script
if ($str[$pos] != '"') {
// error
}
// we need to strip slashes for a quoted string
$result[] = stripslashes(substr($str, 1, $pos - 1));
$str = substr($str, $pos + 1);
$result[] = stripslashes(substr($str, $position + 1, $pos - $position - 1));
$position = $pos + 1;
break;
// Parenthesized list
case '[':
$str = substr($str, 1);
$result[] = self::tokenize($str, 0);
$position++;
$result[] = self::tokenize($str, 0, $position);
break;
case ']':
$str = substr($str, 1);
$position++;
return $result;
break;
@ -1132,8 +1138,8 @@ class rcube_sieve_script
case ')':
case '{':
case '}':
$sep = $str[0];
$str = substr($str, 1);
$sep = $str[$position];
$position++;
if ($num === true) {
$result[] = $sep;
break 2;
@ -1142,69 +1148,97 @@ class rcube_sieve_script
// bracket-comment
case '/':
if ($str[1] == '*') {
if ($end_pos = strpos($str, '*/')) {
$str = substr($str, $end_pos + 2);
if ($str[$position + 1] == '*') {
if ($end_pos = strpos($str, '*/', $position + 2)) {
$position = $end_pos + 2;
}
else {
// error
$str = '';
$position = $length;
}
}
break;
// hash-comment
case '#':
if ($lf_pos = strpos($str, "\n")) {
$str = substr($str, $lf_pos);
if ($lf_pos = strpos($str, "\n", $position)) {
$position = $lf_pos + 1;
break;
}
else {
$str = '';
$position = $length;
}
// String atom
default:
// empty or one character
if ($str === '' || $str === null) {
if ($position == $length) {
break 2;
}
if (strlen($str) < 2) {
$result[] = $str;
$str = '';
if ($length - $position < 2) {
$result[] = substr($str, $position);
$position = $length;
break;
}
// tag/identifier/number
if (preg_match('/^([a-z0-9:_]+)/i', $str, $m)) {
$str = substr($str, strlen($m[1]));
if (preg_match('/[a-zA-Z0-9:_]+/', $str, $m, PREG_OFFSET_CAPTURE, $position)
&& $m[0][1] == $position
) {
$atom = $m[0][0];
$position += strlen($atom);
if ($m[1] != 'text:') {
$result[] = $m[1];
if ($atom != 'text:') {
$result[] = $atom;
}
// multiline string
else {
// skip whitespace chars (except \r\n)
$position = self::ltrim_position($str, $position, false);
// possible hash-comment after "text:"
if (preg_match('/^( |\t)*(#[^\n]+)?\n/', $str, $m)) {
$str = substr($str, strlen($m[0]));
if ($str[$position] === '#') {
$endl = strpos($str, "\n", $position);
$position = $endl ?: $length;
}
// get text until alone dot in a line
if (preg_match('/^(.*)\r?\n\.\r?\n/sU', $str, $m)) {
$text = $m[1];
// remove dot-stuffing
$text = str_replace("\n..", "\n.", $text);
$str = substr($str, strlen($m[0]));
// skip \n or \r\n
if ($str[$position] == "\n") {
$position++;
}
else if ($str[$position] == "\r" && $str[$position] == "\n") {
$position += 2;
}
else {
$text = '';
$text = '';
// get text until alone dot in a line
while ($position < $length) {
$pos = strpos($str, "\n.", $position);
if ($pos === false) {
break;
}
$text .= substr($str, $position, $pos - $position);
$position = $pos + 2;
if ($str[$pos] == "\n"
|| ($str[$pos] == "\r" && $str[$pos + 1] == "\n")
) {
break;
}
}
// remove dot-stuffing
$text = str_replace("\n..", "\n.", $text);
$result[] = $text;
$position++;
}
}
// fallback, skip one character as infinite loop prevention
else {
$str = substr($str, 1);
$position++;
}
break;
@ -1214,4 +1248,24 @@ class rcube_sieve_script
return $num === 1 ? (isset($result[0]) ? $result[0] : null) : $result;
}
/**
* Skip whitespace characters in a string from specified position.
*/
static function ltrim_position($content, $position, $br = true)
{
$blanks = array("\t", "\0", "\x0B", " ");
if ($br) {
$blanks[] = "\r";
$blanks[] = "\n";
}
while (isset($content[$position]) && isset($content[$position + 1])
&& in_array($content[$position], $blanks, true)
) {
$position++;
}
return $position;
}
}

Loading…
Cancel
Save