From 80cb362b8001ead4289c22dffca2ef0a1bd5b3f2 Mon Sep 17 00:00:00 2001 From: Aleksander Machniak <alec@alec.pl> Date: Wed, 06 Apr 2016 06:34:37 -0400 Subject: [PATCH] Managesieve: Refactored script parser to be 100x faster --- CHANGELOG | 1 plugins/managesieve/composer.json | 12 ++- plugins/managesieve/lib/Roundcube/rcube_sieve_engine.php | 2 plugins/managesieve/Changelog | 4 + plugins/managesieve/lib/Roundcube/rcube_sieve_script.php | 204 ++++++++++++++++++++++++++++++++------------------ 5 files changed, 143 insertions(+), 80 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index da091e7..46aa975 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,7 @@ CHANGELOG Roundcube Webmail =========================== +- Managesieve: Refactored script parser to be 100x faster - Enigma: Added option to attach public keys to sent mail (#5152) - Enigma: Handle messages with text before an encrypted block (#5149) - Enigma: Handle encrypted/signed content inside message/rfc822 attachments diff --git a/plugins/managesieve/Changelog b/plugins/managesieve/Changelog index 490819f..be39611 100644 --- a/plugins/managesieve/Changelog +++ b/plugins/managesieve/Changelog @@ -1,3 +1,7 @@ +* version 8.6 [2016-04-06] +----------------------------------------------------------- +- Refactored script parser to be 100x faster + * version 8.5 [2015-11-15] ----------------------------------------------------------- - Add support for :from parameter in vacation action (patch from Michele Locati) diff --git a/plugins/managesieve/composer.json b/plugins/managesieve/composer.json index fcf5ba3..b1fd1b5 100644 --- a/plugins/managesieve/composer.json +++ b/plugins/managesieve/composer.json @@ -3,7 +3,7 @@ "type": "roundcube-plugin", "description": "Adds a possibility to manage Sieve scripts (incoming mail filters). It's clickable interface which operates on text scripts and communicates with server using managesieve protocol. Adds Filters tab in Settings.", "license": "GPLv3+", - "version": "8.5", + "version": "8.6", "authors": [ { "name": "Aleksander Machniak", @@ -14,16 +14,20 @@ "repositories": [ { "type": "composer", - "url": "http://plugins.roundcube.net" + "url": "https://plugins.roundcube.net" }, { "type": "pear", - "url": "http://pear.php.net/" + "url": "https://pear.php.net/" + }, + { + "type": "vcs", + "url": "https://github.com/roundcube/Net_Sieve.git" } ], "require": { "php": ">=5.3.0", "roundcube/plugin-installer": ">=0.1.3", - "pear-pear/Net_Sieve": ">=1.3.2" + "roundcube/net_sieve": "~1.5.0" } } diff --git a/plugins/managesieve/lib/Roundcube/rcube_sieve_engine.php b/plugins/managesieve/lib/Roundcube/rcube_sieve_engine.php index 50f4c08..b013351 100644 --- a/plugins/managesieve/lib/Roundcube/rcube_sieve_engine.php +++ b/plugins/managesieve/lib/Roundcube/rcube_sieve_engine.php @@ -63,7 +63,7 @@ 1 => 'notifyimportancehigh' ); - const VERSION = '8.5'; + const VERSION = '8.6'; const PROGNAME = 'Roundcube (Managesieve)'; const PORT = 4190; diff --git a/plugins/managesieve/lib/Roundcube/rcube_sieve_script.php b/plugins/managesieve/lib/Roundcube/rcube_sieve_script.php index 518d79d..0b11de1 100644 --- a/plugins/managesieve/lib/Roundcube/rcube_sieve_script.php +++ b/plugins/managesieve/lib/Roundcube/rcube_sieve_script.php @@ -529,17 +529,20 @@ */ private function _parse_text($script) { - $prefix = ''; - $options = array(); + $prefix = ''; + $options = array(); + $position = 0; + $length = strlen($script); - while ($script) { - $script = trim($script); - $rule = array(); + while ($position < $length) { + // skip whitespace chars + $position = self::ltrim_position($script, $position); + $rulename = ''; // Comments - while (!empty($script) && $script[0] == '#') { - $endl = strpos($script, "\n"); - $line = $endl ? substr($script, 0, $endl) : $script; + while ($script[$position] === '#') { + $endl = strpos($script, "\n", $position) ?: $length; + $line = substr($script, $position, $endl - $position); // Roundcube format if (preg_match('/^# rule:\[(.*)\]/', $line, $matches)) { @@ -559,7 +562,7 @@ $prefix .= $line . "\n"; } - $script = ltrim(substr($script, strlen($line) + 1)); + $position = $endl + 1; } // handle script header @@ -571,15 +574,15 @@ } // Control structures/blocks - if (preg_match('/^(if|else|elsif)/i', $script)) { - $rule = $this->_tokenize_rule($script); + if (preg_match('/^(if|else|elsif)/i', substr($script, $position, 5))) { + $rule = $this->_tokenize_rule($script, $position); if (strlen($rulename) && !empty($rule)) { $rule['name'] = $rulename; } } // Simple commands else { - $rule = $this->_parse_actions($script, ';'); + $rule = $this->_parse_actions($script, $position, ';'); if (!empty($rule[0]) && is_array($rule)) { // set "global" variables if ($rule[0]['type'] == 'set') { @@ -592,8 +595,6 @@ } } } - - $rulename = ''; if (!empty($rule)) { $this->content[] = $rule; @@ -608,14 +609,14 @@ /** * Convert text script fragment to rule object * - * @param string Text rule + * @param string $content The whole script content + * @param int &$position Start position in the script * * @return array Rule data */ - private function _tokenize_rule(&$content) + private function _tokenize_rule($content, &$position) { - $cond = strtolower(self::tokenize($content, 1)); - + $cond = strtolower(self::tokenize($content, 1, $position)); if ($cond != 'if' && $cond != 'elsif' && $cond != 'else') { return null; } @@ -623,15 +624,16 @@ $disabled = false; $join = false; $join_not = false; + $length = strlen($content); // disabled rule (false + comment): if false # ..... - if (preg_match('/^\s*false\s+#/i', $content)) { - $content = preg_replace('/^\s*false\s+#\s*/i', '', $content); + if (preg_match('/^\s*false\s+#\s*/i', substr($content, $position, 20), $m)) { + $position += strlen($m[0]); $disabled = true; } - while (strlen($content)) { - $tokens = self::tokenize($content, true); + while ($position < $length) { + $tokens = self::tokenize($content, true, $position); $separator = array_pop($tokens); if (!empty($tokens)) { @@ -768,7 +770,7 @@ } // ...and actions block - $actions = $this->_parse_actions($content); + $actions = $this->_parse_actions($content, $position); if ($tests && $actions) { $result = array( @@ -786,17 +788,19 @@ /** * Parse body of actions section * - * @param string $content Text body - * @param string $end End of text separator + * @param string $content The whole script content + * @param int &$position Start position in the script + * @param string $end End of text separator * * @return array Array of parsed action type/target pairs */ - private function _parse_actions(&$content, $end = '}') + private function _parse_actions($content, &$position, $end = '}') { $result = null; + $length = strlen($content); - while (strlen($content)) { - $tokens = self::tokenize($content, true); + while ($position < $length) { + $tokens = self::tokenize($content, true, $position); $separator = array_pop($tokens); $token = !empty($tokens) ? array_shift($tokens) : $separator; @@ -1074,28 +1078,29 @@ /** * Splits script into string tokens * - * @param string &$str The script - * @param mixed $num Number of tokens to return, 0 for all - * or True for all tokens until separator is found. - * Separator will be returned as last token. + * @param string $str The script + * @param mixed $num Number of tokens to return, 0 for all + * or True for all tokens until separator is found. + * Separator will be returned as last token. + * @param int &$position Parsing start position * * @return mixed Tokens array or string if $num=1 */ - static function tokenize(&$str, $num=0) + static function tokenize($str, $num = 0, &$position = 0) { $result = array(); + $length = strlen($str); // remove spaces from the beginning of the string - while (($str = ltrim($str)) !== '' - && (!$num || $num === true || count($result) < $num) - ) { - switch ($str[0]) { + while ($position < $length && (!$num || $num === true || count($result) < $num)) { + // skip whitespace chars + $position = self::ltrim_position($str, $position); + + switch ($str[$position]) { // Quoted string case '"': - $len = strlen($str); - - for ($pos=1; $pos<$len; $pos++) { + for ($pos = $position + 1; $pos < $length; $pos++) { if ($str[$pos] == '"') { break; } @@ -1108,18 +1113,19 @@ if ($str[$pos] != '"') { // error } + // we need to strip slashes for a quoted string - $result[] = stripslashes(substr($str, 1, $pos - 1)); - $str = substr($str, $pos + 1); + $result[] = stripslashes(substr($str, $position + 1, $pos - $position - 1)); + $position = $pos + 1; break; // Parenthesized list case '[': - $str = substr($str, 1); - $result[] = self::tokenize($str, 0); + $position++; + $result[] = self::tokenize($str, 0, $position); break; case ']': - $str = substr($str, 1); + $position++; return $result; break; @@ -1132,8 +1138,8 @@ case ')': case '{': case '}': - $sep = $str[0]; - $str = substr($str, 1); + $sep = $str[$position]; + $position++; if ($num === true) { $result[] = $sep; break 2; @@ -1142,69 +1148,97 @@ // bracket-comment case '/': - if ($str[1] == '*') { - if ($end_pos = strpos($str, '*/')) { - $str = substr($str, $end_pos + 2); + if ($str[$position + 1] == '*') { + if ($end_pos = strpos($str, '*/', $position + 2)) { + $position = $end_pos + 2; } else { // error - $str = ''; + $position = $length; } } break; // hash-comment case '#': - if ($lf_pos = strpos($str, "\n")) { - $str = substr($str, $lf_pos); + if ($lf_pos = strpos($str, "\n", $position)) { + $position = $lf_pos + 1; break; } else { - $str = ''; + $position = $length; } // String atom default: // empty or one character - if ($str === '' || $str === null) { + if ($position == $length) { break 2; } - if (strlen($str) < 2) { - $result[] = $str; - $str = ''; + if ($length - $position < 2) { + $result[] = substr($str, $position); + $position = $length; break; } // tag/identifier/number - if (preg_match('/^([a-z0-9:_]+)/i', $str, $m)) { - $str = substr($str, strlen($m[1])); + if (preg_match('/[a-zA-Z0-9:_]+/', $str, $m, PREG_OFFSET_CAPTURE, $position) + && $m[0][1] == $position + ) { + $atom = $m[0][0]; + $position += strlen($atom); - if ($m[1] != 'text:') { - $result[] = $m[1]; + if ($atom != 'text:') { + $result[] = $atom; } // multiline string else { + // skip whitespace chars (except \r\n) + $position = self::ltrim_position($str, $position, false); + // possible hash-comment after "text:" - if (preg_match('/^( |\t)*(#[^\n]+)?\n/', $str, $m)) { - $str = substr($str, strlen($m[0])); - } - // get text until alone dot in a line - if (preg_match('/^(.*)\r?\n\.\r?\n/sU', $str, $m)) { - $text = $m[1]; - // remove dot-stuffing - $text = str_replace("\n..", "\n.", $text); - $str = substr($str, strlen($m[0])); - } - else { - $text = ''; + if ($str[$position] === '#') { + $endl = strpos($str, "\n", $position); + $position = $endl ?: $length; } + // skip \n or \r\n + if ($str[$position] == "\n") { + $position++; + } + else if ($str[$position] == "\r" && $str[$position] == "\n") { + $position += 2; + } + + $text = ''; + + // get text until alone dot in a line + while ($position < $length) { + $pos = strpos($str, "\n.", $position); + if ($pos === false) { + break; + } + + $text .= substr($str, $position, $pos - $position); + $position = $pos + 2; + + if ($str[$pos] == "\n" + || ($str[$pos] == "\r" && $str[$pos + 1] == "\n") + ) { + break; + } + } + + // remove dot-stuffing + $text = str_replace("\n..", "\n.", $text); + $result[] = $text; + $position++; } } // fallback, skip one character as infinite loop prevention else { - $str = substr($str, 1); + $position++; } break; @@ -1214,4 +1248,24 @@ return $num === 1 ? (isset($result[0]) ? $result[0] : null) : $result; } + /** + * Skip whitespace characters in a string from specified position. + */ + static function ltrim_position($content, $position, $br = true) + { + $blanks = array("\t", "\0", "\x0B", " "); + + if ($br) { + $blanks[] = "\r"; + $blanks[] = "\n"; + } + + while (isset($content[$position]) && isset($content[$position + 1]) + && in_array($content[$position], $blanks, true) + ) { + $position++; + } + + return $position; + } } -- Gitblit v1.9.1