From 80cb362b8001ead4289c22dffca2ef0a1bd5b3f2 Mon Sep 17 00:00:00 2001
From: Aleksander Machniak <alec@alec.pl>
Date: Wed, 06 Apr 2016 06:34:37 -0400
Subject: [PATCH] Managesieve: Refactored script parser to be 100x faster
---
CHANGELOG | 1
plugins/managesieve/composer.json | 12 ++-
plugins/managesieve/lib/Roundcube/rcube_sieve_engine.php | 2
plugins/managesieve/Changelog | 4 +
plugins/managesieve/lib/Roundcube/rcube_sieve_script.php | 204 ++++++++++++++++++++++++++++++++------------------
5 files changed, 143 insertions(+), 80 deletions(-)
diff --git a/CHANGELOG b/CHANGELOG
index da091e7..46aa975 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,6 +1,7 @@
CHANGELOG Roundcube Webmail
===========================
+- Managesieve: Refactored script parser to be 100x faster
- Enigma: Added option to attach public keys to sent mail (#5152)
- Enigma: Handle messages with text before an encrypted block (#5149)
- Enigma: Handle encrypted/signed content inside message/rfc822 attachments
diff --git a/plugins/managesieve/Changelog b/plugins/managesieve/Changelog
index 490819f..be39611 100644
--- a/plugins/managesieve/Changelog
+++ b/plugins/managesieve/Changelog
@@ -1,3 +1,7 @@
+* version 8.6 [2016-04-06]
+-----------------------------------------------------------
+- Refactored script parser to be 100x faster
+
* version 8.5 [2015-11-15]
-----------------------------------------------------------
- Add support for :from parameter in vacation action (patch from Michele Locati)
diff --git a/plugins/managesieve/composer.json b/plugins/managesieve/composer.json
index fcf5ba3..b1fd1b5 100644
--- a/plugins/managesieve/composer.json
+++ b/plugins/managesieve/composer.json
@@ -3,7 +3,7 @@
"type": "roundcube-plugin",
"description": "Adds a possibility to manage Sieve scripts (incoming mail filters). It's clickable interface which operates on text scripts and communicates with server using managesieve protocol. Adds Filters tab in Settings.",
"license": "GPLv3+",
- "version": "8.5",
+ "version": "8.6",
"authors": [
{
"name": "Aleksander Machniak",
@@ -14,16 +14,20 @@
"repositories": [
{
"type": "composer",
- "url": "http://plugins.roundcube.net"
+ "url": "https://plugins.roundcube.net"
},
{
"type": "pear",
- "url": "http://pear.php.net/"
+ "url": "https://pear.php.net/"
+ },
+ {
+ "type": "vcs",
+ "url": "https://github.com/roundcube/Net_Sieve.git"
}
],
"require": {
"php": ">=5.3.0",
"roundcube/plugin-installer": ">=0.1.3",
- "pear-pear/Net_Sieve": ">=1.3.2"
+ "roundcube/net_sieve": "~1.5.0"
}
}
diff --git a/plugins/managesieve/lib/Roundcube/rcube_sieve_engine.php b/plugins/managesieve/lib/Roundcube/rcube_sieve_engine.php
index 50f4c08..b013351 100644
--- a/plugins/managesieve/lib/Roundcube/rcube_sieve_engine.php
+++ b/plugins/managesieve/lib/Roundcube/rcube_sieve_engine.php
@@ -63,7 +63,7 @@
1 => 'notifyimportancehigh'
);
- const VERSION = '8.5';
+ const VERSION = '8.6';
const PROGNAME = 'Roundcube (Managesieve)';
const PORT = 4190;
diff --git a/plugins/managesieve/lib/Roundcube/rcube_sieve_script.php b/plugins/managesieve/lib/Roundcube/rcube_sieve_script.php
index 518d79d..0b11de1 100644
--- a/plugins/managesieve/lib/Roundcube/rcube_sieve_script.php
+++ b/plugins/managesieve/lib/Roundcube/rcube_sieve_script.php
@@ -529,17 +529,20 @@
*/
private function _parse_text($script)
{
- $prefix = '';
- $options = array();
+ $prefix = '';
+ $options = array();
+ $position = 0;
+ $length = strlen($script);
- while ($script) {
- $script = trim($script);
- $rule = array();
+ while ($position < $length) {
+ // skip whitespace chars
+ $position = self::ltrim_position($script, $position);
+ $rulename = '';
// Comments
- while (!empty($script) && $script[0] == '#') {
- $endl = strpos($script, "\n");
- $line = $endl ? substr($script, 0, $endl) : $script;
+ while ($script[$position] === '#') {
+ $endl = strpos($script, "\n", $position) ?: $length;
+ $line = substr($script, $position, $endl - $position);
// Roundcube format
if (preg_match('/^# rule:\[(.*)\]/', $line, $matches)) {
@@ -559,7 +562,7 @@
$prefix .= $line . "\n";
}
- $script = ltrim(substr($script, strlen($line) + 1));
+ $position = $endl + 1;
}
// handle script header
@@ -571,15 +574,15 @@
}
// Control structures/blocks
- if (preg_match('/^(if|else|elsif)/i', $script)) {
- $rule = $this->_tokenize_rule($script);
+ if (preg_match('/^(if|else|elsif)/i', substr($script, $position, 5))) {
+ $rule = $this->_tokenize_rule($script, $position);
if (strlen($rulename) && !empty($rule)) {
$rule['name'] = $rulename;
}
}
// Simple commands
else {
- $rule = $this->_parse_actions($script, ';');
+ $rule = $this->_parse_actions($script, $position, ';');
if (!empty($rule[0]) && is_array($rule)) {
// set "global" variables
if ($rule[0]['type'] == 'set') {
@@ -592,8 +595,6 @@
}
}
}
-
- $rulename = '';
if (!empty($rule)) {
$this->content[] = $rule;
@@ -608,14 +609,14 @@
/**
* Convert text script fragment to rule object
*
- * @param string Text rule
+ * @param string $content The whole script content
+ * @param int &$position Start position in the script
*
* @return array Rule data
*/
- private function _tokenize_rule(&$content)
+ private function _tokenize_rule($content, &$position)
{
- $cond = strtolower(self::tokenize($content, 1));
-
+ $cond = strtolower(self::tokenize($content, 1, $position));
if ($cond != 'if' && $cond != 'elsif' && $cond != 'else') {
return null;
}
@@ -623,15 +624,16 @@
$disabled = false;
$join = false;
$join_not = false;
+ $length = strlen($content);
// disabled rule (false + comment): if false # .....
- if (preg_match('/^\s*false\s+#/i', $content)) {
- $content = preg_replace('/^\s*false\s+#\s*/i', '', $content);
+ if (preg_match('/^\s*false\s+#\s*/i', substr($content, $position, 20), $m)) {
+ $position += strlen($m[0]);
$disabled = true;
}
- while (strlen($content)) {
- $tokens = self::tokenize($content, true);
+ while ($position < $length) {
+ $tokens = self::tokenize($content, true, $position);
$separator = array_pop($tokens);
if (!empty($tokens)) {
@@ -768,7 +770,7 @@
}
// ...and actions block
- $actions = $this->_parse_actions($content);
+ $actions = $this->_parse_actions($content, $position);
if ($tests && $actions) {
$result = array(
@@ -786,17 +788,19 @@
/**
* Parse body of actions section
*
- * @param string $content Text body
- * @param string $end End of text separator
+ * @param string $content The whole script content
+ * @param int &$position Start position in the script
+ * @param string $end End of text separator
*
* @return array Array of parsed action type/target pairs
*/
- private function _parse_actions(&$content, $end = '}')
+ private function _parse_actions($content, &$position, $end = '}')
{
$result = null;
+ $length = strlen($content);
- while (strlen($content)) {
- $tokens = self::tokenize($content, true);
+ while ($position < $length) {
+ $tokens = self::tokenize($content, true, $position);
$separator = array_pop($tokens);
$token = !empty($tokens) ? array_shift($tokens) : $separator;
@@ -1074,28 +1078,29 @@
/**
* Splits script into string tokens
*
- * @param string &$str The script
- * @param mixed $num Number of tokens to return, 0 for all
- * or True for all tokens until separator is found.
- * Separator will be returned as last token.
+ * @param string $str The script
+ * @param mixed $num Number of tokens to return, 0 for all
+ * or True for all tokens until separator is found.
+ * Separator will be returned as last token.
+ * @param int &$position Parsing start position
*
* @return mixed Tokens array or string if $num=1
*/
- static function tokenize(&$str, $num=0)
+ static function tokenize($str, $num = 0, &$position = 0)
{
$result = array();
+ $length = strlen($str);
// remove spaces from the beginning of the string
- while (($str = ltrim($str)) !== ''
- && (!$num || $num === true || count($result) < $num)
- ) {
- switch ($str[0]) {
+ while ($position < $length && (!$num || $num === true || count($result) < $num)) {
+ // skip whitespace chars
+ $position = self::ltrim_position($str, $position);
+
+ switch ($str[$position]) {
// Quoted string
case '"':
- $len = strlen($str);
-
- for ($pos=1; $pos<$len; $pos++) {
+ for ($pos = $position + 1; $pos < $length; $pos++) {
if ($str[$pos] == '"') {
break;
}
@@ -1108,18 +1113,19 @@
if ($str[$pos] != '"') {
// error
}
+
// we need to strip slashes for a quoted string
- $result[] = stripslashes(substr($str, 1, $pos - 1));
- $str = substr($str, $pos + 1);
+ $result[] = stripslashes(substr($str, $position + 1, $pos - $position - 1));
+ $position = $pos + 1;
break;
// Parenthesized list
case '[':
- $str = substr($str, 1);
- $result[] = self::tokenize($str, 0);
+ $position++;
+ $result[] = self::tokenize($str, 0, $position);
break;
case ']':
- $str = substr($str, 1);
+ $position++;
return $result;
break;
@@ -1132,8 +1138,8 @@
case ')':
case '{':
case '}':
- $sep = $str[0];
- $str = substr($str, 1);
+ $sep = $str[$position];
+ $position++;
if ($num === true) {
$result[] = $sep;
break 2;
@@ -1142,69 +1148,97 @@
// bracket-comment
case '/':
- if ($str[1] == '*') {
- if ($end_pos = strpos($str, '*/')) {
- $str = substr($str, $end_pos + 2);
+ if ($str[$position + 1] == '*') {
+ if ($end_pos = strpos($str, '*/', $position + 2)) {
+ $position = $end_pos + 2;
}
else {
// error
- $str = '';
+ $position = $length;
}
}
break;
// hash-comment
case '#':
- if ($lf_pos = strpos($str, "\n")) {
- $str = substr($str, $lf_pos);
+ if ($lf_pos = strpos($str, "\n", $position)) {
+ $position = $lf_pos + 1;
break;
}
else {
- $str = '';
+ $position = $length;
}
// String atom
default:
// empty or one character
- if ($str === '' || $str === null) {
+ if ($position == $length) {
break 2;
}
- if (strlen($str) < 2) {
- $result[] = $str;
- $str = '';
+ if ($length - $position < 2) {
+ $result[] = substr($str, $position);
+ $position = $length;
break;
}
// tag/identifier/number
- if (preg_match('/^([a-z0-9:_]+)/i', $str, $m)) {
- $str = substr($str, strlen($m[1]));
+ if (preg_match('/[a-zA-Z0-9:_]+/', $str, $m, PREG_OFFSET_CAPTURE, $position)
+ && $m[0][1] == $position
+ ) {
+ $atom = $m[0][0];
+ $position += strlen($atom);
- if ($m[1] != 'text:') {
- $result[] = $m[1];
+ if ($atom != 'text:') {
+ $result[] = $atom;
}
// multiline string
else {
+ // skip whitespace chars (except \r\n)
+ $position = self::ltrim_position($str, $position, false);
+
// possible hash-comment after "text:"
- if (preg_match('/^( |\t)*(#[^\n]+)?\n/', $str, $m)) {
- $str = substr($str, strlen($m[0]));
- }
- // get text until alone dot in a line
- if (preg_match('/^(.*)\r?\n\.\r?\n/sU', $str, $m)) {
- $text = $m[1];
- // remove dot-stuffing
- $text = str_replace("\n..", "\n.", $text);
- $str = substr($str, strlen($m[0]));
- }
- else {
- $text = '';
+ if ($str[$position] === '#') {
+ $endl = strpos($str, "\n", $position);
+ $position = $endl ?: $length;
}
+ // skip \n or \r\n
+ if ($str[$position] == "\n") {
+ $position++;
+ }
+ else if ($str[$position] == "\r" && $str[$position] == "\n") {
+ $position += 2;
+ }
+
+ $text = '';
+
+ // get text until alone dot in a line
+ while ($position < $length) {
+ $pos = strpos($str, "\n.", $position);
+ if ($pos === false) {
+ break;
+ }
+
+ $text .= substr($str, $position, $pos - $position);
+ $position = $pos + 2;
+
+ if ($str[$pos] == "\n"
+ || ($str[$pos] == "\r" && $str[$pos + 1] == "\n")
+ ) {
+ break;
+ }
+ }
+
+ // remove dot-stuffing
+ $text = str_replace("\n..", "\n.", $text);
+
$result[] = $text;
+ $position++;
}
}
// fallback, skip one character as infinite loop prevention
else {
- $str = substr($str, 1);
+ $position++;
}
break;
@@ -1214,4 +1248,24 @@
return $num === 1 ? (isset($result[0]) ? $result[0] : null) : $result;
}
+ /**
+ * Skip whitespace characters in a string from specified position.
+ */
+ static function ltrim_position($content, $position, $br = true)
+ {
+ $blanks = array("\t", "\0", "\x0B", " ");
+
+ if ($br) {
+ $blanks[] = "\r";
+ $blanks[] = "\n";
+ }
+
+ while (isset($content[$position]) && isset($content[$position + 1])
+ && in_array($content[$position], $blanks, true)
+ ) {
+ $position++;
+ }
+
+ return $position;
+ }
}
--
Gitblit v1.9.1