From 80cb362b8001ead4289c22dffca2ef0a1bd5b3f2 Mon Sep 17 00:00:00 2001
From: Aleksander Machniak <alec@alec.pl>
Date: Wed, 06 Apr 2016 06:34:37 -0400
Subject: [PATCH] Managesieve: Refactored script parser to be 100x faster

---
 CHANGELOG                                                |    1 
 plugins/managesieve/composer.json                        |   12 ++-
 plugins/managesieve/lib/Roundcube/rcube_sieve_engine.php |    2 
 plugins/managesieve/Changelog                            |    4 +
 plugins/managesieve/lib/Roundcube/rcube_sieve_script.php |  204 ++++++++++++++++++++++++++++++++------------------
 5 files changed, 143 insertions(+), 80 deletions(-)

diff --git a/CHANGELOG b/CHANGELOG
index da091e7..46aa975 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,6 +1,7 @@
 CHANGELOG Roundcube Webmail
 ===========================
 
+- Managesieve: Refactored script parser to be 100x faster
 - Enigma: Added option to attach public keys to sent mail (#5152)
 - Enigma: Handle messages with text before an encrypted block (#5149)
 - Enigma: Handle encrypted/signed content inside message/rfc822 attachments
diff --git a/plugins/managesieve/Changelog b/plugins/managesieve/Changelog
index 490819f..be39611 100644
--- a/plugins/managesieve/Changelog
+++ b/plugins/managesieve/Changelog
@@ -1,3 +1,7 @@
+* version 8.6 [2016-04-06]
+-----------------------------------------------------------
+- Refactored script parser to be 100x faster
+
 * version 8.5 [2015-11-15]
 -----------------------------------------------------------
 - Add support for :from parameter in vacation action (patch from Michele Locati)
diff --git a/plugins/managesieve/composer.json b/plugins/managesieve/composer.json
index fcf5ba3..b1fd1b5 100644
--- a/plugins/managesieve/composer.json
+++ b/plugins/managesieve/composer.json
@@ -3,7 +3,7 @@
     "type": "roundcube-plugin",
     "description": "Adds a possibility to manage Sieve scripts (incoming mail filters). It's clickable interface which operates on text scripts and communicates with server using managesieve protocol. Adds Filters tab in Settings.",
     "license": "GPLv3+",
-    "version": "8.5",
+    "version": "8.6",
     "authors": [
         {
             "name": "Aleksander Machniak",
@@ -14,16 +14,20 @@
     "repositories": [
         {
             "type": "composer",
-            "url": "http://plugins.roundcube.net"
+            "url": "https://plugins.roundcube.net"
         },
         {
             "type": "pear",
-            "url": "http://pear.php.net/"
+            "url": "https://pear.php.net/"
+        },
+        {
+            "type": "vcs",
+            "url": "https://github.com/roundcube/Net_Sieve.git"
         }
     ],
     "require": {
         "php": ">=5.3.0",
         "roundcube/plugin-installer": ">=0.1.3",
-        "pear-pear/Net_Sieve": ">=1.3.2"
+        "roundcube/net_sieve": "~1.5.0"
     }
 }
diff --git a/plugins/managesieve/lib/Roundcube/rcube_sieve_engine.php b/plugins/managesieve/lib/Roundcube/rcube_sieve_engine.php
index 50f4c08..b013351 100644
--- a/plugins/managesieve/lib/Roundcube/rcube_sieve_engine.php
+++ b/plugins/managesieve/lib/Roundcube/rcube_sieve_engine.php
@@ -63,7 +63,7 @@
         1 => 'notifyimportancehigh'
     );
 
-    const VERSION  = '8.5';
+    const VERSION  = '8.6';
     const PROGNAME = 'Roundcube (Managesieve)';
     const PORT     = 4190;
 
diff --git a/plugins/managesieve/lib/Roundcube/rcube_sieve_script.php b/plugins/managesieve/lib/Roundcube/rcube_sieve_script.php
index 518d79d..0b11de1 100644
--- a/plugins/managesieve/lib/Roundcube/rcube_sieve_script.php
+++ b/plugins/managesieve/lib/Roundcube/rcube_sieve_script.php
@@ -529,17 +529,20 @@
      */
     private function _parse_text($script)
     {
-        $prefix     = '';
-        $options = array();
+        $prefix   = '';
+        $options  = array();
+        $position = 0;
+        $length   = strlen($script);
 
-        while ($script) {
-            $script = trim($script);
-            $rule   = array();
+        while ($position < $length) {
+            // skip whitespace chars
+            $position = self::ltrim_position($script, $position);
+            $rulename = '';
 
             // Comments
-            while (!empty($script) && $script[0] == '#') {
-                $endl = strpos($script, "\n");
-                $line = $endl ? substr($script, 0, $endl) : $script;
+            while ($script[$position] === '#') {
+                $endl = strpos($script, "\n", $position) ?: $length;
+                $line = substr($script, $position, $endl - $position);
 
                 // Roundcube format
                 if (preg_match('/^# rule:\[(.*)\]/', $line, $matches)) {
@@ -559,7 +562,7 @@
                     $prefix .= $line . "\n";
                 }
 
-                $script = ltrim(substr($script, strlen($line) + 1));
+                $position = $endl + 1;
             }
 
             // handle script header
@@ -571,15 +574,15 @@
             }
 
             // Control structures/blocks
-            if (preg_match('/^(if|else|elsif)/i', $script)) {
-                $rule = $this->_tokenize_rule($script);
+            if (preg_match('/^(if|else|elsif)/i', substr($script, $position, 5))) {
+                $rule = $this->_tokenize_rule($script, $position);
                 if (strlen($rulename) && !empty($rule)) {
                     $rule['name'] = $rulename;
                 }
             }
             // Simple commands
             else {
-                $rule = $this->_parse_actions($script, ';');
+                $rule = $this->_parse_actions($script, $position, ';');
                 if (!empty($rule[0]) && is_array($rule)) {
                     // set "global" variables
                     if ($rule[0]['type'] == 'set') {
@@ -592,8 +595,6 @@
                     }
                 }
             }
-
-            $rulename = '';
 
             if (!empty($rule)) {
                 $this->content[] = $rule;
@@ -608,14 +609,14 @@
     /**
      * Convert text script fragment to rule object
      *
-     * @param string Text rule
+     * @param string $content   The whole script content
+     * @param int    &$position Start position in the script
      *
      * @return array Rule data
      */
-    private function _tokenize_rule(&$content)
+    private function _tokenize_rule($content, &$position)
     {
-        $cond = strtolower(self::tokenize($content, 1));
-
+        $cond = strtolower(self::tokenize($content, 1, $position));
         if ($cond != 'if' && $cond != 'elsif' && $cond != 'else') {
             return null;
         }
@@ -623,15 +624,16 @@
         $disabled = false;
         $join     = false;
         $join_not = false;
+        $length   = strlen($content);
 
         // disabled rule (false + comment): if false # .....
-        if (preg_match('/^\s*false\s+#/i', $content)) {
-            $content = preg_replace('/^\s*false\s+#\s*/i', '', $content);
+        if (preg_match('/^\s*false\s+#\s*/i', substr($content, $position, 20), $m)) {
+            $position += strlen($m[0]);
             $disabled = true;
         }
 
-        while (strlen($content)) {
-            $tokens = self::tokenize($content, true);
+        while ($position < $length) {
+            $tokens    = self::tokenize($content, true, $position);
             $separator = array_pop($tokens);
 
             if (!empty($tokens)) {
@@ -768,7 +770,7 @@
         }
 
         // ...and actions block
-        $actions = $this->_parse_actions($content);
+        $actions = $this->_parse_actions($content, $position);
 
         if ($tests && $actions) {
             $result = array(
@@ -786,17 +788,19 @@
     /**
      * Parse body of actions section
      *
-     * @param string $content  Text body
-     * @param string $end      End of text separator
+     * @param string $content   The whole script content
+     * @param int    &$position Start position in the script
+     * @param string $end       End of text separator
      *
      * @return array Array of parsed action type/target pairs
      */
-    private function _parse_actions(&$content, $end = '}')
+    private function _parse_actions($content, &$position, $end = '}')
     {
         $result = null;
+        $length = strlen($content);
 
-        while (strlen($content)) {
-            $tokens    = self::tokenize($content, true);
+        while ($position < $length) {
+            $tokens    = self::tokenize($content, true, $position);
             $separator = array_pop($tokens);
             $token     = !empty($tokens) ? array_shift($tokens) : $separator;
 
@@ -1074,28 +1078,29 @@
     /**
      * Splits script into string tokens
      *
-     * @param string &$str    The script
-     * @param mixed  $num     Number of tokens to return, 0 for all
-     *                        or True for all tokens until separator is found.
-     *                        Separator will be returned as last token.
+     * @param string $str       The script
+     * @param mixed  $num       Number of tokens to return, 0 for all
+     *                          or True for all tokens until separator is found.
+     *                          Separator will be returned as last token.
+     * @param int    &$position Parsing start position
      *
      * @return mixed Tokens array or string if $num=1
      */
-    static function tokenize(&$str, $num=0)
+    static function tokenize($str, $num = 0, &$position = 0)
     {
         $result = array();
+        $length = strlen($str);
 
         // remove spaces from the beginning of the string
-        while (($str = ltrim($str)) !== ''
-            && (!$num || $num === true || count($result) < $num)
-        ) {
-            switch ($str[0]) {
+        while ($position < $length && (!$num || $num === true || count($result) < $num)) {
+            // skip whitespace chars
+            $position = self::ltrim_position($str, $position);
+
+            switch ($str[$position]) {
 
             // Quoted string
             case '"':
-                $len = strlen($str);
-
-                for ($pos=1; $pos<$len; $pos++) {
+                for ($pos = $position + 1; $pos < $length; $pos++) {
                     if ($str[$pos] == '"') {
                         break;
                     }
@@ -1108,18 +1113,19 @@
                 if ($str[$pos] != '"') {
                     // error
                 }
+
                 // we need to strip slashes for a quoted string
-                $result[] = stripslashes(substr($str, 1, $pos - 1));
-                $str      = substr($str, $pos + 1);
+                $result[] = stripslashes(substr($str, $position + 1, $pos - $position - 1));
+                $position = $pos + 1;
                 break;
 
             // Parenthesized list
             case '[':
-                $str = substr($str, 1);
-                $result[] = self::tokenize($str, 0);
+                $position++;
+                $result[] = self::tokenize($str, 0, $position);
                 break;
             case ']':
-                $str = substr($str, 1);
+                $position++;
                 return $result;
                 break;
 
@@ -1132,8 +1138,8 @@
             case ')':
             case '{':
             case '}':
-                $sep = $str[0];
-                $str = substr($str, 1);
+                $sep = $str[$position];
+                $position++;
                 if ($num === true) {
                     $result[] = $sep;
                     break 2;
@@ -1142,69 +1148,97 @@
 
             // bracket-comment
             case '/':
-                if ($str[1] == '*') {
-                    if ($end_pos = strpos($str, '*/')) {
-                        $str = substr($str, $end_pos + 2);
+                if ($str[$position + 1] == '*') {
+                    if ($end_pos = strpos($str, '*/', $position + 2)) {
+                        $position = $end_pos + 2;
                     }
                     else {
                         // error
-                        $str = '';
+                        $position = $length;
                     }
                 }
                 break;
 
             // hash-comment
             case '#':
-                if ($lf_pos = strpos($str, "\n")) {
-                    $str = substr($str, $lf_pos);
+                if ($lf_pos = strpos($str, "\n", $position)) {
+                    $position = $lf_pos + 1;
                     break;
                 }
                 else {
-                    $str = '';
+                    $position = $length;
                 }
 
             // String atom
             default:
                 // empty or one character
-                if ($str === '' || $str === null) {
+                if ($position == $length) {
                     break 2;
                 }
-                if (strlen($str) < 2) {
-                    $result[] = $str;
-                    $str = '';
+                if ($length - $position < 2) {
+                    $result[] = substr($str, $position);
+                    $position = $length;
                     break;
                 }
 
                 // tag/identifier/number
-                if (preg_match('/^([a-z0-9:_]+)/i', $str, $m)) {
-                    $str = substr($str, strlen($m[1]));
+                if (preg_match('/[a-zA-Z0-9:_]+/', $str, $m, PREG_OFFSET_CAPTURE, $position)
+                    && $m[0][1] == $position
+                ) {
+                    $atom      = $m[0][0];
+                    $position += strlen($atom);
 
-                    if ($m[1] != 'text:') {
-                        $result[] = $m[1];
+                    if ($atom != 'text:') {
+                        $result[] = $atom;
                     }
                     // multiline string
                     else {
+                        // skip whitespace chars (except \r\n)
+                        $position = self::ltrim_position($str, $position, false);
+
                         // possible hash-comment after "text:"
-                        if (preg_match('/^( |\t)*(#[^\n]+)?\n/', $str, $m)) {
-                            $str = substr($str, strlen($m[0]));
-                        }
-                        // get text until alone dot in a line
-                        if (preg_match('/^(.*)\r?\n\.\r?\n/sU', $str, $m)) {
-                            $text = $m[1];
-                            // remove dot-stuffing
-                            $text = str_replace("\n..", "\n.", $text);
-                            $str = substr($str, strlen($m[0]));
-                        }
-                        else {
-                            $text = '';
+                        if ($str[$position] === '#') {
+                            $endl     = strpos($str, "\n", $position);
+                            $position = $endl ?: $length;
                         }
 
+                        // skip \n or \r\n
+                        if ($str[$position] == "\n") {
+                            $position++;
+                        }
+                        else if ($str[$position] == "\r" && $str[$position] == "\n") {
+                            $position += 2;
+                        }
+
+                        $text = '';
+
+                        // get text until alone dot in a line
+                        while ($position < $length) {
+                            $pos = strpos($str, "\n.", $position);
+                            if ($pos === false) {
+                                break;
+                            }
+
+                            $text    .= substr($str, $position, $pos - $position);
+                            $position = $pos + 2;
+
+                            if ($str[$pos] == "\n"
+                                || ($str[$pos] == "\r" && $str[$pos + 1] == "\n")
+                            ) {
+                                break;
+                            }
+                        }
+
+                        // remove dot-stuffing
+                        $text = str_replace("\n..", "\n.", $text);
+
                         $result[] = $text;
+                        $position++;
                     }
                 }
                 // fallback, skip one character as infinite loop prevention
                 else {
-                    $str = substr($str, 1);
+                    $position++;
                 }
 
                 break;
@@ -1214,4 +1248,24 @@
         return $num === 1 ? (isset($result[0]) ? $result[0] : null) : $result;
     }
 
+    /**
+     * Skip whitespace characters in a string from specified position.
+     */
+    static function ltrim_position($content, $position, $br = true)
+    {
+        $blanks = array("\t", "\0", "\x0B", " ");
+
+        if ($br) {
+            $blanks[] = "\r";
+            $blanks[] = "\n";
+        }
+
+        while (isset($content[$position]) && isset($content[$position + 1])
+            && in_array($content[$position], $blanks, true)
+        ) {
+            $position++;
+        }
+
+        return $position;
+    }
 }

--
Gitblit v1.9.1