From bb6f4b2b5d0676ef0ed90f8050ad28e46f2dce35 Mon Sep 17 00:00:00 2001
From: Thomas Bruederli <thomas@roundcube.net>
Date: Fri, 25 Jan 2013 17:46:34 -0500
Subject: [PATCH] Refactored blockquote quotion routine in html2text conversion: it now correctly converts multiple and/or nested blockquotes

---
 tests/Framework/Html2text.php             |   19 +++++++++
 program/lib/Roundcube/rcube_html2text.php |   87 ++++++++++++++++++++++---------------------
 2 files changed, 64 insertions(+), 42 deletions(-)

diff --git a/program/lib/Roundcube/rcube_html2text.php b/program/lib/Roundcube/rcube_html2text.php
index 0b172eb..3d32fe7 100644
--- a/program/lib/Roundcube/rcube_html2text.php
+++ b/program/lib/Roundcube/rcube_html2text.php
@@ -571,55 +571,58 @@
      */
     protected function _convert_blockquotes(&$text)
     {
-        if (preg_match_all('/<\/*blockquote[^>]*>/i', $text, $matches, PREG_OFFSET_CAPTURE)) {
-            $level = 0;
-            $diff = 0;
-            foreach ($matches[0] as $m) {
-                if ($m[0][0] == '<' && $m[0][1] == '/') {
+        $level = 0;
+        $offset = 0;
+        while (($start = strpos($text, '<blockquote', $offset)) !== false) {
+            $offset = $start + 12;
+            do {
+                $end = strpos($text, '</blockquote>', $offset);
+                $next = strpos($text, '<blockquote', $offset);
+
+                // nested <blockquote>, skip
+                if ($next !== false && $next < $end) {
+                    $offset = $next + 12;
+                    $level++;
+                }
+                // nested </blockquote> tag
+                if ($end !== false && $level > 0) {
+                    $offset = $end + 12;
                     $level--;
-                    if ($level < 0) {
-                        $level = 0; // malformed HTML: go to next blockquote
-                    }
-                    else if ($level > 0) {
-                        // skip inner blockquote
-                    }
-                    else {
-                        $end  = $m[1];
-                        $len  = $end - $taglen - $start;
-                        // Get blockquote content
-                        $body = substr($text, $start + $taglen - $diff, $len);
-
-                        // Set text width
-                        $p_width = $this->width;
-                        if ($this->width > 0) $this->width -= 2;
-                        // Convert blockquote content
-                        $body = trim($body);
-                        $this->_converter($body);
-                        // Add citation markers and create PRE block
-                        $body = preg_replace('/((^|\n)>*)/', '\\1> ', trim($body));
-                        $body = '<pre>' . htmlspecialchars($body) . '</pre>';
-                        // Re-set text width
-                        $this->width = $p_width;
-                        // Replace content
-                        $text = substr($text, 0, $start - $diff)
-                            . $body . substr($text, $end + strlen($m[0]) - $diff);
-
-                        $diff = $len + $taglen + strlen($m[0]) - strlen($body);
-                        unset($body);
-                    }
                 }
-                else {
-                    if ($level == 0) {
-                        $start = $m[1];
-                        $taglen = strlen($m[0]);
-                    }
-                    $level ++;
+                // found matching end tag
+                else if ($end !== false && $level == 0) {
+                    $taglen = strpos($text, '>', $start) - $start;
+                    $startpos = $start + $taglen + 1;
+
+                    // get blockquote content
+                    $body = trim(substr($text, $startpos, $end - $startpos));
+
+                    // replace content with inner blockquotes
+                    $this->_converter($body);
+
+                    // Add citation markers and create <pre> block
+                    $body = preg_replace_callback('/((?:^|\n)>*)([^\n]*)/', array($this, 'blockquote_citation_ballback'), trim($body));
+                    $body = '<pre>' . htmlspecialchars($body) . '</pre>';
+
+                    $text = substr($text, 0, $start) . $body . "\n" . substr($text, $end + 13);
+                    $offset = 0;
+                    break;
                 }
-            }
+            } while ($end || $next);
         }
     }
 
     /**
+     * Callback function to correctly add citation markers for blockquote contents
+     */
+    public function blockquote_citation_ballback($m)
+    {
+        $line = ltrim($m[2]);
+        $space = $line[0] == '>' ? '' : ' ';
+        return $m[1] . '>' . $space . $line;
+    }
+
+    /**
      * Callback function for preg_replace_callback use.
      *
      * @param  array PREG matches
diff --git a/tests/Framework/Html2text.php b/tests/Framework/Html2text.php
index 1d89638..3e0df48 100644
--- a/tests/Framework/Html2text.php
+++ b/tests/Framework/Html2text.php
@@ -56,4 +56,23 @@
 
         $this->assertEquals($out, $res, $title);
     }
+
+    /**
+     *
+     */
+    function test_multiple_blockquotes()
+    {
+        $html = <<<EOF
+<br>Begin<br><blockquote>OUTER BEGIN<blockquote>INNER 1<br></blockquote><div><br></div><div>Par 1</div>
+<blockquote>INNER 2</blockquote><div><br></div><div>Par 2</div>
+<div><br></div><div>Par 3</div><div><br></div>
+<blockquote>INNER 3</blockquote>OUTER END</blockquote>
+EOF;
+        $ht = new rcube_html2text($html, false, false);
+        $res = $ht->get_text();
+
+        $this->assertContains('>> INNER 1', $res, 'Quote inner');
+        $this->assertContains('>> INNER 3', $res, 'Quote inner');
+        $this->assertContains('> OUTER END', $res, 'Quote outer');
+    }
 }

--
Gitblit v1.9.1