Thomas Bruederli
2013-01-25 a2c2cb64e1d5860987d5674f6b81efd8e02af7b7
Refactored blockquote quotion routine in html2text conversion: it now correctly converts multiple and/or nested blockquotes
2 files modified
106 ■■■■■ changed files
program/lib/Roundcube/rcube_html2text.php 87 ●●●● patch | view | raw | blame | history
tests/Framework/Html2text.php 19 ●●●●● patch | view | raw | blame | history
program/lib/Roundcube/rcube_html2text.php
@@ -571,55 +571,58 @@
     */
    protected function _convert_blockquotes(&$text)
    {
        if (preg_match_all('/<\/*blockquote[^>]*>/i', $text, $matches, PREG_OFFSET_CAPTURE)) {
            $level = 0;
            $diff = 0;
            foreach ($matches[0] as $m) {
                if ($m[0][0] == '<' && $m[0][1] == '/') {
        $level = 0;
        $offset = 0;
        while (($start = strpos($text, '<blockquote', $offset)) !== false) {
            $offset = $start + 12;
            do {
                $end = strpos($text, '</blockquote>', $offset);
                $next = strpos($text, '<blockquote', $offset);
                // nested <blockquote>, skip
                if ($next !== false && $next < $end) {
                    $offset = $next + 12;
                    $level++;
                }
                // nested </blockquote> tag
                if ($end !== false && $level > 0) {
                    $offset = $end + 12;
                    $level--;
                    if ($level < 0) {
                        $level = 0; // malformed HTML: go to next blockquote
                    }
                    else if ($level > 0) {
                        // skip inner blockquote
                    }
                    else {
                        $end  = $m[1];
                        $len  = $end - $taglen - $start;
                        // Get blockquote content
                        $body = substr($text, $start + $taglen - $diff, $len);
                        // Set text width
                        $p_width = $this->width;
                        if ($this->width > 0) $this->width -= 2;
                        // Convert blockquote content
                        $body = trim($body);
                        $this->_converter($body);
                        // Add citation markers and create PRE block
                        $body = preg_replace('/((^|\n)>*)/', '\\1> ', trim($body));
                        $body = '<pre>' . htmlspecialchars($body) . '</pre>';
                        // Re-set text width
                        $this->width = $p_width;
                        // Replace content
                        $text = substr($text, 0, $start - $diff)
                            . $body . substr($text, $end + strlen($m[0]) - $diff);
                        $diff = $len + $taglen + strlen($m[0]) - strlen($body);
                        unset($body);
                    }
                }
                else {
                    if ($level == 0) {
                        $start = $m[1];
                        $taglen = strlen($m[0]);
                    }
                    $level ++;
                // found matching end tag
                else if ($end !== false && $level == 0) {
                    $taglen = strpos($text, '>', $start) - $start;
                    $startpos = $start + $taglen + 1;
                    // get blockquote content
                    $body = trim(substr($text, $startpos, $end - $startpos));
                    // replace content with inner blockquotes
                    $this->_converter($body);
                    // Add citation markers and create <pre> block
                    $body = preg_replace_callback('/((?:^|\n)>*)([^\n]*)/', array($this, 'blockquote_citation_ballback'), trim($body));
                    $body = '<pre>' . htmlspecialchars($body) . '</pre>';
                    $text = substr($text, 0, $start) . $body . "\n" . substr($text, $end + 13);
                    $offset = 0;
                    break;
                }
            }
            } while ($end || $next);
        }
    }
    /**
     * Callback function to correctly add citation markers for blockquote contents
     */
    public function blockquote_citation_ballback($m)
    {
        $line = ltrim($m[2]);
        $space = $line[0] == '>' ? '' : ' ';
        return $m[1] . '>' . $space . $line;
    }
    /**
     * Callback function for preg_replace_callback use.
     *
     * @param  array PREG matches
tests/Framework/Html2text.php
@@ -56,4 +56,23 @@
        $this->assertEquals($out, $res, $title);
    }
    /**
     *
     */
    function test_multiple_blockquotes()
    {
        $html = <<<EOF
<br>Begin<br><blockquote>OUTER BEGIN<blockquote>INNER 1<br></blockquote><div><br></div><div>Par 1</div>
<blockquote>INNER 2</blockquote><div><br></div><div>Par 2</div>
<div><br></div><div>Par 3</div><div><br></div>
<blockquote>INNER 3</blockquote>OUTER END</blockquote>
EOF;
        $ht = new rcube_html2text($html, false, false);
        $res = $ht->get_text();
        $this->assertContains('>> INNER 1', $res, 'Quote inner');
        $this->assertContains('>> INNER 3', $res, 'Quote inner');
        $this->assertContains('> OUTER END', $res, 'Quote outer');
    }
}