From c7250749ab01721b55a26d6badf4dcdbbdaf7309 Mon Sep 17 00:00:00 2001
From: Aleksander Machniak <alec@alec.pl>
Date: Sat, 28 Dec 2013 13:14:51 -0500
Subject: [PATCH] Fix issue where deprecated syntax for HTML lists was not handled properly (#1488768)

---
 CHANGELOG                               |    1 
 program/lib/Roundcube/rcube_washtml.php |   83 ++++++++++++++++++++++++++-
 tests/Framework/Washtml.php             |   43 ++++++++++++++
 3 files changed, 123 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG b/CHANGELOG
index 7a64d18..7a625bc 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,6 +1,7 @@
 CHANGELOG Roundcube Webmail
 ===========================
 
+- Fix issue where deprecated syntax for HTML lists was not handled properly (#1488768)
 - Display different icons when Trash folder is empty or full (#1485775)
 - Remember last position of more headers switch (#1488323)
 - Fix so message flags modified by another client are applied on the list on refresh (#1485186)
diff --git a/program/lib/Roundcube/rcube_washtml.php b/program/lib/Roundcube/rcube_washtml.php
index 9cf3c62..5a5b3dc 100644
--- a/program/lib/Roundcube/rcube_washtml.php
+++ b/program/lib/Roundcube/rcube_washtml.php
@@ -418,7 +418,7 @@
         $html = preg_replace($html_search, $html_replace, trim($html));
 
         //-> Replace all of those weird MS Word quotes and other high characters
-        $badwordchars=array(
+        $badwordchars = array(
             "\xe2\x80\x98", // left single quote
             "\xe2\x80\x99", // right single quote
             "\xe2\x80\x9c", // left double quote
@@ -426,7 +426,7 @@
             "\xe2\x80\x94", // em dash
             "\xe2\x80\xa6" // elipses
         );
-        $fixedwordchars=array(
+        $fixedwordchars = array(
             "'",
             "'",
             '"',
@@ -434,7 +434,7 @@
             '&mdash;',
             '...'
         );
-        $html = str_replace($badwordchars,$fixedwordchars, $html);
+        $html = str_replace($badwordchars, $fixedwordchars, $html);
 
         // PCRE errors handling (#1486856), should we use something like for every preg_* use?
         if ($html === null && ($preg_error = preg_last_error()) != PREG_NO_ERROR) {
@@ -461,6 +461,9 @@
         // Don't remove valid conditional comments
         // Don't remove MSOutlook (<!-->) conditional comments (#1489004)
         $html = preg_replace('/<!--[^->\[\n]+>/', '', $html);
+
+        // fix broken nested lists
+        self::fix_broken_lists($html);
 
         // turn relative into absolute urls
         $html = self::resolve_base($html);
@@ -500,5 +503,77 @@
 
         return $body;
     }
-}
 
+    /**
+     * Fix broken nested lists, they are not handled properly by DOMDocument (#1488768)
+     */
+    public static function fix_broken_lists(&$html)
+    {
+        // do two rounds, one for <ol>, one for <ul>
+        foreach (array('ol', 'ul') as $tag) {
+            $pos = 0;
+            while (($pos = stripos($html, '<' . $tag, $pos)) !== false) {
+                $pos++;
+
+                // make sure this is an ol/ul tag
+                if (!in_array($html[$pos+2], array(' ', '>'))) {
+                    continue;
+                }
+
+                $p      = $pos;
+                $in_li  = false;
+                $li_pos = 0;
+
+                while (($p = strpos($html, '<', $p)) !== false) {
+                    $tt = strtolower(substr($html, $p, 4));
+
+                    // li open tag
+                    if ($tt == '<li>' || $tt == '<li ') {
+                        $in_li = true;
+                        $p += 4;
+                    }
+                    // li close tag
+                    else if ($tt == '</li' && in_array($html[$p+4], array(' ', '>'))) {
+                        $li_pos = $p;
+                        $p += 4;
+                        $in_li = false;
+                    }
+                    // ul/ol closing tag
+                    else if ($tt == '</' . $tag && in_array($html[$p+4], array(' ', '>'))) {
+                        break;
+                    }
+                    // nested ol/ul element out of li
+                    else if (!$in_li && $li_pos && ($tt == '<ol>' || $tt == '<ol ' || $tt == '<ul>' || $tt == '<ul ')) {
+                        // find closing tag of this ul/ol element
+                        $element = substr($tt, 1, 2);
+                        $cpos    = $p;
+                        do {
+                            $tpos = stripos($html, '<' . $element, $cpos+1);
+                            $cpos = stripos($html, '</' . $element, $cpos+1);
+                        }
+                        while ($tpos !== false && $cpos !== false && $cpos > $tpos);
+
+                        // not found, this is invalid HTML, skip it
+                        if ($cpos === false) {
+                            break;
+                        }
+
+                        // get element content
+                        $end     = strpos($html, '>', $cpos);
+                        $len     = $end - $p + 1;
+                        $element = substr($html, $p, $len);
+
+                        // move element to the end of the last li
+                        $html    = substr_replace($html, '', $p, $len);
+                        $html    = substr_replace($html, $element, $li_pos, 0);
+
+                        $p = $end;
+                    }
+                    else {
+                        $p++;
+                    }
+                }
+            }
+        }
+    }
+}
diff --git a/tests/Framework/Washtml.php b/tests/Framework/Washtml.php
index 0d050ff..7485d43 100644
--- a/tests/Framework/Washtml.php
+++ b/tests/Framework/Washtml.php
@@ -81,4 +81,47 @@
         $this->assertRegExp('|</a>|', $washed, "Invalid closing tag (#1489446)");
     }
 
+    /**
+     * Test fixing of invalid lists nesting (#1488768)
+     */
+    function test_lists()
+    {
+        $data = array(
+            array(
+                "<ol><li>First</li><li>Second</li><ul><li>First sub</li></ul><li>Third</li></ol>",
+                "<ol><li>First</li><li>Second<ul><li>First sub</li></ul></li><li>Third</li></ol>"
+            ),
+            array(
+                "<ol><li>First<ul><li>First sub</li></ul></li></ol>",
+                "<ol><li>First<ul><li>First sub</li></ul></li></ol>",
+            ),
+            array(
+                "<ol><li>First<ol><li>First sub</li></ol></li></ol>",
+                "<ol><li>First<ol><li>First sub</li></ol></li></ol>",
+            ),
+            array(
+                "<ul><li>First</li><ul><li>First sub</li><ul><li>sub sub</li></ul></ul><li></li></ul>",
+                "<ul><li>First<ul><li>First sub<ul><li>sub sub</li></ul></li></ul></li><li></li></ul>",
+            ),
+            array(
+                "<ul><li>First</li><li>second</li><ul><ul><li>sub sub</li></ul></ul></ul>",
+                "<ul><li>First</li><li>second<ul><ul><li>sub sub</li></ul></ul></li></ul>",
+            ),
+            array(
+                "<ol><ol><ol></ol></ol></ol>",
+                "<ol><ol><ol></ol></ol></ol>",
+            ),
+            array(
+                "<div><ol><ol><ol></ol></ol></ol></div>",
+                "<div><ol><ol><ol></ol></ol></ol></div>",
+            ),
+        );
+
+        foreach ($data as $element) {
+            rcube_washtml::fix_broken_lists($element[0]);
+
+            $this->assertSame($element[1], $element[0], "Broken nested lists (#1488768)");
+        }
+    }
+
 }

--
Gitblit v1.9.1