From 91dc7fabbc15c1e3dc932da327bbd052149ab8af Mon Sep 17 00:00:00 2001
From: Aleksander Machniak <alec@alec.pl>
Date: Thu, 15 Oct 2015 06:23:23 -0400
Subject: [PATCH] Small improvements in HTML to text conversion.

---
 program/lib/Roundcube/rcube_html2text.php |   24 +++++++++++++++---------
 1 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/program/lib/Roundcube/rcube_html2text.php b/program/lib/Roundcube/rcube_html2text.php
index a2f6288..d20d7b7 100644
--- a/program/lib/Roundcube/rcube_html2text.php
+++ b/program/lib/Roundcube/rcube_html2text.php
@@ -136,12 +136,15 @@
      * @see $replace
      */
     protected $search = array(
-        "/\r/",                                  // Non-legal carriage return
-        "/[\n\t]+/",                             // Newlines and tabs
+        '/\r/',                                  // Non-legal carriage return
+        '/^.*<body[^>]*>\n*/i',                  // Anything before <body>
         '/<head[^>]*>.*?<\/head>/i',             // <head>
-        '/<script[^>]*>.*?<\/script>/i',         // <script>s -- which strip_tags supposedly has problems with
-        '/<style[^>]*>.*?<\/style>/i',           // <style>s -- which strip_tags supposedly has problems with
-        '/<p[^>]*>/i',                           // <P>
+        '/<script[^>]*>.*?<\/script>/i',         // <script>
+        '/<style[^>]*>.*?<\/style>/i',           // <style>
+        '/[\n\t]+/',                             // Newlines and tabs
+        '/<p[^>]*>/i',                           // <p>
+        '/<\/p>[\s\n\t]*<div[^>]*>/i',           // </p> before <div>
+        '/<br[^>]*>[\s\n\t]*<div[^>]*>/i',       // <br> before <div>
         '/<br[^>]*>\s*/i',                       // <br>
         '/<i[^>]*>(.*?)<\/i>/i',                 // <i>
         '/<em[^>]*>(.*?)<\/em>/i',               // <em>
@@ -164,11 +167,14 @@
      */
     protected $replace = array(
         '',                                     // Non-legal carriage return
-        ' ',                                    // Newlines and tabs
+        '',                                     // Anything before <body>
         '',                                     // <head>
-        '',                                     // <script>s -- which strip_tags supposedly has problems with
-        '',                                     // <style>s -- which strip_tags supposedly has problems with
-        "\n\n",                                 // <P>
+        '',                                     // <script>
+        '',                                     // <style>
+        ' ',                                    // Newlines and tabs
+        "\n\n",                                 // <p>
+        "\n<div>",                              // </p> before <div>
+        '<div>',                                // <br> before <div>
         "\n",                                   // <br>
         '_\\1_',                                // <i>
         '_\\1_',                                // <em>

--
Gitblit v1.9.1