Aleksander Machniak
2015-10-15 8e7f32fddc7eb5752c30c97005c71cf102cb5021
Small improvements in HTML to text conversion.

Better handling of <body> and trailing spaces, and </p><div> or <br><div>.
1 files modified
24 ■■■■■ changed files
program/lib/Roundcube/rcube_html2text.php 24 ●●●●● patch | view | raw | blame | history
program/lib/Roundcube/rcube_html2text.php
@@ -136,12 +136,15 @@
     * @see $replace
     */
    protected $search = array(
        "/\r/",                                  // Non-legal carriage return
        "/[\n\t]+/",                             // Newlines and tabs
        '/\r/',                                  // Non-legal carriage return
        '/^.*<body[^>]*>\n*/i',                  // Anything before <body>
        '/<head[^>]*>.*?<\/head>/i',             // <head>
        '/<script[^>]*>.*?<\/script>/i',         // <script>s -- which strip_tags supposedly has problems with
        '/<style[^>]*>.*?<\/style>/i',           // <style>s -- which strip_tags supposedly has problems with
        '/<p[^>]*>/i',                           // <P>
        '/<script[^>]*>.*?<\/script>/i',         // <script>
        '/<style[^>]*>.*?<\/style>/i',           // <style>
        '/[\n\t]+/',                             // Newlines and tabs
        '/<p[^>]*>/i',                           // <p>
        '/<\/p>[\s\n\t]*<div[^>]*>/i',           // </p> before <div>
        '/<br[^>]*>[\s\n\t]*<div[^>]*>/i',       // <br> before <div>
        '/<br[^>]*>\s*/i',                       // <br>
        '/<i[^>]*>(.*?)<\/i>/i',                 // <i>
        '/<em[^>]*>(.*?)<\/em>/i',               // <em>
@@ -164,11 +167,14 @@
     */
    protected $replace = array(
        '',                                     // Non-legal carriage return
        ' ',                                    // Newlines and tabs
        '',                                     // Anything before <body>
        '',                                     // <head>
        '',                                     // <script>s -- which strip_tags supposedly has problems with
        '',                                     // <style>s -- which strip_tags supposedly has problems with
        "\n\n",                                 // <P>
        '',                                     // <script>
        '',                                     // <style>
        ' ',                                    // Newlines and tabs
        "\n\n",                                 // <p>
        "\n<div>",                              // </p> before <div>
        '<div>',                                // <br> before <div>
        "\n",                                   // <br>
        '_\\1_',                                // <i>
        '_\\1_',                                // <em>