From 8e7f32fddc7eb5752c30c97005c71cf102cb5021 Mon Sep 17 00:00:00 2001
From: Aleksander Machniak <alec@alec.pl>
Date: Fri, 30 Oct 2015 15:26:12 -0400
Subject: [PATCH] Small improvements in HTML to text conversion.
---
program/lib/Roundcube/rcube_html2text.php | 24 +++++++++++++++---------
1 files changed, 15 insertions(+), 9 deletions(-)
diff --git a/program/lib/Roundcube/rcube_html2text.php b/program/lib/Roundcube/rcube_html2text.php
index c67dc8e..8072c3e 100644
--- a/program/lib/Roundcube/rcube_html2text.php
+++ b/program/lib/Roundcube/rcube_html2text.php
@@ -136,12 +136,15 @@
* @see $replace
*/
protected $search = array(
- "/\r/", // Non-legal carriage return
- "/[\n\t]+/", // Newlines and tabs
+ '/\r/', // Non-legal carriage return
+ '/^.*<body[^>]*>\n*/i', // Anything before <body>
'/<head[^>]*>.*?<\/head>/i', // <head>
- '/<script[^>]*>.*?<\/script>/i', // <script>s -- which strip_tags supposedly has problems with
- '/<style[^>]*>.*?<\/style>/i', // <style>s -- which strip_tags supposedly has problems with
- '/<p[^>]*>/i', // <P>
+ '/<script[^>]*>.*?<\/script>/i', // <script>
+ '/<style[^>]*>.*?<\/style>/i', // <style>
+ '/[\n\t]+/', // Newlines and tabs
+ '/<p[^>]*>/i', // <p>
+ '/<\/p>[\s\n\t]*<div[^>]*>/i', // </p> before <div>
+ '/<br[^>]*>[\s\n\t]*<div[^>]*>/i', // <br> before <div>
'/<br[^>]*>\s*/i', // <br>
'/<i[^>]*>(.*?)<\/i>/i', // <i>
'/<em[^>]*>(.*?)<\/em>/i', // <em>
@@ -164,11 +167,14 @@
*/
protected $replace = array(
'', // Non-legal carriage return
- ' ', // Newlines and tabs
+ '', // Anything before <body>
'', // <head>
- '', // <script>s -- which strip_tags supposedly has problems with
- '', // <style>s -- which strip_tags supposedly has problems with
- "\n\n", // <P>
+ '', // <script>
+ '', // <style>
+ ' ', // Newlines and tabs
+ "\n\n", // <p>
+ "\n<div>", // </p> before <div>
+ '<div>', // <br> before <div>
"\n", // <br>
'_\\1_', // <i>
'_\\1_', // <em>
--
Gitblit v1.9.1