From 300fc65a461ce37999e3f0df991b24ed1b3c1bd3 Mon Sep 17 00:00:00 2001
From: alecpl <alec@alec.pl>
Date: Thu, 20 Nov 2008 05:29:34 -0500
Subject: [PATCH] - Better HTML entities conversion in html2text (#1485519)

---
 CHANGELOG                       |    1 +
 program/lib/html2text.php       |    7 +++++--
 program/steps/mail/compose.inc  |    3 +--
 bin/html2text.php               |    2 +-
 program/steps/mail/sendmail.inc |    2 +-
 5 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/CHANGELOG b/CHANGELOG
index 3509733..775379d 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -5,6 +5,7 @@
 ----------
 - Fix handling of some malformed messages (#1484438)
 - Speed up raw message body handling
+- Better HTML entities conversion in html2text (#1485519)
 
 2008/11/15 (alec)
 ----------
diff --git a/bin/html2text.php b/bin/html2text.php
index e07fb35..3839f5d 100644
--- a/bin/html2text.php
+++ b/bin/html2text.php
@@ -22,7 +22,7 @@
 define('INSTALL_PATH', realpath(dirname(__FILE__) . '/..') . '/');
 require INSTALL_PATH.'program/include/iniset.php';
 
-$converter = new html2text(html_entity_decode($HTTP_RAW_POST_DATA, ENT_COMPAT, 'UTF-8'));
+$converter = new html2text($HTTP_RAW_POST_DATA);
 
 header('Content-Type: text/plain; charset=UTF-8');
 print trim($converter->get_text());
diff --git a/program/lib/html2text.php b/program/lib/html2text.php
index 4180cac..938dfec 100644
--- a/program/lib/html2text.php
+++ b/program/lib/html2text.php
@@ -232,7 +232,7 @@
         '--',
         '-',
         '*',
-        '�',
+        '£',
         'EUR',                                  // Euro sign. � ?
         '',                                     // Unknown/unhandled entities
         ' '                                     // Runs of spaces, post-handling
@@ -465,7 +465,10 @@
 
 	// Convert <PRE>
         $this->_convert_pre($text);
-	
+
+	// Replace known html entities
+	$text = html_entity_decode($text, ENT_COMPAT, 'UTF-8');
+
         // Run our defined search-and-replace
         $text = preg_replace($this->search, $this->replace, $text);
 
diff --git a/program/steps/mail/compose.inc b/program/steps/mail/compose.inc
index d1d913b..d4348f2 100644
--- a/program/steps/mail/compose.inc
+++ b/program/steps/mail/compose.inc
@@ -323,8 +323,7 @@
         if ($a_signatures[$identity_id]['is_html'])
         {
             $h2t = new html2text($a_signatures[$identity_id]['text'], false, false);
-            $plainTextPart = $h2t->get_text();
-            $a_signatures[$identity_id]['plain_text'] = trim(html_entity_decode($plainTextPart, ENT_NOQUOTES, 'UTF-8'));
+            $a_signatures[$identity_id]['plain_text'] = trim($h2t->get_text());
         }
       }
 
diff --git a/program/steps/mail/sendmail.inc b/program/steps/mail/sendmail.inc
index 784d46a..7ee6dd4 100644
--- a/program/steps/mail/sendmail.inc
+++ b/program/steps/mail/sendmail.inc
@@ -265,7 +265,7 @@
     // empty message body breaks attachment handling in drafts 
     $plainTextPart = "\r\n"; 
     }
-  $MAIL_MIME->setTXTBody(html_entity_decode($plainTextPart, ENT_COMPAT, 'utf-8'));
+  $MAIL_MIME->setTXTBody($plainTextPart);
 
   // look for "emoticon" images from TinyMCE and copy into message as attachments
   rcmail_attach_emoticons($MAIL_MIME);

--
Gitblit v1.9.1