From 300fc65a461ce37999e3f0df991b24ed1b3c1bd3 Mon Sep 17 00:00:00 2001 From: alecpl <alec@alec.pl> Date: Thu, 20 Nov 2008 05:29:34 -0500 Subject: [PATCH] - Better HTML entities conversion in html2text (#1485519) --- CHANGELOG | 1 + program/lib/html2text.php | 7 +++++-- program/steps/mail/compose.inc | 3 +-- bin/html2text.php | 2 +- program/steps/mail/sendmail.inc | 2 +- 5 files changed, 9 insertions(+), 6 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 3509733..775379d 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -5,6 +5,7 @@ ---------- - Fix handling of some malformed messages (#1484438) - Speed up raw message body handling +- Better HTML entities conversion in html2text (#1485519) 2008/11/15 (alec) ---------- diff --git a/bin/html2text.php b/bin/html2text.php index e07fb35..3839f5d 100644 --- a/bin/html2text.php +++ b/bin/html2text.php @@ -22,7 +22,7 @@ define('INSTALL_PATH', realpath(dirname(__FILE__) . '/..') . '/'); require INSTALL_PATH.'program/include/iniset.php'; -$converter = new html2text(html_entity_decode($HTTP_RAW_POST_DATA, ENT_COMPAT, 'UTF-8')); +$converter = new html2text($HTTP_RAW_POST_DATA); header('Content-Type: text/plain; charset=UTF-8'); print trim($converter->get_text()); diff --git a/program/lib/html2text.php b/program/lib/html2text.php index 4180cac..938dfec 100644 --- a/program/lib/html2text.php +++ b/program/lib/html2text.php @@ -232,7 +232,7 @@ '--', '-', '*', - '�', + '£', 'EUR', // Euro sign. � ? '', // Unknown/unhandled entities ' ' // Runs of spaces, post-handling @@ -465,7 +465,10 @@ // Convert <PRE> $this->_convert_pre($text); - + + // Replace known html entities + $text = html_entity_decode($text, ENT_COMPAT, 'UTF-8'); + // Run our defined search-and-replace $text = preg_replace($this->search, $this->replace, $text); diff --git a/program/steps/mail/compose.inc b/program/steps/mail/compose.inc index d1d913b..d4348f2 100644 --- a/program/steps/mail/compose.inc +++ b/program/steps/mail/compose.inc @@ -323,8 +323,7 @@ if ($a_signatures[$identity_id]['is_html']) { $h2t = new html2text($a_signatures[$identity_id]['text'], false, false); - $plainTextPart = $h2t->get_text(); - $a_signatures[$identity_id]['plain_text'] = trim(html_entity_decode($plainTextPart, ENT_NOQUOTES, 'UTF-8')); + $a_signatures[$identity_id]['plain_text'] = trim($h2t->get_text()); } } diff --git a/program/steps/mail/sendmail.inc b/program/steps/mail/sendmail.inc index 784d46a..7ee6dd4 100644 --- a/program/steps/mail/sendmail.inc +++ b/program/steps/mail/sendmail.inc @@ -265,7 +265,7 @@ // empty message body breaks attachment handling in drafts $plainTextPart = "\r\n"; } - $MAIL_MIME->setTXTBody(html_entity_decode($plainTextPart, ENT_COMPAT, 'utf-8')); + $MAIL_MIME->setTXTBody($plainTextPart); // look for "emoticon" images from TinyMCE and copy into message as attachments rcmail_attach_emoticons($MAIL_MIME); -- Gitblit v1.9.1