From bb5d7282855dd83ccdd211cb77d0776dce71468e Mon Sep 17 00:00:00 2001
From: Aleksander Machniak <alec@alec.pl>
Date: Wed, 12 Dec 2012 02:54:33 -0500
Subject: [PATCH] Use also Envelope-To for identity selection (#1488553)
---
program/lib/html2text.php | 84 +++++++++++++++++++++++++----------------
1 files changed, 51 insertions(+), 33 deletions(-)
diff --git a/program/lib/html2text.php b/program/lib/html2text.php
index 84a7374..34c7193 100644
--- a/program/lib/html2text.php
+++ b/program/lib/html2text.php
@@ -89,7 +89,7 @@
* out that extra spaces should be compressed--a problem addressed with
* Marcus Bointon's fixes but that I had not yet incorporated.
*
- * Thanks to Daniel Schledermann (http://www.typoconsult.dk/) for
+ * Thanks to Daniel Schledermann (http://www.typoconsult.dk/) for
* suggesting a valuable fix with <a> tag handling.
*
* Thanks to Wojciech Bajon (again!) for suggesting fixes and additions,
@@ -135,6 +135,14 @@
var $width = 70;
/**
+ * Target character encoding for output text
+ *
+ * @var string $charset
+ * @access public
+ */
+ var $charset = 'UTF-8';
+
+ /**
* List of preg* regular expression patterns to search for,
* used in conjunction with $replace.
*
@@ -145,6 +153,7 @@
var $search = array(
"/\r/", // Non-legal carriage return
"/[\n\t]+/", // Newlines and tabs
+ '/<head[^>]*>.*?<\/head>/i', // <head>
'/<script[^>]*>.*?<\/script>/i', // <script>s -- which strip_tags supposedly has problems with
'/<style[^>]*>.*?<\/style>/i', // <style>s -- which strip_tags supposedly has problems with
'/<p[^>]*>/i', // <P>
@@ -172,6 +181,7 @@
var $replace = array(
'', // Non-legal carriage return
' ', // Newlines and tabs
+ '', // <head>
'', // <script>s -- which strip_tags supposedly has problems with
'', // <style>s -- which strip_tags supposedly has problems with
"\n\n", // <P>
@@ -200,7 +210,7 @@
var $ent_search = array(
'/&(nbsp|#160);/i', // Non-breaking space
'/&(quot|rdquo|ldquo|#8220|#8221|#147|#148);/i',
- // Double quotes
+ // Double quotes
'/&(apos|rsquo|lsquo|#8216|#8217);/i', // Single quotes
'/>/i', // Greater-than
'/</i', // Less-than
@@ -249,12 +259,11 @@
* @access public
*/
var $callback_search = array(
- '/<(a) [^>]*href=("|\')([^"\']+)\2[^>]*>(.*?)<\/a>/i',
- // <a href="">
- '/<(h)[123456][^>]*>(.*?)<\/h[123456]>/i', // H1 - H3
- '/<(b)[^>]*>(.*?)<\/b>/i', // <b>
- '/<(strong)[^>]*>(.*?)<\/strong>/i', // <strong>
- '/<(th)[^>]*>(.*?)<\/th>/i', // <th> and </th>
+ '/<(a) [^>]*href=("|\')([^"\']+)\2[^>]*>(.*?)<\/a>/i', // <a href="">
+ '/<(h)[123456]( [^>]*)?>(.*?)<\/h[123456]>/i', // h1 - h6
+ '/<(b)( [^>]*)?>(.*?)<\/b>/i', // <b>
+ '/<(strong)( [^>]*)?>(.*?)<\/strong>/i', // <strong>
+ '/<(th)( [^>]*)?>(.*?)<\/th>/i', // <th> and </th>
);
/**
@@ -346,7 +355,7 @@
* @access public
* @return void
*/
- function html2text( $source = '', $from_file = false, $do_links = true, $width = 75 )
+ function html2text( $source = '', $from_file = false, $do_links = true, $width = 75, $charset = 'UTF-8' )
{
if ( !empty($source) ) {
$this->set_html($source, $from_file);
@@ -355,6 +364,7 @@
$this->set_base_url();
$this->_do_links = $do_links;
$this->width = $width;
+ $this->charset = $charset;
}
/**
@@ -368,7 +378,7 @@
function set_html( $source, $from_file = false )
{
if ( $from_file && file_exists($source) ) {
- $this->html = file_get_contents($source);
+ $this->html = file_get_contents($source);
}
else
$this->html = $source;
@@ -438,11 +448,11 @@
function set_base_url( $url = '' )
{
if ( empty($url) ) {
- if ( !empty($_SERVER['HTTP_HOST']) ) {
- $this->url = 'http://' . $_SERVER['HTTP_HOST'];
- } else {
- $this->url = '';
- }
+ if ( !empty($_SERVER['HTTP_HOST']) ) {
+ $this->url = 'http://' . $_SERVER['HTTP_HOST'];
+ } else {
+ $this->url = '';
+ }
} else {
// Strip any trailing slashes for consistency (relative
// URLs may already start with a slash like "/file.html")
@@ -516,7 +526,7 @@
$text = preg_replace($this->ent_search, $this->ent_replace, $text);
// Replace known html entities
- $text = html_entity_decode($text, ENT_COMPAT, 'UTF-8');
+ $text = html_entity_decode($text, ENT_QUOTES, $this->charset);
// Remove unknown/unhandled entities (this cannot be done in search-and-replace block)
$text = preg_replace('/&([a-zA-Z0-9]{2,6}|#[0-9]{2,4});/', '', $text);
@@ -536,7 +546,7 @@
// for PHP versions >= 4.0.2. Default width is 75
// If width is 0 or less, don't wrap the text.
if ( $this->width > 0 ) {
- $text = wordwrap($text, $this->width);
+ $text = wordwrap($text, $this->width);
}
}
@@ -555,16 +565,16 @@
*/
function _build_link_list( $link, $display )
{
- if (!$this->_do_links || empty($link)) {
- return $display;
- }
-
- // Ignored link types
- if (preg_match('!^(javascript|mailto|#):!i', $link)) {
- return $display;
+ if (!$this->_do_links || empty($link)) {
+ return $display;
}
- if (preg_match('!^(https?://)!i', $link)) {
+ // Ignored link types
+ if (preg_match('!^(javascript:|mailto:|#)!i', $link)) {
+ return $display;
+ }
+
+ if (preg_match('!^([a-z][a-z0-9.+-]+:)!i', $link)) {
$url = $link;
}
else {
@@ -576,8 +586,8 @@
}
if (($index = array_search($url, $this->_link_list)) === false) {
- $this->_link_list[] = $url;
$index = count($this->_link_list);
+ $this->_link_list[] = $url;
}
return $display . ' [' . ($index+1) . ']';
@@ -593,12 +603,20 @@
{
// get the content of PRE element
while (preg_match('/<pre[^>]*>(.*)<\/pre>/ismU', $text, $matches)) {
+ $this->pre_content = $matches[1];
+
+ // Run our defined tags search-and-replace with callback
+ $this->pre_content = preg_replace_callback($this->callback_search,
+ array('html2text', '_preg_callback'), $this->pre_content);
+
// convert the content
$this->pre_content = sprintf('<div><br>%s<br></div>',
- preg_replace($this->pre_search, $this->pre_replace, $matches[1]));
+ preg_replace($this->pre_search, $this->pre_replace, $this->pre_content));
+
// replace the content (use callback because content can contain $0 variable)
- $text = preg_replace_callback('/<pre[^>]*>.*<\/pre>/ismU',
+ $text = preg_replace_callback('/<pre[^>]*>.*<\/pre>/ismU',
array('html2text', '_preg_pre_callback'), $text, 1);
+
// free memory
$this->pre_content = '';
}
@@ -671,11 +689,11 @@
switch (strtolower($matches[1])) {
case 'b':
case 'strong':
- return $this->_toupper($matches[2]);
+ return $this->_toupper($matches[3]);
case 'th':
- return $this->_toupper("\t\t". $matches[2] ."\n");
+ return $this->_toupper("\t\t". $matches[3] ."\n");
case 'h':
- return $this->_toupper("\n\n". $matches[2] ."\n\n");
+ return $this->_toupper("\n\n". $matches[3] ."\n\n");
case 'a':
// Remove spaces in URL (#1487805)
$url = str_replace(' ', '', $matches[3]);
@@ -723,14 +741,14 @@
*/
private function _strtoupper($str)
{
- $str = html_entity_decode($str, ENT_COMPAT, RCMAIL_CHARSET);
+ $str = html_entity_decode($str, ENT_COMPAT, $this->charset);
if (function_exists('mb_strtoupper'))
$str = mb_strtoupper($str);
else
$str = strtoupper($str);
- $str = htmlspecialchars($str, ENT_COMPAT, RCMAIL_CHARSET);
+ $str = htmlspecialchars($str, ENT_COMPAT, $this->charset);
return $str;
}
--
Gitblit v1.9.1