From 8c188058cf9281251cbac5cda43ef833843fd51b Mon Sep 17 00:00:00 2001
From: Aleksander Machniak <alec@alec.pl>
Date: Fri, 08 Jun 2012 02:53:07 -0400
Subject: [PATCH] Fix handling of links with various URI schemes e.g. "skype:" (#1488106) Fix handling of links inside PRE elements on html to text conversion Fix indexing of links on html to text conversion

---
 CHANGELOG                 |    3 +++
 program/lib/html2text.php |   37 ++++++++++++++++++++++---------------
 program/lib/washtml.php   |    2 +-
 3 files changed, 26 insertions(+), 16 deletions(-)

diff --git a/CHANGELOG b/CHANGELOG
index 8b0f067..2248129 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,6 +1,9 @@
 CHANGELOG Roundcube Webmail
 ===========================
 
+- Fix handling of links with various URI schemes e.g. "skype:" (#1488106)
+- Fix handling of links inside PRE elements on html to text conversion
+- Fix indexing of links on html to text conversion
 - Add mail attachments using drag & drop on HTML5 enabled browsers
 - Add workaround for invalid BODYSTRUCTURE response - parse message with Mail_mimeDecode package (#1485585)
 - Decode header value in rcube_mime::get() by default (#1488511)
diff --git a/program/lib/html2text.php b/program/lib/html2text.php
index 84a7374..9de2e96 100644
--- a/program/lib/html2text.php
+++ b/program/lib/html2text.php
@@ -249,12 +249,11 @@
      *  @access public
      */
     var $callback_search = array(
-        '/<(a) [^>]*href=("|\')([^"\']+)\2[^>]*>(.*?)<\/a>/i',
-                                                   // <a href="">
-        '/<(h)[123456][^>]*>(.*?)<\/h[123456]>/i', // H1 - H3
-        '/<(b)[^>]*>(.*?)<\/b>/i',                 // <b>
-        '/<(strong)[^>]*>(.*?)<\/strong>/i',       // <strong>
-        '/<(th)[^>]*>(.*?)<\/th>/i',               // <th> and </th>
+        '/<(a) [^>]*href=("|\')([^"\']+)\2[^>]*>(.*?)<\/a>/i', // <a href="">
+        '/<(h)[123456]( [^>]*)?>(.*?)<\/h[123456]>/i',         // h1 - h6
+        '/<(b)( [^>]*)?>(.*?)<\/b>/i',                         // <b>
+        '/<(strong)( [^>]*)?>(.*?)<\/strong>/i',               // <strong>
+        '/<(th)( [^>]*)?>(.*?)<\/th>/i',                       // <th> and </th>
     );
 
    /**
@@ -368,7 +367,7 @@
     function set_html( $source, $from_file = false )
     {
         if ( $from_file && file_exists($source) ) {
-            $this->html = file_get_contents($source); 
+            $this->html = file_get_contents($source);
         }
         else
             $this->html = $source;
@@ -560,11 +559,11 @@
 	    }
 
         // Ignored link types
-	    if (preg_match('!^(javascript|mailto|#):!i', $link)) {
+	    if (preg_match('!^(javascript:|mailto:|#)!i', $link)) {
 		    return $display;
         }
 
-	    if (preg_match('!^(https?://)!i', $link)) {
+	    if (preg_match('!^([a-z][a-z0-9.+-]+:)!i', $link)) {
             $url = $link;
         }
         else {
@@ -576,8 +575,8 @@
         }
 
         if (($index = array_search($url, $this->_link_list)) === false) {
-            $this->_link_list[] = $url;
             $index = count($this->_link_list);
+            $this->_link_list[] = $url;
         }
 
         return $display . ' [' . ($index+1) . ']';
@@ -593,12 +592,20 @@
     {
         // get the content of PRE element
         while (preg_match('/<pre[^>]*>(.*)<\/pre>/ismU', $text, $matches)) {
+            $this->pre_content = $matches[1];
+
+            // Run our defined tags search-and-replace with callback
+            $this->pre_content = preg_replace_callback($this->callback_search,
+                array('html2text', '_preg_callback'), $this->pre_content);
+
             // convert the content
             $this->pre_content = sprintf('<div><br>%s<br></div>',
-                preg_replace($this->pre_search, $this->pre_replace, $matches[1]));
+                preg_replace($this->pre_search, $this->pre_replace, $this->pre_content));
+
             // replace the content (use callback because content can contain $0 variable)
-            $text = preg_replace_callback('/<pre[^>]*>.*<\/pre>/ismU', 
+            $text = preg_replace_callback('/<pre[^>]*>.*<\/pre>/ismU',
                 array('html2text', '_preg_pre_callback'), $text, 1);
+
             // free memory
             $this->pre_content = '';
         }
@@ -671,11 +678,11 @@
         switch (strtolower($matches[1])) {
         case 'b':
         case 'strong':
-            return $this->_toupper($matches[2]);
+            return $this->_toupper($matches[3]);
         case 'th':
-            return $this->_toupper("\t\t". $matches[2] ."\n");
+            return $this->_toupper("\t\t". $matches[3] ."\n");
         case 'h':
-            return $this->_toupper("\n\n". $matches[2] ."\n\n");
+            return $this->_toupper("\n\n". $matches[3] ."\n\n");
         case 'a':
             // Remove spaces in URL (#1487805)
             $url = str_replace(' ', '', $matches[3]);
diff --git a/program/lib/washtml.php b/program/lib/washtml.php
index 4221abd..6ea59f0 100644
--- a/program/lib/washtml.php
+++ b/program/lib/washtml.php
@@ -202,7 +202,7 @@
       $key = strtolower($key);
       $value = $node->getAttribute($key);
       if (isset($this->_html_attribs[$key]) ||
-         ($key == 'href' && preg_match('!^(http:|https:|ftp:|mailto:|//|#).+!i', $value)))
+         ($key == 'href' && preg_match('!^([a-z][a-z0-9.+-]+:|//|#).+!i', $value)))
         $t .= ' ' . $key . '="' . htmlspecialchars($value, ENT_QUOTES) . '"';
       else if ($key == 'style' && ($style = $this->wash_style($value))) {
         $quot = strpos($style, '"') !== false ? "'" : '"';

--
Gitblit v1.9.1