thomascube
2010-06-08 af3cf8a0a7de74ab169f44277eda73f5f9e18cd7
program/lib/html2text.php
@@ -148,7 +148,6 @@
        '/[ ]{2,}/',                             // Runs of spaces, pre-handling
        '/<script[^>]*>.*?<\/script>/i',         // <script>s -- which strip_tags supposedly has problems with
        '/<style[^>]*>.*?<\/style>/i',           // <style>s -- which strip_tags supposedly has problems with
        //'/<!-- .* -->/',                         // Comments -- which strip_tags might have problem a with
        '/<p[^>]*>/i',                           // <P>
        '/<br[^>]*>/i',                          // <br>
        '/<i[^>]*>(.*?)<\/i>/i',                 // <i>
@@ -158,6 +157,7 @@
        '/<li[^>]*>(.*?)<\/li>/i',               // <li> and </li>
        '/<li[^>]*>/i',                          // <li>
        '/<hr[^>]*>/i',                          // <hr>
        '/<div[^>]*>/i',                         // <div>
        '/(<table[^>]*>|<\/table>)/i',           // <table> and </table>
        '/(<tr[^>]*>|<\/tr>)/i',                 // <tr> and </tr>
        '/<td[^>]*>(.*?)<\/td>/i',               // <td> and </td>
@@ -192,8 +192,7 @@
        ' ',                                    // Runs of spaces, pre-handling
        '',                                     // <script>s -- which strip_tags supposedly has problems with
        '',                                     // <style>s -- which strip_tags supposedly has problems with
        //'',                                     // Comments -- which strip_tags might have problem a with
        "\n\n",                               // <P>
        "\n\n",                                 // <P>
        "\n",                                   // <br>
        '_\\1_',                                // <i>
        '_\\1_',                                // <em>
@@ -202,6 +201,7 @@
        "\t* \\1\n",                            // <li> and </li>
        "\n\t* ",                               // <li>
        "\n-------------------------\n",        // <hr>
        "<div>\n",                                   // <div>
        "\n\n",                                 // <table> and </table>
        "\n",                                   // <tr> and </tr>
        "\t\t\\1\n",                            // <td> and </td>
@@ -468,10 +468,12 @@
        // Run our defined search-and-replace
        $text = preg_replace($this->search, $this->replace, $text);
        $text = preg_replace_callback($this->callback_search, array('html2text', '_preg_callback'), $text);
        // Replace known html entities
        $text = html_entity_decode($text, ENT_COMPAT, 'UTF-8');
        // Run our defined search-and-replace with callback
        $text = preg_replace_callback($this->callback_search, array('html2text', '_preg_callback'), $text);
        // Remove unknown/unhandled entities (this cannot be done in search-and-replace block)
        $text = preg_replace('/&[^&;]+;/i', '', $text); 
@@ -522,9 +524,9 @@
            $this->_link_count++;
            $this->_link_list .= "[" . $this->_link_count . "] $link\n";
            $additional = ' [' . $this->_link_count . ']';
      } elseif ( substr($link, 0, 11) == 'javascript:' ) {
         // Don't count the link; ignore it
         $additional = '';
   } elseif ( substr($link, 0, 11) == 'javascript:' ) {
      // Don't count the link; ignore it
      $additional = '';
      // what about href="#anchor" ?
        } else {
            $this->_link_count++;
@@ -566,7 +568,7 @@
        case 'b':
        case 'strong':
            return $this->_strtoupper($matches[2]);
        case 'hr':
        case 'th':
            return $this->_strtoupper("\t\t". $matches[2] ."\n");
        case 'h':
            return $this->_strtoupper("\n\n". $matches[2] ."\n\n");