alecpl
2008-12-12 f50cc72cbbe443dff8a5daa34ece6cafe8757089
#1485618: fix code injection vulnerability


1 files modified
69 ■■■■ changed files
program/lib/html2text.php 69 ●●●● patch | view | raw | blame | history
program/lib/html2text.php
@@ -149,25 +149,18 @@
        '/<script[^>]*>.*?<\/script>/i',         // <script>s -- which strip_tags supposedly has problems with
        '/<style[^>]*>.*?<\/style>/i',           // <style>s -- which strip_tags supposedly has problems with
        //'/<!-- .* -->/',                         // Comments -- which strip_tags might have problem a with
        '/<h[123][^>]*>(.*?)<\/h[123]>/ie',      // H1 - H3
        '/<h[456][^>]*>(.*?)<\/h[456]>/ie',      // H4 - H6
        '/<p[^>]*>/i',                           // <P>
        '/<br[^>]*>/i',                          // <br>
        '/<b[^>]*>(.*?)<\/b>/ie',                // <b>
        '/<strong[^>]*>(.*?)<\/strong>/ie',      // <strong>
        '/<i[^>]*>(.*?)<\/i>/i',                 // <i>
        '/<em[^>]*>(.*?)<\/em>/i',               // <em>
        '/(<ul[^>]*>|<\/ul>)/i',                 // <ul> and </ul>
        '/(<ol[^>]*>|<\/ol>)/i',                 // <ol> and </ol>
        '/<li[^>]*>(.*?)<\/li>/i',               // <li> and </li>
        '/<li[^>]*>/i',                          // <li>
        '/<a [^>]*href=("|\')([^"\']+)\1[^>]*>(.*?)<\/a>/ie',
                                                 // <a href="">
        '/<hr[^>]*>/i',                          // <hr>
        '/(<table[^>]*>|<\/table>)/i',           // <table> and </table>
        '/(<tr[^>]*>|<\/tr>)/i',                 // <tr> and </tr>
        '/<td[^>]*>(.*?)<\/td>/i',               // <td> and </td>
        '/<th[^>]*>(.*?)<\/th>/ie',              // <th> and </th>
        '/&(nbsp|#160);/i',                      // Non-breaking space
        '/&(quot|rdquo|ldquo|#8220|#8221|#147|#148);/i',
                                                 // Double quotes
@@ -201,25 +194,18 @@
        '',                                     // <script>s -- which strip_tags supposedly has problems with
        '',                                     // <style>s -- which strip_tags supposedly has problems with
        //'',                                     // Comments -- which strip_tags might have problem a with
        "strtoupper(\"\n\n\\1\n\n\")",          // H1 - H3
        "ucwords(\"\n\n\\1\n\")",             // H4 - H6
        "\n\n",                               // <P>
        "\n",                                   // <br>
        'strtoupper("\\1")',                    // <b>
        'strtoupper("\\1")',                    // <strong>
        '_\\1_',                                // <i>
        '_\\1_',                                // <em>
        "\n\n",                                 // <ul> and </ul>
        "\n\n",                                 // <ol> and </ol>
        "\t* \\1\n",                            // <li> and </li>
        "\n\t* ",                               // <li>
        '$this->_build_link_list("\\2", "\\3")',
                                            // <a href="">
        "\n-------------------------\n",        // <hr>
        "\n\n",                                 // <table> and </table>
        "\n",                                   // <tr> and </tr>
        "\t\t\\1\n",                            // <td> and </td>
        "strtoupper(\"\t\t\\1\n\")",            // <th> and </th>
        ' ',                                    // Non-breaking space
        '"',                                    // Double quotes
        "'",                                    // Single quotes
@@ -236,6 +222,22 @@
        'EUR',                                  // Euro sign. € ?
        '',                                     // Unknown/unhandled entities
        ' '                                     // Runs of spaces, post-handling
    );
    /**
     *  List of preg* regular expression patterns to search for
     *  and replace using callback function.
     *
     *  @var array $callback_search
     *  @access public
     */
    var $callback_search = array(
        '/<(h)[123456][^>]*>(.*?)<\/h[123456]>/i', // H1 - H3
        '/<(b)[^>]*>(.*?)<\/b>/i',                 // <b>
        '/<(strong)[^>]*>(.*?)<\/strong>/i',       // <strong>
        '/<(a) [^>]*href=("|\')([^"\']+)\2[^>]*>(.*?)<\/a>/i',
                                                   // <a href="">
        '/<(th)[^>]*>(.*?)<\/th>/i',               // <th> and </th>
    );
   /**
@@ -471,6 +473,7 @@
        // Run our defined search-and-replace
        $text = preg_replace($this->search, $this->replace, $text);
        $text = preg_replace_callback($this->callback_search, array('html2text', '_preg_callback'), $text);
        // Strip any other HTML tags
        $text = strip_tags($text, $this->allowed_tags);
@@ -549,6 +552,44 @@
        $text = preg_replace('/<pre[^>]*>.*<\/pre>/ismU', '<div><br>' . $result . '<br></div>', $text, 1);
    }
    }
    /**
     *  Callback function for preg_replace_callback use.
     *
     *  @param  array PREG matches
     *  @return string
     *  @access private
     */
    function _preg_callback($matches)
    {
    switch($matches[1])
    {
        case 'b':
        case 'strong':
        return $this->_strtoupper($matches[2]);
        case 'hr':
        return $this->_strtoupper("\t\t". $matches[2] ."\n");
        case 'h':
        return $this->_strtoupper("\n\n". $matches[2] ."\n\n");
        case 'a':
                return $this->_build_link_list($matches[3], $matches[4]);
    }
    }
    /**
     *  Strtoupper multibyte wrapper function
     *
     *  @param  string
     *  @return string
     *  @access private
     */
    function _strtoupper($str)
    {
    if (function_exists('mb_strtoupper'))
            return mb_strtoupper($str);
        else
        return strtoupper($str);
    }
}
?>