| | |
| | | '/<script[^>]*>.*?<\/script>/i', // <script>s -- which strip_tags supposedly has problems with |
| | | '/<style[^>]*>.*?<\/style>/i', // <style>s -- which strip_tags supposedly has problems with |
| | | '/<p[^>]*>/i', // <P> |
| | | '/<br[^>]*>/i', // <br> |
| | | '/<br[^>]*>\s*/i', // <br> |
| | | '/<i[^>]*>(.*?)<\/i>/i', // <i> |
| | | '/<em[^>]*>(.*?)<\/em>/i', // <em> |
| | | '/(<ul[^>]*>|<\/ul>)/i', // <ul> and </ul> |
| | |
| | | * @see $ent_search |
| | | */ |
| | | protected $ent_replace = array( |
| | | ' ', // Non-breaking space |
| | | "\xC2\xA0", // Non-breaking space |
| | | '"', // Double quotes |
| | | "'", // Single quotes |
| | | '>', |
| | |
| | | // Variables used for building the link list |
| | | $this->_link_list = array(); |
| | | |
| | | $text = trim(stripslashes($this->html)); |
| | | $text = $this->html; |
| | | |
| | | // Convert HTML to TXT |
| | | $this->_converter($text); |
| | |
| | | // Replace known html entities |
| | | $text = html_entity_decode($text, ENT_QUOTES, $this->charset); |
| | | |
| | | // Replace unicode nbsp to regular spaces |
| | | $text = preg_replace('/\xC2\xA0/', ' ', $text); |
| | | |
| | | // Remove unknown/unhandled entities (this cannot be done in search-and-replace block) |
| | | $text = preg_replace('/&([a-zA-Z0-9]{2,6}|#[0-9]{2,4});/', '', $text); |
| | | |
| | |
| | | * @param string $link URL of the link |
| | | * @param string $display Part of the text to associate number with |
| | | */ |
| | | protected function _build_link_list( $link, $display ) |
| | | protected function _build_link_list($link, $display) |
| | | { |
| | | if (!$this->_do_links || empty($link)) { |
| | | return $display; |
| | |
| | | |
| | | // Ignored link types |
| | | if (preg_match('!^(javascript:|mailto:|#)!i', $link)) { |
| | | return $display; |
| | | } |
| | | |
| | | // skip links with href == content (#1490434) |
| | | if ($link === $display) { |
| | | return $display; |
| | | } |
| | | |
| | |
| | | */ |
| | | protected function _convert_blockquotes(&$text) |
| | | { |
| | | if (preg_match_all('/<\/*blockquote[^>]*>/i', $text, $matches, PREG_OFFSET_CAPTURE)) { |
| | | $level = 0; |
| | | $diff = 0; |
| | | foreach ($matches[0] as $m) { |
| | | if ($m[0][0] == '<' && $m[0][1] == '/') { |
| | | $level--; |
| | | if ($level < 0) { |
| | | $level = 0; // malformed HTML: go to next blockquote |
| | | } |
| | | else if ($level > 0) { |
| | | // skip inner blockquote |
| | | } |
| | | else { |
| | | $end = $m[1]; |
| | | $len = $end - $taglen - $start; |
| | | // Get blockquote content |
| | | $body = substr($text, $start + $taglen - $diff, $len); |
| | | $level = 0; |
| | | $offset = 0; |
| | | while (($start = strpos($text, '<blockquote', $offset)) !== false) { |
| | | $offset = $start + 12; |
| | | do { |
| | | $end = strpos($text, '</blockquote>', $offset); |
| | | $next = strpos($text, '<blockquote', $offset); |
| | | |
| | | // Set text width |
| | | $p_width = $this->width; |
| | | if ($this->width > 0) $this->width -= 2; |
| | | // Convert blockquote content |
| | | $body = trim($body); |
| | | $this->_converter($body); |
| | | // Add citation markers and create PRE block |
| | | $body = preg_replace('/((^|\n)>*)/', '\\1> ', trim($body)); |
| | | $body = '<pre>' . htmlspecialchars($body) . '</pre>'; |
| | | // Re-set text width |
| | | $this->width = $p_width; |
| | | // Replace content |
| | | $text = substr($text, 0, $start - $diff) |
| | | . $body . substr($text, $end + strlen($m[0]) - $diff); |
| | | |
| | | $diff = $len + $taglen + strlen($m[0]) - strlen($body); |
| | | unset($body); |
| | | } |
| | | // nested <blockquote>, skip |
| | | if ($next !== false && $next < $end) { |
| | | $offset = $next + 12; |
| | | $level++; |
| | | } |
| | | // nested </blockquote> tag |
| | | if ($end !== false && $level > 0) { |
| | | $offset = $end + 12; |
| | | $level--; |
| | | } |
| | | // found matching end tag |
| | | else if ($end !== false && $level == 0) { |
| | | $taglen = strpos($text, '>', $start) - $start; |
| | | $startpos = $start + $taglen + 1; |
| | | |
| | | // get blockquote content |
| | | $body = trim(substr($text, $startpos, $end - $startpos)); |
| | | |
| | | // adjust text wrapping width |
| | | $p_width = $this->width; |
| | | if ($this->width > 0) $this->width -= 2; |
| | | |
| | | // replace content with inner blockquotes |
| | | $this->_converter($body); |
| | | |
| | | // resore text width |
| | | $this->width = $p_width; |
| | | |
| | | // Add citation markers and create <pre> block |
| | | $body = preg_replace_callback('/((?:^|\n)>*)([^\n]*)/', array($this, 'blockquote_citation_callback'), trim($body)); |
| | | $body = '<pre>' . htmlspecialchars($body) . '</pre>'; |
| | | |
| | | $text = substr_replace($text, $body . "\n", $start, $end + 13 - $start); |
| | | $offset = 0; |
| | | |
| | | break; |
| | | } |
| | | // abort on invalid tag structure (e.g. no closing tag found) |
| | | else { |
| | | if ($level == 0) { |
| | | $start = $m[1]; |
| | | $taglen = strlen($m[0]); |
| | | } |
| | | $level ++; |
| | | break; |
| | | } |
| | | } |
| | | while ($end || $next); |
| | | } |
| | | } |
| | | |
| | | /** |
| | | * Callback function to correctly add citation markers for blockquote contents |
| | | */ |
| | | public function blockquote_citation_callback($m) |
| | | { |
| | | $line = ltrim($m[2]); |
| | | $space = $line[0] == '>' ? '' : ' '; |
| | | |
| | | return $m[1] . '>' . $space . $line; |
| | | } |
| | | |
| | | /** |
| | | * Callback function for preg_replace_callback use. |
| | | * |
| | | * @param array PREG matches |