From ed1d212ae2daea5e4bd043417610177093e99f19 Mon Sep 17 00:00:00 2001
From: Aleksander Machniak <alec@alec.pl>
Date: Sat, 16 Jan 2016 03:03:51 -0500
Subject: [PATCH] Improved SVG cleanup code
---
program/lib/Roundcube/rcube_utils.php | 406 ++++++++++++++++++++++++++++++++++++++-------------------
1 files changed, 270 insertions(+), 136 deletions(-)
diff --git a/program/lib/Roundcube/rcube_utils.php b/program/lib/Roundcube/rcube_utils.php
index 4a183bd..06f4314 100644
--- a/program/lib/Roundcube/rcube_utils.php
+++ b/program/lib/Roundcube/rcube_utils.php
@@ -1,6 +1,6 @@
<?php
-/*
+/**
+-----------------------------------------------------------------------+
| This file is part of the Roundcube Webmail client |
| Copyright (C) 2008-2012, The Roundcube Dev Team |
@@ -103,13 +103,14 @@
}
foreach ($domain_array as $part) {
- if (!preg_match('/^(([A-Za-z0-9][A-Za-z0-9-]{0,61}[A-Za-z0-9])|([A-Za-z0-9]))$/', $part)) {
+ if (!preg_match('/^((xn--)?([A-Za-z0-9][A-Za-z0-9-]{0,61}[A-Za-z0-9])|([A-Za-z0-9]))$/', $part)) {
return false;
}
}
// last domain part
- if (preg_match('/[^a-zA-Z]/', array_pop($domain_array))) {
+ $last_part = array_pop($domain_array);
+ if (strpos($last_part, 'xn--') !== 0 && preg_match('/[^a-zA-Z]/', $last_part)) {
return false;
}
@@ -117,17 +118,6 @@
if (!$dns_check || !$rcube->config->get('email_dns_check')) {
return true;
- }
-
- if (strtoupper(substr(PHP_OS, 0, 3)) == 'WIN' && version_compare(PHP_VERSION, '5.3.0', '<')) {
- $lookup = array();
- @exec("nslookup -type=MX " . escapeshellarg($domain_part) . " 2>&1", $lookup);
- foreach ($lookup as $line) {
- if (strpos($line, 'MX preference')) {
- return true;
- }
- }
- return false;
}
// find MX record(s)
@@ -144,7 +134,6 @@
return false;
}
-
/**
* Validates IPv4 or IPv6 address
*
@@ -154,41 +143,8 @@
*/
public static function check_ip($ip)
{
- // IPv6, but there's no build-in IPv6 support
- if (strpos($ip, ':') !== false && !defined('AF_INET6')) {
- $parts = explode(':', $ip);
- $count = count($parts);
-
- if ($count > 8 || $count < 2) {
- return false;
- }
-
- foreach ($parts as $idx => $part) {
- $length = strlen($part);
- if (!$length) {
- // there can be only one ::
- if ($found_empty) {
- return false;
- }
- $found_empty = true;
- }
- // last part can be an IPv4 address
- else if ($idx == $count - 1) {
- if (!preg_match('/^[0-9a-f]{1,4}$/i', $part)) {
- return @inet_pton($part) !== false;
- }
- }
- else if (!preg_match('/^[0-9a-f]{1,4}$/i', $part)) {
- return false;
- }
- }
-
- return true;
- }
-
- return @inet_pton($ip) !== false;
+ return filter_var($ip, FILTER_VALIDATE_IP) !== false;
}
-
/**
* Check whether the HTTP referer matches the current request
@@ -197,27 +153,27 @@
*/
public static function check_referer()
{
- $uri = parse_url($_SERVER['REQUEST_URI']);
+ $uri = parse_url($_SERVER['REQUEST_URI']);
$referer = parse_url(self::request_header('Referer'));
+
return $referer['host'] == self::request_header('Host') && $referer['path'] == $uri['path'];
}
-
/**
* Replacing specials characters to a specific encoding type
*
- * @param string Input string
- * @param string Encoding type: text|html|xml|js|url
- * @param string Replace mode for tags: show|replace|remove
- * @param boolean Convert newlines
+ * @param string Input string
+ * @param string Encoding type: text|html|xml|js|url
+ * @param string Replace mode for tags: show|remove|strict
+ * @param boolean Convert newlines
*
- * @return string The quoted string
+ * @return string The quoted string
*/
public static function rep_specialchars_output($str, $enctype = '', $mode = '', $newlines = true)
{
static $html_encode_arr = false;
- static $js_rep_table = false;
- static $xml_rep_table = false;
+ static $js_rep_table = false;
+ static $xml_rep_table = false;
if (!is_string($str)) {
$str = strval($str);
@@ -232,8 +188,11 @@
$encode_arr = $html_encode_arr;
- // don't replace quotes and html tags
- if ($mode == 'show' || $mode == '') {
+ if ($mode == 'remove') {
+ $str = strip_tags($str);
+ }
+ else if ($mode != 'strict') {
+ // don't replace quotes and html tags
$ltpos = strpos($str, '<');
if ($ltpos !== false && strpos($str, '>', $ltpos) !== false) {
unset($encode_arr['"']);
@@ -241,9 +200,6 @@
unset($encode_arr['>']);
unset($encode_arr['&']);
}
- }
- else if ($mode == 'remove') {
- $str = strip_tags($str);
}
$out = strtr($str, $encode_arr);
@@ -266,8 +222,8 @@
$js_rep_table["'"] = "\\'";
$js_rep_table["\\"] = "\\\\";
// Unicode line and paragraph separators (#1486310)
- $js_rep_table[chr(hexdec(E2)).chr(hexdec(80)).chr(hexdec(A8))] = '
';
- $js_rep_table[chr(hexdec(E2)).chr(hexdec(80)).chr(hexdec(A9))] = '
';
+ $js_rep_table[chr(hexdec('E2')).chr(hexdec('80')).chr(hexdec('A8'))] = '
';
+ $js_rep_table[chr(hexdec('E2')).chr(hexdec('80')).chr(hexdec('A9'))] = '
';
}
// encode for javascript use
@@ -277,7 +233,7 @@
// encode for plaintext
if ($enctype == 'text') {
- return str_replace("\r\n", "\n", $mode=='remove' ? strip_tags($str) : $str);
+ return str_replace("\r\n", "\n", $mode == 'remove' ? strip_tags($str) : $str);
}
if ($enctype == 'url') {
@@ -293,21 +249,20 @@
return $str;
}
-
/**
* Read input value and convert it for internal use
* Performs stripslashes() and charset conversion if necessary
*
- * @param string Field name to read
- * @param int Source to get value from (GPC)
- * @param boolean Allow HTML tags in field value
- * @param string Charset to convert into
+ * @param string Field name to read
+ * @param int Source to get value from (GPC)
+ * @param boolean Allow HTML tags in field value
+ * @param string Charset to convert into
*
- * @return string Field value or NULL if not available
+ * @return string Field value or NULL if not available
*/
- public static function get_input_value($fname, $source, $allow_html=FALSE, $charset=NULL)
+ public static function get_input_value($fname, $source, $allow_html = false, $charset = null)
{
- $value = NULL;
+ $value = null;
if ($source == self::INPUT_GET) {
if (isset($_GET[$fname])) {
@@ -334,18 +289,17 @@
return self::parse_input_value($value, $allow_html, $charset);
}
-
/**
* Parse/validate input value. See self::get_input_value()
* Performs stripslashes() and charset conversion if necessary
*
- * @param string Input value
- * @param boolean Allow HTML tags in field value
- * @param string Charset to convert into
+ * @param string Input value
+ * @param boolean Allow HTML tags in field value
+ * @param string Charset to convert into
*
- * @return string Parsed value
+ * @return string Parsed value
*/
- public static function parse_input_value($value, $allow_html=FALSE, $charset=NULL)
+ public static function parse_input_value($value, $allow_html = false, $charset = null)
{
global $OUTPUT;
@@ -385,7 +339,6 @@
return $value;
}
-
/**
* Convert array of request parameters (prefixed with _)
* to a regular array with non-prefixed keys.
@@ -411,7 +364,6 @@
return $out;
}
-
/**
* Convert the given string into a valid HTML identifier
* Same functionality as done in app.js with rcube_webmail.html_identifier()
@@ -426,7 +378,6 @@
}
}
-
/**
* Replace all css definitions with #container [def]
* and remove css-inlined scripting
@@ -436,9 +387,9 @@
*
* @return string Modified CSS source
*/
- public static function mod_css_styles($source, $container_id, $allow_remote=false)
+ public static function mod_css_styles($source, $container_id, $allow_remote = false)
{
- $last_pos = 0;
+ $last_pos = 0;
$replacements = new rcube_string_replacer;
// ignore the whole block if evil styles are detected
@@ -503,12 +454,11 @@
return $source;
}
-
/**
* Generate CSS classes from mimetype and filename extension
*
- * @param string $mimetype Mimetype
- * @param string $filename Filename
+ * @param string $mimetype Mimetype
+ * @param string $filename Filename
*
* @return string CSS classes separated by space
*/
@@ -519,7 +469,7 @@
list($primary, $secondary) = explode('/', $mimetype);
- $classes = array($primary ? $primary : 'unknown');
+ $classes = array($primary ?: 'unknown');
if ($secondary) {
$classes[] = $secondary;
@@ -533,7 +483,6 @@
return join(" ", $classes);
}
-
/**
* Decode escaped entities used by known XSS exploits.
@@ -553,7 +502,6 @@
return $out;
}
-
/**
* preg_replace_callback callback for xss_entity_decode
*
@@ -565,7 +513,6 @@
{
return chr(hexdec($matches[1]));
}
-
/**
* Check if we can process not exceeding memory_limit
@@ -582,7 +529,6 @@
return $mem_limit > 0 && $memory + $need > $mem_limit ? false : true;
}
-
/**
* Check if working in SSL mode
*
@@ -593,26 +539,24 @@
*/
public static function https_check($port=null, $use_https=true)
{
- global $RCMAIL;
-
if (!empty($_SERVER['HTTPS']) && strtolower($_SERVER['HTTPS']) != 'off') {
return true;
}
if (!empty($_SERVER['HTTP_X_FORWARDED_PROTO'])
&& strtolower($_SERVER['HTTP_X_FORWARDED_PROTO']) == 'https'
- && in_array($_SERVER['REMOTE_ADDR'], rcube::get_instance()->config->get('proxy_whitelist', array()))) {
+ && in_array($_SERVER['REMOTE_ADDR'], rcube::get_instance()->config->get('proxy_whitelist', array()))
+ ) {
return true;
}
if ($port && $_SERVER['SERVER_PORT'] == $port) {
return true;
}
- if ($use_https && isset($RCMAIL) && $RCMAIL->config->get('use_https')) {
+ if ($use_https && rcube::get_instance()->config->get('use_https')) {
return true;
}
return false;
}
-
/**
* Replaces hostname variables.
@@ -635,10 +579,11 @@
// %d - domain name without first part
$d = preg_replace('/^[^\.]+\./', '', $_SERVER['HTTP_HOST']);
// %h - IMAP host
- $h = $_SESSION['storage_host'] ? $_SESSION['storage_host'] : $host;
+ $h = $_SESSION['storage_host'] ?: $host;
// %z - IMAP domain without first part, e.g. %h=imap.domain.tld, %z=domain.tld
$z = preg_replace('/^[^\.]+\./', '', $h);
- // %s - domain name after the '@' from e-mail address provided at login screen. Returns FALSE if an invalid email is provided
+ // %s - domain name after the '@' from e-mail address provided at login screen.
+ // Returns FALSE if an invalid email is provided
if (strpos($name, '%s') !== false) {
$user_email = self::get_input_value('_user', self::INPUT_POST);
$user_email = self::idn_convert($user_email, true);
@@ -650,7 +595,6 @@
return str_replace(array('%n', '%t', '%d', '%h', '%z', '%s'), array($n, $t, $d, $h, $z, $s[2]), $name);
}
-
/**
* Returns remote IP address and forwarded addresses if found
@@ -665,6 +609,7 @@
if (!empty($_SERVER['HTTP_X_REAL_IP'])) {
$remote_ip[] = 'X-Real-IP: ' . $_SERVER['HTTP_X_REAL_IP'];
}
+
// append the X-Forwarded-For header, if set
if (!empty($_SERVER['HTTP_X_FORWARDED_FOR'])) {
$remote_ip[] = 'X-Forwarded-For: ' . $_SERVER['HTTP_X_FORWARDED_FOR'];
@@ -676,7 +621,6 @@
return $address;
}
-
/**
* Returns the real remote IP address
@@ -713,9 +657,9 @@
/**
* Read a specific HTTP request header.
*
- * @param string $name Header name
+ * @param string $name Header name
*
- * @return mixed Header value or null if not available
+ * @return mixed Header value or null if not available
*/
public static function request_header($name)
{
@@ -759,17 +703,18 @@
return $result;
}
-
/**
* Improved equivalent to strtotime()
*
- * @param string $date Date string
+ * @param string $date Date string
+ * @param DateTimeZone $timezone Timezone to use for DateTime object
*
* @return int Unix timestamp
*/
- public static function strtotime($date)
+ public static function strtotime($date, $timezone = null)
{
- $date = self::clean_datestr($date);
+ $date = self::clean_datestr($date);
+ $tzname = $timezone ? ' ' . $timezone->getName() : '';
// unix timestamp
if (is_numeric($date)) {
@@ -778,7 +723,7 @@
// if date parsing fails, we have a date in non-rfc format.
// remove token from the end and try again
- while ((($ts = @strtotime($date)) === false) || ($ts < 0)) {
+ while ((($ts = @strtotime($date . $tzname)) === false) || ($ts < 0)) {
$d = explode(' ', $date);
array_pop($d);
if (!$d) {
@@ -793,13 +738,14 @@
/**
* Date parsing function that turns the given value into a DateTime object
*
- * @param string $date Date string
+ * @param string $date Date string
+ * @param DateTimeZone $timezone Timezone to use for DateTime object
*
- * @return object DateTime instance or false on failure
+ * @return DateTime instance or false on failure
*/
- public static function anytodatetime($date)
+ public static function anytodatetime($date, $timezone = null)
{
- if (is_object($date) && is_a($date, 'DateTime')) {
+ if ($date instanceof DateTime) {
return $date;
}
@@ -809,7 +755,7 @@
// try to parse string with DateTime first
if (!empty($date)) {
try {
- $dt = new DateTime($date);
+ $dt = $timezone ? new DateTime($date, $timezone) : new DateTime($date);
}
catch (Exception $e) {
// ignore
@@ -817,9 +763,12 @@
}
// try our advanced strtotime() method
- if (!$dt && ($timestamp = self::strtotime($date))) {
+ if (!$dt && ($timestamp = self::strtotime($date, $timezone))) {
try {
$dt = new DateTime("@".$timestamp);
+ if ($timezone) {
+ $dt->setTimezone($timezone);
+ }
}
catch (Exception $e) {
// ignore
@@ -884,7 +833,6 @@
return self::idn_convert($str, true);
}
-
/*
* Idn_to_ascii wrapper.
* Intl/Idn modules version of this function doesn't work with e-mail address
@@ -894,8 +842,7 @@
return self::idn_convert($str, false);
}
-
- public static function idn_convert($input, $is_utf=false)
+ public static function idn_convert($input, $is_utf = false)
{
if ($at = strpos($input, '@')) {
$user = substr($input, 0, $at);
@@ -918,26 +865,34 @@
* Split the given string into word tokens
*
* @param string Input to tokenize
+ * @param integer Minimum length of a single token
* @return array List of tokens
*/
- public static function tokenize_string($str)
+ public static function tokenize_string($str, $minlen = 2)
{
- return explode(" ", preg_replace(
- array('/[\s;\/+-]+/i', '/(\d)[-.\s]+(\d)/', '/\s\w{1,3}\s/u'),
- array(' ', '\\1\\2', ' '),
- $str));
+ $expr = array('/[\s;,"\'\/+-]+/ui', '/(\d)[-.\s]+(\d)/u');
+ $repl = array(' ', '\\1\\2');
+
+ if ($minlen > 1) {
+ $minlen--;
+ $expr[] = "/(^|\s+)\w{1,$minlen}(\s+|$)/u";
+ $repl[] = ' ';
+ }
+
+ return array_filter(explode(" ", preg_replace($expr, $repl, $str)));
}
/**
* Normalize the given string for fulltext search.
- * Currently only optimized for Latin-1 characters; to be extended
+ * Currently only optimized for ISO-8859-1 and ISO-8859-2 characters; to be extended
*
* @param string Input string (UTF-8)
* @param boolean True to return list of words as array
+ * @param integer Minimum length of tokens
*
- * @return mixed Normalized string or a list of normalized tokens
+ * @return mixed Normalized string or a list of normalized tokens
*/
- public static function normalize_string($str, $as_array = false)
+ public static function normalize_string($str, $as_array = false, $minlen = 2)
{
// replace 4-byte unicode characters with '?' character,
// these are not supported in default utf-8 charset on mysql,
@@ -949,20 +904,66 @@
. ')/', '?', $str);
// split by words
- $arr = self::tokenize_string($str);
+ $arr = self::tokenize_string($str, $minlen);
+
+ // detect character set
+ if (utf8_encode(utf8_decode($str)) == $str) {
+ // ISO-8859-1 (or ASCII)
+ preg_match_all('/./u', 'äâàåáãæçéêëèïîìíñöôòøõóüûùúýÿ', $keys);
+ preg_match_all('/./', 'aaaaaaaceeeeiiiinoooooouuuuyy', $values);
+
+ $mapping = array_combine($keys[0], $values[0]);
+ $mapping = array_merge($mapping, array('ß' => 'ss', 'ae' => 'a', 'oe' => 'o', 'ue' => 'u'));
+ }
+ else if (rcube_charset::convert(rcube_charset::convert($str, 'UTF-8', 'ISO-8859-2'), 'ISO-8859-2', 'UTF-8') == $str) {
+ // ISO-8859-2
+ preg_match_all('/./u', 'ąáâäćçčéęëěíîłľĺńňóôöŕřśšşťţůúűüźžżý', $keys);
+ preg_match_all('/./', 'aaaaccceeeeiilllnnooorrsssttuuuuzzzy', $values);
+
+ $mapping = array_combine($keys[0], $values[0]);
+ $mapping = array_merge($mapping, array('ß' => 'ss', 'ae' => 'a', 'oe' => 'o', 'ue' => 'u'));
+ }
foreach ($arr as $i => $part) {
- if (utf8_encode(utf8_decode($part)) == $part) { // is latin-1 ?
- $arr[$i] = utf8_encode(strtr(strtolower(strtr(utf8_decode($part),
- 'ÇçäâàåéêëèïîìÅÉöôòüûùÿøØáíóúñÑÁÂÀãÃÊËÈÍÎÏÓÔõÕÚÛÙýÝ',
- 'ccaaaaeeeeiiiaeooouuuyooaiounnaaaaaeeeiiioooouuuyy')),
- array('ß' => 'ss', 'ae' => 'a', 'oe' => 'o', 'ue' => 'u')));
+ $part = mb_strtolower($part);
+
+ if (!empty($mapping)) {
+ $part = strtr($part, $mapping);
}
- else
- $arr[$i] = mb_strtolower($part);
+
+ $arr[$i] = $part;
}
return $as_array ? $arr : join(" ", $arr);
+ }
+
+ /**
+ * Compare two strings for matching words (order not relevant)
+ *
+ * @param string Haystack
+ * @param string Needle
+ *
+ * @return boolean True if match, False otherwise
+ */
+ public static function words_match($haystack, $needle)
+ {
+ $a_needle = self::tokenize_string($needle, 1);
+ $_haystack = join(" ", self::tokenize_string($haystack, 1));
+ $valid = strlen($_haystack) > 0;
+ $hits = 0;
+
+ foreach ($a_needle as $w) {
+ if ($valid) {
+ if (stripos($_haystack, $w) !== false) {
+ $hits++;
+ }
+ }
+ else if (stripos($haystack, $w) !== false) {
+ $hits++;
+ }
+ }
+
+ return $hits >= count($a_needle);
}
/**
@@ -1041,7 +1042,6 @@
}
}
-
/**
* Find out if the string content means true or false
*
@@ -1065,7 +1065,141 @@
return (bool) preg_match('!^[a-z]:[\\\\/]!i', $path);
}
else {
- return $path[0] == DIRECTORY_SEPARATOR;
+ return $path[0] == '/';
}
}
+
+ /**
+ * Resolve relative URL
+ *
+ * @param string $url Relative URL
+ *
+ * @return string Absolute URL
+ */
+ public static function resolve_url($url)
+ {
+ // prepend protocol://hostname:port
+ if (!preg_match('|^https?://|', $url)) {
+ $schema = 'http';
+ $default_port = 80;
+
+ if (self::https_check()) {
+ $schema = 'https';
+ $default_port = 443;
+ }
+
+ $prefix = $schema . '://' . preg_replace('/:\d+$/', '', $_SERVER['HTTP_HOST']);
+ if ($_SERVER['SERVER_PORT'] != $default_port) {
+ $prefix .= ':' . $_SERVER['SERVER_PORT'];
+ }
+
+ $url = $prefix . ($url[0] == '/' ? '' : '/') . $url;
+ }
+
+ return $url;
+ }
+
+ /**
+ * Generate a random string
+ *
+ * @param int $length String length
+ * @param bool $raw Return RAW data instead of ascii
+ *
+ * @return string The generated random string
+ */
+ public static function random_bytes($length, $raw = false)
+ {
+ // Use PHP7 true random generator
+ if (function_exists('random_bytes')) {
+ // random_bytes() can throw an Error/TypeError/Exception in some cases
+ try {
+ $random = random_bytes($length);
+ }
+ catch (Throwable $e) {}
+ }
+
+ if (!$random) {
+ $random = openssl_random_pseudo_bytes($length);
+ }
+
+ if ($raw) {
+ return $random;
+ }
+
+ $random = self::bin2ascii($random);
+
+ // truncate to the specified size...
+ if ($length < strlen($random)) {
+ $random = substr($random, 0, $length);
+ }
+
+ return $random;
+ }
+
+ /**
+ * Convert binary data into readable form (containing a-zA-Z0-9 characters)
+ *
+ * @param string $input Binary input
+ *
+ * @return string Readable output
+ */
+ public static function bin2ascii($input)
+ {
+ // Above method returns "hexits".
+ // Based on bin_to_readable() function in ext/session/session.c.
+ // Note: removed ",-" characters from hextab
+ $hextab = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
+ $nbits = 6; // can be 4, 5 or 6
+ $length = strlen($input);
+ $result = '';
+ $char = 0;
+ $i = 0;
+ $have = 0;
+ $mask = (1 << $nbits) - 1;
+
+ while (true) {
+ if ($have < $nbits) {
+ if ($i < $length) {
+ $char |= ord($input[$i++]) << $have;
+ $have += 8;
+ }
+ else if (!$have) {
+ break;
+ }
+ else {
+ $have = $nbits;
+ }
+ }
+
+ // consume nbits
+ $result .= $hextab[$char & $mask];
+ $char >>= $nbits;
+ $have -= $nbits;
+ }
+
+ return $result;
+ }
+
+ /**
+ * Format current date according to specified format.
+ * This method supports microseconds (u).
+ *
+ * @param string $format Date format (default: 'd-M-Y H:i:s O')
+ *
+ * @return string Formatted date
+ */
+ public static function date_format($format = null)
+ {
+ if (empty($format)) {
+ $format = 'd-M-Y H:i:s O';
+ }
+
+ if (strpos($format, 'u') !== false
+ && ($date = date_create_from_format('U.u.e', microtime(true) . '.' . date_default_timezone_get()))
+ ) {
+ return $date->format($format);
+ }
+
+ return date($format);
+ }
}
--
Gitblit v1.9.1