From 53604a0550f9940584b7e4d4260b96714ae0edbf Mon Sep 17 00:00:00 2001 From: alecpl <alec@alec.pl> Date: Wed, 01 Dec 2010 05:49:20 -0500 Subject: [PATCH] - Fix setting charset of attachment filenames (#1487122) --- program/lib/Mail/mimePart.php | 506 ++++++++++++++++++++++++++++++++++++++++++++++++------- 1 files changed, 437 insertions(+), 69 deletions(-) diff --git a/program/lib/Mail/mimePart.php b/program/lib/Mail/mimePart.php index 7863028..60b3601 100644 --- a/program/lib/Mail/mimePart.php +++ b/program/lib/Mail/mimePart.php @@ -141,17 +141,19 @@ * content_type - The content type for this part eg multipart/mixed * encoding - The encoding to use, 7bit, 8bit, * base64, or quoted-printable + * charset - Content character set * cid - Content ID to apply * disposition - Content disposition, inline or attachment * dfilename - Filename parameter for content disposition * description - Content description - * charset - Character set to use - * name_encoding - Encoding for attachment name (Content-Type) + * name_encoding - Encoding of the attachment name (Content-Type) * By default filenames are encoded using RFC2231 * Here you can set RFC2047 encoding (quoted-printable * or base64) instead - * filename_encoding - Encoding for attachment filename (Content-Disposition) + * filename_encoding - Encoding of the attachment filename (Content-Disposition) * See 'name_encoding' + * headers_charset - Charset of the headers e.g. filename, description. + * If not set, 'charset' will be used * eol - End of line sequence. Default: "\r\n" * body_file - Location of file with part's body (instead of $body) * @@ -165,14 +167,8 @@ $this->_eol = MAIL_MIMEPART_CRLF; } - $c_type = array(); - $c_disp = array(); foreach ($params as $key => $value) { switch ($key) { - case 'content_type': - $c_type['type'] = $value; - break; - case 'encoding': $this->_encoding = $value; $headers['Content-Transfer-Encoding'] = $value; @@ -180,29 +176,6 @@ case 'cid': $headers['Content-ID'] = '<' . $value . '>'; - break; - - case 'disposition': - $c_disp['disp'] = $value; - break; - - case 'dfilename': - $c_disp['filename'] = $value; - $c_type['name'] = $value; - break; - - case 'description': - $headers['Content-Description'] = $value; - break; - - case 'charset': - $c_type['charset'] = $value; - $c_disp['charset'] = $value; - break; - - case 'language': - $c_type['language'] = $value; - $c_disp['language'] = $value; break; case 'location': @@ -215,41 +188,59 @@ } } + // Default content-type + if (empty($params['content_type'])) { + $params['content_type'] = 'text/plain'; + } + // Content-Type - if (isset($c_type['type'])) { - $headers['Content-Type'] = $c_type['type']; - if (isset($c_type['name'])) { - $headers['Content-Type'] .= ';' . $this->_eol; - $headers['Content-Type'] .= $this->_buildHeaderParam( - 'name', $c_type['name'], - isset($c_type['charset']) ? $c_type['charset'] : 'US-ASCII', - isset($c_type['language']) ? $c_type['language'] : null, - isset($params['name_encoding']) ? $params['name_encoding'] : null - ); + $headers['Content-Type'] = $params['content_type']; + if (!empty($params['charset'])) { + $charset = "charset={$params['charset']}"; + // place charset parameter in the same line, if possible + if ((strlen($headers['Content-Type']) + strlen($charset) + 16) <= 76) { + $headers['Content-Type'] .= '; '; + } else { + $headers['Content-Type'] .= ';' . $this->_eol . ' '; } - if (isset($c_type['charset'])) { - $headers['Content-Type'] - .= ';' . $this->_eol . " charset={$c_type['charset']}"; + $headers['Content-Type'] .= $charset; + + // Default headers charset + if (!isset($params['headers_charset'])) { + $params['headers_charset'] = $params['charset']; } + } + if (!empty($params['filename'])) { + $headers['Content-Type'] .= ';' . $this->_eol; + $headers['Content-Type'] .= $this->_buildHeaderParam( + 'name', $params['filename'], + !empty($params['headers_charset']) ? $params['headers_charset'] : 'US-ASCII', + !empty($params['language']) ? $params['language'] : null, + !empty($params['name_encoding']) ? $params['name_encoding'] : null + ); } // Content-Disposition - if (isset($c_disp['disp'])) { - $headers['Content-Disposition'] = $c_disp['disp']; - if (isset($c_disp['filename'])) { + if (!empty($params['disposition'])) { + $headers['Content-Disposition'] = $params['disposition']; + if (!empty($params['filename'])) { $headers['Content-Disposition'] .= ';' . $this->_eol; $headers['Content-Disposition'] .= $this->_buildHeaderParam( - 'filename', $c_disp['filename'], - isset($c_disp['charset']) ? $c_disp['charset'] : 'US-ASCII', - isset($c_disp['language']) ? $c_disp['language'] : null, - isset($params['filename_encoding']) ? $params['filename_encoding'] : null + 'filename', $params['filename'], + !empty($params['headers_charset']) ? $params['headers_charset'] : 'US-ASCII', + !empty($params['language']) ? $params['language'] : null, + !empty($params['filename_encoding']) ? $params['filename_encoding'] : null ); } } - // Default content-type - if (!isset($headers['Content-Type'])) { - $headers['Content-Type'] = 'text/plain'; + if (!empty($params['description'])) { + $headers['Content-Description'] = $this->encodeHeader( + 'Content-Description', $params['description'], + !empty($params['headers_charset']) ? $params['headers_charset'] : 'US-ASCII', + !empty($params['name_encoding']) ? $params['name_encoding'] : 'quoted-printable', + $this->_eol + ); } // Default encoding @@ -769,17 +760,275 @@ } /** - * Callback function to replace extended characters (\x80-xFF) with their - * ASCII values (RFC2231) + * Encodes a header as per RFC2047 * - * @param array $matches Preg_replace's matches array + * @param string $name The header name + * @param string $value The header data to encode + * @param string $charset Character set name + * @param string $encoding Encoding name (base64 or quoted-printable) + * @param string $eol End-of-line sequence. Default: "\r\n" * - * @return string Encoded character string + * @return string Encoded header data (without a name) + * @access public + * @since 1.6.1 + */ + function encodeHeader($name, $value, $charset='ISO-8859-1', + $encoding='quoted-printable', $eol="\r\n" + ) { + // Structured headers + $comma_headers = array( + 'from', 'to', 'cc', 'bcc', 'sender', 'reply-to', + 'resent-from', 'resent-to', 'resent-cc', 'resent-bcc', + 'resent-sender', 'resent-reply-to', + 'return-receipt-to', 'disposition-notification-to', + ); + $other_headers = array( + 'references', 'in-reply-to', 'message-id', 'resent-message-id', + ); + + $name = strtolower($name); + + if (in_array($name, $comma_headers)) { + $separator = ','; + } else if (in_array($name, $other_headers)) { + $separator = ' '; + } + + if (!$charset) { + $charset = 'ISO-8859-1'; + } + + // Structured header (make sure addr-spec inside is not encoded) + if (!empty($separator)) { + $parts = Mail_mimePart::_explodeQuotedString($separator, $value); + $value = ''; + + foreach ($parts as $part) { + $part = preg_replace('/\r?\n[\s\t]*/', $eol . ' ', $part); + $part = trim($part); + + if (!$part) { + continue; + } + if ($value) { + $value .= $separator==',' ? $separator.' ' : ' '; + } else { + $value = $name . ': '; + } + + // let's find phrase (name) and/or addr-spec + if (preg_match('/^<\S+@\S+>$/', $part)) { + $value .= $part; + } else if (preg_match('/^\S+@\S+$/', $part)) { + // address without brackets and without name + $value .= $part; + } else if (preg_match('/<*\S+@\S+>*$/', $part, $matches)) { + // address with name (handle name) + $address = $matches[0]; + $word = str_replace($address, '', $part); + $word = trim($word); + // check if phrase requires quoting + if ($word) { + // non-ASCII: require encoding + if (preg_match('#([\x80-\xFF]){1}#', $word)) { + if ($word[0] == '"' && $word[strlen($word)-1] == '"') { + // de-quote quoted-string, encoding changes + // string to atom + $search = array("\\\"", "\\\\"); + $replace = array("\"", "\\"); + $word = str_replace($search, $replace, $word); + $word = substr($word, 1, -1); + } + // find length of last line + if (($pos = strrpos($value, $eol)) !== false) { + $last_len = strlen($value) - $pos; + } else { + $last_len = strlen($value); + } + $word = Mail_mimePart::encodeHeaderValue( + $word, $charset, $encoding, $last_len, $eol + ); + } else if (($word[0] != '"' || $word[strlen($word)-1] != '"') + && preg_match('/[\(\)\<\>\\\.\[\]@,;:"]/', $word) + ) { + // ASCII: quote string if needed + $word = '"'.addcslashes($word, '\\"').'"'; + } + } + $value .= $word.' '.$address; + } else { + // addr-spec not found, don't encode (?) + $value .= $part; + } + + // RFC2822 recommends 78 characters limit, use 76 from RFC2047 + $value = wordwrap($value, 76, $eol . ' '); + } + + // remove header name prefix (there could be EOL too) + $value = preg_replace( + '/^'.$name.':('.preg_quote($eol, '/').')* /', '', $value + ); + + } else { + // Unstructured header + // non-ASCII: require encoding + if (preg_match('#([\x80-\xFF]){1}#', $value)) { + if ($value[0] == '"' && $value[strlen($value)-1] == '"') { + // de-quote quoted-string, encoding changes + // string to atom + $search = array("\\\"", "\\\\"); + $replace = array("\"", "\\"); + $value = str_replace($search, $replace, $value); + $value = substr($value, 1, -1); + } + $value = Mail_mimePart::encodeHeaderValue( + $value, $charset, $encoding, strlen($name) + 2, $eol + ); + } else if (strlen($name.': '.$value) > 78) { + // ASCII: check if header line isn't too long and use folding + $value = preg_replace('/\r?\n[\s\t]*/', $eol . ' ', $value); + $tmp = wordwrap($name.': '.$value, 78, $eol . ' '); + $value = preg_replace('/^'.$name.':\s*/', '', $tmp); + // hard limit 998 (RFC2822) + $value = wordwrap($value, 998, $eol . ' ', true); + } + } + + return $value; + } + + /** + * Explode quoted string + * + * @param string $delimiter Delimiter expression string for preg_match() + * @param string $string Input string + * + * @return array String tokens array * @access private */ - function _encodeReplaceCallback($matches) + function _explodeQuotedString($delimiter, $string) { - return sprintf('%%%02X', ord($matches[1])); + $result = array(); + $strlen = strlen($string); + + for ($q=$p=$i=0; $i < $strlen; $i++) { + if ($string[$i] == "\"" + && (empty($string[$i-1]) || $string[$i-1] != "\\") + ) { + $q = $q ? false : true; + } else if (!$q && preg_match("/$delimiter/", $string[$i])) { + $result[] = substr($string, $p, $i - $p); + $p = $i + 1; + } + } + + $result[] = substr($string, $p); + return $result; + } + + /** + * Encodes a header value as per RFC2047 + * + * @param string $value The header data to encode + * @param string $charset Character set name + * @param string $encoding Encoding name (base64 or quoted-printable) + * @param int $prefix_len Prefix length. Default: 0 + * @param string $eol End-of-line sequence. Default: "\r\n" + * + * @return string Encoded header data + * @access public + * @since 1.6.1 + */ + function encodeHeaderValue($value, $charset, $encoding, $prefix_len=0, $eol="\r\n") + { + // #17311: Use multibyte aware method (requires mbstring extension) + if ($result = Mail_mimePart::encodeMB($value, $charset, $encoding, $prefix_len, $eol)) { + return $result; + } + + // Generate the header using the specified params and dynamicly + // determine the maximum length of such strings. + // 75 is the value specified in the RFC. + $encoding = $encoding == 'base64' ? 'B' : 'Q'; + $prefix = '=?' . $charset . '?' . $encoding .'?'; + $suffix = '?='; + $maxLength = 75 - strlen($prefix . $suffix); + $maxLength1stLine = $maxLength - $prefix_len; + + if ($encoding == 'B') { + // Base64 encode the entire string + $value = base64_encode($value); + + // We can cut base64 every 4 characters, so the real max + // we can get must be rounded down. + $maxLength = $maxLength - ($maxLength % 4); + $maxLength1stLine = $maxLength1stLine - ($maxLength1stLine % 4); + + $cutpoint = $maxLength1stLine; + $output = ''; + + while ($value) { + // Split translated string at every $maxLength + $part = substr($value, 0, $cutpoint); + $value = substr($value, $cutpoint); + $cutpoint = $maxLength; + // RFC 2047 specifies that any split header should + // be seperated by a CRLF SPACE. + if ($output) { + $output .= $eol . ' '; + } + $output .= $prefix . $part . $suffix; + } + $value = $output; + } else { + // quoted-printable encoding has been selected + $value = Mail_mimePart::encodeQP($value); + + // This regexp will break QP-encoded text at every $maxLength + // but will not break any encoded letters. + $reg1st = "|(.{0,$maxLength1stLine}[^\=][^\=])|"; + $reg2nd = "|(.{0,$maxLength}[^\=][^\=])|"; + + if (strlen($value) > $maxLength1stLine) { + // Begin with the regexp for the first line. + $reg = $reg1st; + $output = ''; + while ($value) { + // Split translated string at every $maxLength + // But make sure not to break any translated chars. + $found = preg_match($reg, $value, $matches); + + // After this first line, we need to use a different + // regexp for the first line. + $reg = $reg2nd; + + // Save the found part and encapsulate it in the + // prefix & suffix. Then remove the part from the + // $value_out variable. + if ($found) { + $part = $matches[0]; + $len = strlen($matches[0]); + $value = substr($value, $len); + } else { + $part = $value; + $value = ''; + } + + // RFC 2047 specifies that any split header should + // be seperated by a CRLF SPACE + if ($output) { + $output .= $eol . ' '; + } + $output .= $prefix . $part . $suffix; + } + $value = $output; + } else { + $value = $prefix . $value . $suffix; + } + } + + return $value; } /** @@ -793,16 +1042,121 @@ */ function encodeQP($str) { - // Replace all special characters used by the encoder - $search = array('=', '_', '?', ' '); - $replace = array('=3D', '=5F', '=3F', '_'); - $str = str_replace($search, $replace, $str); + // Bug #17226 RFC 2047 restricts some characters + // if the word is inside a phrase, permitted chars are only: + // ASCII letters, decimal digits, "!", "*", "+", "-", "/", "=", and "_" - // Replace all extended characters (\x80-xFF) with their - // ASCII values. - return preg_replace_callback( - '/([\x80-\xFF])/', array('Mail_mimePart', '_qpReplaceCallback'), $str + // "=", "_", "?" must be encoded + $regexp = '/([\x22-\x29\x2C\x2E\x3A-\x40\x5B-\x60\x7B-\x7E\x80-\xFF])/'; + $str = preg_replace_callback( + $regexp, array('Mail_mimePart', '_qpReplaceCallback'), $str ); + + return str_replace(' ', '_', $str); + } + + /** + * Encodes the given string using base64 or quoted-printable. + * This method makes sure that encoded-word represents an integral + * number of characters as per RFC2047. + * + * @param string $str String to encode + * @param string $charset Character set name + * @param string $encoding Encoding name (base64 or quoted-printable) + * @param int $prefix_len Prefix length. Default: 0 + * @param string $eol End-of-line sequence. Default: "\r\n" + * + * @return string Encoded string + * @access public + * @since 1.8.0 + */ + function encodeMB($str, $charset, $encoding, $prefix_len=0, $eol="\r\n") + { + if (!function_exists('mb_substr') || !function_exists('mb_strlen')) { + return; + } + + $encoding = $encoding == 'base64' ? 'B' : 'Q'; + // 75 is the value specified in the RFC + $prefix = '=?' . $charset . '?'.$encoding.'?'; + $suffix = '?='; + $maxLength = 75 - strlen($prefix . $suffix); + + // A multi-octet character may not be split across adjacent encoded-words + // So, we'll loop over each character + // mb_stlen() with wrong charset will generate a warning here and return null + $length = mb_strlen($str, $charset); + $result = ''; + $line_length = $prefix_len; + + if ($encoding == 'B') { + // base64 + $start = 0; + $prev = ''; + + for ($i=1; $i<=$length; $i++) { + // See #17311 + $chunk = mb_substr($str, $start, $i-$start, $charset); + $chunk = base64_encode($chunk); + $chunk_len = strlen($chunk); + + if ($line_length + $chunk_len == $maxLength || $i == $length) { + if ($result) { + $result .= "\n"; + } + $result .= $chunk; + $line_length = 0; + $start = $i; + } else if ($line_length + $chunk_len > $maxLength) { + if ($result) { + $result .= "\n"; + } + if ($prev) { + $result .= $prev; + } + $line_length = 0; + $start = $i - 1; + } else { + $prev = $chunk; + } + } + } else { + // quoted-printable + // see encodeQP() + $regexp = '/([\x22-\x29\x2C\x2E\x3A-\x40\x5B-\x60\x7B-\x7E\x80-\xFF])/'; + + for ($i=0; $i<=$length; $i++) { + $char = mb_substr($str, $i, 1, $charset); + // RFC recommends underline (instead of =20) in place of the space + // that's one of the reasons why we're not using iconv_mime_encode() + if ($char == ' ') { + $char = '_'; + $char_len = 1; + } else { + $char = preg_replace_callback( + $regexp, array('Mail_mimePart', '_qpReplaceCallback'), $char + ); + $char_len = strlen($char); + } + + if ($line_length + $char_len > $maxLength) { + if ($result) { + $result .= "\n"; + } + $line_length = 0; + } + + $result .= $char; + $line_length += $char_len; + } + } + + if ($result) { + $result = $prefix + .str_replace("\n", $suffix.$eol.' '.$prefix, $result).$suffix; + } + + return $result; } /** @@ -819,4 +1173,18 @@ return sprintf('=%02X', ord($matches[1])); } + /** + * Callback function to replace extended characters (\x80-xFF) with their + * ASCII values (RFC2231) + * + * @param array $matches Preg_replace's matches array + * + * @return string Encoded character string + * @access private + */ + function _encodeReplaceCallback($matches) + { + return sprintf('%%%02X', ord($matches[1])); + } + } // End of class -- Gitblit v1.9.1