From 53604a0550f9940584b7e4d4260b96714ae0edbf Mon Sep 17 00:00:00 2001
From: alecpl <alec@alec.pl>
Date: Wed, 01 Dec 2010 05:49:20 -0500
Subject: [PATCH] - Fix setting charset of attachment filenames (#1487122)

---
 program/lib/Mail/mimePart.php |  506 ++++++++++++++++++++++++++++++++++++++++++++++++-------
 1 files changed, 437 insertions(+), 69 deletions(-)

diff --git a/program/lib/Mail/mimePart.php b/program/lib/Mail/mimePart.php
index 7863028..60b3601 100644
--- a/program/lib/Mail/mimePart.php
+++ b/program/lib/Mail/mimePart.php
@@ -141,17 +141,19 @@
     *     content_type      - The content type for this part eg multipart/mixed
     *     encoding          - The encoding to use, 7bit, 8bit,
     *                         base64, or quoted-printable
+    *     charset           - Content character set
     *     cid               - Content ID to apply
     *     disposition       - Content disposition, inline or attachment
     *     dfilename         - Filename parameter for content disposition
     *     description       - Content description
-    *     charset           - Character set to use
-    *     name_encoding     - Encoding for attachment name (Content-Type)
+    *     name_encoding     - Encoding of the attachment name (Content-Type)
     *                         By default filenames are encoded using RFC2231
     *                         Here you can set RFC2047 encoding (quoted-printable
     *                         or base64) instead
-    *     filename_encoding - Encoding for attachment filename (Content-Disposition)
+    *     filename_encoding - Encoding of the attachment filename (Content-Disposition)
     *                         See 'name_encoding'
+    *     headers_charset   - Charset of the headers e.g. filename, description.
+    *                         If not set, 'charset' will be used
     *     eol               - End of line sequence. Default: "\r\n"
     *     body_file         - Location of file with part's body (instead of $body)
     *
@@ -165,14 +167,8 @@
             $this->_eol = MAIL_MIMEPART_CRLF;
         }
 
-        $c_type = array();
-        $c_disp = array();
         foreach ($params as $key => $value) {
             switch ($key) {
-            case 'content_type':
-                $c_type['type'] = $value;
-                break;
-
             case 'encoding':
                 $this->_encoding = $value;
                 $headers['Content-Transfer-Encoding'] = $value;
@@ -180,29 +176,6 @@
 
             case 'cid':
                 $headers['Content-ID'] = '<' . $value . '>';
-                break;
-
-            case 'disposition':
-                $c_disp['disp'] = $value;
-                break;
-
-            case 'dfilename':
-                $c_disp['filename'] = $value;
-                $c_type['name'] = $value;
-                break;
-
-            case 'description':
-                $headers['Content-Description'] = $value;
-                break;
-
-            case 'charset':
-                $c_type['charset'] = $value;
-                $c_disp['charset'] = $value;
-                break;
-
-            case 'language':
-                $c_type['language'] = $value;
-                $c_disp['language'] = $value;
                 break;
 
             case 'location':
@@ -215,41 +188,59 @@
             }
         }
 
+        // Default content-type
+        if (empty($params['content_type'])) {
+            $params['content_type'] = 'text/plain';
+        }
+
         // Content-Type
-        if (isset($c_type['type'])) {
-            $headers['Content-Type'] = $c_type['type'];
-            if (isset($c_type['name'])) {
-                $headers['Content-Type'] .= ';' . $this->_eol;
-                $headers['Content-Type'] .= $this->_buildHeaderParam(
-                    'name', $c_type['name'], 
-                    isset($c_type['charset']) ? $c_type['charset'] : 'US-ASCII', 
-                    isset($c_type['language']) ? $c_type['language'] : null,
-                    isset($params['name_encoding']) ?  $params['name_encoding'] : null
-                );
+        $headers['Content-Type'] = $params['content_type'];
+        if (!empty($params['charset'])) {
+            $charset = "charset={$params['charset']}";
+            // place charset parameter in the same line, if possible
+            if ((strlen($headers['Content-Type']) + strlen($charset) + 16) <= 76) {
+                $headers['Content-Type'] .= '; ';
+            } else {
+                $headers['Content-Type'] .= ';' . $this->_eol . ' ';
             }
-            if (isset($c_type['charset'])) {
-                $headers['Content-Type']
-                    .= ';' . $this->_eol . " charset={$c_type['charset']}";
+            $headers['Content-Type'] .= $charset;
+
+            // Default headers charset
+            if (!isset($params['headers_charset'])) {
+                $params['headers_charset'] = $params['charset'];
             }
+        }
+        if (!empty($params['filename'])) {
+            $headers['Content-Type'] .= ';' . $this->_eol;
+            $headers['Content-Type'] .= $this->_buildHeaderParam(
+                'name', $params['filename'],
+                !empty($params['headers_charset']) ? $params['headers_charset'] : 'US-ASCII',
+                !empty($params['language']) ? $params['language'] : null,
+                !empty($params['name_encoding']) ? $params['name_encoding'] : null
+            );
         }
 
         // Content-Disposition
-        if (isset($c_disp['disp'])) {
-            $headers['Content-Disposition'] = $c_disp['disp'];
-            if (isset($c_disp['filename'])) {
+        if (!empty($params['disposition'])) {
+            $headers['Content-Disposition'] = $params['disposition'];
+            if (!empty($params['filename'])) {
                 $headers['Content-Disposition'] .= ';' . $this->_eol;
                 $headers['Content-Disposition'] .= $this->_buildHeaderParam(
-                    'filename', $c_disp['filename'], 
-                    isset($c_disp['charset']) ? $c_disp['charset'] : 'US-ASCII', 
-                    isset($c_disp['language']) ? $c_disp['language'] : null,
-                    isset($params['filename_encoding']) ?  $params['filename_encoding'] : null
+                    'filename', $params['filename'],
+                    !empty($params['headers_charset']) ? $params['headers_charset'] : 'US-ASCII',
+                    !empty($params['language']) ? $params['language'] : null,
+                    !empty($params['filename_encoding']) ? $params['filename_encoding'] : null
                 );
             }
         }
 
-        // Default content-type
-        if (!isset($headers['Content-Type'])) {
-            $headers['Content-Type'] = 'text/plain';
+        if (!empty($params['description'])) {
+            $headers['Content-Description'] = $this->encodeHeader(
+                'Content-Description', $params['description'],
+                !empty($params['headers_charset']) ? $params['headers_charset'] : 'US-ASCII',
+                !empty($params['name_encoding']) ? $params['name_encoding'] : 'quoted-printable',
+                $this->_eol
+            );
         }
 
         // Default encoding
@@ -769,17 +760,275 @@
     }
 
     /**
-     * Callback function to replace extended characters (\x80-xFF) with their
-     * ASCII values (RFC2231)
+     * Encodes a header as per RFC2047
      *
-     * @param array $matches Preg_replace's matches array
+     * @param string $name     The header name
+     * @param string $value    The header data to encode
+     * @param string $charset  Character set name
+     * @param string $encoding Encoding name (base64 or quoted-printable)
+     * @param string $eol      End-of-line sequence. Default: "\r\n"
      *
-     * @return string        Encoded character string
+     * @return string          Encoded header data (without a name)
+     * @access public
+     * @since 1.6.1
+     */
+    function encodeHeader($name, $value, $charset='ISO-8859-1',
+        $encoding='quoted-printable', $eol="\r\n"
+    ) {
+        // Structured headers
+        $comma_headers = array(
+            'from', 'to', 'cc', 'bcc', 'sender', 'reply-to',
+            'resent-from', 'resent-to', 'resent-cc', 'resent-bcc',
+            'resent-sender', 'resent-reply-to',
+            'return-receipt-to', 'disposition-notification-to',
+        );
+        $other_headers = array(
+            'references', 'in-reply-to', 'message-id', 'resent-message-id',
+        );
+
+        $name = strtolower($name);
+
+        if (in_array($name, $comma_headers)) {
+            $separator = ',';
+        } else if (in_array($name, $other_headers)) {
+            $separator = ' ';
+        }
+
+        if (!$charset) {
+            $charset = 'ISO-8859-1';
+        }
+
+        // Structured header (make sure addr-spec inside is not encoded)
+        if (!empty($separator)) {
+            $parts = Mail_mimePart::_explodeQuotedString($separator, $value);
+            $value = '';
+
+            foreach ($parts as $part) {
+                $part = preg_replace('/\r?\n[\s\t]*/', $eol . ' ', $part);
+                $part = trim($part);
+
+                if (!$part) {
+                    continue;
+                }
+                if ($value) {
+                    $value .= $separator==',' ? $separator.' ' : ' ';
+                } else {
+                    $value = $name . ': ';
+                }
+
+                // let's find phrase (name) and/or addr-spec
+                if (preg_match('/^<\S+@\S+>$/', $part)) {
+                    $value .= $part;
+                } else if (preg_match('/^\S+@\S+$/', $part)) {
+                    // address without brackets and without name
+                    $value .= $part;
+                } else if (preg_match('/<*\S+@\S+>*$/', $part, $matches)) {
+                    // address with name (handle name)
+                    $address = $matches[0];
+                    $word = str_replace($address, '', $part);
+                    $word = trim($word);
+                    // check if phrase requires quoting
+                    if ($word) {
+                        // non-ASCII: require encoding
+                        if (preg_match('#([\x80-\xFF]){1}#', $word)) {
+                            if ($word[0] == '"' && $word[strlen($word)-1] == '"') {
+                                // de-quote quoted-string, encoding changes
+                                // string to atom
+                                $search = array("\\\"", "\\\\");
+                                $replace = array("\"", "\\");
+                                $word = str_replace($search, $replace, $word);
+                                $word = substr($word, 1, -1);
+                            }
+                            // find length of last line
+                            if (($pos = strrpos($value, $eol)) !== false) {
+                                $last_len = strlen($value) - $pos;
+                            } else {
+                                $last_len = strlen($value);
+                            }
+                            $word = Mail_mimePart::encodeHeaderValue(
+                                $word, $charset, $encoding, $last_len, $eol
+                            );
+                        } else if (($word[0] != '"' || $word[strlen($word)-1] != '"')
+                            && preg_match('/[\(\)\<\>\\\.\[\]@,;:"]/', $word)
+                        ) {
+                            // ASCII: quote string if needed
+                            $word = '"'.addcslashes($word, '\\"').'"';
+                        }
+                    }
+                    $value .= $word.' '.$address;
+                } else {
+                    // addr-spec not found, don't encode (?)
+                    $value .= $part;
+                }
+
+                // RFC2822 recommends 78 characters limit, use 76 from RFC2047
+                $value = wordwrap($value, 76, $eol . ' ');
+            }
+
+            // remove header name prefix (there could be EOL too)
+            $value = preg_replace(
+                '/^'.$name.':('.preg_quote($eol, '/').')* /', '', $value
+            );
+
+        } else {
+            // Unstructured header
+            // non-ASCII: require encoding
+            if (preg_match('#([\x80-\xFF]){1}#', $value)) {
+                if ($value[0] == '"' && $value[strlen($value)-1] == '"') {
+                    // de-quote quoted-string, encoding changes
+                    // string to atom
+                    $search = array("\\\"", "\\\\");
+                    $replace = array("\"", "\\");
+                    $value = str_replace($search, $replace, $value);
+                    $value = substr($value, 1, -1);
+                }
+                $value = Mail_mimePart::encodeHeaderValue(
+                    $value, $charset, $encoding, strlen($name) + 2, $eol
+                );
+            } else if (strlen($name.': '.$value) > 78) {
+                // ASCII: check if header line isn't too long and use folding
+                $value = preg_replace('/\r?\n[\s\t]*/', $eol . ' ', $value);
+                $tmp = wordwrap($name.': '.$value, 78, $eol . ' ');
+                $value = preg_replace('/^'.$name.':\s*/', '', $tmp);
+                // hard limit 998 (RFC2822)
+                $value = wordwrap($value, 998, $eol . ' ', true);
+            }
+        }
+
+        return $value;
+    }
+
+    /**
+     * Explode quoted string
+     *
+     * @param string $delimiter Delimiter expression string for preg_match()
+     * @param string $string    Input string
+     *
+     * @return array            String tokens array
      * @access private
      */
-    function _encodeReplaceCallback($matches)
+    function _explodeQuotedString($delimiter, $string)
     {
-        return sprintf('%%%02X', ord($matches[1]));
+        $result = array();
+        $strlen = strlen($string);
+
+        for ($q=$p=$i=0; $i < $strlen; $i++) {
+            if ($string[$i] == "\""
+                && (empty($string[$i-1]) || $string[$i-1] != "\\")
+            ) {
+                $q = $q ? false : true;
+            } else if (!$q && preg_match("/$delimiter/", $string[$i])) {
+                $result[] = substr($string, $p, $i - $p);
+                $p = $i + 1;
+            }
+        }
+
+        $result[] = substr($string, $p);
+        return $result;
+    }
+
+    /**
+     * Encodes a header value as per RFC2047
+     *
+     * @param string $value      The header data to encode
+     * @param string $charset    Character set name
+     * @param string $encoding   Encoding name (base64 or quoted-printable)
+     * @param int    $prefix_len Prefix length. Default: 0
+     * @param string $eol        End-of-line sequence. Default: "\r\n"
+     *
+     * @return string            Encoded header data
+     * @access public
+     * @since 1.6.1
+     */
+    function encodeHeaderValue($value, $charset, $encoding, $prefix_len=0, $eol="\r\n")
+    {
+        // #17311: Use multibyte aware method (requires mbstring extension)
+        if ($result = Mail_mimePart::encodeMB($value, $charset, $encoding, $prefix_len, $eol)) {
+            return $result;
+        }
+
+        // Generate the header using the specified params and dynamicly
+        // determine the maximum length of such strings.
+        // 75 is the value specified in the RFC.
+        $encoding = $encoding == 'base64' ? 'B' : 'Q';
+        $prefix = '=?' . $charset . '?' . $encoding .'?';
+        $suffix = '?=';
+        $maxLength = 75 - strlen($prefix . $suffix);
+        $maxLength1stLine = $maxLength - $prefix_len;
+
+        if ($encoding == 'B') {
+            // Base64 encode the entire string
+            $value = base64_encode($value);
+
+            // We can cut base64 every 4 characters, so the real max
+            // we can get must be rounded down.
+            $maxLength = $maxLength - ($maxLength % 4);
+            $maxLength1stLine = $maxLength1stLine - ($maxLength1stLine % 4);
+
+            $cutpoint = $maxLength1stLine;
+            $output = '';
+
+            while ($value) {
+                // Split translated string at every $maxLength
+                $part = substr($value, 0, $cutpoint);
+                $value = substr($value, $cutpoint);
+                $cutpoint = $maxLength;
+                // RFC 2047 specifies that any split header should
+                // be seperated by a CRLF SPACE.
+                if ($output) {
+                    $output .= $eol . ' ';
+                }
+                $output .= $prefix . $part . $suffix;
+            }
+            $value = $output;
+        } else {
+            // quoted-printable encoding has been selected
+            $value = Mail_mimePart::encodeQP($value);
+
+            // This regexp will break QP-encoded text at every $maxLength
+            // but will not break any encoded letters.
+            $reg1st = "|(.{0,$maxLength1stLine}[^\=][^\=])|";
+            $reg2nd = "|(.{0,$maxLength}[^\=][^\=])|";
+
+            if (strlen($value) > $maxLength1stLine) {
+                // Begin with the regexp for the first line.
+                $reg = $reg1st;
+                $output = '';
+                while ($value) {
+                    // Split translated string at every $maxLength
+                    // But make sure not to break any translated chars.
+                    $found = preg_match($reg, $value, $matches);
+
+                    // After this first line, we need to use a different
+                    // regexp for the first line.
+                    $reg = $reg2nd;
+
+                    // Save the found part and encapsulate it in the
+                    // prefix & suffix. Then remove the part from the
+                    // $value_out variable.
+                    if ($found) {
+                        $part = $matches[0];
+                        $len = strlen($matches[0]);
+                        $value = substr($value, $len);
+                    } else {
+                        $part = $value;
+                        $value = '';
+                    }
+
+                    // RFC 2047 specifies that any split header should
+                    // be seperated by a CRLF SPACE
+                    if ($output) {
+                        $output .= $eol . ' ';
+                    }
+                    $output .= $prefix . $part . $suffix;
+                }
+                $value = $output;
+            } else {
+                $value = $prefix . $value . $suffix;
+            }
+        }
+
+        return $value;
     }
 
     /**
@@ -793,16 +1042,121 @@
      */
     function encodeQP($str)
     {
-        // Replace all special characters used by the encoder
-        $search  = array('=',   '_',   '?',   ' ');
-        $replace = array('=3D', '=5F', '=3F', '_');
-        $str = str_replace($search, $replace, $str);
+        // Bug #17226 RFC 2047 restricts some characters
+        // if the word is inside a phrase, permitted chars are only:
+        // ASCII letters, decimal digits, "!", "*", "+", "-", "/", "=", and "_"
 
-        // Replace all extended characters (\x80-xFF) with their
-        // ASCII values.
-        return preg_replace_callback(
-            '/([\x80-\xFF])/', array('Mail_mimePart', '_qpReplaceCallback'), $str
+        // "=",  "_",  "?" must be encoded
+        $regexp = '/([\x22-\x29\x2C\x2E\x3A-\x40\x5B-\x60\x7B-\x7E\x80-\xFF])/';
+        $str = preg_replace_callback(
+            $regexp, array('Mail_mimePart', '_qpReplaceCallback'), $str
         );
+
+        return str_replace(' ', '_', $str);
+    }
+
+    /**
+     * Encodes the given string using base64 or quoted-printable.
+     * This method makes sure that encoded-word represents an integral
+     * number of characters as per RFC2047.
+     *
+     * @param string $str        String to encode
+     * @param string $charset    Character set name
+     * @param string $encoding   Encoding name (base64 or quoted-printable)
+     * @param int    $prefix_len Prefix length. Default: 0
+     * @param string $eol        End-of-line sequence. Default: "\r\n"
+     *
+     * @return string     Encoded string
+     * @access public
+     * @since 1.8.0
+     */
+    function encodeMB($str, $charset, $encoding, $prefix_len=0, $eol="\r\n")
+    {
+        if (!function_exists('mb_substr') || !function_exists('mb_strlen')) {
+            return;
+        }
+
+        $encoding = $encoding == 'base64' ? 'B' : 'Q';
+        // 75 is the value specified in the RFC
+        $prefix = '=?' . $charset . '?'.$encoding.'?';
+        $suffix = '?=';
+        $maxLength = 75 - strlen($prefix . $suffix);
+
+        // A multi-octet character may not be split across adjacent encoded-words
+        // So, we'll loop over each character
+        // mb_stlen() with wrong charset will generate a warning here and return null
+        $length      = mb_strlen($str, $charset);
+        $result      = '';
+        $line_length = $prefix_len;
+
+        if ($encoding == 'B') {
+            // base64
+            $start = 0;
+            $prev  = '';
+
+            for ($i=1; $i<=$length; $i++) {
+                // See #17311
+                $chunk = mb_substr($str, $start, $i-$start, $charset);
+                $chunk = base64_encode($chunk);
+                $chunk_len = strlen($chunk);
+
+                if ($line_length + $chunk_len == $maxLength || $i == $length) {
+                    if ($result) {
+                        $result .= "\n";
+                    }
+                    $result .= $chunk;
+                    $line_length = 0;
+                    $start = $i;
+                } else if ($line_length + $chunk_len > $maxLength) {
+                    if ($result) {
+                        $result .= "\n";
+                    }
+                    if ($prev) {
+                        $result .= $prev;
+                    }
+                    $line_length = 0;
+                    $start = $i - 1;
+                } else {
+                    $prev = $chunk;
+                }
+            }
+        } else {
+            // quoted-printable
+            // see encodeQP()
+            $regexp = '/([\x22-\x29\x2C\x2E\x3A-\x40\x5B-\x60\x7B-\x7E\x80-\xFF])/';
+
+            for ($i=0; $i<=$length; $i++) {
+                $char = mb_substr($str, $i, 1, $charset);
+                // RFC recommends underline (instead of =20) in place of the space
+                // that's one of the reasons why we're not using iconv_mime_encode()
+                if ($char == ' ') {
+                    $char = '_';
+                    $char_len = 1;
+                } else {
+                    $char = preg_replace_callback(
+                        $regexp, array('Mail_mimePart', '_qpReplaceCallback'), $char
+                    );
+                    $char_len = strlen($char);
+                }
+
+                if ($line_length + $char_len > $maxLength) {
+                    if ($result) {
+                        $result .= "\n";
+                    }
+                    $line_length = 0;
+                }
+
+                $result      .= $char;
+                $line_length += $char_len;
+            }
+        }
+
+        if ($result) {
+            $result = $prefix
+                .str_replace("\n", $suffix.$eol.' '.$prefix, $result).$suffix;
+        }
+
+        return $result;
     }
 
     /**
@@ -819,4 +1173,18 @@
         return sprintf('=%02X', ord($matches[1]));
     }
 
+    /**
+     * Callback function to replace extended characters (\x80-xFF) with their
+     * ASCII values (RFC2231)
+     *
+     * @param array $matches Preg_replace's matches array
+     *
+     * @return string        Encoded character string
+     * @access private
+     */
+    function _encodeReplaceCallback($matches)
+    {
+        return sprintf('%%%02X', ord($matches[1]));
+    }
+
 } // End of class

--
Gitblit v1.9.1