commit | author | age
|
1c4f23
|
1 |
<?php |
A |
2 |
|
|
3 |
/** |
|
4 |
+-----------------------------------------------------------------------+ |
|
5 |
| program/include/rcube_mime.php | |
|
6 |
| | |
|
7 |
| This file is part of the Roundcube Webmail client | |
|
8 |
| Copyright (C) 2005-2012, The Roundcube Dev Team | |
|
9 |
| Copyright (C) 2011-2012, Kolab Systems AG | |
|
10 |
| Licensed under the GNU GPL | |
|
11 |
| | |
|
12 |
| PURPOSE: | |
|
13 |
| MIME message parsing utilities | |
|
14 |
| | |
|
15 |
+-----------------------------------------------------------------------+ |
|
16 |
| Author: Thomas Bruederli <roundcube@gmail.com> | |
|
17 |
| Author: Aleksander Machniak <alec@alec.pl> | |
|
18 |
+-----------------------------------------------------------------------+ |
|
19 |
|
|
20 |
$Id$ |
|
21 |
|
|
22 |
*/ |
|
23 |
|
|
24 |
|
|
25 |
/** |
|
26 |
* Class for parsing MIME messages |
|
27 |
* |
|
28 |
* @package Mail |
|
29 |
* @author Thomas Bruederli <roundcube@gmail.com> |
|
30 |
* @author Aleksander Machniak <alec@alec.pl> |
|
31 |
*/ |
|
32 |
class rcube_mime |
|
33 |
{ |
|
34 |
private static $default_charset = RCMAIL_CHARSET; |
|
35 |
|
|
36 |
|
|
37 |
/** |
|
38 |
* Object constructor. |
|
39 |
*/ |
|
40 |
function __construct($default_charset = null) |
|
41 |
{ |
|
42 |
if ($default_charset) { |
|
43 |
self::$default_charset = $default_charset; |
|
44 |
} |
|
45 |
else { |
|
46 |
self::$default_charset = rcmail::get_instance()->config->get('default_charset', RCMAIL_CHARSET); |
|
47 |
} |
|
48 |
} |
|
49 |
|
|
50 |
|
|
51 |
/** |
|
52 |
* Split an address list into a structured array list |
|
53 |
* |
|
54 |
* @param string $input Input string |
|
55 |
* @param int $max List only this number of addresses |
|
56 |
* @param boolean $decode Decode address strings |
|
57 |
* @param string $fallback Fallback charset if none specified |
|
58 |
* |
|
59 |
* @return array Indexed list of addresses |
|
60 |
*/ |
|
61 |
static function decode_address_list($input, $max = null, $decode = true, $fallback = null) |
|
62 |
{ |
|
63 |
$a = self::parse_address_list($input, $decode, $fallback); |
|
64 |
$out = array(); |
|
65 |
$j = 0; |
|
66 |
|
|
67 |
// Special chars as defined by RFC 822 need to in quoted string (or escaped). |
|
68 |
$special_chars = '[\(\)\<\>\\\.\[\]@,;:"]'; |
|
69 |
|
|
70 |
if (!is_array($a)) |
|
71 |
return $out; |
|
72 |
|
|
73 |
foreach ($a as $val) { |
|
74 |
$j++; |
|
75 |
$address = trim($val['address']); |
|
76 |
$name = trim($val['name']); |
|
77 |
|
|
78 |
if ($name && $address && $name != $address) |
|
79 |
$string = sprintf('%s <%s>', preg_match("/$special_chars/", $name) ? '"'.addcslashes($name, '"').'"' : $name, $address); |
|
80 |
else if ($address) |
|
81 |
$string = $address; |
|
82 |
else if ($name) |
|
83 |
$string = $name; |
|
84 |
|
|
85 |
$out[$j] = array( |
|
86 |
'name' => $name, |
|
87 |
'mailto' => $address, |
|
88 |
'string' => $string |
|
89 |
); |
|
90 |
|
|
91 |
if ($max && $j==$max) |
|
92 |
break; |
|
93 |
} |
|
94 |
|
|
95 |
return $out; |
|
96 |
} |
|
97 |
|
|
98 |
|
|
99 |
/** |
|
100 |
* Decode a message header value |
|
101 |
* |
|
102 |
* @param string $input Header value |
|
103 |
* @param string $fallback Fallback charset if none specified |
|
104 |
* |
|
105 |
* @return string Decoded string |
|
106 |
*/ |
|
107 |
public static function decode_header($input, $fallback = null) |
|
108 |
{ |
|
109 |
$str = self::decode_mime_string((string)$input, $fallback); |
|
110 |
|
|
111 |
return $str; |
|
112 |
} |
|
113 |
|
|
114 |
|
|
115 |
/** |
|
116 |
* Decode a mime-encoded string to internal charset |
|
117 |
* |
|
118 |
* @param string $input Header value |
|
119 |
* @param string $fallback Fallback charset if none specified |
|
120 |
* |
|
121 |
* @return string Decoded string |
|
122 |
*/ |
|
123 |
public static function decode_mime_string($input, $fallback = null) |
|
124 |
{ |
|
125 |
$default_charset = !empty($fallback) ? $fallback : self::$default_charset; |
|
126 |
|
|
127 |
// rfc: all line breaks or other characters not found |
|
128 |
// in the Base64 Alphabet must be ignored by decoding software |
|
129 |
// delete all blanks between MIME-lines, differently we can |
|
130 |
// receive unnecessary blanks and broken utf-8 symbols |
|
131 |
$input = preg_replace("/\?=\s+=\?/", '?==?', $input); |
|
132 |
|
|
133 |
// encoded-word regexp |
|
134 |
$re = '/=\?([^?]+)\?([BbQq])\?([^\n]*?)\?=/'; |
|
135 |
|
|
136 |
// Find all RFC2047's encoded words |
|
137 |
if (preg_match_all($re, $input, $matches, PREG_OFFSET_CAPTURE | PREG_SET_ORDER)) { |
|
138 |
// Initialize variables |
|
139 |
$tmp = array(); |
|
140 |
$out = ''; |
|
141 |
$start = 0; |
|
142 |
|
|
143 |
foreach ($matches as $idx => $m) { |
|
144 |
$pos = $m[0][1]; |
|
145 |
$charset = $m[1][0]; |
|
146 |
$encoding = $m[2][0]; |
|
147 |
$text = $m[3][0]; |
|
148 |
$length = strlen($m[0][0]); |
|
149 |
|
|
150 |
// Append everything that is before the text to be decoded |
|
151 |
if ($start != $pos) { |
|
152 |
$substr = substr($input, $start, $pos-$start); |
|
153 |
$out .= rcube_charset_convert($substr, $default_charset); |
|
154 |
$start = $pos; |
|
155 |
} |
|
156 |
$start += $length; |
|
157 |
|
|
158 |
// Per RFC2047, each string part "MUST represent an integral number |
|
159 |
// of characters . A multi-octet character may not be split across |
|
160 |
// adjacent encoded-words." However, some mailers break this, so we |
|
161 |
// try to handle characters spanned across parts anyway by iterating |
|
162 |
// through and aggregating sequential encoded parts with the same |
|
163 |
// character set and encoding, then perform the decoding on the |
|
164 |
// aggregation as a whole. |
|
165 |
|
|
166 |
$tmp[] = $text; |
|
167 |
if ($next_match = $matches[$idx+1]) { |
|
168 |
if ($next_match[0][1] == $start |
|
169 |
&& $next_match[1][0] == $charset |
|
170 |
&& $next_match[2][0] == $encoding |
|
171 |
) { |
|
172 |
continue; |
|
173 |
} |
|
174 |
} |
|
175 |
|
|
176 |
$count = count($tmp); |
|
177 |
$text = ''; |
|
178 |
|
|
179 |
// Decode and join encoded-word's chunks |
|
180 |
if ($encoding == 'B' || $encoding == 'b') { |
|
181 |
// base64 must be decoded a segment at a time |
|
182 |
for ($i=0; $i<$count; $i++) |
|
183 |
$text .= base64_decode($tmp[$i]); |
|
184 |
} |
|
185 |
else { //if ($encoding == 'Q' || $encoding == 'q') { |
|
186 |
// quoted printable can be combined and processed at once |
|
187 |
for ($i=0; $i<$count; $i++) |
|
188 |
$text .= $tmp[$i]; |
|
189 |
|
|
190 |
$text = str_replace('_', ' ', $text); |
|
191 |
$text = quoted_printable_decode($text); |
|
192 |
} |
|
193 |
|
|
194 |
$out .= rcube_charset_convert($text, $charset); |
|
195 |
$tmp = array(); |
|
196 |
} |
|
197 |
|
|
198 |
// add the last part of the input string |
|
199 |
if ($start != strlen($input)) { |
|
200 |
$out .= rcube_charset_convert(substr($input, $start), $default_charset); |
|
201 |
} |
|
202 |
|
|
203 |
// return the results |
|
204 |
return $out; |
|
205 |
} |
|
206 |
|
|
207 |
// no encoding information, use fallback |
|
208 |
return rcube_charset_convert($input, $default_charset); |
|
209 |
} |
|
210 |
|
|
211 |
|
|
212 |
/** |
|
213 |
* Decode a mime part |
|
214 |
* |
|
215 |
* @param string $input Input string |
|
216 |
* @param string $encoding Part encoding |
|
217 |
* @return string Decoded string |
|
218 |
*/ |
|
219 |
public static function decode($input, $encoding = '7bit') |
|
220 |
{ |
|
221 |
switch (strtolower($encoding)) { |
|
222 |
case 'quoted-printable': |
|
223 |
return quoted_printable_decode($input); |
|
224 |
case 'base64': |
|
225 |
return base64_decode($input); |
|
226 |
case 'x-uuencode': |
|
227 |
case 'x-uue': |
|
228 |
case 'uue': |
|
229 |
case 'uuencode': |
|
230 |
return convert_uudecode($input); |
|
231 |
case '7bit': |
|
232 |
default: |
|
233 |
return $input; |
|
234 |
} |
|
235 |
} |
|
236 |
|
|
237 |
|
|
238 |
/** |
|
239 |
* Split RFC822 header string into an associative array |
|
240 |
* @access private |
|
241 |
*/ |
|
242 |
public static function parse_headers($headers) |
|
243 |
{ |
|
244 |
$a_headers = array(); |
|
245 |
$headers = preg_replace('/\r?\n(\t| )+/', ' ', $headers); |
|
246 |
$lines = explode("\n", $headers); |
|
247 |
$c = count($lines); |
|
248 |
|
|
249 |
for ($i=0; $i<$c; $i++) { |
|
250 |
if ($p = strpos($lines[$i], ': ')) { |
|
251 |
$field = strtolower(substr($lines[$i], 0, $p)); |
|
252 |
$value = trim(substr($lines[$i], $p+1)); |
|
253 |
if (!empty($value)) |
|
254 |
$a_headers[$field] = $value; |
|
255 |
} |
|
256 |
} |
|
257 |
|
|
258 |
return $a_headers; |
|
259 |
} |
|
260 |
|
|
261 |
|
|
262 |
/** |
|
263 |
* @access private |
|
264 |
*/ |
|
265 |
private static function parse_address_list($str, $decode = true, $fallback = null) |
|
266 |
{ |
|
267 |
// remove any newlines and carriage returns before |
|
268 |
$str = preg_replace('/\r?\n(\s|\t)?/', ' ', $str); |
|
269 |
|
|
270 |
// extract list items, remove comments |
|
271 |
$str = self::explode_header_string(',;', $str, true); |
|
272 |
$result = array(); |
|
273 |
|
|
274 |
// simplified regexp, supporting quoted local part |
|
275 |
$email_rx = '(\S+|("\s*(?:[^"\f\n\r\t\v\b\s]+\s*)+"))@\S+'; |
|
276 |
|
|
277 |
foreach ($str as $key => $val) { |
|
278 |
$name = ''; |
|
279 |
$address = ''; |
|
280 |
$val = trim($val); |
|
281 |
|
|
282 |
if (preg_match('/(.*)<('.$email_rx.')>$/', $val, $m)) { |
|
283 |
$address = $m[2]; |
|
284 |
$name = trim($m[1]); |
|
285 |
} |
|
286 |
else if (preg_match('/^('.$email_rx.')$/', $val, $m)) { |
|
287 |
$address = $m[1]; |
|
288 |
$name = ''; |
|
289 |
} |
|
290 |
else { |
|
291 |
$name = $val; |
|
292 |
} |
|
293 |
|
|
294 |
// dequote and/or decode name |
|
295 |
if ($name) { |
|
296 |
if ($name[0] == '"' && $name[strlen($name)-1] == '"') { |
|
297 |
$name = substr($name, 1, -1); |
|
298 |
$name = stripslashes($name); |
|
299 |
} |
|
300 |
if ($decode) { |
|
301 |
$name = self::decode_header($name, $fallback); |
|
302 |
} |
|
303 |
} |
|
304 |
|
|
305 |
if (!$address && $name) { |
|
306 |
$address = $name; |
|
307 |
} |
|
308 |
|
|
309 |
if ($address) { |
|
310 |
$result[$key] = array('name' => $name, 'address' => $address); |
|
311 |
} |
|
312 |
} |
|
313 |
|
|
314 |
return $result; |
|
315 |
} |
|
316 |
|
|
317 |
|
|
318 |
/** |
|
319 |
* Explodes header (e.g. address-list) string into array of strings |
|
320 |
* using specified separator characters with proper handling |
|
321 |
* of quoted-strings and comments (RFC2822) |
|
322 |
* |
|
323 |
* @param string $separator String containing separator characters |
|
324 |
* @param string $str Header string |
|
325 |
* @param bool $remove_comments Enable to remove comments |
|
326 |
* |
|
327 |
* @return array Header items |
|
328 |
*/ |
|
329 |
public static function explode_header_string($separator, $str, $remove_comments = false) |
|
330 |
{ |
|
331 |
$length = strlen($str); |
|
332 |
$result = array(); |
|
333 |
$quoted = false; |
|
334 |
$comment = 0; |
|
335 |
$out = ''; |
|
336 |
|
|
337 |
for ($i=0; $i<$length; $i++) { |
|
338 |
// we're inside a quoted string |
|
339 |
if ($quoted) { |
|
340 |
if ($str[$i] == '"') { |
|
341 |
$quoted = false; |
|
342 |
} |
|
343 |
else if ($str[$i] == "\\") { |
|
344 |
if ($comment <= 0) { |
|
345 |
$out .= "\\"; |
|
346 |
} |
|
347 |
$i++; |
|
348 |
} |
|
349 |
} |
|
350 |
// we are inside a comment string |
|
351 |
else if ($comment > 0) { |
|
352 |
if ($str[$i] == ')') { |
|
353 |
$comment--; |
|
354 |
} |
|
355 |
else if ($str[$i] == '(') { |
|
356 |
$comment++; |
|
357 |
} |
|
358 |
else if ($str[$i] == "\\") { |
|
359 |
$i++; |
|
360 |
} |
|
361 |
continue; |
|
362 |
} |
|
363 |
// separator, add to result array |
|
364 |
else if (strpos($separator, $str[$i]) !== false) { |
|
365 |
if ($out) { |
|
366 |
$result[] = $out; |
|
367 |
} |
|
368 |
$out = ''; |
|
369 |
continue; |
|
370 |
} |
|
371 |
// start of quoted string |
|
372 |
else if ($str[$i] == '"') { |
|
373 |
$quoted = true; |
|
374 |
} |
|
375 |
// start of comment |
|
376 |
else if ($remove_comments && $str[$i] == '(') { |
|
377 |
$comment++; |
|
378 |
} |
|
379 |
|
|
380 |
if ($comment <= 0) { |
|
381 |
$out .= $str[$i]; |
|
382 |
} |
|
383 |
} |
|
384 |
|
|
385 |
if ($out && $comment <= 0) { |
|
386 |
$result[] = $out; |
|
387 |
} |
|
388 |
|
|
389 |
return $result; |
|
390 |
} |
|
391 |
|
|
392 |
|
|
393 |
/** |
|
394 |
* Interpret a format=flowed message body according to RFC 2646 |
|
395 |
* |
|
396 |
* @param string $text Raw body formatted as flowed text |
|
397 |
* |
|
398 |
* @return string Interpreted text with unwrapped lines and stuffed space removed |
|
399 |
*/ |
|
400 |
public static function unfold_flowed($text) |
|
401 |
{ |
|
402 |
$text = preg_split('/\r?\n/', $text); |
|
403 |
$last = -1; |
|
404 |
$q_level = 0; |
|
405 |
|
|
406 |
foreach ($text as $idx => $line) { |
|
407 |
if ($line[0] == '>' && preg_match('/^(>+\s*)/', $line, $regs)) { |
|
408 |
$q = strlen(str_replace(' ', '', $regs[0])); |
|
409 |
$line = substr($line, strlen($regs[0])); |
|
410 |
|
|
411 |
if ($q == $q_level && $line |
|
412 |
&& isset($text[$last]) |
|
413 |
&& $text[$last][strlen($text[$last])-1] == ' ' |
|
414 |
) { |
|
415 |
$text[$last] .= $line; |
|
416 |
unset($text[$idx]); |
|
417 |
} |
|
418 |
else { |
|
419 |
$last = $idx; |
|
420 |
} |
|
421 |
} |
|
422 |
else { |
|
423 |
$q = 0; |
|
424 |
if ($line == '-- ') { |
|
425 |
$last = $idx; |
|
426 |
} |
|
427 |
else { |
|
428 |
// remove space-stuffing |
|
429 |
$line = preg_replace('/^\s/', '', $line); |
|
430 |
|
|
431 |
if (isset($text[$last]) && $line |
|
432 |
&& $text[$last] != '-- ' |
|
433 |
&& $text[$last][strlen($text[$last])-1] == ' ' |
|
434 |
) { |
|
435 |
$text[$last] .= $line; |
|
436 |
unset($text[$idx]); |
|
437 |
} |
|
438 |
else { |
|
439 |
$text[$idx] = $line; |
|
440 |
$last = $idx; |
|
441 |
} |
|
442 |
} |
|
443 |
} |
|
444 |
$q_level = $q; |
|
445 |
} |
|
446 |
|
|
447 |
return implode("\r\n", $text); |
|
448 |
} |
|
449 |
|
|
450 |
|
|
451 |
/** |
|
452 |
* Wrap the given text to comply with RFC 2646 |
|
453 |
* |
|
454 |
* @param string $text Text to wrap |
|
455 |
* @param int $length Length |
|
456 |
* |
|
457 |
* @return string Wrapped text |
|
458 |
*/ |
|
459 |
public static function format_flowed($text, $length = 72) |
|
460 |
{ |
|
461 |
$text = preg_split('/\r?\n/', $text); |
|
462 |
|
|
463 |
foreach ($text as $idx => $line) { |
|
464 |
if ($line != '-- ') { |
|
465 |
if ($line[0] == '>' && preg_match('/^(>+)/', $line, $regs)) { |
|
466 |
$prefix = $regs[0]; |
|
467 |
$level = strlen($prefix); |
|
468 |
$line = rtrim(substr($line, $level)); |
|
469 |
$line = $prefix . rc_wordwrap($line, $length - $level - 2, " \r\n$prefix "); |
|
470 |
} |
|
471 |
else if ($line) { |
|
472 |
$line = rc_wordwrap(rtrim($line), $length - 2, " \r\n"); |
|
473 |
// space-stuffing |
|
474 |
$line = preg_replace('/(^|\r\n)(From| |>)/', '\\1 \\2', $line); |
|
475 |
} |
|
476 |
|
|
477 |
$text[$idx] = $line; |
|
478 |
} |
|
479 |
} |
|
480 |
|
|
481 |
return implode("\r\n", $text); |
|
482 |
} |
|
483 |
|
|
484 |
} |