commit | author | age
|
aa055c
|
1 |
<?php |
T |
2 |
|
a95874
|
3 |
/** |
aa055c
|
4 |
+-----------------------------------------------------------------------+ |
e019f2
|
5 |
| This file is part of the Roundcube Webmail client | |
60226a
|
6 |
| Copyright (C) 2009-2012, The Roundcube Dev Team | |
7fe381
|
7 |
| | |
T |
8 |
| Licensed under the GNU General Public License version 3 or | |
|
9 |
| any later version with exceptions for skins & plugins. | |
|
10 |
| See the README file for a full license statement. | |
aa055c
|
11 |
| | |
T |
12 |
| PURPOSE: | |
|
13 |
| Handle string replacements based on preg_replace_callback | |
|
14 |
+-----------------------------------------------------------------------+ |
|
15 |
| Author: Thomas Bruederli <roundcube@gmail.com> | |
|
16 |
+-----------------------------------------------------------------------+ |
|
17 |
*/ |
|
18 |
|
|
19 |
/** |
|
20 |
* Helper class for string replacements based on preg_replace_callback |
|
21 |
* |
9ab346
|
22 |
* @package Framework |
AM |
23 |
* @subpackage Utils |
aa055c
|
24 |
*/ |
T |
25 |
class rcube_string_replacer |
|
26 |
{ |
d1abd8
|
27 |
public static $pattern = '/##str_replacement_(\d+)##/'; |
0d2144
|
28 |
public $mailto_pattern; |
AM |
29 |
public $link_pattern; |
c856b7
|
30 |
public $linkref_index; |
TB |
31 |
public $linkref_pattern; |
e480ca
|
32 |
|
df0b4f
|
33 |
protected $values = array(); |
AM |
34 |
protected $options = array(); |
|
35 |
protected $linkrefs = array(); |
|
36 |
protected $urls = array(); |
e8ab3d
|
37 |
protected $noword = '[^\w@.#-]'; |
aa055c
|
38 |
|
88ed23
|
39 |
|
1e3254
|
40 |
function __construct($options = array()) |
0d2144
|
41 |
{ |
AM |
42 |
// Simplified domain expression for UTF8 characters handling |
|
43 |
// Support unicode/punycode in top-level domain part |
16915e
|
44 |
$utf_domain = '[^?&@"\'\\/()<>\s\r\t\n]+\\.?([^\\x00-\\x2f\\x3b-\\x40\\x5b-\\x60\\x7b-\\x7f]{2,}|xn--[a-zA-Z0-9]{2,})'; |
0d2144
|
45 |
$url1 = '.:;,'; |
001d33
|
46 |
$url2 = 'a-zA-Z0-9%=#$@+?|!&\\/_~\\[\\]\\(\\){}\*\x80-\xFE-'; |
88ed23
|
47 |
|
e8ab3d
|
48 |
// Supported link prefixes |
AM |
49 |
$link_prefix = "([\w]+:\/\/|{$this->noword}[Ww][Ww][Ww]\.|^[Ww][Ww][Ww]\.)"; |
|
50 |
|
df0b4f
|
51 |
$this->options = $options; |
AM |
52 |
$this->linkref_index = '/\[([^\]#]+)\](:?\s*##str_replacement_(\d+)##)/'; |
|
53 |
$this->linkref_pattern = '/\[([^\]#]+)\]/'; |
e8ab3d
|
54 |
$this->link_pattern = "/$link_prefix($utf_domain([$url1]*[$url2]+)*)/"; |
df0b4f
|
55 |
$this->mailto_pattern = "/(" |
0d2144
|
56 |
."[-\w!\#\$%&\'*+~\/^`|{}=]+(?:\.[-\w!\#\$%&\'*+~\/^`|{}=]+)*" // local-part |
AM |
57 |
."@$utf_domain" // domain-part |
|
58 |
."(\?[$url1$url2]+)?" // e.g. ?subject=test... |
|
59 |
.")/"; |
aa055c
|
60 |
} |
0ff554
|
61 |
|
0d2144
|
62 |
/** |
AM |
63 |
* Add a string to the internal list |
|
64 |
* |
a03233
|
65 |
* @param string String value |
AM |
66 |
* |
0d2144
|
67 |
* @return int Index of value for retrieval |
AM |
68 |
*/ |
|
69 |
public function add($str) |
|
70 |
{ |
|
71 |
$i = count($this->values); |
|
72 |
$this->values[$i] = $str; |
|
73 |
return $i; |
a1013c
|
74 |
} |
aa055c
|
75 |
|
0d2144
|
76 |
/** |
AM |
77 |
* Build replacement string |
|
78 |
*/ |
|
79 |
public function get_replacement($i) |
|
80 |
{ |
d1abd8
|
81 |
return '##str_replacement_' . $i . '##'; |
0d2144
|
82 |
} |
aa055c
|
83 |
|
0d2144
|
84 |
/** |
AM |
85 |
* Callback function used to build HTML links around URL strings |
|
86 |
* |
|
87 |
* @param array Matches result from preg_replace_callback |
|
88 |
* @return int Index of saved string value |
|
89 |
*/ |
|
90 |
public function link_callback($matches) |
|
91 |
{ |
|
92 |
$i = -1; |
|
93 |
$scheme = strtolower($matches[1]); |
aa055c
|
94 |
|
0d2144
|
95 |
if (preg_match('!^(http|ftp|file)s?://!i', $scheme)) { |
AM |
96 |
$url = $matches[1] . $matches[2]; |
0ff554
|
97 |
} |
e8ab3d
|
98 |
else if (preg_match("/^({$this->noword}*)(www\.)$/i", $matches[1], $m)) { |
0d2144
|
99 |
$url = $m[2] . $matches[2]; |
AM |
100 |
$url_prefix = 'http://'; |
|
101 |
$prefix = $m[1]; |
0ff554
|
102 |
} |
A |
103 |
|
0d2144
|
104 |
if ($url) { |
AM |
105 |
$suffix = $this->parse_url_brackets($url); |
1e3254
|
106 |
$attrib = (array)$this->options['link_attribs']; |
AM |
107 |
$attrib['href'] = $url_prefix . $url; |
|
108 |
|
70229c
|
109 |
$i = $this->add(html::a($attrib, rcube::Q($url)) . $suffix); |
e480ca
|
110 |
$this->urls[$i] = $attrib['href']; |
0d2144
|
111 |
} |
AM |
112 |
|
|
113 |
// Return valid link for recognized schemes, otherwise |
|
114 |
// return the unmodified string for unrecognized schemes. |
70229c
|
115 |
return $i >= 0 ? $prefix . $this->get_replacement($i) : $matches[0]; |
e480ca
|
116 |
} |
TB |
117 |
|
|
118 |
/** |
|
119 |
* Callback to add an entry to the link index |
|
120 |
*/ |
|
121 |
public function linkref_addindex($matches) |
|
122 |
{ |
|
123 |
$key = $matches[1]; |
c856b7
|
124 |
$this->linkrefs[$key] = $this->urls[$matches[3]]; |
e480ca
|
125 |
|
c856b7
|
126 |
return $this->get_replacement($this->add('['.$key.']')) . $matches[2]; |
e480ca
|
127 |
} |
TB |
128 |
|
|
129 |
/** |
|
130 |
* Callback to replace link references with real links |
|
131 |
*/ |
|
132 |
public function linkref_callback($matches) |
|
133 |
{ |
|
134 |
$i = 0; |
c856b7
|
135 |
if ($url = $this->linkrefs[$matches[1]]) { |
e480ca
|
136 |
$attrib = (array)$this->options['link_attribs']; |
TB |
137 |
$attrib['href'] = $url; |
|
138 |
$i = $this->add(html::a($attrib, rcube::Q($matches[1]))); |
|
139 |
} |
|
140 |
|
|
141 |
return $i > 0 ? '['.$this->get_replacement($i).']' : $matches[0]; |
0ff554
|
142 |
} |
A |
143 |
|
0d2144
|
144 |
/** |
AM |
145 |
* Callback function used to build mailto: links around e-mail strings |
|
146 |
* |
|
147 |
* @param array Matches result from preg_replace_callback |
a03233
|
148 |
* |
0d2144
|
149 |
* @return int Index of saved string value |
AM |
150 |
*/ |
|
151 |
public function mailto_callback($matches) |
|
152 |
{ |
|
153 |
$href = $matches[1]; |
|
154 |
$suffix = $this->parse_url_brackets($href); |
|
155 |
$i = $this->add(html::a('mailto:' . $href, rcube::Q($href)) . $suffix); |
0ff554
|
156 |
|
0d2144
|
157 |
return $i >= 0 ? $this->get_replacement($i) : ''; |
AM |
158 |
} |
|
159 |
|
|
160 |
/** |
|
161 |
* Look up the index from the preg_replace matches array |
|
162 |
* and return the substitution value. |
|
163 |
* |
|
164 |
* @param array Matches result from preg_replace_callback |
|
165 |
* @return string Value at index $matches[1] |
|
166 |
*/ |
|
167 |
public function replace_callback($matches) |
|
168 |
{ |
|
169 |
return $this->values[$matches[1]]; |
|
170 |
} |
|
171 |
|
|
172 |
/** |
|
173 |
* Replace all defined (link|mailto) patterns with replacement string |
|
174 |
* |
|
175 |
* @param string $str Text |
|
176 |
* |
|
177 |
* @return string Text |
|
178 |
*/ |
|
179 |
public function replace($str) |
|
180 |
{ |
|
181 |
// search for patterns like links and e-mail addresses |
|
182 |
$str = preg_replace_callback($this->link_pattern, array($this, 'link_callback'), $str); |
|
183 |
$str = preg_replace_callback($this->mailto_pattern, array($this, 'mailto_callback'), $str); |
e480ca
|
184 |
// resolve link references |
TB |
185 |
$str = preg_replace_callback($this->linkref_index, array($this, 'linkref_addindex'), $str); |
|
186 |
$str = preg_replace_callback($this->linkref_pattern, array($this, 'linkref_callback'), $str); |
0d2144
|
187 |
|
AM |
188 |
return $str; |
|
189 |
} |
|
190 |
|
|
191 |
/** |
|
192 |
* Replace substituted strings with original values |
|
193 |
*/ |
|
194 |
public function resolve($str) |
|
195 |
{ |
|
196 |
return preg_replace_callback(self::$pattern, array($this, 'replace_callback'), $str); |
|
197 |
} |
|
198 |
|
|
199 |
/** |
|
200 |
* Fixes bracket characters in URL handling |
|
201 |
*/ |
|
202 |
public static function parse_url_brackets(&$url) |
|
203 |
{ |
|
204 |
// #1487672: special handling of square brackets, |
|
205 |
// URL regexp allows [] characters in URL, for example: |
|
206 |
// "http://example.com/?a[b]=c". However we need to handle |
|
207 |
// properly situation when a bracket is placed at the end |
|
208 |
// of the link e.g. "[http://example.com]" |
0931a9
|
209 |
// Yes, this is not perfect handles correctly only paired characters |
AM |
210 |
// but it should work for common cases |
|
211 |
|
0d2144
|
212 |
if (preg_match('/(\\[|\\])/', $url)) { |
AM |
213 |
$in = false; |
|
214 |
for ($i=0, $len=strlen($url); $i<$len; $i++) { |
|
215 |
if ($url[$i] == '[') { |
|
216 |
if ($in) |
|
217 |
break; |
|
218 |
$in = true; |
|
219 |
} |
|
220 |
else if ($url[$i] == ']') { |
|
221 |
if (!$in) |
|
222 |
break; |
|
223 |
$in = false; |
|
224 |
} |
|
225 |
} |
|
226 |
|
|
227 |
if ($i < $len) { |
|
228 |
$suffix = substr($url, $i); |
|
229 |
$url = substr($url, 0, $i); |
|
230 |
} |
|
231 |
} |
|
232 |
|
0931a9
|
233 |
// Do the same for parentheses |
AM |
234 |
if (preg_match('/(\\(|\\))/', $url)) { |
|
235 |
$in = false; |
|
236 |
for ($i=0, $len=strlen($url); $i<$len; $i++) { |
|
237 |
if ($url[$i] == '(') { |
|
238 |
if ($in) |
|
239 |
break; |
|
240 |
$in = true; |
|
241 |
} |
|
242 |
else if ($url[$i] == ')') { |
|
243 |
if (!$in) |
|
244 |
break; |
|
245 |
$in = false; |
|
246 |
} |
|
247 |
} |
|
248 |
|
|
249 |
if ($i < $len) { |
|
250 |
$suffix = substr($url, $i); |
|
251 |
$url = substr($url, 0, $i); |
|
252 |
} |
|
253 |
} |
|
254 |
|
0d2144
|
255 |
return $suffix; |
AM |
256 |
} |
0ff554
|
257 |
} |