commit | author | age
|
b4edf7
|
1 |
<?php |
A |
2 |
|
|
3 |
/* |
|
4 |
+-----------------------------------------------------------------------+ |
|
5 |
| This file is part of the Roundcube Webmail client | |
bc0a47
|
6 |
| Copyright (C) 2011-2013, Kolab Systems AG | |
TB |
7 |
| Copyright (C) 2008-2013, The Roundcube Dev Team | |
7fe381
|
8 |
| | |
T |
9 |
| Licensed under the GNU General Public License version 3 or | |
|
10 |
| any later version with exceptions for skins & plugins. | |
|
11 |
| See the README file for a full license statement. | |
b4edf7
|
12 |
| | |
A |
13 |
| PURPOSE: | |
|
14 |
| Spellchecking using different backends | |
|
15 |
+-----------------------------------------------------------------------+ |
|
16 |
| Author: Aleksander Machniak <machniak@kolabsys.com> | |
|
17 |
| Author: Thomas Bruederli <roundcube@gmail.com> | |
|
18 |
+-----------------------------------------------------------------------+ |
|
19 |
*/ |
|
20 |
|
|
21 |
/** |
|
22 |
* Helper class for spellchecking with Googielspell and PSpell support. |
|
23 |
* |
9ab346
|
24 |
* @package Framework |
AM |
25 |
* @subpackage Utils |
b4edf7
|
26 |
*/ |
A |
27 |
class rcube_spellchecker |
|
28 |
{ |
|
29 |
private $matches = array(); |
|
30 |
private $engine; |
bc0a47
|
31 |
private $backend; |
b4edf7
|
32 |
private $lang; |
A |
33 |
private $rc; |
|
34 |
private $error; |
66df08
|
35 |
private $options = array(); |
A |
36 |
private $dict; |
|
37 |
private $have_dict; |
b4edf7
|
38 |
|
A |
39 |
|
|
40 |
/** |
|
41 |
* Constructor |
|
42 |
* |
|
43 |
* @param string $lang Language code |
|
44 |
*/ |
|
45 |
function __construct($lang = 'en') |
|
46 |
{ |
be98df
|
47 |
$this->rc = rcube::get_instance(); |
b4edf7
|
48 |
$this->engine = $this->rc->config->get('spellcheck_engine', 'googie'); |
66df08
|
49 |
$this->lang = $lang ? $lang : 'en'; |
b4edf7
|
50 |
|
66df08
|
51 |
$this->options = array( |
A |
52 |
'ignore_syms' => $this->rc->config->get('spellcheck_ignore_syms'), |
|
53 |
'ignore_nums' => $this->rc->config->get('spellcheck_ignore_nums'), |
|
54 |
'ignore_caps' => $this->rc->config->get('spellcheck_ignore_caps'), |
|
55 |
'dictionary' => $this->rc->config->get('spellcheck_dictionary'), |
|
56 |
); |
bc0a47
|
57 |
|
TB |
58 |
$cls = 'rcube_spellcheck_' . $this->engine; |
|
59 |
if (class_exists($cls)) { |
|
60 |
$this->backend = new $cls($this, $this->lang); |
|
61 |
$this->backend->options = $this->options; |
|
62 |
} |
|
63 |
else { |
|
64 |
$this->error = "Unknown spellcheck engine '$this->engine'"; |
|
65 |
} |
b4edf7
|
66 |
} |
A |
67 |
|
c344b6
|
68 |
/** |
TB |
69 |
* Return a list of supported languages |
|
70 |
*/ |
|
71 |
function languages() |
|
72 |
{ |
|
73 |
// trust configuration |
|
74 |
$configured = $this->rc->config->get('spellcheck_languages'); |
|
75 |
if (!empty($configured) && is_array($configured) && !$configured[0]) { |
|
76 |
return $configured; |
|
77 |
} |
|
78 |
else if (!empty($configured)) { |
|
79 |
$langs = (array)$configured; |
|
80 |
} |
|
81 |
else if ($this->backend) { |
|
82 |
$langs = $this->backend->languages(); |
|
83 |
} |
|
84 |
|
|
85 |
// load index |
|
86 |
@include(RCUBE_LOCALIZATION_DIR . 'index.inc'); |
|
87 |
|
|
88 |
// add correct labels |
|
89 |
$languages = array(); |
|
90 |
foreach ($langs as $lang) { |
|
91 |
$langc = strtolower(substr($lang, 0, 2)); |
|
92 |
$alias = $rcube_language_aliases[$langc]; |
|
93 |
if (!$alias) { |
|
94 |
$alias = $langc.'_'.strtoupper($langc); |
|
95 |
} |
|
96 |
if ($rcube_languages[$lang]) { |
|
97 |
$languages[$lang] = $rcube_languages[$lang]; |
|
98 |
} |
|
99 |
else if ($rcube_languages[$alias]) { |
|
100 |
$languages[$lang] = $rcube_languages[$alias]; |
|
101 |
} |
|
102 |
else { |
|
103 |
$languages[$lang] = ucfirst($lang); |
|
104 |
} |
|
105 |
} |
|
106 |
|
64cb70
|
107 |
// remove possible duplicates (#1489395) |
AM |
108 |
$languages = array_unique($languages); |
|
109 |
|
c344b6
|
110 |
asort($languages); |
TB |
111 |
|
|
112 |
return $languages; |
|
113 |
} |
b4edf7
|
114 |
|
A |
115 |
/** |
|
116 |
* Set content and check spelling |
|
117 |
* |
|
118 |
* @param string $text Text content for spellchecking |
|
119 |
* @param bool $is_html Enables HTML-to-Text conversion |
|
120 |
* |
|
121 |
* @return bool True when no mispelling found, otherwise false |
|
122 |
*/ |
66df08
|
123 |
function check($text, $is_html = false) |
b4edf7
|
124 |
{ |
A |
125 |
// convert to plain text |
|
126 |
if ($is_html) { |
|
127 |
$this->content = $this->html2text($text); |
|
128 |
} |
|
129 |
else { |
|
130 |
$this->content = $text; |
|
131 |
} |
|
132 |
|
bc0a47
|
133 |
if ($this->backend) { |
TB |
134 |
$this->matches = $this->backend->check($this->content); |
b4edf7
|
135 |
} |
A |
136 |
|
|
137 |
return $this->found() == 0; |
|
138 |
} |
|
139 |
|
|
140 |
|
|
141 |
/** |
|
142 |
* Number of mispellings found (after check) |
|
143 |
* |
|
144 |
* @return int Number of mispellings |
|
145 |
*/ |
|
146 |
function found() |
|
147 |
{ |
|
148 |
return count($this->matches); |
|
149 |
} |
|
150 |
|
|
151 |
|
|
152 |
/** |
|
153 |
* Returns suggestions for the specified word |
|
154 |
* |
|
155 |
* @param string $word The word |
|
156 |
* |
|
157 |
* @return array Suggestions list |
|
158 |
*/ |
|
159 |
function get_suggestions($word) |
|
160 |
{ |
bc0a47
|
161 |
if ($this->backend) { |
TB |
162 |
return $this->backend->get_suggestions($word); |
f99aa1
|
163 |
} |
b4edf7
|
164 |
|
bc0a47
|
165 |
return array(); |
b4edf7
|
166 |
} |
66df08
|
167 |
|
b4edf7
|
168 |
|
A |
169 |
/** |
654ac1
|
170 |
* Returns misspelled words |
b4edf7
|
171 |
* |
A |
172 |
* @param string $text The content for spellchecking. If empty content |
|
173 |
* used for check() method will be used. |
|
174 |
* |
654ac1
|
175 |
* @return array List of misspelled words |
b4edf7
|
176 |
*/ |
A |
177 |
function get_words($text = null, $is_html=false) |
|
178 |
{ |
bc0a47
|
179 |
if ($is_html) { |
TB |
180 |
$text = $this->html2text($text); |
b4edf7
|
181 |
} |
A |
182 |
|
bc0a47
|
183 |
if ($this->backend) { |
TB |
184 |
return $this->backend->get_words($text); |
|
185 |
} |
|
186 |
|
|
187 |
return array(); |
b4edf7
|
188 |
} |
A |
189 |
|
|
190 |
|
|
191 |
/** |
|
192 |
* Returns checking result in XML (Googiespell) format |
|
193 |
* |
|
194 |
* @return string XML content |
|
195 |
*/ |
|
196 |
function get_xml() |
|
197 |
{ |
|
198 |
// send output |
a92beb
|
199 |
$out = '<?xml version="1.0" encoding="'.RCUBE_CHARSET.'"?><spellresult charschecked="'.mb_strlen($this->content).'">'; |
b4edf7
|
200 |
|
c344b6
|
201 |
foreach ((array)$this->matches as $item) { |
b4edf7
|
202 |
$out .= '<c o="'.$item[1].'" l="'.$item[2].'">'; |
A |
203 |
$out .= is_array($item[4]) ? implode("\t", $item[4]) : $item[4]; |
|
204 |
$out .= '</c>'; |
|
205 |
} |
|
206 |
|
|
207 |
$out .= '</spellresult>'; |
|
208 |
|
|
209 |
return $out; |
|
210 |
} |
|
211 |
|
|
212 |
|
|
213 |
/** |
654ac1
|
214 |
* Returns checking result (misspelled words with suggestions) |
644e3a
|
215 |
* |
A |
216 |
* @return array Spellchecking result. An array indexed by word. |
|
217 |
*/ |
|
218 |
function get() |
|
219 |
{ |
|
220 |
$result = array(); |
|
221 |
|
c344b6
|
222 |
foreach ((array)$this->matches as $item) { |
644e3a
|
223 |
if ($this->engine == 'pspell') { |
A |
224 |
$word = $item[0]; |
|
225 |
} |
|
226 |
else { |
a92beb
|
227 |
$word = mb_substr($this->content, $item[1], $item[2], RCUBE_CHARSET); |
644e3a
|
228 |
} |
89d6ce
|
229 |
|
AM |
230 |
if (is_array($item[4])) { |
|
231 |
$suggestions = $item[4]; |
|
232 |
} |
|
233 |
else if (empty($item[4])) { |
|
234 |
$suggestions = array(); |
|
235 |
} |
|
236 |
else { |
|
237 |
$suggestions = explode("\t", $item[4]); |
|
238 |
} |
|
239 |
|
|
240 |
$result[$word] = $suggestions; |
644e3a
|
241 |
} |
A |
242 |
|
66df08
|
243 |
return $result; |
644e3a
|
244 |
} |
A |
245 |
|
|
246 |
|
|
247 |
/** |
b4edf7
|
248 |
* Returns error message |
A |
249 |
* |
|
250 |
* @return string Error message |
|
251 |
*/ |
|
252 |
function error() |
|
253 |
{ |
bc0a47
|
254 |
return $this->error ? $this->error : ($this->backend ? $this->backend->error() : false); |
b4edf7
|
255 |
} |
A |
256 |
|
|
257 |
|
|
258 |
private function html2text($text) |
|
259 |
{ |
66afd7
|
260 |
$h2t = new rcube_html2text($text, false, true, 0); |
b4edf7
|
261 |
return $h2t->get_text(); |
A |
262 |
} |
66df08
|
263 |
|
A |
264 |
|
|
265 |
/** |
|
266 |
* Check if the specified word is an exception accoring to |
|
267 |
* spellcheck options. |
|
268 |
* |
|
269 |
* @param string $word The word |
|
270 |
* |
|
271 |
* @return bool True if the word is an exception, False otherwise |
|
272 |
*/ |
|
273 |
public function is_exception($word) |
|
274 |
{ |
|
275 |
// Contain only symbols (e.g. "+9,0", "2:2") |
3e4906
|
276 |
if (!$word || preg_match('/^[0-9@#$%^&_+~*<>=:;?!,.-]+$/', $word)) |
66df08
|
277 |
return true; |
A |
278 |
|
|
279 |
// Contain symbols (e.g. "g@@gle"), all symbols excluding separators |
|
280 |
if (!empty($this->options['ignore_syms']) && preg_match('/[@#$%^&_+~*=-]/', $word)) |
|
281 |
return true; |
|
282 |
|
|
283 |
// Contain numbers (e.g. "g00g13") |
|
284 |
if (!empty($this->options['ignore_nums']) && preg_match('/[0-9]/', $word)) |
|
285 |
return true; |
|
286 |
|
|
287 |
// Blocked caps (e.g. "GOOGLE") |
|
288 |
if (!empty($this->options['ignore_caps']) && $word == mb_strtoupper($word)) |
|
289 |
return true; |
|
290 |
|
|
291 |
// Use exceptions from dictionary |
|
292 |
if (!empty($this->options['dictionary'])) { |
|
293 |
$this->load_dict(); |
|
294 |
|
|
295 |
// @TODO: should dictionary be case-insensitive? |
|
296 |
if (!empty($this->dict) && in_array($word, $this->dict)) |
|
297 |
return true; |
|
298 |
} |
|
299 |
|
|
300 |
return false; |
|
301 |
} |
|
302 |
|
|
303 |
|
|
304 |
/** |
|
305 |
* Add a word to dictionary |
|
306 |
* |
|
307 |
* @param string $word The word to add |
|
308 |
*/ |
|
309 |
public function add_word($word) |
|
310 |
{ |
|
311 |
$this->load_dict(); |
|
312 |
|
|
313 |
foreach (explode(' ', $word) as $word) { |
|
314 |
// sanity check |
|
315 |
if (strlen($word) < 512) { |
|
316 |
$this->dict[] = $word; |
|
317 |
$valid = true; |
|
318 |
} |
|
319 |
} |
|
320 |
|
|
321 |
if ($valid) { |
|
322 |
$this->dict = array_unique($this->dict); |
|
323 |
$this->update_dict(); |
|
324 |
} |
|
325 |
} |
|
326 |
|
|
327 |
|
|
328 |
/** |
|
329 |
* Remove a word from dictionary |
|
330 |
* |
|
331 |
* @param string $word The word to remove |
|
332 |
*/ |
|
333 |
public function remove_word($word) |
|
334 |
{ |
|
335 |
$this->load_dict(); |
|
336 |
|
|
337 |
if (($key = array_search($word, $this->dict)) !== false) { |
|
338 |
unset($this->dict[$key]); |
|
339 |
$this->update_dict(); |
|
340 |
} |
|
341 |
} |
|
342 |
|
|
343 |
|
|
344 |
/** |
|
345 |
* Update dictionary row in DB |
|
346 |
*/ |
|
347 |
private function update_dict() |
|
348 |
{ |
|
349 |
if (strcasecmp($this->options['dictionary'], 'shared') != 0) { |
0c2596
|
350 |
$userid = $this->rc->get_user_id(); |
66df08
|
351 |
} |
A |
352 |
|
|
353 |
$plugin = $this->rc->plugins->exec_hook('spell_dictionary_save', array( |
|
354 |
'userid' => $userid, 'language' => $this->lang, 'dictionary' => $this->dict)); |
|
355 |
|
|
356 |
if (!empty($plugin['abort'])) { |
|
357 |
return; |
|
358 |
} |
|
359 |
|
|
360 |
if ($this->have_dict) { |
|
361 |
if (!empty($this->dict)) { |
|
362 |
$this->rc->db->query( |
34a090
|
363 |
"UPDATE " . $this->rc->db->table_name('dictionary', true) |
AM |
364 |
." SET `data` = ?" |
|
365 |
." WHERE `user_id` " . ($plugin['userid'] ? "= ".$this->rc->db->quote($plugin['userid']) : "IS NULL") |
|
366 |
." AND `language` = ?", |
66df08
|
367 |
implode(' ', $plugin['dictionary']), $plugin['language']); |
A |
368 |
} |
|
369 |
// don't store empty dict |
|
370 |
else { |
|
371 |
$this->rc->db->query( |
34a090
|
372 |
"DELETE FROM " . $this->rc->db->table_name('dictionary', true) |
AM |
373 |
." WHERE `user_id` " . ($plugin['userid'] ? "= ".$this->rc->db->quote($plugin['userid']) : "IS NULL") |
|
374 |
." AND `language` = ?", |
66df08
|
375 |
$plugin['language']); |
A |
376 |
} |
|
377 |
} |
|
378 |
else if (!empty($this->dict)) { |
|
379 |
$this->rc->db->query( |
34a090
|
380 |
"INSERT INTO " . $this->rc->db->table_name('dictionary', true) |
AM |
381 |
." (`user_id`, `language`, `data`) VALUES (?, ?, ?)", |
66df08
|
382 |
$plugin['userid'], $plugin['language'], implode(' ', $plugin['dictionary'])); |
A |
383 |
} |
|
384 |
} |
|
385 |
|
|
386 |
|
|
387 |
/** |
|
388 |
* Get dictionary from DB |
|
389 |
*/ |
|
390 |
private function load_dict() |
|
391 |
{ |
|
392 |
if (is_array($this->dict)) { |
|
393 |
return $this->dict; |
|
394 |
} |
|
395 |
|
|
396 |
if (strcasecmp($this->options['dictionary'], 'shared') != 0) { |
0c2596
|
397 |
$userid = $this->rc->get_user_id(); |
66df08
|
398 |
} |
A |
399 |
|
|
400 |
$plugin = $this->rc->plugins->exec_hook('spell_dictionary_get', array( |
|
401 |
'userid' => $userid, 'language' => $this->lang, 'dictionary' => array())); |
|
402 |
|
|
403 |
if (empty($plugin['abort'])) { |
|
404 |
$dict = array(); |
c027ba
|
405 |
$sql_result = $this->rc->db->query( |
34a090
|
406 |
"SELECT `data` FROM " . $this->rc->db->table_name('dictionary', true) |
AM |
407 |
." WHERE `user_id` ". ($plugin['userid'] ? "= ".$this->rc->db->quote($plugin['userid']) : "IS NULL") |
|
408 |
." AND `language` = ?", |
66df08
|
409 |
$plugin['language']); |
A |
410 |
|
|
411 |
if ($sql_arr = $this->rc->db->fetch_assoc($sql_result)) { |
|
412 |
$this->have_dict = true; |
|
413 |
if (!empty($sql_arr['data'])) { |
|
414 |
$dict = explode(' ', $sql_arr['data']); |
|
415 |
} |
|
416 |
} |
|
417 |
|
|
418 |
$plugin['dictionary'] = array_merge((array)$plugin['dictionary'], $dict); |
|
419 |
} |
|
420 |
|
|
421 |
if (!empty($plugin['dictionary']) && is_array($plugin['dictionary'])) { |
|
422 |
$this->dict = $plugin['dictionary']; |
|
423 |
} |
|
424 |
else { |
|
425 |
$this->dict = array(); |
|
426 |
} |
|
427 |
|
|
428 |
return $this->dict; |
|
429 |
} |
b4edf7
|
430 |
} |