commit | author | age
|
b4edf7
|
1 |
<?php |
A |
2 |
|
a95874
|
3 |
/** |
b4edf7
|
4 |
+-----------------------------------------------------------------------+ |
A |
5 |
| This file is part of the Roundcube Webmail client | |
bc0a47
|
6 |
| Copyright (C) 2011-2013, Kolab Systems AG | |
TB |
7 |
| Copyright (C) 2008-2013, The Roundcube Dev Team | |
7fe381
|
8 |
| | |
T |
9 |
| Licensed under the GNU General Public License version 3 or | |
|
10 |
| any later version with exceptions for skins & plugins. | |
|
11 |
| See the README file for a full license statement. | |
b4edf7
|
12 |
| | |
A |
13 |
| PURPOSE: | |
|
14 |
| Spellchecking using different backends | |
|
15 |
+-----------------------------------------------------------------------+ |
|
16 |
| Author: Aleksander Machniak <machniak@kolabsys.com> | |
|
17 |
| Author: Thomas Bruederli <roundcube@gmail.com> | |
|
18 |
+-----------------------------------------------------------------------+ |
|
19 |
*/ |
|
20 |
|
|
21 |
/** |
|
22 |
* Helper class for spellchecking with Googielspell and PSpell support. |
|
23 |
* |
9ab346
|
24 |
* @package Framework |
AM |
25 |
* @subpackage Utils |
b4edf7
|
26 |
*/ |
A |
27 |
class rcube_spellchecker |
|
28 |
{ |
|
29 |
private $matches = array(); |
|
30 |
private $engine; |
bc0a47
|
31 |
private $backend; |
b4edf7
|
32 |
private $lang; |
A |
33 |
private $rc; |
|
34 |
private $error; |
66df08
|
35 |
private $options = array(); |
A |
36 |
private $dict; |
|
37 |
private $have_dict; |
b4edf7
|
38 |
|
A |
39 |
|
|
40 |
/** |
|
41 |
* Constructor |
|
42 |
* |
|
43 |
* @param string $lang Language code |
|
44 |
*/ |
|
45 |
function __construct($lang = 'en') |
|
46 |
{ |
be98df
|
47 |
$this->rc = rcube::get_instance(); |
b4edf7
|
48 |
$this->engine = $this->rc->config->get('spellcheck_engine', 'googie'); |
7e3298
|
49 |
$this->lang = $lang ?: 'en'; |
b4edf7
|
50 |
|
66df08
|
51 |
$this->options = array( |
A |
52 |
'ignore_syms' => $this->rc->config->get('spellcheck_ignore_syms'), |
|
53 |
'ignore_nums' => $this->rc->config->get('spellcheck_ignore_nums'), |
|
54 |
'ignore_caps' => $this->rc->config->get('spellcheck_ignore_caps'), |
|
55 |
'dictionary' => $this->rc->config->get('spellcheck_dictionary'), |
|
56 |
); |
bc0a47
|
57 |
|
TB |
58 |
$cls = 'rcube_spellcheck_' . $this->engine; |
|
59 |
if (class_exists($cls)) { |
|
60 |
$this->backend = new $cls($this, $this->lang); |
|
61 |
$this->backend->options = $this->options; |
|
62 |
} |
|
63 |
else { |
|
64 |
$this->error = "Unknown spellcheck engine '$this->engine'"; |
|
65 |
} |
b4edf7
|
66 |
} |
A |
67 |
|
c344b6
|
68 |
/** |
TB |
69 |
* Return a list of supported languages |
|
70 |
*/ |
|
71 |
function languages() |
|
72 |
{ |
|
73 |
// trust configuration |
|
74 |
$configured = $this->rc->config->get('spellcheck_languages'); |
|
75 |
if (!empty($configured) && is_array($configured) && !$configured[0]) { |
|
76 |
return $configured; |
|
77 |
} |
|
78 |
else if (!empty($configured)) { |
|
79 |
$langs = (array)$configured; |
|
80 |
} |
|
81 |
else if ($this->backend) { |
|
82 |
$langs = $this->backend->languages(); |
|
83 |
} |
|
84 |
|
|
85 |
// load index |
|
86 |
@include(RCUBE_LOCALIZATION_DIR . 'index.inc'); |
|
87 |
|
|
88 |
// add correct labels |
|
89 |
$languages = array(); |
|
90 |
foreach ($langs as $lang) { |
|
91 |
$langc = strtolower(substr($lang, 0, 2)); |
|
92 |
$alias = $rcube_language_aliases[$langc]; |
|
93 |
if (!$alias) { |
|
94 |
$alias = $langc.'_'.strtoupper($langc); |
|
95 |
} |
|
96 |
if ($rcube_languages[$lang]) { |
|
97 |
$languages[$lang] = $rcube_languages[$lang]; |
|
98 |
} |
|
99 |
else if ($rcube_languages[$alias]) { |
|
100 |
$languages[$lang] = $rcube_languages[$alias]; |
|
101 |
} |
|
102 |
else { |
|
103 |
$languages[$lang] = ucfirst($lang); |
|
104 |
} |
|
105 |
} |
|
106 |
|
64cb70
|
107 |
// remove possible duplicates (#1489395) |
AM |
108 |
$languages = array_unique($languages); |
|
109 |
|
c344b6
|
110 |
asort($languages); |
TB |
111 |
|
|
112 |
return $languages; |
|
113 |
} |
b4edf7
|
114 |
|
A |
115 |
/** |
|
116 |
* Set content and check spelling |
|
117 |
* |
|
118 |
* @param string $text Text content for spellchecking |
|
119 |
* @param bool $is_html Enables HTML-to-Text conversion |
|
120 |
* |
|
121 |
* @return bool True when no mispelling found, otherwise false |
|
122 |
*/ |
66df08
|
123 |
function check($text, $is_html = false) |
b4edf7
|
124 |
{ |
A |
125 |
// convert to plain text |
|
126 |
if ($is_html) { |
|
127 |
$this->content = $this->html2text($text); |
|
128 |
} |
|
129 |
else { |
|
130 |
$this->content = $text; |
|
131 |
} |
|
132 |
|
bc0a47
|
133 |
if ($this->backend) { |
TB |
134 |
$this->matches = $this->backend->check($this->content); |
b4edf7
|
135 |
} |
A |
136 |
|
|
137 |
return $this->found() == 0; |
|
138 |
} |
|
139 |
|
|
140 |
/** |
|
141 |
* Number of mispellings found (after check) |
|
142 |
* |
|
143 |
* @return int Number of mispellings |
|
144 |
*/ |
|
145 |
function found() |
|
146 |
{ |
|
147 |
return count($this->matches); |
|
148 |
} |
|
149 |
|
|
150 |
/** |
|
151 |
* Returns suggestions for the specified word |
|
152 |
* |
|
153 |
* @param string $word The word |
|
154 |
* |
|
155 |
* @return array Suggestions list |
|
156 |
*/ |
|
157 |
function get_suggestions($word) |
|
158 |
{ |
bc0a47
|
159 |
if ($this->backend) { |
TB |
160 |
return $this->backend->get_suggestions($word); |
f99aa1
|
161 |
} |
b4edf7
|
162 |
|
bc0a47
|
163 |
return array(); |
b4edf7
|
164 |
} |
A |
165 |
|
|
166 |
/** |
654ac1
|
167 |
* Returns misspelled words |
b4edf7
|
168 |
* |
A |
169 |
* @param string $text The content for spellchecking. If empty content |
|
170 |
* used for check() method will be used. |
|
171 |
* |
654ac1
|
172 |
* @return array List of misspelled words |
b4edf7
|
173 |
*/ |
A |
174 |
function get_words($text = null, $is_html=false) |
|
175 |
{ |
bc0a47
|
176 |
if ($is_html) { |
TB |
177 |
$text = $this->html2text($text); |
b4edf7
|
178 |
} |
A |
179 |
|
bc0a47
|
180 |
if ($this->backend) { |
TB |
181 |
return $this->backend->get_words($text); |
|
182 |
} |
|
183 |
|
|
184 |
return array(); |
b4edf7
|
185 |
} |
A |
186 |
|
|
187 |
/** |
|
188 |
* Returns checking result in XML (Googiespell) format |
|
189 |
* |
|
190 |
* @return string XML content |
|
191 |
*/ |
|
192 |
function get_xml() |
|
193 |
{ |
|
194 |
// send output |
a92beb
|
195 |
$out = '<?xml version="1.0" encoding="'.RCUBE_CHARSET.'"?><spellresult charschecked="'.mb_strlen($this->content).'">'; |
b4edf7
|
196 |
|
c344b6
|
197 |
foreach ((array)$this->matches as $item) { |
b4edf7
|
198 |
$out .= '<c o="'.$item[1].'" l="'.$item[2].'">'; |
A |
199 |
$out .= is_array($item[4]) ? implode("\t", $item[4]) : $item[4]; |
|
200 |
$out .= '</c>'; |
|
201 |
} |
|
202 |
|
|
203 |
$out .= '</spellresult>'; |
|
204 |
|
|
205 |
return $out; |
|
206 |
} |
|
207 |
|
|
208 |
/** |
654ac1
|
209 |
* Returns checking result (misspelled words with suggestions) |
644e3a
|
210 |
* |
A |
211 |
* @return array Spellchecking result. An array indexed by word. |
|
212 |
*/ |
|
213 |
function get() |
|
214 |
{ |
|
215 |
$result = array(); |
|
216 |
|
c344b6
|
217 |
foreach ((array)$this->matches as $item) { |
644e3a
|
218 |
if ($this->engine == 'pspell') { |
A |
219 |
$word = $item[0]; |
|
220 |
} |
|
221 |
else { |
a92beb
|
222 |
$word = mb_substr($this->content, $item[1], $item[2], RCUBE_CHARSET); |
644e3a
|
223 |
} |
89d6ce
|
224 |
|
AM |
225 |
if (is_array($item[4])) { |
|
226 |
$suggestions = $item[4]; |
|
227 |
} |
|
228 |
else if (empty($item[4])) { |
|
229 |
$suggestions = array(); |
|
230 |
} |
|
231 |
else { |
|
232 |
$suggestions = explode("\t", $item[4]); |
|
233 |
} |
|
234 |
|
|
235 |
$result[$word] = $suggestions; |
644e3a
|
236 |
} |
A |
237 |
|
66df08
|
238 |
return $result; |
644e3a
|
239 |
} |
A |
240 |
|
|
241 |
/** |
b4edf7
|
242 |
* Returns error message |
A |
243 |
* |
|
244 |
* @return string Error message |
|
245 |
*/ |
|
246 |
function error() |
|
247 |
{ |
7e3298
|
248 |
return $this->error ?: ($this->backend ? $this->backend->error() : false); |
b4edf7
|
249 |
} |
A |
250 |
|
|
251 |
private function html2text($text) |
|
252 |
{ |
5e406f
|
253 |
$h2t = new rcube_html2text($text, false, false, 0); |
b4edf7
|
254 |
return $h2t->get_text(); |
A |
255 |
} |
66df08
|
256 |
|
A |
257 |
/** |
|
258 |
* Check if the specified word is an exception accoring to |
|
259 |
* spellcheck options. |
|
260 |
* |
|
261 |
* @param string $word The word |
|
262 |
* |
|
263 |
* @return bool True if the word is an exception, False otherwise |
|
264 |
*/ |
|
265 |
public function is_exception($word) |
|
266 |
{ |
|
267 |
// Contain only symbols (e.g. "+9,0", "2:2") |
3e4906
|
268 |
if (!$word || preg_match('/^[0-9@#$%^&_+~*<>=:;?!,.-]+$/', $word)) |
66df08
|
269 |
return true; |
A |
270 |
|
|
271 |
// Contain symbols (e.g. "g@@gle"), all symbols excluding separators |
|
272 |
if (!empty($this->options['ignore_syms']) && preg_match('/[@#$%^&_+~*=-]/', $word)) |
|
273 |
return true; |
|
274 |
|
|
275 |
// Contain numbers (e.g. "g00g13") |
|
276 |
if (!empty($this->options['ignore_nums']) && preg_match('/[0-9]/', $word)) |
|
277 |
return true; |
|
278 |
|
|
279 |
// Blocked caps (e.g. "GOOGLE") |
|
280 |
if (!empty($this->options['ignore_caps']) && $word == mb_strtoupper($word)) |
|
281 |
return true; |
|
282 |
|
|
283 |
// Use exceptions from dictionary |
|
284 |
if (!empty($this->options['dictionary'])) { |
|
285 |
$this->load_dict(); |
|
286 |
|
|
287 |
// @TODO: should dictionary be case-insensitive? |
|
288 |
if (!empty($this->dict) && in_array($word, $this->dict)) |
|
289 |
return true; |
|
290 |
} |
|
291 |
|
|
292 |
return false; |
|
293 |
} |
|
294 |
|
|
295 |
/** |
|
296 |
* Add a word to dictionary |
|
297 |
* |
|
298 |
* @param string $word The word to add |
|
299 |
*/ |
|
300 |
public function add_word($word) |
|
301 |
{ |
|
302 |
$this->load_dict(); |
|
303 |
|
|
304 |
foreach (explode(' ', $word) as $word) { |
|
305 |
// sanity check |
|
306 |
if (strlen($word) < 512) { |
|
307 |
$this->dict[] = $word; |
|
308 |
$valid = true; |
|
309 |
} |
|
310 |
} |
|
311 |
|
|
312 |
if ($valid) { |
|
313 |
$this->dict = array_unique($this->dict); |
|
314 |
$this->update_dict(); |
|
315 |
} |
|
316 |
} |
|
317 |
|
|
318 |
/** |
|
319 |
* Remove a word from dictionary |
|
320 |
* |
|
321 |
* @param string $word The word to remove |
|
322 |
*/ |
|
323 |
public function remove_word($word) |
|
324 |
{ |
|
325 |
$this->load_dict(); |
|
326 |
|
|
327 |
if (($key = array_search($word, $this->dict)) !== false) { |
|
328 |
unset($this->dict[$key]); |
|
329 |
$this->update_dict(); |
|
330 |
} |
|
331 |
} |
|
332 |
|
|
333 |
/** |
|
334 |
* Update dictionary row in DB |
|
335 |
*/ |
|
336 |
private function update_dict() |
|
337 |
{ |
|
338 |
if (strcasecmp($this->options['dictionary'], 'shared') != 0) { |
0c2596
|
339 |
$userid = $this->rc->get_user_id(); |
66df08
|
340 |
} |
A |
341 |
|
|
342 |
$plugin = $this->rc->plugins->exec_hook('spell_dictionary_save', array( |
|
343 |
'userid' => $userid, 'language' => $this->lang, 'dictionary' => $this->dict)); |
|
344 |
|
|
345 |
if (!empty($plugin['abort'])) { |
|
346 |
return; |
|
347 |
} |
|
348 |
|
|
349 |
if ($this->have_dict) { |
|
350 |
if (!empty($this->dict)) { |
|
351 |
$this->rc->db->query( |
34a090
|
352 |
"UPDATE " . $this->rc->db->table_name('dictionary', true) |
AM |
353 |
." SET `data` = ?" |
|
354 |
." WHERE `user_id` " . ($plugin['userid'] ? "= ".$this->rc->db->quote($plugin['userid']) : "IS NULL") |
|
355 |
." AND `language` = ?", |
66df08
|
356 |
implode(' ', $plugin['dictionary']), $plugin['language']); |
A |
357 |
} |
|
358 |
// don't store empty dict |
|
359 |
else { |
|
360 |
$this->rc->db->query( |
34a090
|
361 |
"DELETE FROM " . $this->rc->db->table_name('dictionary', true) |
AM |
362 |
." WHERE `user_id` " . ($plugin['userid'] ? "= ".$this->rc->db->quote($plugin['userid']) : "IS NULL") |
|
363 |
." AND `language` = ?", |
66df08
|
364 |
$plugin['language']); |
A |
365 |
} |
|
366 |
} |
|
367 |
else if (!empty($this->dict)) { |
|
368 |
$this->rc->db->query( |
34a090
|
369 |
"INSERT INTO " . $this->rc->db->table_name('dictionary', true) |
AM |
370 |
." (`user_id`, `language`, `data`) VALUES (?, ?, ?)", |
66df08
|
371 |
$plugin['userid'], $plugin['language'], implode(' ', $plugin['dictionary'])); |
A |
372 |
} |
|
373 |
} |
|
374 |
|
|
375 |
/** |
|
376 |
* Get dictionary from DB |
|
377 |
*/ |
|
378 |
private function load_dict() |
|
379 |
{ |
|
380 |
if (is_array($this->dict)) { |
|
381 |
return $this->dict; |
|
382 |
} |
|
383 |
|
|
384 |
if (strcasecmp($this->options['dictionary'], 'shared') != 0) { |
0c2596
|
385 |
$userid = $this->rc->get_user_id(); |
66df08
|
386 |
} |
A |
387 |
|
|
388 |
$plugin = $this->rc->plugins->exec_hook('spell_dictionary_get', array( |
|
389 |
'userid' => $userid, 'language' => $this->lang, 'dictionary' => array())); |
|
390 |
|
|
391 |
if (empty($plugin['abort'])) { |
|
392 |
$dict = array(); |
c027ba
|
393 |
$sql_result = $this->rc->db->query( |
34a090
|
394 |
"SELECT `data` FROM " . $this->rc->db->table_name('dictionary', true) |
AM |
395 |
." WHERE `user_id` ". ($plugin['userid'] ? "= ".$this->rc->db->quote($plugin['userid']) : "IS NULL") |
|
396 |
." AND `language` = ?", |
66df08
|
397 |
$plugin['language']); |
A |
398 |
|
|
399 |
if ($sql_arr = $this->rc->db->fetch_assoc($sql_result)) { |
|
400 |
$this->have_dict = true; |
|
401 |
if (!empty($sql_arr['data'])) { |
|
402 |
$dict = explode(' ', $sql_arr['data']); |
|
403 |
} |
|
404 |
} |
|
405 |
|
|
406 |
$plugin['dictionary'] = array_merge((array)$plugin['dictionary'], $dict); |
|
407 |
} |
|
408 |
|
|
409 |
if (!empty($plugin['dictionary']) && is_array($plugin['dictionary'])) { |
|
410 |
$this->dict = $plugin['dictionary']; |
|
411 |
} |
|
412 |
else { |
|
413 |
$this->dict = array(); |
|
414 |
} |
|
415 |
|
|
416 |
return $this->dict; |
|
417 |
} |
b4edf7
|
418 |
} |