commit | author | age
|
5d2b7f
|
1 |
<?php |
T |
2 |
|
|
3 |
/* |
|
4 |
+-----------------------------------------------------------------------+ |
677e1f
|
5 |
| program/steps/utils/spell_pspell.inc | |
5d2b7f
|
6 |
| | |
T |
7 |
| This file is part of the RoundCube Webmail client | |
|
8 |
| Licensed under the GNU GPL | |
|
9 |
| | |
|
10 |
| PURPOSE: | |
|
11 |
| Use the Pspell extension to check spelling, returns results | |
|
12 |
| compatible with spell_googie.inc. | |
|
13 |
| | |
|
14 |
+-----------------------------------------------------------------------+ |
|
15 |
| Author: Kris Steinhoff <steinhof@umich.edu> | |
|
16 |
+-----------------------------------------------------------------------+ |
|
17 |
|
|
18 |
$Id$ |
|
19 |
|
|
20 |
*/ |
|
21 |
|
|
22 |
if (!extension_loaded('pspell')) { |
|
23 |
raise_error(array( |
|
24 |
'code' => 500, |
6710a6
|
25 |
'type' => 'php', |
10eedb
|
26 |
'file' => __FILE__, 'line' => __LINE__, |
5d2b7f
|
27 |
'message' => "Pspell extension not available"), true, false); |
677e1f
|
28 |
|
5d2b7f
|
29 |
header('HTTP/1.1 404 Not Found'); |
T |
30 |
exit; |
|
31 |
} |
|
32 |
|
4e65a3
|
33 |
// max. number of suggestions for one word |
A |
34 |
define('MAX_SUGGESTIONS', 10); |
|
35 |
|
b214f8
|
36 |
// read input |
5d2b7f
|
37 |
$data = file_get_contents('php://input'); |
b214f8
|
38 |
|
A |
39 |
// parse data (simplexml_load_string breaks CRLFs) |
|
40 |
$left = strpos($data, '<text>'); |
|
41 |
$right = strrpos($data, '</text>'); |
|
42 |
$text = substr($data, $left+6, $right-($left+6)); |
f9a4bc
|
43 |
$text = html_entity_decode($text, ENT_QUOTES, RCMAIL_CHARSET); |
b214f8
|
44 |
|
A |
45 |
// tokenize |
16dd37
|
46 |
$words = preg_split('/[ !"#$%&()*+\\,\/\n:;<=>?@\[\]^_{|}-]+|\.[^\w]/', $text, NULL, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE ); |
b214f8
|
47 |
|
A |
48 |
// init spellchecker |
f9a4bc
|
49 |
$plink = pspell_new(get_input_value('lang', RCUBE_INPUT_GET), null, null, RCMAIL_CHARSET, PSPELL_FAST); |
b214f8
|
50 |
|
A |
51 |
// send output |
ee258c
|
52 |
$out = '<?xml version="1.0" encoding="'.RCMAIL_CHARSET.'"?><spellresult charschecked="'.mb_strlen($text).'">'; |
81308b
|
53 |
|
A |
54 |
$diff = 0; |
5d2b7f
|
55 |
foreach ($words as $w) { |
b214f8
|
56 |
$word = trim($w[0]); |
81308b
|
57 |
$pos = $w[1] - $diff; |
ee258c
|
58 |
$len = mb_strlen($word); |
16dd37
|
59 |
if ($word && $plink && preg_match('/[^0-9\.]/', $word) |
A |
60 |
&& !pspell_check($plink, $word)) { |
5d2b7f
|
61 |
$suggestions = pspell_suggest($plink, $word); |
4e65a3
|
62 |
if (sizeof($suggestions)>10) |
A |
63 |
$suggestions = array_slice($suggestions, 0, MAX_SUGGESTIONS); |
|
64 |
|
5d2b7f
|
65 |
$out .= '<c o="'.$pos.'" l="'.$len.'">'; |
T |
66 |
$out .= implode("\t", $suggestions); |
|
67 |
$out .= '</c>'; |
|
68 |
} |
81308b
|
69 |
$diff += (strlen($word) - $len); |
5d2b7f
|
70 |
} |
b214f8
|
71 |
|
5d2b7f
|
72 |
$out .= '</spellresult>'; |
T |
73 |
|
57837f
|
74 |
header("Content-Type: text/xml; charset=".RCMAIL_CHARSET); |
5d2b7f
|
75 |
echo $out; |
T |
76 |
exit; |
|
77 |
|
|
78 |
?> |