From b214f8d4d81a167cd651a0c021a7f1486600c1f4 Mon Sep 17 00:00:00 2001 From: alecpl <alec@alec.pl> Date: Tue, 14 Oct 2008 09:32:48 -0400 Subject: [PATCH] #1485398, #1485441: fix (p)spell checking CRLF/multibyte issues --- program/steps/mail/spell_pspell.inc | 21 ++++++++++++++++----- 1 files changed, 16 insertions(+), 5 deletions(-) diff --git a/program/steps/mail/spell_pspell.inc b/program/steps/mail/spell_pspell.inc index 4d15604..bf696f2 100644 --- a/program/steps/mail/spell_pspell.inc +++ b/program/steps/mail/spell_pspell.inc @@ -29,19 +29,29 @@ exit; } +// read input $data = file_get_contents('php://input'); -$xml = simplexml_load_string($data); -$text = (string)$xml->text; + +// parse data (simplexml_load_string breaks CRLFs) +$left = strpos($data, '<text>'); +$right = strrpos($data, '</text>'); +$text = substr($data, $left+6, $right-($left+6)); + +// tokenize $words = preg_split('/[ !"#$%&()*+\\,-.\/\n:;<=>?@\[\]^_{|}]+/', $text, NULL, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE ); -$plink = pspell_new(get_input_value('lang', RCUBE_INPUT_GET), null, null, 'utf-8'); + +// init spellchecker +$plink = pspell_new(get_input_value('lang', RCUBE_INPUT_GET), null, null, 'utf-8', PSPELL_FAST); + +// send output $out = '<?xml version="1.0" encoding="UTF-8"?><spellresult charschecked="'.rc_strlen($text).'">'; $diff = 0; foreach ($words as $w) { - $word = $w[0]; + $word = trim($w[0]); $pos = $w[1] - $diff; $len = rc_strlen($word); - if ($plink && !pspell_check($plink, $word)) { + if ($word && $plink && !pspell_check($plink, $word)) { $suggestions = pspell_suggest($plink, $word); $out .= '<c o="'.$pos.'" l="'.$len.'">'; $out .= implode("\t", $suggestions); @@ -49,6 +59,7 @@ } $diff += (strlen($word) - $len); } + $out .= '</spellresult>'; header("Content-Type: text/xml"); -- Gitblit v1.9.1