From b214f8d4d81a167cd651a0c021a7f1486600c1f4 Mon Sep 17 00:00:00 2001
From: alecpl <alec@alec.pl>
Date: Tue, 14 Oct 2008 09:32:48 -0400
Subject: [PATCH] #1485398, #1485441: fix (p)spell checking CRLF/multibyte issues

---
 program/steps/mail/spell_pspell.inc |   21 ++++++++++++++++-----
 1 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/program/steps/mail/spell_pspell.inc b/program/steps/mail/spell_pspell.inc
index 4d15604..bf696f2 100644
--- a/program/steps/mail/spell_pspell.inc
+++ b/program/steps/mail/spell_pspell.inc
@@ -29,19 +29,29 @@
     exit;
 }
 
+// read input
 $data = file_get_contents('php://input');
-$xml = simplexml_load_string($data);
-$text = (string)$xml->text;
+
+// parse data (simplexml_load_string breaks CRLFs)
+$left = strpos($data, '<text>');
+$right = strrpos($data, '</text>');
+$text = substr($data, $left+6, $right-($left+6));
+
+// tokenize
 $words = preg_split('/[ !"#$%&()*+\\,-.\/\n:;<=>?@\[\]^_{|}]+/', $text, NULL,  PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE );
-$plink = pspell_new(get_input_value('lang', RCUBE_INPUT_GET), null, null, 'utf-8');
+
+// init spellchecker
+$plink = pspell_new(get_input_value('lang', RCUBE_INPUT_GET), null, null, 'utf-8', PSPELL_FAST);
+
+// send output
 $out = '<?xml version="1.0" encoding="UTF-8"?><spellresult charschecked="'.rc_strlen($text).'">';
 
 $diff = 0;
 foreach ($words as $w) {
-    $word = $w[0];
+    $word = trim($w[0]);
     $pos  = $w[1] - $diff;
     $len  = rc_strlen($word);
-    if ($plink && !pspell_check($plink, $word)) {
+    if ($word && $plink && !pspell_check($plink, $word)) {
         $suggestions = pspell_suggest($plink, $word);
         $out .= '<c o="'.$pos.'" l="'.$len.'">';
         $out .= implode("\t", $suggestions);
@@ -49,6 +59,7 @@
     }
     $diff += (strlen($word) - $len);
 }
+
 $out .= '</spellresult>';
 
 header("Content-Type: text/xml");

--
Gitblit v1.9.1