Thomas Bruederli
2013-08-21 eef9eb1146cf9903e58743291ca27c68340aea2c
program/lib/Roundcube/rcube_spellchecker.php
@@ -2,8 +2,6 @@
/*
 +-----------------------------------------------------------------------+
 | program/include/rcube_spellchecker.php                                |
 |                                                                       |
 | This file is part of the Roundcube Webmail client                     |
 | Copyright (C) 2011, Kolab Systems AG                                  |
 | Copyright (C) 2008-2011, The Roundcube Dev Team                       |
@@ -14,13 +12,11 @@
 |                                                                       |
 | PURPOSE:                                                              |
 |   Spellchecking using different backends                              |
 |                                                                       |
 +-----------------------------------------------------------------------+
 | Author: Aleksander Machniak <machniak@kolabsys.com>                   |
 | Author: Thomas Bruederli <roundcube@gmail.com>                        |
 +-----------------------------------------------------------------------+
*/
/**
 * Helper class for spellchecking with Googielspell and PSpell support.
@@ -35,7 +31,7 @@
    private $lang;
    private $rc;
    private $error;
    private $separator = '/[\s\r\n\t\(\)\/\[\]{}<>\\"]+|[:;?!,\.]([^\w]|$)/';
    private $separator = '/[\s\r\n\t\(\)\/\[\]{}<>\\"]+|[:;?!,\.](?=\W|$)/';
    private $options = array();
    private $dict;
    private $have_dict;
@@ -88,6 +84,9 @@
        if ($this->engine == 'pspell') {
            $this->matches = $this->_pspell_check($this->content);
        }
        else if ($this->engine == 'enchant') {
            $this->matches = $this->_enchant_check($this->content);
        }
        else {
            $this->matches = $this->_googie_check($this->content);
        }
@@ -119,6 +118,9 @@
        if ($this->engine == 'pspell') {
            return $this->_pspell_suggestions($word);
        }
        else if ($this->engine == 'enchant') {
            return $this->_enchant_suggestions($word);
        }
        return $this->_googie_suggestions($word);
    }
@@ -137,6 +139,9 @@
        if ($this->engine == 'pspell') {
            return $this->_pspell_words($text, $is_html);
        }
        else if ($this->engine == 'enchant') {
            return $this->_enchant_words($text, $is_html);
        }
        return $this->_googie_words($text, $is_html);
    }
@@ -150,7 +155,7 @@
    function get_xml()
    {
        // send output
        $out = '<?xml version="1.0" encoding="'.RCMAIL_CHARSET.'"?><spellresult charschecked="'.mb_strlen($this->content).'">';
        $out = '<?xml version="1.0" encoding="'.RCUBE_CHARSET.'"?><spellresult charschecked="'.mb_strlen($this->content).'">';
        foreach ($this->matches as $item) {
            $out .= '<c o="'.$item[1].'" l="'.$item[2].'">';
@@ -178,7 +183,7 @@
                $word = $item[0];
            }
            else {
                $word = mb_substr($this->content, $item[1], $item[2], RCMAIL_CHARSET);
                $word = mb_substr($this->content, $item[1], $item[2], RCUBE_CHARSET);
            }
            $result[$word] = is_array($item[4]) ? implode("\t", $item[4]) : $item[4];
        }
@@ -318,20 +323,150 @@
        if (!$this->plink) {
            if (!extension_loaded('pspell')) {
                $this->error = "Pspell extension not available";
                rcube::raise_error(array(
                    'code' => 500, 'type' => 'php',
                    'file' => __FILE__, 'line' => __LINE__,
                    'message' => $this->error), true, false);
                return;
            }
            $this->plink = pspell_new($this->lang, null, null, RCMAIL_CHARSET, PSPELL_FAST);
            $this->plink = pspell_new($this->lang, null, null, RCUBE_CHARSET, PSPELL_FAST);
        }
        if (!$this->plink) {
            $this->error = "Unable to load Pspell engine for selected language";
        }
    }
    /**
     * Checks the text using enchant
     *
     * @param string $text Text content for spellchecking
     */
    private function _enchant_check($text)
    {
        // init spellchecker
        $this->_enchant_init();
        if (!$this->enchant_dictionary) {
            return array();
        }
        // tokenize
        $text = preg_split($this->separator, $text, NULL, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE);
        $diff       = 0;
        $matches    = array();
        foreach ($text as $w) {
            $word = trim($w[0]);
            $pos  = $w[1] - $diff;
            $len  = mb_strlen($word);
            // skip exceptions
            if ($this->is_exception($word)) {
            }
            else if (!enchant_dict_check($this->enchant_dictionary, $word)) {
                $suggestions = enchant_dict_suggest($this->enchant_dictionary, $word);
                if (sizeof($suggestions) > self::MAX_SUGGESTIONS) {
                    $suggestions = array_slice($suggestions, 0, self::MAX_SUGGESTIONS);
                }
                $matches[] = array($word, $pos, $len, null, $suggestions);
            }
            $diff += (strlen($word) - $len);
        }
        return $matches;
    }
    /**
     * Returns the misspelled words
     */
    private function _enchant_words($text = null, $is_html=false)
    {
        $result = array();
        if ($text) {
            // init spellchecker
            $this->_enchant_init();
            if (!$this->enchant_dictionary) {
                return array();
            }
            // With Enchant we don't need to get suggestions to return misspelled words
            if ($is_html) {
                $text = $this->html2text($text);
            }
            $text = preg_split($this->separator, $text, NULL, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE);
            foreach ($text as $w) {
                $word = trim($w[0]);
                // skip exceptions
                if ($this->is_exception($word)) {
                    continue;
                }
                if (!enchant_dict_check($this->enchant_dictionary, $word)) {
                    $result[] = $word;
                }
            }
            return $result;
        }
        foreach ($this->matches as $m) {
            $result[] = $m[0];
        }
        return $result;
    }
    /**
     * Returns suggestions for misspelled word
     */
    private function _enchant_suggestions($word)
    {
        // init spellchecker
        $this->_enchant_init();
        if (!$this->enchant_dictionary) {
            return array();
        }
        $suggestions = enchant_dict_suggest($this->enchant_dictionary, $word);
        if (sizeof($suggestions) > self::MAX_SUGGESTIONS)
            $suggestions = array_slice($suggestions, 0, self::MAX_SUGGESTIONS);
        return is_array($suggestions) ? $suggestions : array();
    }
    /**
     * Initializes PSpell dictionary
     */
    private function _enchant_init()
    {
        if (!$this->enchant_broker) {
            if (!extension_loaded('enchant')) {
                $this->error = "Enchant extension not available";
                return;
            }
            $this->enchant_broker = enchant_broker_init();
        }
        if (!enchant_broker_dict_exists($this->enchant_broker, $this->lang)) {
            $this->error = "Unable to load dictionary for selected language using Enchant";
            return;
        }
        $this->enchant_dictionary = enchant_broker_request_dict($this->enchant_broker, $this->lang);
    }
@@ -376,8 +511,18 @@
            fclose($fp);
        }
        // parse HTTP response
        if (preg_match('!^HTTP/1.\d (\d+)(.+)!', $store, $m)) {
            $http_status = $m[1];
            if ($http_status != '200')
                $this->error = 'HTTP ' . $m[1] . $m[2];
        }
        if (!$store) {
            $this->error = "Empty result from spelling engine";
        }
        else if (preg_match('/<spellresult error="([^"]+)"/', $store, $m) && $m[1]) {
            $this->error = "Error code $m[1] returned";
        }
        preg_match_all('/<c o="([^"]*)" l="([^"]*)" s="([^"]*)">([^<]*)<\/c>/', $store, $matches, PREG_SET_ORDER);
@@ -387,7 +532,7 @@
            || !empty($this->options['ignore_caps']) || !empty($this->options['dictionary'])
        ) {
            foreach ($matches as $idx => $m) {
                $word = mb_substr($text, $m[1], $m[2], RCMAIL_CHARSET);
                $word = mb_substr($text, $m[1], $m[2], RCUBE_CHARSET);
                // skip  exceptions
                if ($this->is_exception($word)) {
                    unset($matches[$idx]);
@@ -416,7 +561,7 @@
        $result = array();
        foreach ($matches as $m) {
            $result[] = mb_substr($text, $m[1], $m[2], RCMAIL_CHARSET);
            $result[] = mb_substr($text, $m[1], $m[2], RCUBE_CHARSET);
        }
        return $result;
@@ -447,7 +592,7 @@
    private function html2text($text)
    {
        $h2t = new html2text($text, false, true, 0);
        $h2t = new rcube_html2text($text, false, true, 0);
        return $h2t->get_text();
    }
@@ -592,7 +737,7 @@
        if (empty($plugin['abort'])) {
            $dict = array();
            $this->rc->db->query(
            $sql_result = $this->rc->db->query(
                "SELECT data FROM ".$this->rc->db->table_name('dictionary')
                ." WHERE user_id ". ($plugin['userid'] ? "= ".$this->rc->db->quote($plugin['userid']) : "IS NULL")
                    ." AND " . $this->rc->db->quoteIdentifier('language') . " = ?",