| | |
| | | |
| | | /* |
| | | +-----------------------------------------------------------------------+ |
| | | | program/include/rcube_spellchecker.php | |
| | | | | |
| | | | This file is part of the Roundcube Webmail client | |
| | | | Copyright (C) 2011, Kolab Systems AG | |
| | | | Copyright (C) 2008-2011, The Roundcube Dev Team | |
| | |
| | | | | |
| | | | PURPOSE: | |
| | | | Spellchecking using different backends | |
| | | | | |
| | | +-----------------------------------------------------------------------+ |
| | | | Author: Aleksander Machniak <machniak@kolabsys.com> | |
| | | | Author: Thomas Bruederli <roundcube@gmail.com> | |
| | | +-----------------------------------------------------------------------+ |
| | | */ |
| | | |
| | | |
| | | /** |
| | | * Helper class for spellchecking with Googielspell and PSpell support. |
| | |
| | | private $lang; |
| | | private $rc; |
| | | private $error; |
| | | private $separator = '/[\s\r\n\t\(\)\/\[\]{}<>\\"]+|[:;?!,\.]([^\w]|$)/'; |
| | | private $separator = '/[\s\r\n\t\(\)\/\[\]{}<>\\"]+|[:;?!,\.](?=\W|$)/'; |
| | | private $options = array(); |
| | | private $dict; |
| | | private $have_dict; |
| | |
| | | if ($this->engine == 'pspell') { |
| | | $this->matches = $this->_pspell_check($this->content); |
| | | } |
| | | else if ($this->engine == 'enchant') { |
| | | $this->matches = $this->_enchant_check($this->content); |
| | | } |
| | | else { |
| | | $this->matches = $this->_googie_check($this->content); |
| | | } |
| | |
| | | if ($this->engine == 'pspell') { |
| | | return $this->_pspell_suggestions($word); |
| | | } |
| | | else if ($this->engine == 'enchant') { |
| | | return $this->_enchant_suggestions($word); |
| | | } |
| | | |
| | | return $this->_googie_suggestions($word); |
| | | } |
| | |
| | | if ($this->engine == 'pspell') { |
| | | return $this->_pspell_words($text, $is_html); |
| | | } |
| | | else if ($this->engine == 'enchant') { |
| | | return $this->_enchant_words($text, $is_html); |
| | | } |
| | | |
| | | return $this->_googie_words($text, $is_html); |
| | | } |
| | |
| | | function get_xml() |
| | | { |
| | | // send output |
| | | $out = '<?xml version="1.0" encoding="'.RCMAIL_CHARSET.'"?><spellresult charschecked="'.mb_strlen($this->content).'">'; |
| | | $out = '<?xml version="1.0" encoding="'.RCUBE_CHARSET.'"?><spellresult charschecked="'.mb_strlen($this->content).'">'; |
| | | |
| | | foreach ($this->matches as $item) { |
| | | $out .= '<c o="'.$item[1].'" l="'.$item[2].'">'; |
| | |
| | | $word = $item[0]; |
| | | } |
| | | else { |
| | | $word = mb_substr($this->content, $item[1], $item[2], RCMAIL_CHARSET); |
| | | $word = mb_substr($this->content, $item[1], $item[2], RCUBE_CHARSET); |
| | | } |
| | | $result[$word] = is_array($item[4]) ? implode("\t", $item[4]) : $item[4]; |
| | | } |
| | |
| | | if (!$this->plink) { |
| | | if (!extension_loaded('pspell')) { |
| | | $this->error = "Pspell extension not available"; |
| | | rcube::raise_error(array( |
| | | 'code' => 500, 'type' => 'php', |
| | | 'file' => __FILE__, 'line' => __LINE__, |
| | | 'message' => $this->error), true, false); |
| | | |
| | | return; |
| | | } |
| | | |
| | | $this->plink = pspell_new($this->lang, null, null, RCMAIL_CHARSET, PSPELL_FAST); |
| | | $this->plink = pspell_new($this->lang, null, null, RCUBE_CHARSET, PSPELL_FAST); |
| | | } |
| | | |
| | | if (!$this->plink) { |
| | | $this->error = "Unable to load Pspell engine for selected language"; |
| | | } |
| | | } |
| | | |
| | | |
| | | /** |
| | | * Checks the text using enchant |
| | | * |
| | | * @param string $text Text content for spellchecking |
| | | */ |
| | | private function _enchant_check($text) |
| | | { |
| | | // init spellchecker |
| | | $this->_enchant_init(); |
| | | |
| | | if (!$this->enchant_dictionary) { |
| | | return array(); |
| | | } |
| | | |
| | | // tokenize |
| | | $text = preg_split($this->separator, $text, NULL, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE); |
| | | |
| | | $diff = 0; |
| | | $matches = array(); |
| | | |
| | | foreach ($text as $w) { |
| | | $word = trim($w[0]); |
| | | $pos = $w[1] - $diff; |
| | | $len = mb_strlen($word); |
| | | |
| | | // skip exceptions |
| | | if ($this->is_exception($word)) { |
| | | } |
| | | else if (!enchant_dict_check($this->enchant_dictionary, $word)) { |
| | | $suggestions = enchant_dict_suggest($this->enchant_dictionary, $word); |
| | | |
| | | if (sizeof($suggestions) > self::MAX_SUGGESTIONS) { |
| | | $suggestions = array_slice($suggestions, 0, self::MAX_SUGGESTIONS); |
| | | } |
| | | |
| | | $matches[] = array($word, $pos, $len, null, $suggestions); |
| | | } |
| | | |
| | | $diff += (strlen($word) - $len); |
| | | } |
| | | |
| | | return $matches; |
| | | } |
| | | |
| | | |
| | | /** |
| | | * Returns the misspelled words |
| | | */ |
| | | private function _enchant_words($text = null, $is_html=false) |
| | | { |
| | | $result = array(); |
| | | |
| | | if ($text) { |
| | | // init spellchecker |
| | | $this->_enchant_init(); |
| | | |
| | | if (!$this->enchant_dictionary) { |
| | | return array(); |
| | | } |
| | | |
| | | // With Enchant we don't need to get suggestions to return misspelled words |
| | | if ($is_html) { |
| | | $text = $this->html2text($text); |
| | | } |
| | | |
| | | $text = preg_split($this->separator, $text, NULL, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE); |
| | | |
| | | foreach ($text as $w) { |
| | | $word = trim($w[0]); |
| | | |
| | | // skip exceptions |
| | | if ($this->is_exception($word)) { |
| | | continue; |
| | | } |
| | | |
| | | if (!enchant_dict_check($this->enchant_dictionary, $word)) { |
| | | $result[] = $word; |
| | | } |
| | | } |
| | | |
| | | return $result; |
| | | } |
| | | |
| | | foreach ($this->matches as $m) { |
| | | $result[] = $m[0]; |
| | | } |
| | | |
| | | return $result; |
| | | } |
| | | |
| | | |
| | | /** |
| | | * Returns suggestions for misspelled word |
| | | */ |
| | | private function _enchant_suggestions($word) |
| | | { |
| | | // init spellchecker |
| | | $this->_enchant_init(); |
| | | |
| | | if (!$this->enchant_dictionary) { |
| | | return array(); |
| | | } |
| | | |
| | | $suggestions = enchant_dict_suggest($this->enchant_dictionary, $word); |
| | | |
| | | if (sizeof($suggestions) > self::MAX_SUGGESTIONS) |
| | | $suggestions = array_slice($suggestions, 0, self::MAX_SUGGESTIONS); |
| | | |
| | | return is_array($suggestions) ? $suggestions : array(); |
| | | } |
| | | |
| | | |
| | | /** |
| | | * Initializes PSpell dictionary |
| | | */ |
| | | private function _enchant_init() |
| | | { |
| | | if (!$this->enchant_broker) { |
| | | if (!extension_loaded('enchant')) { |
| | | $this->error = "Enchant extension not available"; |
| | | return; |
| | | } |
| | | |
| | | $this->enchant_broker = enchant_broker_init(); |
| | | } |
| | | |
| | | if (!enchant_broker_dict_exists($this->enchant_broker, $this->lang)) { |
| | | $this->error = "Unable to load dictionary for selected language using Enchant"; |
| | | return; |
| | | } |
| | | |
| | | $this->enchant_dictionary = enchant_broker_request_dict($this->enchant_broker, $this->lang); |
| | | } |
| | | |
| | | |
| | |
| | | fclose($fp); |
| | | } |
| | | |
| | | // parse HTTP response |
| | | if (preg_match('!^HTTP/1.\d (\d+)(.+)!', $store, $m)) { |
| | | $http_status = $m[1]; |
| | | if ($http_status != '200') |
| | | $this->error = 'HTTP ' . $m[1] . $m[2]; |
| | | } |
| | | |
| | | if (!$store) { |
| | | $this->error = "Empty result from spelling engine"; |
| | | } |
| | | else if (preg_match('/<spellresult error="([^"]+)"/', $store, $m) && $m[1]) { |
| | | $this->error = "Error code $m[1] returned"; |
| | | } |
| | | |
| | | preg_match_all('/<c o="([^"]*)" l="([^"]*)" s="([^"]*)">([^<]*)<\/c>/', $store, $matches, PREG_SET_ORDER); |
| | |
| | | || !empty($this->options['ignore_caps']) || !empty($this->options['dictionary']) |
| | | ) { |
| | | foreach ($matches as $idx => $m) { |
| | | $word = mb_substr($text, $m[1], $m[2], RCMAIL_CHARSET); |
| | | $word = mb_substr($text, $m[1], $m[2], RCUBE_CHARSET); |
| | | // skip exceptions |
| | | if ($this->is_exception($word)) { |
| | | unset($matches[$idx]); |
| | |
| | | $result = array(); |
| | | |
| | | foreach ($matches as $m) { |
| | | $result[] = mb_substr($text, $m[1], $m[2], RCMAIL_CHARSET); |
| | | $result[] = mb_substr($text, $m[1], $m[2], RCUBE_CHARSET); |
| | | } |
| | | |
| | | return $result; |
| | |
| | | |
| | | private function html2text($text) |
| | | { |
| | | $h2t = new html2text($text, false, true, 0); |
| | | $h2t = new rcube_html2text($text, false, true, 0); |
| | | return $h2t->get_text(); |
| | | } |
| | | |
| | |
| | | |
| | | if (empty($plugin['abort'])) { |
| | | $dict = array(); |
| | | $this->rc->db->query( |
| | | $sql_result = $this->rc->db->query( |
| | | "SELECT data FROM ".$this->rc->db->table_name('dictionary') |
| | | ." WHERE user_id ". ($plugin['userid'] ? "= ".$this->rc->db->quote($plugin['userid']) : "IS NULL") |
| | | ." AND " . $this->rc->db->quoteIdentifier('language') . " = ?", |