Refactor spellchecker class into backend subclasses for better extensibility
parent
c856b73c86
commit
bc0a470157
@ -0,0 +1,164 @@
|
||||
<?php
|
||||
|
||||
/*
|
||||
+-----------------------------------------------------------------------+
|
||||
| This file is part of the Roundcube Webmail client |
|
||||
| |
|
||||
| Copyright (C) 2011-2013, Kolab Systems AG |
|
||||
| Copyright (C) 20011-2013, The Roundcube Dev Team |
|
||||
| |
|
||||
| Licensed under the GNU General Public License version 3 or |
|
||||
| any later version with exceptions for skins & plugins. |
|
||||
| See the README file for a full license statement. |
|
||||
| |
|
||||
| PURPOSE: |
|
||||
| Spellchecking backend implementation to work with Enchant |
|
||||
+-----------------------------------------------------------------------+
|
||||
| Author: Aleksander Machniak <machniak@kolabsys.com> |
|
||||
+-----------------------------------------------------------------------+
|
||||
*/
|
||||
|
||||
/**
|
||||
* Spellchecking backend implementation to work with Pspell
|
||||
*
|
||||
* @package Framework
|
||||
* @subpackage Utils
|
||||
*/
|
||||
class rcube_spellcheck_enchant extends rcube_spellcheck_engine
|
||||
{
|
||||
private $enchant_broker;
|
||||
private $enchant_dictionary;
|
||||
private $matches = array();
|
||||
|
||||
/**
|
||||
* Initializes Enchant dictionary
|
||||
*/
|
||||
private function init()
|
||||
{
|
||||
if (!$this->enchant_broker) {
|
||||
if (!extension_loaded('enchant')) {
|
||||
$this->error = "Enchant extension not available";
|
||||
return;
|
||||
}
|
||||
|
||||
$this->enchant_broker = enchant_broker_init();
|
||||
}
|
||||
|
||||
if (!enchant_broker_dict_exists($this->enchant_broker, $this->lang)) {
|
||||
$this->error = "Unable to load dictionary for selected language using Enchant";
|
||||
return;
|
||||
}
|
||||
|
||||
$this->enchant_dictionary = enchant_broker_request_dict($this->enchant_broker, $this->lang);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set content and check spelling
|
||||
*
|
||||
* @see rcube_spellcheck_engine::check()
|
||||
*/
|
||||
function check($text)
|
||||
{
|
||||
$this->init();
|
||||
|
||||
if (!$this->enchant_dictionary) {
|
||||
return array();
|
||||
}
|
||||
|
||||
// tokenize
|
||||
$text = preg_split($this->separator, $text, NULL, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE);
|
||||
|
||||
$diff = 0;
|
||||
$matches = array();
|
||||
|
||||
foreach ($text as $w) {
|
||||
$word = trim($w[0]);
|
||||
$pos = $w[1] - $diff;
|
||||
$len = mb_strlen($word);
|
||||
|
||||
// skip exceptions
|
||||
if ($this->dictionary->is_exception($word)) {
|
||||
}
|
||||
else if (!enchant_dict_check($this->enchant_dictionary, $word)) {
|
||||
$suggestions = enchant_dict_suggest($this->enchant_dictionary, $word);
|
||||
|
||||
if (sizeof($suggestions) > self::MAX_SUGGESTIONS) {
|
||||
$suggestions = array_slice($suggestions, 0, self::MAX_SUGGESTIONS);
|
||||
}
|
||||
|
||||
$matches[] = array($word, $pos, $len, null, $suggestions);
|
||||
}
|
||||
|
||||
$diff += (strlen($word) - $len);
|
||||
}
|
||||
|
||||
$this->matches = $matches;
|
||||
return $matches;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns suggestions for the specified word
|
||||
*
|
||||
* @see rcube_spellcheck_engine::get_words()
|
||||
*/
|
||||
function get_suggestions($word)
|
||||
{
|
||||
$this->init();
|
||||
|
||||
if (!$this->enchant_dictionary) {
|
||||
return array();
|
||||
}
|
||||
|
||||
$suggestions = enchant_dict_suggest($this->enchant_dictionary, $word);
|
||||
|
||||
if (sizeof($suggestions) > self::MAX_SUGGESTIONS)
|
||||
$suggestions = array_slice($suggestions, 0, self::MAX_SUGGESTIONS);
|
||||
|
||||
return is_array($suggestions) ? $suggestions : array();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns misspelled words
|
||||
*
|
||||
* @see rcube_spellcheck_engine::get_suggestions()
|
||||
*/
|
||||
function get_words($text = null)
|
||||
{
|
||||
$result = array();
|
||||
|
||||
if ($text) {
|
||||
// init spellchecker
|
||||
$this->init();
|
||||
|
||||
if (!$this->enchant_dictionary) {
|
||||
return array();
|
||||
}
|
||||
|
||||
// With Enchant we don't need to get suggestions to return misspelled words
|
||||
$text = preg_split($this->separator, $text, NULL, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE);
|
||||
|
||||
foreach ($text as $w) {
|
||||
$word = trim($w[0]);
|
||||
|
||||
// skip exceptions
|
||||
if ($this->dictionary->is_exception($word)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!enchant_dict_check($this->enchant_dictionary, $word)) {
|
||||
$result[] = $word;
|
||||
}
|
||||
}
|
||||
|
||||
return $result;
|
||||
}
|
||||
|
||||
foreach ($this->matches as $m) {
|
||||
$result[] = $m[0];
|
||||
}
|
||||
|
||||
return $result;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -0,0 +1,84 @@
|
||||
<?php
|
||||
|
||||
/*
|
||||
+-----------------------------------------------------------------------+
|
||||
| This file is part of the Roundcube Webmail client |
|
||||
| |
|
||||
| Copyright (C) 2011-2013, Kolab Systems AG |
|
||||
| Copyright (C) 2008-2013, The Roundcube Dev Team |
|
||||
| |
|
||||
| Licensed under the GNU General Public License version 3 or |
|
||||
| any later version with exceptions for skins & plugins. |
|
||||
| See the README file for a full license statement. |
|
||||
| |
|
||||
| PURPOSE: |
|
||||
| Interface class for a spell-checking backend |
|
||||
+-----------------------------------------------------------------------+
|
||||
| Author: Thomas Bruederli <roundcube@gmail.com> |
|
||||
+-----------------------------------------------------------------------+
|
||||
*/
|
||||
|
||||
/**
|
||||
* Interface class for a spell-checking backend
|
||||
*
|
||||
* @package Framework
|
||||
* @subpackage Utils
|
||||
*/
|
||||
abstract class rcube_spellcheck_engine
|
||||
{
|
||||
const MAX_SUGGESTIONS = 10;
|
||||
|
||||
protected $lang;
|
||||
protected $error;
|
||||
protected $dictionary;
|
||||
protected $separator = '/[\s\r\n\t\(\)\/\[\]{}<>\\"]+|[:;?!,\.](?=\W|$)/';
|
||||
|
||||
/**
|
||||
* Default constructor
|
||||
*/
|
||||
public function __construct($dict, $lang)
|
||||
{
|
||||
$this->dictionary = $dict;
|
||||
$this->lang = $lang;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set content and check spelling
|
||||
*
|
||||
* @param string $text Text content for spellchecking
|
||||
*
|
||||
* @return bool True when no mispelling found, otherwise false
|
||||
*/
|
||||
abstract function check($text);
|
||||
|
||||
/**
|
||||
* Returns suggestions for the specified word
|
||||
*
|
||||
* @param string $word The word
|
||||
*
|
||||
* @return array Suggestions list
|
||||
*/
|
||||
abstract function get_suggestions($word);
|
||||
|
||||
/**
|
||||
* Returns misspelled words
|
||||
*
|
||||
* @param string $text The content for spellchecking. If empty content
|
||||
* used for check() method will be used.
|
||||
*
|
||||
* @return array List of misspelled words
|
||||
*/
|
||||
abstract function get_words($text = null);
|
||||
|
||||
/**
|
||||
* Returns error message
|
||||
*
|
||||
* @return string Error message
|
||||
*/
|
||||
public function error()
|
||||
{
|
||||
return $this->error;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -0,0 +1,158 @@
|
||||
<?php
|
||||
|
||||
/*
|
||||
+-----------------------------------------------------------------------+
|
||||
| This file is part of the Roundcube Webmail client |
|
||||
| |
|
||||
| Copyright (C) 2008-2013, The Roundcube Dev Team |
|
||||
| |
|
||||
| Licensed under the GNU General Public License version 3 or |
|
||||
| any later version with exceptions for skins & plugins. |
|
||||
| See the README file for a full license statement. |
|
||||
| |
|
||||
| PURPOSE: |
|
||||
| Spellchecking backend implementation to work with Googiespell |
|
||||
+-----------------------------------------------------------------------+
|
||||
| Author: Aleksander Machniak <machniak@kolabsys.com> |
|
||||
| Author: Thomas Bruederli <roundcube@gmail.com> |
|
||||
+-----------------------------------------------------------------------+
|
||||
*/
|
||||
|
||||
/**
|
||||
* Spellchecking backend implementation to work with a Googiespell service
|
||||
*
|
||||
* @package Framework
|
||||
* @subpackage Utils
|
||||
*/
|
||||
class rcube_spellcheck_googie extends rcube_spellcheck_engine
|
||||
{
|
||||
const GOOGLE_HOST = 'ssl://www.google.com';
|
||||
const GOOGLE_PORT = 443;
|
||||
|
||||
private $matches = array();
|
||||
private $content;
|
||||
|
||||
/**
|
||||
* Set content and check spelling
|
||||
*
|
||||
* @see rcube_spellcheck_engine::check()
|
||||
*/
|
||||
function check($text)
|
||||
{
|
||||
$this->content = $text;
|
||||
|
||||
// spell check uri is configured
|
||||
$url = rcube::get_instance()->config->get('spellcheck_uri');
|
||||
|
||||
if ($url) {
|
||||
$a_uri = parse_url($url);
|
||||
$ssl = ($a_uri['scheme'] == 'https' || $a_uri['scheme'] == 'ssl');
|
||||
$port = $a_uri['port'] ? $a_uri['port'] : ($ssl ? 443 : 80);
|
||||
$host = ($ssl ? 'ssl://' : '') . $a_uri['host'];
|
||||
$path = $a_uri['path'] . ($a_uri['query'] ? '?'.$a_uri['query'] : '') . $this->lang;
|
||||
}
|
||||
else {
|
||||
$host = self::GOOGLE_HOST;
|
||||
$port = self::GOOGLE_PORT;
|
||||
$path = '/tbproxy/spell?lang=' . $this->lang;
|
||||
}
|
||||
|
||||
// Google has some problem with spaces, use \n instead
|
||||
$gtext = str_replace(' ', "\n", $text);
|
||||
|
||||
$gtext = '<?xml version="1.0" encoding="utf-8" ?>'
|
||||
.'<spellrequest textalreadyclipped="0" ignoredups="0" ignoredigits="1" ignoreallcaps="1">'
|
||||
.'<text>' . $gtext . '</text>'
|
||||
.'</spellrequest>';
|
||||
|
||||
$store = '';
|
||||
if ($fp = fsockopen($host, $port, $errno, $errstr, 30)) {
|
||||
$out = "POST $path HTTP/1.0\r\n";
|
||||
$out .= "Host: " . str_replace('ssl://', '', $host) . "\r\n";
|
||||
$out .= "Content-Length: " . strlen($gtext) . "\r\n";
|
||||
$out .= "Content-Type: application/x-www-form-urlencoded\r\n";
|
||||
$out .= "Connection: Close\r\n\r\n";
|
||||
$out .= $gtext;
|
||||
fwrite($fp, $out);
|
||||
|
||||
while (!feof($fp))
|
||||
$store .= fgets($fp, 128);
|
||||
fclose($fp);
|
||||
}
|
||||
|
||||
// parse HTTP response
|
||||
if (preg_match('!^HTTP/1.\d (\d+)(.+)!', $store, $m)) {
|
||||
$http_status = $m[1];
|
||||
if ($http_status != '200')
|
||||
$this->error = 'HTTP ' . $m[1] . $m[2];
|
||||
}
|
||||
|
||||
if (!$store) {
|
||||
$this->error = "Empty result from spelling engine";
|
||||
}
|
||||
else if (preg_match('/<spellresult error="([^"]+)"/', $store, $m) && $m[1]) {
|
||||
$this->error = "Error code $m[1] returned";
|
||||
}
|
||||
|
||||
preg_match_all('/<c o="([^"]*)" l="([^"]*)" s="([^"]*)">([^<]*)<\/c>/', $store, $matches, PREG_SET_ORDER);
|
||||
|
||||
// skip exceptions (if appropriate options are enabled)
|
||||
foreach ($matches as $idx => $m) {
|
||||
$word = mb_substr($text, $m[1], $m[2], RCUBE_CHARSET);
|
||||
// skip exceptions
|
||||
if ($this->dictionary->is_exception($word)) {
|
||||
unset($matches[$idx]);
|
||||
}
|
||||
}
|
||||
|
||||
$this->matches = $matches;
|
||||
return $matches;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns suggestions for the specified word
|
||||
*
|
||||
* @see rcube_spellcheck_engine::get_words()
|
||||
*/
|
||||
function get_suggestions($word)
|
||||
{
|
||||
$matches = $word ? $this->check($word) : $this->matches;
|
||||
|
||||
if ($matches[0][4]) {
|
||||
$suggestions = explode("\t", $matches[0][4]);
|
||||
if (sizeof($suggestions) > self::MAX_SUGGESTIONS) {
|
||||
$suggestions = array_slice($suggestions, 0, self::MAX_SUGGESTIONS);
|
||||
}
|
||||
|
||||
return $suggestions;
|
||||
}
|
||||
|
||||
return array();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns misspelled words
|
||||
*
|
||||
* @see rcube_spellcheck_engine::get_suggestions()
|
||||
*/
|
||||
function get_words($text = null)
|
||||
{
|
||||
if ($text) {
|
||||
$matches = $this->check($text);
|
||||
}
|
||||
else {
|
||||
$matches = $this->matches;
|
||||
$text = $this->content;
|
||||
}
|
||||
|
||||
$result = array();
|
||||
|
||||
foreach ($matches as $m) {
|
||||
$result[] = mb_substr($text, $m[1], $m[2], RCUBE_CHARSET);
|
||||
}
|
||||
|
||||
return $result;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -0,0 +1,160 @@
|
||||
<?php
|
||||
|
||||
/*
|
||||
+-----------------------------------------------------------------------+
|
||||
| This file is part of the Roundcube Webmail client |
|
||||
| |
|
||||
| Copyright (C) 2008-2013, The Roundcube Dev Team |
|
||||
| |
|
||||
| Licensed under the GNU General Public License version 3 or |
|
||||
| any later version with exceptions for skins & plugins. |
|
||||
| See the README file for a full license statement. |
|
||||
| |
|
||||
| PURPOSE: |
|
||||
| Spellchecking backend implementation to work with Pspell |
|
||||
+-----------------------------------------------------------------------+
|
||||
| Author: Aleksander Machniak <machniak@kolabsys.com> |
|
||||
| Author: Thomas Bruederli <roundcube@gmail.com> |
|
||||
+-----------------------------------------------------------------------+
|
||||
*/
|
||||
|
||||
/**
|
||||
* Spellchecking backend implementation to work with Pspell
|
||||
*
|
||||
* @package Framework
|
||||
* @subpackage Utils
|
||||
*/
|
||||
class rcube_spellcheck_pspell extends rcube_spellcheck_engine
|
||||
{
|
||||
private $plink;
|
||||
private $matches = array();
|
||||
|
||||
/**
|
||||
* Initializes PSpell dictionary
|
||||
*/
|
||||
private function init()
|
||||
{
|
||||
if (!$this->plink) {
|
||||
if (!extension_loaded('pspell')) {
|
||||
$this->error = "Pspell extension not available";
|
||||
return;
|
||||
}
|
||||
|
||||
$this->plink = pspell_new($this->lang, null, null, RCUBE_CHARSET, PSPELL_FAST);
|
||||
}
|
||||
|
||||
if (!$this->plink) {
|
||||
$this->error = "Unable to load Pspell engine for selected language";
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set content and check spelling
|
||||
*
|
||||
* @see rcube_spellcheck_engine::check()
|
||||
*/
|
||||
function check($text)
|
||||
{
|
||||
$this->init();
|
||||
|
||||
if (!$this->plink) {
|
||||
return array();
|
||||
}
|
||||
|
||||
// tokenize
|
||||
$text = preg_split($this->separator, $text, NULL, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE);
|
||||
|
||||
$diff = 0;
|
||||
$matches = array();
|
||||
|
||||
foreach ($text as $w) {
|
||||
$word = trim($w[0]);
|
||||
$pos = $w[1] - $diff;
|
||||
$len = mb_strlen($word);
|
||||
|
||||
// skip exceptions
|
||||
if ($this->dictionary->is_exception($word)) {
|
||||
}
|
||||
else if (!pspell_check($this->plink, $word)) {
|
||||
$suggestions = pspell_suggest($this->plink, $word);
|
||||
|
||||
if (sizeof($suggestions) > self::MAX_SUGGESTIONS) {
|
||||
$suggestions = array_slice($suggestions, 0, self::MAX_SUGGESTIONS);
|
||||
}
|
||||
|
||||
$matches[] = array($word, $pos, $len, null, $suggestions);
|
||||
}
|
||||
|
||||
$diff += (strlen($word) - $len);
|
||||
}
|
||||
|
||||
$this->matches = $matches;
|
||||
return $matches;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns suggestions for the specified word
|
||||
*
|
||||
* @see rcube_spellcheck_engine::get_words()
|
||||
*/
|
||||
function get_suggestions($word)
|
||||
{
|
||||
$this->init();
|
||||
|
||||
if (!$this->plink) {
|
||||
return array();
|
||||
}
|
||||
|
||||
$suggestions = pspell_suggest($this->plink, $word);
|
||||
|
||||
if (sizeof($suggestions) > self::MAX_SUGGESTIONS)
|
||||
$suggestions = array_slice($suggestions, 0, self::MAX_SUGGESTIONS);
|
||||
|
||||
return is_array($suggestions) ? $suggestions : array();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns misspelled words
|
||||
*
|
||||
* @see rcube_spellcheck_engine::get_suggestions()
|
||||
*/
|
||||
function get_words($text = null)
|
||||
{
|
||||
$result = array();
|
||||
|
||||
if ($text) {
|
||||
// init spellchecker
|
||||
$this->init();
|
||||
|
||||
if (!$this->plink) {
|
||||
return array();
|
||||
}
|
||||
|
||||
// With PSpell we don't need to get suggestions to return misspelled words
|
||||
$text = preg_split($this->separator, $text, NULL, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE);
|
||||
|
||||
foreach ($text as $w) {
|
||||
$word = trim($w[0]);
|
||||
|
||||
// skip exceptions
|
||||
if ($this->dictionary->is_exception($word)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!pspell_check($this->plink, $word)) {
|
||||
$result[] = $word;
|
||||
}
|
||||
}
|
||||
|
||||
return $result;
|
||||
}
|
||||
|
||||
foreach ($this->matches as $m) {
|
||||
$result[] = $m[0];
|
||||
}
|
||||
|
||||
return $result;
|
||||
}
|
||||
|
||||
}
|
||||
|
Loading…
Reference in New Issue