#1485398, #1485441: fix (p)spell checking CRLF/multibyte issues

release-0.6
alecpl 16 years ago
parent 32eb29fb99
commit b214f8d4d8

@ -29,19 +29,29 @@ if (!extension_loaded('pspell')) {
exit; exit;
} }
// read input
$data = file_get_contents('php://input'); $data = file_get_contents('php://input');
$xml = simplexml_load_string($data);
$text = (string)$xml->text; // parse data (simplexml_load_string breaks CRLFs)
$left = strpos($data, '<text>');
$right = strrpos($data, '</text>');
$text = substr($data, $left+6, $right-($left+6));
// tokenize
$words = preg_split('/[ !"#$%&()*+\\,-.\/\n:;<=>?@\[\]^_{|}]+/', $text, NULL, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE ); $words = preg_split('/[ !"#$%&()*+\\,-.\/\n:;<=>?@\[\]^_{|}]+/', $text, NULL, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_OFFSET_CAPTURE );
$plink = pspell_new(get_input_value('lang', RCUBE_INPUT_GET), null, null, 'utf-8');
// init spellchecker
$plink = pspell_new(get_input_value('lang', RCUBE_INPUT_GET), null, null, 'utf-8', PSPELL_FAST);
// send output
$out = '<?xml version="1.0" encoding="UTF-8"?><spellresult charschecked="'.rc_strlen($text).'">'; $out = '<?xml version="1.0" encoding="UTF-8"?><spellresult charschecked="'.rc_strlen($text).'">';
$diff = 0; $diff = 0;
foreach ($words as $w) { foreach ($words as $w) {
$word = $w[0]; $word = trim($w[0]);
$pos = $w[1] - $diff; $pos = $w[1] - $diff;
$len = rc_strlen($word); $len = rc_strlen($word);
if ($plink && !pspell_check($plink, $word)) { if ($word && $plink && !pspell_check($plink, $word)) {
$suggestions = pspell_suggest($plink, $word); $suggestions = pspell_suggest($plink, $word);
$out .= '<c o="'.$pos.'" l="'.$len.'">'; $out .= '<c o="'.$pos.'" l="'.$len.'">';
$out .= implode("\t", $suggestions); $out .= implode("\t", $suggestions);
@ -49,6 +59,7 @@ foreach ($words as $w) {
} }
$diff += (strlen($word) - $len); $diff += (strlen($word) - $len);
} }
$out .= '</spellresult>'; $out .= '</spellresult>';
header("Content-Type: text/xml"); header("Content-Type: text/xml");

Loading…
Cancel
Save