Move rcube_addressbook::normalize_string() to rcube_utils::normalize_string() for general purpose

pull/6/head
Thomas Bruederli 12 years ago
parent 92be3ee4a4
commit ceb5b56c3b

@ -447,30 +447,13 @@ abstract class rcube_addressbook
*
* @param string Input string (UTF-8)
* @return string Normalized string
* @deprecated since 0.9-beta
*/
protected static function normalize_string($str)
{
// split by words
$arr = explode(" ", preg_replace(
array('/[\s;\+\-\/]+/i', '/(\d)[-.\s]+(\d)/', '/\s\w{1,3}\s/'),
array(' ', '\\1\\2', ' '),
$str));
foreach ($arr as $i => $part) {
if (utf8_encode(utf8_decode($part)) == $part) { // is latin-1 ?
$arr[$i] = utf8_encode(strtr(strtolower(strtr(utf8_decode($part),
'ÇçäâàåéêëèïîìÅÉöôòüûùÿøØáíóúñÑÁÂÀãÃÊËÈÍÎÏÓÔõÕÚÛÙýÝ',
'ccaaaaeeeeiiiaeooouuuyooaiounnaaaaaeeeiiioooouuuyy')),
array('ß' => 'ss', 'ae' => 'a', 'oe' => 'o', 'ue' => 'u')));
}
else
$arr[$i] = mb_strtolower($part);
}
return join(" ", $arr);
return rcbe_utils::normalize_string($str);
}
/**
* Compose a valid display name from the given structured contact data
*

@ -313,7 +313,7 @@ class rcube_contacts extends rcube_addressbook
// fulltext search in all fields
else if ($col == '*') {
$words = array();
foreach (explode($WS, self::normalize_string($value)) as $word) {
foreach (explode($WS, rcube_utils::normalize_string($value)) as $word) {
switch ($mode) {
case 1: // strict
$words[] = '(' . $this->db->ilike('words', $word . '%')
@ -352,7 +352,7 @@ class rcube_contacts extends rcube_addressbook
// vCard field
else {
if (in_array($col, $this->fulltext_cols)) {
foreach (explode(" ", self::normalize_string($val)) as $word) {
foreach (rcube_utils::normalize_string($val, true) as $word) {
switch ($mode) {
case 1: // strict
$words[] = '(' . $this->db->ilike('words', $word . $WS . '%')
@ -728,9 +728,9 @@ class rcube_contacts extends rcube_addressbook
if (isset($value))
$vcard->set($field, $value, $section);
if ($fulltext && is_array($value))
$words .= ' ' . self::normalize_string(join(" ", $value));
$words .= ' ' . rcube_utils::normalize_string(join(" ", $value));
else if ($fulltext && strlen($value) >= 3)
$words .= ' ' . self::normalize_string($value);
$words .= ' ' . rcube_utils::normalize_string($value);
}
}
$out['vcard'] = $vcard->export(false);

@ -790,4 +790,45 @@ class rcube_utils
return $at ? $user . '@' . $domain : $domain;
}
/**
* Split the given string into word tokens
*
* @param string Input to tokenize
* @return array List of tokens
*/
public static function tokenize_string($str)
{
return explode(" ", preg_replace(
array('/[\s;\/+-]+/i', '/(\d)[-.\s]+(\d)/', '/\s\w{1,3}\s/u'),
array(' ', '\\1\\2', ' '),
$str));
}
/**
* Normalize the given string for fulltext search.
* Currently only optimized for Latin-1 characters; to be extended
*
* @param string Input string (UTF-8)
* @param boolean True to return list of words as array
* @return mixed Normalized string or a list of normalized tokens
*/
public static function normalize_string($str, $as_array = false)
{
// split by words
$arr = self::tokenize_string($str);
foreach ($arr as $i => $part) {
if (utf8_encode(utf8_decode($part)) == $part) { // is latin-1 ?
$arr[$i] = utf8_encode(strtr(strtolower(strtr(utf8_decode($part),
'ÇçäâàåéêëèïîìÅÉöôòüûùÿøØáíóúñÑÁÂÀãÃÊËÈÍÎÏÓÔõÕÚÛÙýÝ',
'ccaaaaeeeeiiiaeooouuuyooaiounnaaaaaeeeiiioooouuuyy')),
array('ß' => 'ss', 'ae' => 'a', 'oe' => 'o', 'ue' => 'u')));
}
else
$arr[$i] = mb_strtolower($part);
}
return $as_array ? $arr : join(" ", $arr);
}
}

Loading…
Cancel
Save