From ceb5b56c3b8db37425338e9c2661c5c4bc4ac069 Mon Sep 17 00:00:00 2001
From: Thomas Bruederli <thomas@roundcube.net>
Date: Tue, 15 May 2012 07:48:13 -0400
Subject: [PATCH] Move rcube_addressbook::normalize_string() to rcube_utils::normalize_string() for general purpose
---
program/include/rcube_utils.php | 41 +++++++++++++++++++++++++++++++++++++++++
1 files changed, 41 insertions(+), 0 deletions(-)
diff --git a/program/include/rcube_utils.php b/program/include/rcube_utils.php
index 5b31537..c6d4805 100644
--- a/program/include/rcube_utils.php
+++ b/program/include/rcube_utils.php
@@ -790,4 +790,45 @@
return $at ? $user . '@' . $domain : $domain;
}
+ /**
+ * Split the given string into word tokens
+ *
+ * @param string Input to tokenize
+ * @return array List of tokens
+ */
+ public static function tokenize_string($str)
+ {
+ return explode(" ", preg_replace(
+ array('/[\s;\/+-]+/i', '/(\d)[-.\s]+(\d)/', '/\s\w{1,3}\s/u'),
+ array(' ', '\\1\\2', ' '),
+ $str));
+ }
+
+ /**
+ * Normalize the given string for fulltext search.
+ * Currently only optimized for Latin-1 characters; to be extended
+ *
+ * @param string Input string (UTF-8)
+ * @param boolean True to return list of words as array
+ * @return mixed Normalized string or a list of normalized tokens
+ */
+ public static function normalize_string($str, $as_array = false)
+ {
+ // split by words
+ $arr = self::tokenize_string($str);
+
+ foreach ($arr as $i => $part) {
+ if (utf8_encode(utf8_decode($part)) == $part) { // is latin-1 ?
+ $arr[$i] = utf8_encode(strtr(strtolower(strtr(utf8_decode($part),
+ 'ÇçäâàåéêëèïîìÅÉöôòüûùÿøØáíóúñÑÁÂÀãÃÊËÈÍÎÏÓÔõÕÚÛÙýÝ',
+ 'ccaaaaeeeeiiiaeooouuuyooaiounnaaaaaeeeiiioooouuuyy')),
+ array('ß' => 'ss', 'ae' => 'a', 'oe' => 'o', 'ue' => 'u')));
+ }
+ else
+ $arr[$i] = mb_strtolower($part);
+ }
+
+ return $as_array ? $arr : join(" ", $arr);
+ }
+
}
--
Gitblit v1.9.1