From ceb5b56c3b8db37425338e9c2661c5c4bc4ac069 Mon Sep 17 00:00:00 2001
From: Thomas Bruederli <thomas@roundcube.net>
Date: Tue, 15 May 2012 07:48:13 -0400
Subject: [PATCH] Move rcube_addressbook::normalize_string() to rcube_utils::normalize_string() for general purpose

---
 program/include/rcube_utils.php |   41 +++++++++++++++++++++++++++++++++++++++++
 1 files changed, 41 insertions(+), 0 deletions(-)

diff --git a/program/include/rcube_utils.php b/program/include/rcube_utils.php
index 5b31537..c6d4805 100644
--- a/program/include/rcube_utils.php
+++ b/program/include/rcube_utils.php
@@ -790,4 +790,45 @@
         return $at ? $user . '@' . $domain : $domain;
     }
 
+    /**
+     * Split the given string into word tokens
+     *
+     * @param string Input to tokenize
+     * @return array List of tokens
+     */
+    public static function tokenize_string($str)
+    {
+        return explode(" ", preg_replace(
+            array('/[\s;\/+-]+/i', '/(\d)[-.\s]+(\d)/', '/\s\w{1,3}\s/u'),
+            array(' ', '\\1\\2', ' '),
+            $str));
+    }
+
+    /**
+     * Normalize the given string for fulltext search.
+     * Currently only optimized for Latin-1 characters; to be extended
+     *
+     * @param string  Input string (UTF-8)
+     * @param boolean True to return list of words as array
+     * @return mixed  Normalized string or a list of normalized tokens
+     */
+    public static function normalize_string($str, $as_array = false)
+    {
+        // split by words
+        $arr = self::tokenize_string($str);
+
+        foreach ($arr as $i => $part) {
+            if (utf8_encode(utf8_decode($part)) == $part) {  // is latin-1 ?
+                $arr[$i] = utf8_encode(strtr(strtolower(strtr(utf8_decode($part),
+                    'ÇçäâàåéêëèïîìÅÉöôòüûùÿøØáíóúñÑÁÂÀãÃÊËÈÍÎÏÓÔõÕÚÛÙýÝ',
+                    'ccaaaaeeeeiiiaeooouuuyooaiounnaaaaaeeeiiioooouuuyy')),
+                    array('ß' => 'ss', 'ae' => 'a', 'oe' => 'o', 'ue' => 'u')));
+            }
+            else
+                $arr[$i] = mb_strtolower($part);
+        }
+
+        return $as_array ? $arr : join(" ", $arr);
+    }
+
 }

--
Gitblit v1.9.1