From 49dad5f669965ca6149c2d759ede2a91fa571149 Mon Sep 17 00:00:00 2001
From: Aleksander Machniak <alec@alec.pl>
Date: Tue, 27 May 2014 05:39:31 -0400
Subject: [PATCH] Fix broken normalize_string(), add support for ISO-8859-2

---
 tests/Framework/Utils.php             |    3 ++-
 program/lib/Roundcube/rcube_utils.php |   34 +++++++++++++++++++++++++---------
 2 files changed, 27 insertions(+), 10 deletions(-)

diff --git a/program/lib/Roundcube/rcube_utils.php b/program/lib/Roundcube/rcube_utils.php
index c2009ce..00999ba 100644
--- a/program/lib/Roundcube/rcube_utils.php
+++ b/program/lib/Roundcube/rcube_utils.php
@@ -928,7 +928,7 @@
 
     /**
      * Normalize the given string for fulltext search.
-     * Currently only optimized for Latin-1 characters; to be extended
+     * Currently only optimized for ISO-8859-1 and ISO-8859-2 characters; to be extended
      *
      * @param string  Input string (UTF-8)
      * @param boolean True to return list of words as array
@@ -949,15 +949,32 @@
         // split by words
         $arr = self::tokenize_string($str);
 
+        // detect character set
+        if (utf8_encode(utf8_decode($str)) == $str) {
+            // ISO-8859-1 (or ASCII)
+            preg_match_all('/./u', 'äâàåáãæçéêëèïîìíñöôòøõóüûùúýÿ', $keys);
+            preg_match_all('/./',  'aaaaaaaceeeeiiiinoooooouuuuyy', $values);
+
+            $mapping = array_combine($keys[0], $values[0]);
+            $mapping = array_merge($mapping, array('ß' => 'ss', 'ae' => 'a', 'oe' => 'o', 'ue' => 'u'));
+        }
+        else if (rcube_charset::convert(rcube_charset::convert($str, 'UTF-8', 'ISO-8859-2'), 'ISO-8859-2', 'UTF-8') == $str) {
+            // ISO-8859-2
+            preg_match_all('/./u', 'ąáâäćçčéęëěíîłľĺńňóôöŕřśšşťţůúűüźžżý', $keys);
+            preg_match_all('/./',  'aaaaccceeeeiilllnnooorrsssttuuuuzzzy', $values);
+
+            $mapping = array_combine($keys[0], $values[0]);
+            $mapping = array_merge($mapping, array('ß' => 'ss', 'ae' => 'a', 'oe' => 'o', 'ue' => 'u'));
+        }
+
         foreach ($arr as $i => $part) {
-            if (utf8_encode(utf8_decode($part)) == $part) {  // is latin-1 ?
-                $arr[$i] = utf8_encode(strtr(strtolower(strtr(utf8_decode($part),
-                    'ÇçäâàåéêëèïîìÅÉöôòüûùÿøØáíóúñÑÁÂÀãÃÊËÈÍÎÏÓÔõÕÚÛÙýÝ',
-                    'ccaaaaeeeeiiiaeooouuuyooaiounnaaaaaeeeiiioooouuuyy')),
-                    array('ß' => 'ss', 'ae' => 'a', 'oe' => 'o', 'ue' => 'u')));
+            $part = mb_strtolower($part);
+
+            if (!empty($mapping)) {
+                $part = strtr($part, $mapping);
             }
-            else
-                $arr[$i] = mb_strtolower($part);
+
+            $arr[$i] = $part;
         }
 
         return $as_array ? $arr : join(" ", $arr);
@@ -1038,7 +1055,6 @@
             return $password;
         }
     }
-
 
     /**
      * Find out if the string content means true or false
diff --git a/tests/Framework/Utils.php b/tests/Framework/Utils.php
index 82b8ebb..560a8bd 100644
--- a/tests/Framework/Utils.php
+++ b/tests/Framework/Utils.php
@@ -327,7 +327,8 @@
         $test = array(
             ''        => '',
             'abc def' => 'abc def',
-            'ÇçäâàåéêëèïîìÅÉöôòüûùÿøØáíóúñÑÁÂÀãÃÊËÈÍÎÏÓÔõÕÚÛÙýÝ' => 'ccaaaaeeeeiiiaeooouuuyooaiounnaaaaaeeeiiioooouuuyy',
+            'ÇçäâàåæéêëèïîìÅÉöôòüûùÿøØáíóúñÑÁÂÀãÃÊËÈÍÎÏÓÔõÕÚÛÙýÝ' => 'ccaaaaaeeeeiiiaeooouuuyooaiounnaaaaaeeeiiioooouuuyy',
+            'ąáâäćçčéęëěíîłľĺńňóôöŕřśšşťţůúűüźžżýĄŚŻŹĆ' => 'aaaaccceeeeiilllnnooorrsssttuuuuzzzyaszzc',
             'ß'  => 'ss',
             'ae' => 'a',
             'oe' => 'o',

--
Gitblit v1.9.1