githubFork/roundcubemail.git

			@@ -912,14 +912,21 @@
			* Split the given string into word tokens
			*
			* @param string Input to tokenize
			* @param integer Minimum length of a single token
			* @return array List of tokens
			*/
			public static function tokenize_string($str)
			public static function tokenize_string($str, $minlen = 2)
			{
			return explode(" ", preg_replace(
			array('/[\s;\/+-]+/i', '/(\d)[-.\s]+(\d)/', '/\s\w{1,3}\s/u'),
			array(' ', '\\1\\2', ' '),
			$str));
			$expr = array('/[\s;\/+-]+/ui', '/(\d)[-.\s]+(\d)/u');
			$repl = array(' ', '\\1\\2');

			if ($minlen > 1) {
			$minlen--;
			$expr[] = "/(^\|\s+)\w{1,$minlen}(\s+\|$)/u";
			$repl[] = ' ';
			}

			return array_filter(explode(" ", preg_replace($expr, $repl, $str)));
			}

			/**
			@@ -928,10 +935,11 @@
			*
			* @param string Input string (UTF-8)
			* @param boolean True to return list of words as array
			* @param integer Minimum length of tokens
			*
			* @return mixed Normalized string or a list of normalized tokens
			*/
			public static function normalize_string($str, $as_array = false)
			public static function normalize_string($str, $as_array = false, $minlen = 2)
			{
			// replace 4-byte unicode characters with '?' character,
			// these are not supported in default utf-8 charset on mysql,
			@@ -943,7 +951,7 @@
			. ')/', '?', $str);

			// split by words
			$arr = self::tokenize_string($str);
			$arr = self::tokenize_string($str, $minlen);

			// detect character set
			if (utf8_encode(utf8_decode($str)) == $str) {

			@@ -344,6 +344,25 @@
			}

			/**
			* rcube:utils::tokenize_string()
			*/
			function test_tokenize_string()
			{
			$test = array(
			'' => array(),
			'abc d' => array('abc'),
			'abc de' => array('abc','de'),
			'äàé;êöü-xyz' => array('äàé','êöü','xyz'),
			'日期格式' => array('日期格式'),
			);

			foreach ($test as $input => $output) {
			$result = rcube_utils::tokenize_string($input);
			$this->assertSame($output, $result);
			}
			}

			/**
			* rcube:utils::normalize_string()
			*/
			function test_normalize_string()
			@@ -353,15 +372,18 @@
			'abc def' => 'abc def',
			'ÇçäâàåæéêëèïîìÅÉöôòüûùÿøØáíóúñÑÁÂÀãÃÊËÈÍÎÏÓÔõÕÚÛÙýÝ' => 'ccaaaaaeeeeiiiaeooouuuyooaiounnaaaaaeeeiiioooouuuyy',
			'ąáâäćçčéęëěíîłľĺńňóôöŕřśšşťţůúűüźžżýĄŚŻŹĆ' => 'aaaaccceeeeiilllnnooorrsssttuuuuzzzyaszzc',
			'ß' => 'ss',
			'ae' => 'a',
			'oe' => 'o',
			'ue' => 'u',
			'ß' => '',
			'ßs' => 'sss',
			'Xae' => 'xa',
			'Xoe' => 'xo',
			'Xue' => 'xu',
			'项目' => '项目',
			'日' => '', // FIXME: this should not be stripped although minlen = 2
			);

			foreach ($test as $input => $output) {
			$result = rcube_utils::normalize_string($input);
			$this->assertSame($output, $result);
			$this->assertSame($output, $result, "Error normalizing '$input'");
			}
			}

	program/lib/Roundcube/rcube_utils.php	22 ●●●●● patch \| view \| raw \| blame \| history
	tests/Framework/Utils.php	32 ●●●●● patch \| view \| raw \| blame \| history