From 4d7acb3cb293dbe3933c23282cdb6d1b04dea759 Mon Sep 17 00:00:00 2001
From: alecpl <alec@alec.pl>
Date: Thu, 09 Sep 2010 07:34:35 -0400
Subject: [PATCH] - Fix handling of charsets with LATIN-* label
---
CHANGELOG | 1 +
program/include/main.inc | 31 ++++++++++++++++++++++++++-----
2 files changed, 27 insertions(+), 5 deletions(-)
diff --git a/CHANGELOG b/CHANGELOG
index 4ce835e..cd3fcce 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -19,6 +19,7 @@
- Show disabled checkboxes for protected folders instead of dots (#1485498)
- Added fieldsets in Identity form, added 'identity_form' hook
- Re-added 'Close' button in upload form (#1486930, #1486823)
+- Fix handling of charsets with LATIN-* label
RELEASE 0.4
-----------
diff --git a/program/include/main.inc b/program/include/main.inc
index 04992fd..9b6668e 100644
--- a/program/include/main.inc
+++ b/program/include/main.inc
@@ -334,9 +334,10 @@
return $charsets[$input];
$charset = preg_replace(array(
- '/^[^0-9A-Z]+/', // e.g. _ISO-8859-JP$SIO
- '/\$.*$/', // e.g. _ISO-8859-JP$SIO
- '/UNICODE-1-1-*/', // RFC1641/1642
+ '/^[^0-9A-Z]+/', // e.g. _ISO-8859-JP$SIO
+ '/\$.*$/', // e.g. _ISO-8859-JP$SIO
+ '/UNICODE-1-1-*/', // RFC1641/1642
+ '/^X-/', // X- prefix (e.g. X-ROMAN8 => ROMAN8)
), '', $charset);
# Aliases: some of them from HTML5 spec.
@@ -367,8 +368,8 @@
'128' => 'SHIFT-JIS'
);
- // allow a-z and 0-9 only and remove X- prefix (e.g. X-ROMAN8 => ROMAN8)
- $str = preg_replace(array('/[^A-Z0-9]/', '/^X+/'), '', $charset);
+ // allow A-Z and 0-9 only
+ $str = preg_replace('/[^A-Z0-9]/', '', $charset);
if (isset($aliases[$str]))
$result = $aliases[$str];
@@ -386,6 +387,26 @@
else if (preg_match('/(WIN|WINDOWS)([0-9]+)/', $str, $m)) {
$result = 'WINDOWS-' . $m[2];
}
+ // LATIN
+ else if (preg_match('/(CSISOLATIN|LATIN)(.*)/', $str, $m)) {
+ $aliases = array('2' => 2, '3' => 3, '4' => 4, '5' => 9, '6' => 10,
+ '7' => 13, '8' => 14, '9' => 15, '10' => 16,
+ 'ARABIC' => 6, 'CYRILLIC' => 5, 'GREEK' => 7, 'HEBREW' => 8);
+
+ // some clients sends windows-1252 text as latin1,
+ // it is safe to use windows-1252 for all latin1
+ if ($m[2] == 1) {
+ $result = 'WINDOWS-1252';
+ }
+ // if iconv is not supported we need ISO labels, it's also safe for iconv
+ else if (!empty($aliases[$m[2]])) {
+ $result = 'ISO-8859-'.$aliases[$m[2]];
+ }
+ // iconv requires convertion of e.g. LATIN-1 to LATIN1
+ else {
+ $result = $str;
+ }
+ }
else {
$result = $charset;
}
--
Gitblit v1.9.1