From 4d7acb3cb293dbe3933c23282cdb6d1b04dea759 Mon Sep 17 00:00:00 2001
From: alecpl <alec@alec.pl>
Date: Thu, 09 Sep 2010 07:34:35 -0400
Subject: [PATCH] - Fix handling of charsets with LATIN-* label

---
 CHANGELOG                |    1 +
 program/include/main.inc |   31 ++++++++++++++++++++++++++-----
 2 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/CHANGELOG b/CHANGELOG
index 4ce835e..cd3fcce 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -19,6 +19,7 @@
 - Show disabled checkboxes for protected folders instead of dots (#1485498)
 - Added fieldsets in Identity form, added 'identity_form' hook
 - Re-added 'Close' button in upload form (#1486930, #1486823)
+- Fix handling of charsets with LATIN-* label
 
 RELEASE 0.4
 -----------
diff --git a/program/include/main.inc b/program/include/main.inc
index 04992fd..9b6668e 100644
--- a/program/include/main.inc
+++ b/program/include/main.inc
@@ -334,9 +334,10 @@
     return $charsets[$input];
 
   $charset = preg_replace(array(
-    '/^[^0-9A-Z]+/',	// e.g. _ISO-8859-JP$SIO
-    '/\$.*$/',		// e.g. _ISO-8859-JP$SIO
-    '/UNICODE-1-1-*/',	// RFC1641/1642
+    '/^[^0-9A-Z]+/',    // e.g. _ISO-8859-JP$SIO
+    '/\$.*$/',          // e.g. _ISO-8859-JP$SIO
+    '/UNICODE-1-1-*/',  // RFC1641/1642
+    '/^X-/',            // X- prefix (e.g. X-ROMAN8 => ROMAN8)
     ), '', $charset);
 
   # Aliases: some of them from HTML5 spec.
@@ -367,8 +368,8 @@
     '128'           => 'SHIFT-JIS'
   );
 
-  // allow a-z and 0-9 only and remove X- prefix (e.g. X-ROMAN8 => ROMAN8)
-  $str = preg_replace(array('/[^A-Z0-9]/', '/^X+/'), '', $charset);
+  // allow A-Z and 0-9 only
+  $str = preg_replace('/[^A-Z0-9]/', '', $charset);
 
   if (isset($aliases[$str]))
     $result = $aliases[$str];
@@ -386,6 +387,26 @@
   else if (preg_match('/(WIN|WINDOWS)([0-9]+)/', $str, $m)) {
     $result = 'WINDOWS-' . $m[2];
     }
+  // LATIN
+  else if (preg_match('/(CSISOLATIN|LATIN)(.*)/', $str, $m)) {
+    $aliases = array('2' => 2, '3' => 3, '4' => 4, '5' => 9, '6' => 10,
+        '7' => 13, '8' => 14, '9' => 15, '10' => 16,
+        'ARABIC' => 6, 'CYRILLIC' => 5, 'GREEK' => 7, 'HEBREW' => 8);
+
+    // some clients sends windows-1252 text as latin1,
+    // it is safe to use windows-1252 for all latin1
+    if ($m[2] == 1) {
+      $result = 'WINDOWS-1252';
+      }
+    // if iconv is not supported we need ISO labels, it's also safe for iconv
+    else if (!empty($aliases[$m[2]])) {
+      $result = 'ISO-8859-'.$aliases[$m[2]];
+      }
+    // iconv requires convertion of e.g. LATIN-1 to LATIN1
+    else {
+      $result = $str;
+      }
+    }
   else {
     $result = $charset;
     }

--
Gitblit v1.9.1