SPECS: spec_utf8 Handle language codes with charset info (like pl_...

wiget wiget at pld-linux.org
Mon Jun 12 03:07:12 CEST 2006


Author: wiget                        Date: Mon Jun 12 01:07:12 2006 GMT
Module: SPECS                         Tag: HEAD
---- Log message:
Handle language codes with charset info (like pl_PL.ISO8859-2).
Dialects (like de_DE at euro) still not supported.

---- Files affected:
SPECS:
   spec_utf8 (1.2 -> 1.3) 

---- Diffs:

================================================================
Index: SPECS/spec_utf8
diff -u SPECS/spec_utf8:1.2 SPECS/spec_utf8:1.3
--- SPECS/spec_utf8:1.2	Fri Jun  9 18:15:16 2006
+++ SPECS/spec_utf8	Mon Jun 12 03:07:07 2006
@@ -41,9 +41,22 @@
     'zh_TW':'BIG5',
     0:0}
 
+def find_encoding(lang):
+  r = re.match("^([^.]+)(\.[^@]+)?$", lang)
+  pure_lang = r.group(1)
+  if r.group(2) == None:
+    try:
+      enc = langs[lang]
+    except KeyError:
+      enc = None
+  else:
+    # strip dot
+    enc = r.group(2)[1:]
+  return (enc, pure_lang)
+
 def parse_spec(infile, outfile):
   re_summary = re.compile("^Summary\(([^\)]+)\):\t+(.*)$")
-  re_utf = re.compile(".utf-8$", re.I)
+  re_utf = re.compile("^utf-8$", re.I)
   re_desc = re.compile("^(%description.*\s)-l\s+([\S]+)($|\s.*$)")
   re_proc = re.compile("^%")
   in_desc = False
@@ -52,43 +65,35 @@
       outline = l
       r = re_summary.match(l)
       if r:
-        lang = r.group(1)
-        if re_utf.search(lang):
-          outfile.write(l)
-          continue
-        if lang in langs.keys():
+        (enc, pure_lang) = find_encoding(r.group(1))
+        if enc == None:
+          outfile.write("#unknow lang code Summary(%s)\n" % (lang))
+        elif not re_utf.search(enc):
           try:
-            desc = unicode(r.group(2), langs[lang]).encode("UTF-8")
-            l = "Summary(%s.UTF-8):   %s\n" % (lang, desc)
+            desc = unicode(r.group(2), enc).encode("UTF-8")
+            l = "Summary(%s.UTF-8):   %s\n" % (pure_lang, desc)
           except UnicodeDecodeError:
             outfile.write("#transcoding error Summary(%s)\n" % (lang))
-        else:
-          outfile.write("#unknow lang code Summary(%s)\n" % (lang))
       if in_desc:
         if re_proc.search(l):
           in_desc = False
         else:
-          if not langs[lang] == 'UTF-8':
+          if not re_utf.search(enc):
             try:
-              l = unicode(l, langs[lang]).encode("UTF-8")
+              l = unicode(l, enc).encode("UTF-8")
             except UnicodeDecodeError:
               outfile.write("#transcoding error %%description -l %s\n" % (lang))
 
       r = re_desc.match(l)
       if r:
-        lang = r.group(2)
-        if re_utf.search(lang):
-          outfile.write(l)
-          continue
-        in_desc = True
-        if not lang in langs.keys():
+        (enc, pure_lang) = find_encoding(r.group(2))
+        if enc == None:
           outfile.write("#unknow lang code in %%description -l %s\n" % (lang))
-        else:
-          l = "%s-l %s.UTF-8%s\n" % (r.group(1), lang, r.group(3))
+        elif not re_utf.search(enc):
+          in_desc = True
+          l = "%s-l %s.UTF-8%s\n" % (r.group(1), pure_lang, r.group(3))
 
       outfile.write(l)
-
-
 
 def main():
   parse_spec(sys.stdin, sys.stdout)
================================================================

---- CVS-web:
    http://cvs.pld-linux.org/SPECS/spec_utf8?r1=1.2&r2=1.3&f=u



More information about the pld-cvs-commit mailing list