diff --git a/tools/strip-icu.py b/tools/strip-icu.py index e26e40c17..915a615ed 100644 --- a/tools/strip-icu.py +++ b/tools/strip-icu.py @@ -15,7 +15,7 @@ # Aegisub Project http://www.aegisub.org/ # A script to strip all of the data we don't use out of ICU's data files -# Run from $ICU_ROOT/source/data +# Run from $ICU_ROOT/icu4c/source/data from __future__ import unicode_literals import re @@ -25,10 +25,10 @@ import os def delete_matching(filename, strs): exprs = [re.compile(s) for s in strs] - with open(filename) as f: - lines = [line for line in f if not any(r.match(line.decode('utf-8')) for r in exprs)] + with open(filename, encoding='utf-8') as f: + lines = [line for line in f if not any(r.match(line) for r in exprs)] - with open(filename, 'w') as f: + with open(filename, 'w', encoding='utf-8') as f: for line in lines: f.write(line) @@ -36,7 +36,7 @@ REMOVE_SUBDIRS=['LOCSRCDIR', 'CURRSRCDIR', 'ZONESRCDIR', 'COLSRCDIR', 'RBNFSRCDI delete_matching('Makefile.in', ['^-include .*%s' % s for s in REMOVE_SUBDIRS]) delete_matching('Makefile.in', ['^CNV_FILES']) -with open('misc/misclocal.mk', 'w') as f: +with open('misc/misclocal.mk', 'w', encoding='utf-8') as f: f.write('MISC_SOURCE = supplementalData.txt likelySubtags.txt icuver.txt icustd.txt metadata.txt') # Remove data we don't need from the lang and region files @@ -45,8 +45,7 @@ def parse_txt(filename): cur = root stack = [root] comment = False - for line in open(filename): - line = line.decode('utf-8') + for line in open(filename, encoding='utf-8'): line = line.strip() if len(line) == 0: continue @@ -72,15 +71,13 @@ def parse_txt(filename): continue m = re.match('(.*){"(.*)"}', line) - if not m: - print line - else: + if m: cur[m.group(1)] = m.group(2) return root def remove_sections(root): - for child in root.itervalues(): + for child in root.values(): child.pop('Keys', None) child.pop('LanguagesShort', None) child.pop('Types', None) @@ -91,7 +88,7 @@ def remove_sections(root): child.pop('Scripts%stand-alone', None) def remove_languages(root): - for lang, child in root.iteritems(): + for lang, child in root.items(): # We only care about a language's name in that language lang = lang.split('_')[0] trimmed = {} @@ -103,7 +100,7 @@ def remove_languages(root): # Scripts which are actually used by stuff SCRIPTS = ['Cyrl', 'Latn', 'Arab', 'Vaii', 'Hans', 'Hant'] def remove_scripts(root): - for lang, child in root.iteritems(): + for lang, child in root.items(): v = child.get('Scripts') if not v: continue @@ -121,20 +118,20 @@ def write_dict(name, value, out, indent): child_indent = indent + ' ' out.write(indent) - out.write(name.encode('utf-8')) + out.write(name) out.write('{\n') for k in sorted(value.keys()): v = value[k] if type(v) == dict: write_dict(k, v, out, child_indent) else: - out.write(('%s%s{"%s"}\n' % (child_indent, k, v)).encode('utf-8')) + out.write(('%s%s{"%s"}\n' % (child_indent, k, v))) out.write(indent) out.write('}\n') def write_file(root, filename): - with open(filename, 'w') as f: - for k, v in root.iteritems(): + with open(filename, 'w', encoding='utf-8') as f: + for k, v in root.items(): write_dict(k, v, f, '') def minify_lang(filename): @@ -287,7 +284,7 @@ def gather_regions(): REGIONS = gather_regions() def remove_countries(root): - for lang, child in root.iteritems(): + for lang, child in root.items(): v = child.get('Countries', {}) if not v: continue