diff --git a/build/icu/icu.vcxproj b/build/icu/icu.vcxproj
index 4f589b953..b7194b08c 100644
--- a/build/icu/icu.vcxproj
+++ b/build/icu/icu.vcxproj
@@ -81,12 +81,11 @@
Outputs="$(AegisubObjectDir)build.timestamp"
>
-
-
+
+#
+# Permission to use, copy, modify, and distribute this software for any
+# purpose with or without fee is hereby granted, provided that the above
+# copyright notice and this permission notice appear in all copies.
+#
+# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+#
+# Aegisub Project http://www.aegisub.org/
+
+# A script to strip all of the data we don't use out of ICU's data files
+# Run from $ICU_ROOT/source/data
+
+from __future__ import unicode_literals
+import re
+import os
+
+# Remove stuff we don't use at all from the Makefile
+def delete_matching(filename, strs):
+ exprs = [re.compile(s) for s in strs]
+
+ with open(filename) as f:
+ lines = [line for line in f if not any(r.match(line.decode('utf-8')) for r in exprs)]
+
+ with open(filename, 'w') as f:
+ for line in lines:
+ f.write(line)
+
+REMOVE_SUBDIRS=['LOCSRCDIR', 'CURRSRCDIR', 'ZONESRCDIR', 'COLSRCDIR', 'RBNFSRCDIR', 'TRANSLITSRCDIR']
+delete_matching('Makefile.in', ['^-include .*%s' % s for s in REMOVE_SUBDIRS])
+delete_matching('Makefile.in', ['^CNV_FILES'])
+
+# Remove data we don't need from the lang and region files
+def parse_txt(filename):
+ root = {}
+ cur = root
+ stack = [root]
+ comment = False
+ for line in open(filename):
+ line = line.decode('utf-8')
+ line = line.strip()
+ if len(line) == 0:
+ continue
+ if '//' in line:
+ continue
+ if '/*' in line:
+ comment = True
+ continue
+ if comment:
+ if '*/' in line:
+ comment = False
+ continue
+
+ if line == '}':
+ stack.pop()
+ cur = stack[-1]
+ continue
+ if line.endswith('{'):
+ obj = {}
+ cur[line[:-1]] = obj
+ cur = obj
+ stack.append(obj)
+ continue
+
+ m = re.match('(.*){"(.*)"}', line)
+ if not m:
+ print line
+ else:
+ cur[m.group(1)] = m.group(2)
+
+ return root
+
+def remove_sections(root):
+ for child in root.itervalues():
+ child.pop('Keys', None)
+ child.pop('LanguagesShort', None)
+ child.pop('Types', None)
+ child.pop('Variants', None)
+ child.pop('codePatterns', None)
+ child.pop('localeDisplayPattern', None)
+ child.pop('CountriesShort', None)
+ child.pop('Scripts%stand-alone', None)
+
+def remove_languages(root):
+ for lang, child in root.iteritems():
+ # We only care about a language's name in that language
+ lang = lang.split('_')[0]
+ trimmed = {}
+ v = child.get('Languages', {}).get(lang)
+ if v:
+ trimmed[lang] = v
+ child['Languages'] = trimmed
+
+# Scripts which are actually used by stuff
+SCRIPTS = ['Cyrl', 'Latn', 'Arab', 'Vaii', 'Hans', 'Hant']
+def remove_scripts(root):
+ for lang, child in root.iteritems():
+ v = child.get('Scripts')
+ if not v:
+ continue
+
+ trimmed = {}
+ for script in SCRIPTS:
+ if v.get(script):
+ trimmed[script] = v[script]
+ child['Scripts'] = trimmed
+
+def write_dict(name, value, out, indent):
+ if len(value) == 0:
+ return
+
+ child_indent = indent + ' '
+
+ out.write(indent)
+ out.write(name.encode('utf-8'))
+ out.write('{\n')
+ for k in sorted(value.keys()):
+ v = value[k]
+ if type(v) == dict:
+ write_dict(k, v, out, child_indent)
+ else:
+ out.write(('%s%s{"%s"}\n' % (child_indent, k, v)).encode('utf-8'))
+ out.write(indent)
+ out.write('}\n')
+
+def write_file(root, filename):
+ with open(filename, 'w') as f:
+ for k, v in root.iteritems():
+ write_dict(k, v, f, '')
+
+def minify_lang(filename):
+ f = parse_txt(filename)
+ remove_sections(f)
+ remove_languages(f)
+ remove_scripts(f)
+ write_file(f, filename)
+
+for name in os.listdir('lang'):
+ if not name.endswith('.txt'):
+ continue
+ minify_lang('lang/' + name)
+
+# gather information about which language+region combinations actually exist,
+# so that we can drop all others
+def gather_regions():
+ langs = {
+ 'af': ['ZA'],
+ 'am': ['ET'],
+ 'ar': ['AE', 'BH', 'DZ', 'EG', 'IQ', 'JO', 'KW', 'LB', 'LY', 'MA', 'OM', 'QA', 'SA', 'SY', 'TN', 'YE'],
+ 'arn': ['CL'],
+ 'as': ['IN'],
+ 'az': ['AZ', 'AZ'],
+ 'ba': ['RU'],
+ 'be': ['BY'],
+ 'bg': ['BG'],
+ 'bn': ['BD', 'IN'],
+ 'bo': ['CN'],
+ 'br': ['FR'],
+ 'bs': ['BA', 'BA'],
+ 'ca': ['ES'],
+ 'co': ['FR'],
+ 'cs': ['CZ'],
+ 'cy': ['GB'],
+ 'da': ['DK'],
+ 'de': ['AT', 'CH', 'DE', 'LI', 'LU'],
+ 'div': ['MV'],
+ 'el': ['GR'],
+ 'en': ['029', 'AU', 'BZ', 'CA', 'GB', 'IE', 'IN', 'JM', 'MY', 'NZ', 'PH', 'SG', 'TT', 'US', 'ZA', 'ZW'],
+ 'es': ['AR', 'BO', 'CL', 'CO', 'CR', 'DO', 'EC', 'ES', 'GT', 'HN', 'MX', 'NI', 'PA', 'PE', 'PR', 'PY', 'SV', 'US', 'UY', 'VE'],
+ 'et': ['EE'],
+ 'eu': ['ES'],
+ 'fa': ['IR'],
+ 'fi': ['FI'],
+ 'fil': ['PH'],
+ 'fo': ['FO'],
+ 'fr': ['BE', 'CA', 'CH', 'FR', 'LU', 'MC'],
+ 'fy': ['NL'],
+ 'ga': ['IE'],
+ 'gl': ['ES'],
+ 'gsw': ['FR'],
+ 'gu': ['IN'],
+ 'ha': ['NG'],
+ 'he': ['IL'],
+ 'hi': ['IN'],
+ 'hr': ['BA', 'HR'],
+ 'hu': ['HU'],
+ 'hy': ['AM'],
+ 'id': ['ID'],
+ 'ig': ['NG'],
+ 'ii': ['CN'],
+ 'is': ['IS'],
+ 'it': ['CH', 'IT'],
+ 'iu': ['CA', 'CA'],
+ 'ja': ['JP'],
+ 'ka': ['GE'],
+ 'kk': ['KZ'],
+ 'kl': ['GL'],
+ 'km': ['KH'],
+ 'kn': ['IN'],
+ 'ko': ['KR'],
+ 'kok': ['IN'],
+ 'ky': ['KG'],
+ 'lb': ['LU'],
+ 'lo': ['LA'],
+ 'lt': ['LT'],
+ 'lv': ['LV'],
+ 'mi': ['NZ'],
+ 'mk': ['MK'],
+ 'ml': ['IN'],
+ 'mn': ['CN', 'MN'],
+ 'moh': ['CA'],
+ 'mr': ['IN'],
+ 'ms': ['BN', 'MY'],
+ 'mt': ['MT'],
+ 'nb': ['NO'],
+ 'ne': ['NP'],
+ 'nl': ['BE', 'NL'],
+ 'nn': ['NO'],
+ 'nso': ['ZA'],
+ 'oc': ['FR'],
+ 'or': ['IN'],
+ 'pa': ['IN'],
+ 'pl': ['PL'],
+ 'prs': ['AF'],
+ 'ps': ['AF'],
+ 'pt': ['BR', 'PT'],
+ 'qut': ['GT'],
+ 'quz': ['BO', 'EC', 'PE'],
+ 'rm': ['CH'],
+ 'ro': ['RO'],
+ 'ru': ['RU'],
+ 'rw': ['RW'],
+ 'sa': ['IN'],
+ 'sah': ['RU'],
+ 'se': ['FI', 'NO', 'SE'],
+ 'si': ['LK'],
+ 'sk': ['SK'],
+ 'sl': ['SI'],
+ 'sma': ['NO', 'SE'],
+ 'smj': ['NO', 'SE'],
+ 'smn': ['FI'],
+ 'sms': ['FI'],
+ 'sq': ['AL'],
+ 'sr': ['BA', 'BA', 'SP', 'YU'],
+ 'sv': ['FI', 'SE'],
+ 'sw': ['KE', 'TZ'],
+ 'syr': ['SY'],
+ 'ta': ['IN'],
+ 'te': ['IN'],
+ 'tg': ['TJ'],
+ 'th': ['TH'],
+ 'tk': ['TM'],
+ 'tn': ['ZA'],
+ 'tr': ['TR'],
+ 'tt': ['RU'],
+ 'tzm': ['DZ'],
+ 'ug': ['CN'],
+ 'uk': ['UA'],
+ 'ur': ['PK'],
+ 'uz': ['UZ', 'UZ'],
+ 'vi': ['VN'],
+ 'wee': ['DE'],
+ 'wen': ['DE'],
+ 'wo': ['SN'],
+ 'xh': ['ZA'],
+ 'yo': ['NG'],
+ 'zh': ['CN', 'HK', 'MO', 'SG', 'TW'],
+ 'zu': ['ZA']
+ }
+ for name in os.listdir('region'):
+ if not name.endswith('.txt'): continue
+ parts = name[:-4].split('_')
+ if len(parts) == 1: continue
+ if not parts[0] in langs:
+ langs[parts[0]] = []
+ langs[parts[0]].extend(parts[1:])
+ return langs
+
+REGIONS = gather_regions()
+def remove_countries(root):
+ for lang, child in root.iteritems():
+ v = child.get('Countries', {})
+ if not v: continue
+
+ # We only care about the names for regions in the languages used in
+ # those regions
+ lang = lang.split('_')[0]
+ regions = REGIONS.get(lang)
+ if not regions:
+ del child['Countries']
+ continue
+
+ trimmed = {}
+ for region in regions:
+ name = v.get(region)
+ if name:
+ trimmed[region] = name
+ child['Countries'] = trimmed
+
+def minify_region(filename):
+ f = parse_txt(filename)
+ remove_sections(f)
+ remove_countries(f)
+ write_file(f, filename)
+
+for name in os.listdir('region'):
+ if not name.endswith('.txt'):
+ continue
+ minify_region('region/' + name)
+