summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rwxr-xr-xtools/localedata/extract_icu_data.py142
1 files changed, 112 insertions, 30 deletions
diff --git a/tools/localedata/extract_icu_data.py b/tools/localedata/extract_icu_data.py
index 8f67fa87adb5..ec531275af1c 100755
--- a/tools/localedata/extract_icu_data.py
+++ b/tools/localedata/extract_icu_data.py
@@ -121,7 +121,7 @@ def pack_to_uint32(locale):
def dump_script_codes(all_scripts):
"""Dump the SCRIPT_CODES table."""
- print('const char SCRIPT_CODES[][4] = {')
+ print('constexpr const char SCRIPT_CODES[][4] = {')
for index, script in enumerate(all_scripts):
print(" /* %-2d */ {'%c', '%c', '%c', '%c'}," % (
index, script[0], script[1], script[2], script[3]))
@@ -132,15 +132,33 @@ def dump_script_codes(all_scripts):
def dump_script_data(likely_script_dict, all_scripts):
"""Dump the script data."""
print()
- print('const std::unordered_map<uint32_t, uint8_t> LIKELY_SCRIPTS({')
+ print('const char* lookupLikelyScript(uint32_t packed_lang_region) {')
+ print(' switch(packed_lang_region) {')
+
+ # partition the mapping by the script code
+ parts = {}
for locale in sorted(likely_script_dict.keys()):
script = likely_script_dict[locale]
- print(' {0x%08Xu, %2du}, // %s -> %s' % (
- pack_to_uint32(locale),
- all_scripts.index(script),
- locale.replace('_', '-'),
- script))
- print('});')
+ if script in parts:
+ l = parts[script]
+ else:
+ l = []
+ parts[script] = l
+ l.append(locale)
+
+ for script in sorted(parts.keys()):
+ locales = parts[script]
+ for locale in locales:
+ print(' case 0x%08Xu: // %s -> %s' % (
+ pack_to_uint32(locale),
+ locale.replace('_', '-'),
+ script))
+ print(' return SCRIPT_CODES[%2du];' %
+ all_scripts.index(script))
+ print(' default:')
+ print(' return nullptr;')
+ print(' }')
+ print('}')
def pack_to_uint64(locale):
@@ -152,16 +170,32 @@ def pack_to_uint64(locale):
(ord(script[2]) << 8) |
ord(script[3]))
+def pack_script_to_uint32(script):
+ """Pack a 4-letter script code into a 32-bit unsigned integer."""
+ return ((ord(script[0]) << 24) |
+ (ord(script[1]) << 16) |
+ (ord(script[2]) << 8) |
+ ord(script[3]))
+
def dump_representative_locales(representative_locales):
"""Dump the set of representative locales."""
print()
- print('std::unordered_set<uint64_t> REPRESENTATIVE_LOCALES({')
+ print('bool isLocaleRepresentative(uint32_t language_and_region, const char* script) {')
+ print(' const uint64_t packed_locale =')
+ print(' ((static_cast<uint64_t>(language_and_region)) << 32u) |')
+ print(' (static_cast<uint64_t>(packScript(script)));')
+ print(' switch(packed_locale) {')
for locale in sorted(representative_locales):
- print(' 0x%08XLLU, // %s' % (
+ print(' case 0x%08XLLU: // %s' % (
pack_to_uint64(locale),
locale))
- print('});')
+
+ print(' return true;')
+ print(' default:')
+ print(' return false;')
+ print(' }')
+ print('}')
def read_and_dump_likely_data(cldr_source_dir):
@@ -182,7 +216,7 @@ def read_and_dump_likely_data(cldr_source_dir):
def escape_script_variable_name(script):
"""Escape characters, e.g. '~', in a C++ variable name"""
- return script.replace("~", "_")
+ return script.replace("~", "0")
def read_parent_data(icu_data_dir):
"""Read locale parent data from ICU data files."""
@@ -225,29 +259,52 @@ def dump_parent_data(script_organized_dict):
"""Dump information for parents of locales."""
sorted_scripts = sorted(script_organized_dict.keys())
print()
+
for script in sorted_scripts:
parent_dict = script_organized_dict[script]
- print ('const std::unordered_map<uint32_t, uint32_t> %s_PARENTS({'
- % escape_script_variable_name(script.upper()))
+
+ # partition the mapping by the parent's value
+ parts = {}
for locale in sorted(parent_dict.keys()):
parent = parent_dict[locale]
- print(' {0x%08Xu, 0x%08Xu}, // %s -> %s' % (
- pack_to_uint32(locale),
- pack_to_uint32(parent),
- locale.replace('_', '-'),
- parent.replace('_', '-')))
- print('});')
+ if parent in parts:
+ l = parts[parent]
+ else:
+ l = []
+ parts[parent] = l
+ l.append(locale)
+
+ print('static uint32_t find%sParent(uint32_t packed_lang_region) {' % escape_script_variable_name(script))
+ print(' switch(packed_lang_region) {')
+ for parent in sorted(parts.keys()):
+ locales = parts[parent]
+ for locale in locales:
+ print(' case 0x%08Xu: // %s -> %s' % (
+ pack_to_uint32(locale),
+ locale.replace('_', '-'),
+ parent.replace('_', '-')))
+
+ print(' return 0x%08Xu;' % pack_to_uint32(parent))
+
+ print(' default:')
+ print(' return 0;')
+ print(' }')
+ print('}')
print()
- print('const struct {')
- print(' const char script[4];')
- print(' const std::unordered_map<uint32_t, uint32_t>* map;')
- print('} SCRIPT_PARENTS[] = {')
+ print('uint32_t findParentLocalePackedKey(const char* script, uint32_t packed_lang_region) {')
+ print(' uint32_t packedScript = packScript(script);')
+ print(' switch (packedScript) {')
+
for script in sorted_scripts:
- print(" {{'%c', '%c', '%c', '%c'}, &%s_PARENTS}," % (
- script[0], script[1], script[2], script[3],
- escape_script_variable_name(script.upper())))
- print('};')
+ print(' case 0x%08Xu: // %s' % (pack_script_to_uint32(script), script))
+ print(' return find%sParent(packed_lang_region);' %
+ escape_script_variable_name(script))
+
+ print(' default:')
+ print(' return 0;')
+ print(' }')
+ print('}')
def dump_parent_tree_depth(parent_dict):
@@ -261,7 +318,9 @@ def dump_parent_tree_depth(parent_dict):
max_depth = max(max_depth, depth)
assert max_depth < 5 # Our algorithms assume small max_depth
print()
- print('const size_t MAX_PARENT_DEPTH = %d;' % max_depth)
+ print('uint32_t getMaxAncestorTreeDepth() {')
+ print(' return %d;' % max_depth)
+ print('}')
def read_and_dump_parent_data(icu_data_dir, likely_script_dict):
@@ -286,10 +345,33 @@ def main():
'external', 'icu', 'icu4c', 'source', 'data')
cldr_source_dir = os.path.join(source_root, 'external', 'cldr')
+ print('''/*
+ * Copyright (C) 2025 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+''')
print('// Auto-generated by %s' % sys.argv[0])
- print()
+ print('''
+#include <androidfw/LocaleDataLookup.h>
+
+namespace android {
+''')
likely_script_dict = read_and_dump_likely_data(cldr_source_dir)
read_and_dump_parent_data(icu_data_dir, likely_script_dict)
+ print()
+ print('} // namespace android')
if __name__ == '__main__':