diff options
Diffstat (limited to 'tools')
| -rwxr-xr-x | tools/localedata/extract_icu_data.py | 142 |
1 files changed, 112 insertions, 30 deletions
diff --git a/tools/localedata/extract_icu_data.py b/tools/localedata/extract_icu_data.py index 8f67fa87adb5..ec531275af1c 100755 --- a/tools/localedata/extract_icu_data.py +++ b/tools/localedata/extract_icu_data.py @@ -121,7 +121,7 @@ def pack_to_uint32(locale): def dump_script_codes(all_scripts): """Dump the SCRIPT_CODES table.""" - print('const char SCRIPT_CODES[][4] = {') + print('constexpr const char SCRIPT_CODES[][4] = {') for index, script in enumerate(all_scripts): print(" /* %-2d */ {'%c', '%c', '%c', '%c'}," % ( index, script[0], script[1], script[2], script[3])) @@ -132,15 +132,33 @@ def dump_script_codes(all_scripts): def dump_script_data(likely_script_dict, all_scripts): """Dump the script data.""" print() - print('const std::unordered_map<uint32_t, uint8_t> LIKELY_SCRIPTS({') + print('const char* lookupLikelyScript(uint32_t packed_lang_region) {') + print(' switch(packed_lang_region) {') + + # partition the mapping by the script code + parts = {} for locale in sorted(likely_script_dict.keys()): script = likely_script_dict[locale] - print(' {0x%08Xu, %2du}, // %s -> %s' % ( - pack_to_uint32(locale), - all_scripts.index(script), - locale.replace('_', '-'), - script)) - print('});') + if script in parts: + l = parts[script] + else: + l = [] + parts[script] = l + l.append(locale) + + for script in sorted(parts.keys()): + locales = parts[script] + for locale in locales: + print(' case 0x%08Xu: // %s -> %s' % ( + pack_to_uint32(locale), + locale.replace('_', '-'), + script)) + print(' return SCRIPT_CODES[%2du];' % + all_scripts.index(script)) + print(' default:') + print(' return nullptr;') + print(' }') + print('}') def pack_to_uint64(locale): @@ -152,16 +170,32 @@ def pack_to_uint64(locale): (ord(script[2]) << 8) | ord(script[3])) +def pack_script_to_uint32(script): + """Pack a 4-letter script code into a 32-bit unsigned integer.""" + return ((ord(script[0]) << 24) | + (ord(script[1]) << 16) | + (ord(script[2]) << 8) | + ord(script[3])) + def dump_representative_locales(representative_locales): """Dump the set of representative locales.""" print() - print('std::unordered_set<uint64_t> REPRESENTATIVE_LOCALES({') + print('bool isLocaleRepresentative(uint32_t language_and_region, const char* script) {') + print(' const uint64_t packed_locale =') + print(' ((static_cast<uint64_t>(language_and_region)) << 32u) |') + print(' (static_cast<uint64_t>(packScript(script)));') + print(' switch(packed_locale) {') for locale in sorted(representative_locales): - print(' 0x%08XLLU, // %s' % ( + print(' case 0x%08XLLU: // %s' % ( pack_to_uint64(locale), locale)) - print('});') + + print(' return true;') + print(' default:') + print(' return false;') + print(' }') + print('}') def read_and_dump_likely_data(cldr_source_dir): @@ -182,7 +216,7 @@ def read_and_dump_likely_data(cldr_source_dir): def escape_script_variable_name(script): """Escape characters, e.g. '~', in a C++ variable name""" - return script.replace("~", "_") + return script.replace("~", "0") def read_parent_data(icu_data_dir): """Read locale parent data from ICU data files.""" @@ -225,29 +259,52 @@ def dump_parent_data(script_organized_dict): """Dump information for parents of locales.""" sorted_scripts = sorted(script_organized_dict.keys()) print() + for script in sorted_scripts: parent_dict = script_organized_dict[script] - print ('const std::unordered_map<uint32_t, uint32_t> %s_PARENTS({' - % escape_script_variable_name(script.upper())) + + # partition the mapping by the parent's value + parts = {} for locale in sorted(parent_dict.keys()): parent = parent_dict[locale] - print(' {0x%08Xu, 0x%08Xu}, // %s -> %s' % ( - pack_to_uint32(locale), - pack_to_uint32(parent), - locale.replace('_', '-'), - parent.replace('_', '-'))) - print('});') + if parent in parts: + l = parts[parent] + else: + l = [] + parts[parent] = l + l.append(locale) + + print('static uint32_t find%sParent(uint32_t packed_lang_region) {' % escape_script_variable_name(script)) + print(' switch(packed_lang_region) {') + for parent in sorted(parts.keys()): + locales = parts[parent] + for locale in locales: + print(' case 0x%08Xu: // %s -> %s' % ( + pack_to_uint32(locale), + locale.replace('_', '-'), + parent.replace('_', '-'))) + + print(' return 0x%08Xu;' % pack_to_uint32(parent)) + + print(' default:') + print(' return 0;') + print(' }') + print('}') print() - print('const struct {') - print(' const char script[4];') - print(' const std::unordered_map<uint32_t, uint32_t>* map;') - print('} SCRIPT_PARENTS[] = {') + print('uint32_t findParentLocalePackedKey(const char* script, uint32_t packed_lang_region) {') + print(' uint32_t packedScript = packScript(script);') + print(' switch (packedScript) {') + for script in sorted_scripts: - print(" {{'%c', '%c', '%c', '%c'}, &%s_PARENTS}," % ( - script[0], script[1], script[2], script[3], - escape_script_variable_name(script.upper()))) - print('};') + print(' case 0x%08Xu: // %s' % (pack_script_to_uint32(script), script)) + print(' return find%sParent(packed_lang_region);' % + escape_script_variable_name(script)) + + print(' default:') + print(' return 0;') + print(' }') + print('}') def dump_parent_tree_depth(parent_dict): @@ -261,7 +318,9 @@ def dump_parent_tree_depth(parent_dict): max_depth = max(max_depth, depth) assert max_depth < 5 # Our algorithms assume small max_depth print() - print('const size_t MAX_PARENT_DEPTH = %d;' % max_depth) + print('uint32_t getMaxAncestorTreeDepth() {') + print(' return %d;' % max_depth) + print('}') def read_and_dump_parent_data(icu_data_dir, likely_script_dict): @@ -286,10 +345,33 @@ def main(): 'external', 'icu', 'icu4c', 'source', 'data') cldr_source_dir = os.path.join(source_root, 'external', 'cldr') + print('''/* + * Copyright (C) 2025 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +''') print('// Auto-generated by %s' % sys.argv[0]) - print() + print(''' +#include <androidfw/LocaleDataLookup.h> + +namespace android { +''') likely_script_dict = read_and_dump_likely_data(cldr_source_dir) read_and_dump_parent_data(icu_data_dir, likely_script_dict) + print() + print('} // namespace android') if __name__ == '__main__': |