diff options
author | 2024-12-27 14:04:08 +0000 | |
---|---|---|
committer | 2024-12-31 14:16:09 +0000 | |
commit | a490e2cf1ef7257b40bcb45df6bde5972df2ac13 (patch) | |
tree | 73b8fe2890777badb1c9b69b4ad8d3324b00f030 /libs/androidfw | |
parent | 7b64171769bd4c75518bff8d9e01158f3d467cec (diff) |
Extract implementation of script and locale matching into LocaleDataLookup.h
It helps adding new unit tests, and fixing correctness and performance
bugs later.
Bug: 386340812
Test: atest libandroidfw_tests
Change-Id: I4d3ee1333637d2cd22d5fdfad730935951feeccb
Diffstat (limited to 'libs/androidfw')
-rw-r--r-- | libs/androidfw/Android.bp | 2 | ||||
-rw-r--r-- | libs/androidfw/LocaleData.cpp | 58 | ||||
-rw-r--r-- | libs/androidfw/LocaleDataLookup.cpp | 64 | ||||
-rw-r--r-- | libs/androidfw/include/androidfw/LocaleDataLookup.h | 79 | ||||
-rw-r--r-- | libs/androidfw/tests/LocaleDataLookup_test.cpp | 108 |
5 files changed, 266 insertions, 45 deletions
diff --git a/libs/androidfw/Android.bp b/libs/androidfw/Android.bp index 1bc15d72bacc..a13dd78a23a1 100644 --- a/libs/androidfw/Android.bp +++ b/libs/androidfw/Android.bp @@ -80,6 +80,7 @@ cc_library { "LoadedArsc.cpp", "Locale.cpp", "LocaleData.cpp", + "LocaleDataLookup.cpp", "misc.cpp", "NinePatch.cpp", "ObbFile.cpp", @@ -224,6 +225,7 @@ cc_test { "tests/Idmap_test.cpp", "tests/LoadedArsc_test.cpp", "tests/Locale_test.cpp", + "tests/LocaleDataLookup_test.cpp", "tests/NinePatch_test.cpp", "tests/ResourceTimer_test.cpp", "tests/ResourceUtils_test.cpp", diff --git a/libs/androidfw/LocaleData.cpp b/libs/androidfw/LocaleData.cpp index 020cef6012e9..1b23d90c5ab3 100644 --- a/libs/androidfw/LocaleData.cpp +++ b/libs/androidfw/LocaleData.cpp @@ -23,39 +23,18 @@ #include <unordered_set> #include <androidfw/LocaleData.h> +#include <androidfw/LocaleDataLookup.h> namespace android { -#include "LocaleDataTables.cpp" - -inline uint32_t packLocale(const char* language, const char* region) { - return (((uint8_t) language[0]) << 24u) | (((uint8_t) language[1]) << 16u) | - (((uint8_t) region[0]) << 8u) | ((uint8_t) region[1]); -} - -inline uint32_t dropRegion(uint32_t packed_locale) { - return packed_locale & 0xFFFF0000LU; -} - -inline bool hasRegion(uint32_t packed_locale) { - return (packed_locale & 0x0000FFFFLU) != 0; -} - -const size_t SCRIPT_LENGTH = 4; -const size_t SCRIPT_PARENTS_COUNT = sizeof(SCRIPT_PARENTS)/sizeof(SCRIPT_PARENTS[0]); const uint32_t PACKED_ROOT = 0; // to represent the root locale +const uint32_t MAX_PARENT_DEPTH = getMaxAncestorTreeDepth(); uint32_t findParent(uint32_t packed_locale, const char* script) { if (hasRegion(packed_locale)) { - for (size_t i = 0; i < SCRIPT_PARENTS_COUNT; i++) { - if (memcmp(script, SCRIPT_PARENTS[i].script, SCRIPT_LENGTH) == 0) { - auto map = SCRIPT_PARENTS[i].map; - auto lookup_result = map->find(packed_locale); - if (lookup_result != map->end()) { - return lookup_result->second; - } - break; - } + auto parent_key = findParentLocalePackedKey(script, packed_locale); + if (parent_key != 0) { + return parent_key; } return dropRegion(packed_locale); } @@ -111,17 +90,6 @@ size_t findDistance(uint32_t supported, return supported_ancestor_count + request_ancestors_index - 1; } -inline bool isRepresentative(uint32_t language_and_region, const char* script) { - const uint64_t packed_locale = ( - (((uint64_t) language_and_region) << 32u) | - (((uint64_t) script[0]) << 24u) | - (((uint64_t) script[1]) << 16u) | - (((uint64_t) script[2]) << 8u) | - ((uint64_t) script[3])); - - return (REPRESENTATIVE_LOCALES.count(packed_locale) != 0); -} - const uint32_t US_SPANISH = 0x65735553LU; // es-US const uint32_t MEXICAN_SPANISH = 0x65734D58LU; // es-MX const uint32_t LATIN_AMERICAN_SPANISH = 0x6573A424LU; // es-419 @@ -185,8 +153,8 @@ int localeDataCompareRegions( // If we are here, left and right are equidistant from the request. We will // try and see if any of them is a representative locale. - const bool left_is_representative = isRepresentative(left, requested_script); - const bool right_is_representative = isRepresentative(right, requested_script); + const bool left_is_representative = isLocaleRepresentative(left, requested_script); + const bool right_is_representative = isLocaleRepresentative(right, requested_script); if (left_is_representative != right_is_representative) { return (int) left_is_representative - (int) right_is_representative; } @@ -204,14 +172,14 @@ void localeDataComputeScript(char out[4], const char* language, const char* regi return; } uint32_t lookup_key = packLocale(language, region); - auto lookup_result = LIKELY_SCRIPTS.find(lookup_key); - if (lookup_result == LIKELY_SCRIPTS.end()) { + auto lookup_result = lookupLikelyScript(lookup_key); + if (lookup_result == nullptr) { // We couldn't find the locale. Let's try without the region if (region[0] != '\0') { lookup_key = dropRegion(lookup_key); - lookup_result = LIKELY_SCRIPTS.find(lookup_key); - if (lookup_result != LIKELY_SCRIPTS.end()) { - memcpy(out, SCRIPT_CODES[lookup_result->second], SCRIPT_LENGTH); + lookup_result = lookupLikelyScript(lookup_key); + if (lookup_result != nullptr) { + memcpy(out, lookup_result, SCRIPT_LENGTH); return; } } @@ -220,7 +188,7 @@ void localeDataComputeScript(char out[4], const char* language, const char* regi return; } else { // We found the locale. - memcpy(out, SCRIPT_CODES[lookup_result->second], SCRIPT_LENGTH); + memcpy(out, lookup_result, SCRIPT_LENGTH); } } diff --git a/libs/androidfw/LocaleDataLookup.cpp b/libs/androidfw/LocaleDataLookup.cpp new file mode 100644 index 000000000000..5441e2258900 --- /dev/null +++ b/libs/androidfw/LocaleDataLookup.cpp @@ -0,0 +1,64 @@ +/* + * Copyright (C) 2024 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <unordered_map> +#include <unordered_set> + +#include <androidfw/LocaleDataLookup.h> + +namespace android { + +#include "LocaleDataTables.cpp" + +const size_t SCRIPT_PARENTS_COUNT = sizeof(SCRIPT_PARENTS)/sizeof(SCRIPT_PARENTS[0]); + +const char* lookupLikelyScript(uint32_t packed_lang_region) { + + auto lookup_result = LIKELY_SCRIPTS.find(packed_lang_region); + if (lookup_result == LIKELY_SCRIPTS.end()) { + return nullptr; + } else { + return SCRIPT_CODES[lookup_result->second]; + } +} + +uint32_t findParentLocalePackedKey(const char* script, uint32_t packed_lang_region) { + for (size_t i = 0; i < SCRIPT_PARENTS_COUNT; i++) { + if (memcmp(script, SCRIPT_PARENTS[i].script, SCRIPT_LENGTH) == 0) { + auto map = SCRIPT_PARENTS[i].map; + auto lookup_result = map->find(packed_lang_region); + if (lookup_result != map->end()) { + return lookup_result->second; + } + break; + } + } + return 0; +} + +uint32_t getMaxAncestorTreeDepth() { + return MAX_PARENT_DEPTH; +} + +namespace hidden { + +bool isRepresentative(uint64_t packed_locale) { + return (REPRESENTATIVE_LOCALES.count(packed_locale) != 0); +} + +} // namespace hidden + +} // namespace android diff --git a/libs/androidfw/include/androidfw/LocaleDataLookup.h b/libs/androidfw/include/androidfw/LocaleDataLookup.h new file mode 100644 index 000000000000..7fde7123ed0b --- /dev/null +++ b/libs/androidfw/include/androidfw/LocaleDataLookup.h @@ -0,0 +1,79 @@ +/* + * Copyright (C) 2024 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include <stddef.h> +#include <stdint.h> + + +namespace android { + +namespace hidden { + bool isRepresentative(uint64_t packed_locale); +} + +constexpr size_t SCRIPT_LENGTH = 4; + +constexpr inline uint32_t packLocale(const char* language, const char* region) { + const unsigned char* lang = reinterpret_cast<const unsigned char*>(language); + const unsigned char* reg = reinterpret_cast<const unsigned char*>(region); + return (static_cast<uint32_t>(lang[0]) << 24u) | + (static_cast<uint32_t>(lang[1]) << 16u) | + (static_cast<uint32_t>(reg[0]) << 8u) | + static_cast<uint32_t>(reg[1]); +} + +constexpr inline uint32_t dropRegion(uint32_t packed_locale) { + return packed_locale & 0xFFFF0000LU; +} + +constexpr inline bool hasRegion(uint32_t packed_locale) { + return (packed_locale & 0x0000FFFFLU) != 0; +} + +/** + * Return nullptr if the key isn't found. The input packed_lang_region can be computed + * by android::packLocale. + * Note that the returned char* is either nullptr or 4-byte char seqeuence, but isn't + * a null-terminated string. + */ +const char* lookupLikelyScript(uint32_t packed_lang_region); +/** + * Return false if the key isn't representative. The input lookup key can be computed + * by android::packLocale. + */ +bool inline isLocaleRepresentative(uint32_t language_and_region, const char* script) { + const unsigned char* s = reinterpret_cast<const unsigned char*>(script); + const uint64_t packed_locale = ( + ((static_cast<uint64_t>(language_and_region)) << 32u) | + (static_cast<uint64_t>(s[0]) << 24u) | + (static_cast<uint64_t>(s[1]) << 16u) | + (static_cast<uint64_t>(s[2]) << 8u) | + static_cast<uint64_t>(s[3])); + + return hidden::isRepresentative(packed_locale); +} + +/** + * Return a parent packed key for a given script and child packed key. Return 0 if + * no parent is found. + */ +uint32_t findParentLocalePackedKey(const char* script, uint32_t packed_lang_region); + +uint32_t getMaxAncestorTreeDepth(); + +} // namespace android diff --git a/libs/androidfw/tests/LocaleDataLookup_test.cpp b/libs/androidfw/tests/LocaleDataLookup_test.cpp new file mode 100644 index 000000000000..26b220d63169 --- /dev/null +++ b/libs/androidfw/tests/LocaleDataLookup_test.cpp @@ -0,0 +1,108 @@ +/* + * Copyright (C) 2024 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "androidfw/LocaleDataLookup.h" + +#include <cstddef> +#include <string> + +#include "gtest/gtest.h" +#include "gmock/gmock.h" + + +namespace android { + +constexpr const char NULL_SCRIPT[4] = {'\0', '\0', '\0','\0' }; + +#define EXPECT_SCEIPT_EQ(ex, s) EXPECT_EQ(0, s == nullptr ? -1 : memcmp(ex, s, 4)) + +// Similar to packLanguageOrRegion() in ResourceTypes.cpp +static uint32_t encodeLanguageOrRegionLiteral(const char* in, const char base) { + size_t len = strlen(in); + if (len <= 1) { + return 0; + } + + if (len == 2) { + return (((uint8_t) in[0]) << 8) | ((uint8_t) in[1]); + } + uint8_t first = (in[0] - base) & 0x007f; + uint8_t second = (in[1] - base) & 0x007f; + uint8_t third = (in[2] - base) & 0x007f; + + return ((uint8_t) (0x80 | (third << 2) | (second >> 3)) << 8) | ((second << 5) | first); +} + +static uint32_t encodeLocale(const char* language, const char* region) { + return (encodeLanguageOrRegionLiteral(language, 'a') << 16) | + encodeLanguageOrRegionLiteral(region, '0'); +} + +TEST(LocaleDataLookupTest, lookupLikelyScript) { + EXPECT_EQ(nullptr, lookupLikelyScript(encodeLocale("", ""))); + EXPECT_SCEIPT_EQ("Latn", lookupLikelyScript(encodeLocale("en", ""))); + EXPECT_EQ(nullptr, lookupLikelyScript(encodeLocale("en", "US"))); + EXPECT_EQ(nullptr, lookupLikelyScript(encodeLocale("en", "GB"))); + EXPECT_SCEIPT_EQ("Latn", lookupLikelyScript(encodeLocale("fr", ""))); + EXPECT_EQ(nullptr, lookupLikelyScript(encodeLocale("fr", "FR"))); + + + EXPECT_SCEIPT_EQ("~~~A", lookupLikelyScript(encodeLocale("en", "XA"))); + EXPECT_SCEIPT_EQ("Latn", lookupLikelyScript(encodeLocale("ha", ""))); + EXPECT_SCEIPT_EQ("Arab", lookupLikelyScript(encodeLocale("ha", "SD"))); + EXPECT_EQ(nullptr, lookupLikelyScript(encodeLocale("ha", "Sd"))); // case sensitive + EXPECT_SCEIPT_EQ("Hans", lookupLikelyScript(encodeLocale("zh", ""))); + EXPECT_EQ(nullptr, lookupLikelyScript(encodeLocale("zh", "CN"))); + EXPECT_SCEIPT_EQ("Hant", lookupLikelyScript(encodeLocale("zh", "HK"))); + + EXPECT_SCEIPT_EQ("Nshu", lookupLikelyScript(encodeLocale("zhx", ""))); + EXPECT_SCEIPT_EQ("Nshu", lookupLikelyScript(0xDCF90000u)); // encoded "zhx" +} + +TEST(LocaleDataLookupTest, isLocaleRepresentative) { + EXPECT_TRUE(isLocaleRepresentative(encodeLocale("en", "US"), "Latn")); + EXPECT_TRUE(isLocaleRepresentative(encodeLocale("en", "GB"), "Latn")); + EXPECT_FALSE(isLocaleRepresentative(encodeLocale("en", "US"), NULL_SCRIPT)); + EXPECT_FALSE(isLocaleRepresentative(encodeLocale("en", ""), "Latn")); + EXPECT_FALSE(isLocaleRepresentative(encodeLocale("en", ""), NULL_SCRIPT)); + EXPECT_FALSE(isLocaleRepresentative(encodeLocale("en", "US"), "Arab")); + + EXPECT_TRUE(isLocaleRepresentative(encodeLocale("fr", "FR"), "Latn")); + + EXPECT_TRUE(isLocaleRepresentative(encodeLocale("zh", "CN"), "Hans")); + EXPECT_FALSE(isLocaleRepresentative(encodeLocale("zh", "TW"), "Hans")); + EXPECT_FALSE(isLocaleRepresentative(encodeLocale("zhx", "CN"), "Hans")); + EXPECT_FALSE(isLocaleRepresentative(0xDCF9434E, "Hans")); + EXPECT_TRUE(isLocaleRepresentative(encodeLocale("zhx", "CN"), "Nshu")); + EXPECT_TRUE(isLocaleRepresentative(0xDCF9434E, "Nshu")); +} + +TEST(LocaleDataLookupTest, findParentLocalePackedKey) { + EXPECT_EQ(encodeLocale("en", "001"), findParentLocalePackedKey("Latn", encodeLocale("en", "GB"))); + EXPECT_EQ(0x656E8400u, findParentLocalePackedKey("Latn", encodeLocale("en", "GB"))); + + EXPECT_EQ(encodeLocale("en", "IN"), findParentLocalePackedKey("Deva", encodeLocale("hi", ""))); + + EXPECT_EQ(encodeLocale("ar", "015"), findParentLocalePackedKey("Arab", encodeLocale("ar", "AE"))); + EXPECT_EQ(0x61729420u, findParentLocalePackedKey("Arab", encodeLocale("ar", "AE"))); + + EXPECT_EQ(encodeLocale("ar", "015"), findParentLocalePackedKey("~~~B", encodeLocale("ar", "XB"))); + EXPECT_EQ(0x61729420u, findParentLocalePackedKey("Arab", encodeLocale("ar", "AE"))); + + EXPECT_EQ(encodeLocale("zh", "HK"), findParentLocalePackedKey("Hant", encodeLocale("zh", "MO"))); +} + +} // namespace android |