summaryrefslogtreecommitdiff
path: root/libs/androidfw
diff options
context:
space:
mode:
author Victor Chang <vichang@google.com> 2024-12-27 14:04:08 +0000
committer Victor Chang <vichang@google.com> 2024-12-31 14:16:09 +0000
commita490e2cf1ef7257b40bcb45df6bde5972df2ac13 (patch)
tree73b8fe2890777badb1c9b69b4ad8d3324b00f030 /libs/androidfw
parent7b64171769bd4c75518bff8d9e01158f3d467cec (diff)
Extract implementation of script and locale matching into LocaleDataLookup.h
It helps adding new unit tests, and fixing correctness and performance bugs later. Bug: 386340812 Test: atest libandroidfw_tests Change-Id: I4d3ee1333637d2cd22d5fdfad730935951feeccb
Diffstat (limited to 'libs/androidfw')
-rw-r--r--libs/androidfw/Android.bp2
-rw-r--r--libs/androidfw/LocaleData.cpp58
-rw-r--r--libs/androidfw/LocaleDataLookup.cpp64
-rw-r--r--libs/androidfw/include/androidfw/LocaleDataLookup.h79
-rw-r--r--libs/androidfw/tests/LocaleDataLookup_test.cpp108
5 files changed, 266 insertions, 45 deletions
diff --git a/libs/androidfw/Android.bp b/libs/androidfw/Android.bp
index 1bc15d72bacc..a13dd78a23a1 100644
--- a/libs/androidfw/Android.bp
+++ b/libs/androidfw/Android.bp
@@ -80,6 +80,7 @@ cc_library {
"LoadedArsc.cpp",
"Locale.cpp",
"LocaleData.cpp",
+ "LocaleDataLookup.cpp",
"misc.cpp",
"NinePatch.cpp",
"ObbFile.cpp",
@@ -224,6 +225,7 @@ cc_test {
"tests/Idmap_test.cpp",
"tests/LoadedArsc_test.cpp",
"tests/Locale_test.cpp",
+ "tests/LocaleDataLookup_test.cpp",
"tests/NinePatch_test.cpp",
"tests/ResourceTimer_test.cpp",
"tests/ResourceUtils_test.cpp",
diff --git a/libs/androidfw/LocaleData.cpp b/libs/androidfw/LocaleData.cpp
index 020cef6012e9..1b23d90c5ab3 100644
--- a/libs/androidfw/LocaleData.cpp
+++ b/libs/androidfw/LocaleData.cpp
@@ -23,39 +23,18 @@
#include <unordered_set>
#include <androidfw/LocaleData.h>
+#include <androidfw/LocaleDataLookup.h>
namespace android {
-#include "LocaleDataTables.cpp"
-
-inline uint32_t packLocale(const char* language, const char* region) {
- return (((uint8_t) language[0]) << 24u) | (((uint8_t) language[1]) << 16u) |
- (((uint8_t) region[0]) << 8u) | ((uint8_t) region[1]);
-}
-
-inline uint32_t dropRegion(uint32_t packed_locale) {
- return packed_locale & 0xFFFF0000LU;
-}
-
-inline bool hasRegion(uint32_t packed_locale) {
- return (packed_locale & 0x0000FFFFLU) != 0;
-}
-
-const size_t SCRIPT_LENGTH = 4;
-const size_t SCRIPT_PARENTS_COUNT = sizeof(SCRIPT_PARENTS)/sizeof(SCRIPT_PARENTS[0]);
const uint32_t PACKED_ROOT = 0; // to represent the root locale
+const uint32_t MAX_PARENT_DEPTH = getMaxAncestorTreeDepth();
uint32_t findParent(uint32_t packed_locale, const char* script) {
if (hasRegion(packed_locale)) {
- for (size_t i = 0; i < SCRIPT_PARENTS_COUNT; i++) {
- if (memcmp(script, SCRIPT_PARENTS[i].script, SCRIPT_LENGTH) == 0) {
- auto map = SCRIPT_PARENTS[i].map;
- auto lookup_result = map->find(packed_locale);
- if (lookup_result != map->end()) {
- return lookup_result->second;
- }
- break;
- }
+ auto parent_key = findParentLocalePackedKey(script, packed_locale);
+ if (parent_key != 0) {
+ return parent_key;
}
return dropRegion(packed_locale);
}
@@ -111,17 +90,6 @@ size_t findDistance(uint32_t supported,
return supported_ancestor_count + request_ancestors_index - 1;
}
-inline bool isRepresentative(uint32_t language_and_region, const char* script) {
- const uint64_t packed_locale = (
- (((uint64_t) language_and_region) << 32u) |
- (((uint64_t) script[0]) << 24u) |
- (((uint64_t) script[1]) << 16u) |
- (((uint64_t) script[2]) << 8u) |
- ((uint64_t) script[3]));
-
- return (REPRESENTATIVE_LOCALES.count(packed_locale) != 0);
-}
-
const uint32_t US_SPANISH = 0x65735553LU; // es-US
const uint32_t MEXICAN_SPANISH = 0x65734D58LU; // es-MX
const uint32_t LATIN_AMERICAN_SPANISH = 0x6573A424LU; // es-419
@@ -185,8 +153,8 @@ int localeDataCompareRegions(
// If we are here, left and right are equidistant from the request. We will
// try and see if any of them is a representative locale.
- const bool left_is_representative = isRepresentative(left, requested_script);
- const bool right_is_representative = isRepresentative(right, requested_script);
+ const bool left_is_representative = isLocaleRepresentative(left, requested_script);
+ const bool right_is_representative = isLocaleRepresentative(right, requested_script);
if (left_is_representative != right_is_representative) {
return (int) left_is_representative - (int) right_is_representative;
}
@@ -204,14 +172,14 @@ void localeDataComputeScript(char out[4], const char* language, const char* regi
return;
}
uint32_t lookup_key = packLocale(language, region);
- auto lookup_result = LIKELY_SCRIPTS.find(lookup_key);
- if (lookup_result == LIKELY_SCRIPTS.end()) {
+ auto lookup_result = lookupLikelyScript(lookup_key);
+ if (lookup_result == nullptr) {
// We couldn't find the locale. Let's try without the region
if (region[0] != '\0') {
lookup_key = dropRegion(lookup_key);
- lookup_result = LIKELY_SCRIPTS.find(lookup_key);
- if (lookup_result != LIKELY_SCRIPTS.end()) {
- memcpy(out, SCRIPT_CODES[lookup_result->second], SCRIPT_LENGTH);
+ lookup_result = lookupLikelyScript(lookup_key);
+ if (lookup_result != nullptr) {
+ memcpy(out, lookup_result, SCRIPT_LENGTH);
return;
}
}
@@ -220,7 +188,7 @@ void localeDataComputeScript(char out[4], const char* language, const char* regi
return;
} else {
// We found the locale.
- memcpy(out, SCRIPT_CODES[lookup_result->second], SCRIPT_LENGTH);
+ memcpy(out, lookup_result, SCRIPT_LENGTH);
}
}
diff --git a/libs/androidfw/LocaleDataLookup.cpp b/libs/androidfw/LocaleDataLookup.cpp
new file mode 100644
index 000000000000..5441e2258900
--- /dev/null
+++ b/libs/androidfw/LocaleDataLookup.cpp
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2024 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <unordered_map>
+#include <unordered_set>
+
+#include <androidfw/LocaleDataLookup.h>
+
+namespace android {
+
+#include "LocaleDataTables.cpp"
+
+const size_t SCRIPT_PARENTS_COUNT = sizeof(SCRIPT_PARENTS)/sizeof(SCRIPT_PARENTS[0]);
+
+const char* lookupLikelyScript(uint32_t packed_lang_region) {
+
+ auto lookup_result = LIKELY_SCRIPTS.find(packed_lang_region);
+ if (lookup_result == LIKELY_SCRIPTS.end()) {
+ return nullptr;
+ } else {
+ return SCRIPT_CODES[lookup_result->second];
+ }
+}
+
+uint32_t findParentLocalePackedKey(const char* script, uint32_t packed_lang_region) {
+ for (size_t i = 0; i < SCRIPT_PARENTS_COUNT; i++) {
+ if (memcmp(script, SCRIPT_PARENTS[i].script, SCRIPT_LENGTH) == 0) {
+ auto map = SCRIPT_PARENTS[i].map;
+ auto lookup_result = map->find(packed_lang_region);
+ if (lookup_result != map->end()) {
+ return lookup_result->second;
+ }
+ break;
+ }
+ }
+ return 0;
+}
+
+uint32_t getMaxAncestorTreeDepth() {
+ return MAX_PARENT_DEPTH;
+}
+
+namespace hidden {
+
+bool isRepresentative(uint64_t packed_locale) {
+ return (REPRESENTATIVE_LOCALES.count(packed_locale) != 0);
+}
+
+} // namespace hidden
+
+} // namespace android
diff --git a/libs/androidfw/include/androidfw/LocaleDataLookup.h b/libs/androidfw/include/androidfw/LocaleDataLookup.h
new file mode 100644
index 000000000000..7fde7123ed0b
--- /dev/null
+++ b/libs/androidfw/include/androidfw/LocaleDataLookup.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (C) 2024 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <stddef.h>
+#include <stdint.h>
+
+
+namespace android {
+
+namespace hidden {
+ bool isRepresentative(uint64_t packed_locale);
+}
+
+constexpr size_t SCRIPT_LENGTH = 4;
+
+constexpr inline uint32_t packLocale(const char* language, const char* region) {
+ const unsigned char* lang = reinterpret_cast<const unsigned char*>(language);
+ const unsigned char* reg = reinterpret_cast<const unsigned char*>(region);
+ return (static_cast<uint32_t>(lang[0]) << 24u) |
+ (static_cast<uint32_t>(lang[1]) << 16u) |
+ (static_cast<uint32_t>(reg[0]) << 8u) |
+ static_cast<uint32_t>(reg[1]);
+}
+
+constexpr inline uint32_t dropRegion(uint32_t packed_locale) {
+ return packed_locale & 0xFFFF0000LU;
+}
+
+constexpr inline bool hasRegion(uint32_t packed_locale) {
+ return (packed_locale & 0x0000FFFFLU) != 0;
+}
+
+/**
+ * Return nullptr if the key isn't found. The input packed_lang_region can be computed
+ * by android::packLocale.
+ * Note that the returned char* is either nullptr or 4-byte char seqeuence, but isn't
+ * a null-terminated string.
+ */
+const char* lookupLikelyScript(uint32_t packed_lang_region);
+/**
+ * Return false if the key isn't representative. The input lookup key can be computed
+ * by android::packLocale.
+ */
+bool inline isLocaleRepresentative(uint32_t language_and_region, const char* script) {
+ const unsigned char* s = reinterpret_cast<const unsigned char*>(script);
+ const uint64_t packed_locale = (
+ ((static_cast<uint64_t>(language_and_region)) << 32u) |
+ (static_cast<uint64_t>(s[0]) << 24u) |
+ (static_cast<uint64_t>(s[1]) << 16u) |
+ (static_cast<uint64_t>(s[2]) << 8u) |
+ static_cast<uint64_t>(s[3]));
+
+ return hidden::isRepresentative(packed_locale);
+}
+
+/**
+ * Return a parent packed key for a given script and child packed key. Return 0 if
+ * no parent is found.
+ */
+uint32_t findParentLocalePackedKey(const char* script, uint32_t packed_lang_region);
+
+uint32_t getMaxAncestorTreeDepth();
+
+} // namespace android
diff --git a/libs/androidfw/tests/LocaleDataLookup_test.cpp b/libs/androidfw/tests/LocaleDataLookup_test.cpp
new file mode 100644
index 000000000000..26b220d63169
--- /dev/null
+++ b/libs/androidfw/tests/LocaleDataLookup_test.cpp
@@ -0,0 +1,108 @@
+/*
+ * Copyright (C) 2024 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "androidfw/LocaleDataLookup.h"
+
+#include <cstddef>
+#include <string>
+
+#include "gtest/gtest.h"
+#include "gmock/gmock.h"
+
+
+namespace android {
+
+constexpr const char NULL_SCRIPT[4] = {'\0', '\0', '\0','\0' };
+
+#define EXPECT_SCEIPT_EQ(ex, s) EXPECT_EQ(0, s == nullptr ? -1 : memcmp(ex, s, 4))
+
+// Similar to packLanguageOrRegion() in ResourceTypes.cpp
+static uint32_t encodeLanguageOrRegionLiteral(const char* in, const char base) {
+ size_t len = strlen(in);
+ if (len <= 1) {
+ return 0;
+ }
+
+ if (len == 2) {
+ return (((uint8_t) in[0]) << 8) | ((uint8_t) in[1]);
+ }
+ uint8_t first = (in[0] - base) & 0x007f;
+ uint8_t second = (in[1] - base) & 0x007f;
+ uint8_t third = (in[2] - base) & 0x007f;
+
+ return ((uint8_t) (0x80 | (third << 2) | (second >> 3)) << 8) | ((second << 5) | first);
+}
+
+static uint32_t encodeLocale(const char* language, const char* region) {
+ return (encodeLanguageOrRegionLiteral(language, 'a') << 16) |
+ encodeLanguageOrRegionLiteral(region, '0');
+}
+
+TEST(LocaleDataLookupTest, lookupLikelyScript) {
+ EXPECT_EQ(nullptr, lookupLikelyScript(encodeLocale("", "")));
+ EXPECT_SCEIPT_EQ("Latn", lookupLikelyScript(encodeLocale("en", "")));
+ EXPECT_EQ(nullptr, lookupLikelyScript(encodeLocale("en", "US")));
+ EXPECT_EQ(nullptr, lookupLikelyScript(encodeLocale("en", "GB")));
+ EXPECT_SCEIPT_EQ("Latn", lookupLikelyScript(encodeLocale("fr", "")));
+ EXPECT_EQ(nullptr, lookupLikelyScript(encodeLocale("fr", "FR")));
+
+
+ EXPECT_SCEIPT_EQ("~~~A", lookupLikelyScript(encodeLocale("en", "XA")));
+ EXPECT_SCEIPT_EQ("Latn", lookupLikelyScript(encodeLocale("ha", "")));
+ EXPECT_SCEIPT_EQ("Arab", lookupLikelyScript(encodeLocale("ha", "SD")));
+ EXPECT_EQ(nullptr, lookupLikelyScript(encodeLocale("ha", "Sd"))); // case sensitive
+ EXPECT_SCEIPT_EQ("Hans", lookupLikelyScript(encodeLocale("zh", "")));
+ EXPECT_EQ(nullptr, lookupLikelyScript(encodeLocale("zh", "CN")));
+ EXPECT_SCEIPT_EQ("Hant", lookupLikelyScript(encodeLocale("zh", "HK")));
+
+ EXPECT_SCEIPT_EQ("Nshu", lookupLikelyScript(encodeLocale("zhx", "")));
+ EXPECT_SCEIPT_EQ("Nshu", lookupLikelyScript(0xDCF90000u)); // encoded "zhx"
+}
+
+TEST(LocaleDataLookupTest, isLocaleRepresentative) {
+ EXPECT_TRUE(isLocaleRepresentative(encodeLocale("en", "US"), "Latn"));
+ EXPECT_TRUE(isLocaleRepresentative(encodeLocale("en", "GB"), "Latn"));
+ EXPECT_FALSE(isLocaleRepresentative(encodeLocale("en", "US"), NULL_SCRIPT));
+ EXPECT_FALSE(isLocaleRepresentative(encodeLocale("en", ""), "Latn"));
+ EXPECT_FALSE(isLocaleRepresentative(encodeLocale("en", ""), NULL_SCRIPT));
+ EXPECT_FALSE(isLocaleRepresentative(encodeLocale("en", "US"), "Arab"));
+
+ EXPECT_TRUE(isLocaleRepresentative(encodeLocale("fr", "FR"), "Latn"));
+
+ EXPECT_TRUE(isLocaleRepresentative(encodeLocale("zh", "CN"), "Hans"));
+ EXPECT_FALSE(isLocaleRepresentative(encodeLocale("zh", "TW"), "Hans"));
+ EXPECT_FALSE(isLocaleRepresentative(encodeLocale("zhx", "CN"), "Hans"));
+ EXPECT_FALSE(isLocaleRepresentative(0xDCF9434E, "Hans"));
+ EXPECT_TRUE(isLocaleRepresentative(encodeLocale("zhx", "CN"), "Nshu"));
+ EXPECT_TRUE(isLocaleRepresentative(0xDCF9434E, "Nshu"));
+}
+
+TEST(LocaleDataLookupTest, findParentLocalePackedKey) {
+ EXPECT_EQ(encodeLocale("en", "001"), findParentLocalePackedKey("Latn", encodeLocale("en", "GB")));
+ EXPECT_EQ(0x656E8400u, findParentLocalePackedKey("Latn", encodeLocale("en", "GB")));
+
+ EXPECT_EQ(encodeLocale("en", "IN"), findParentLocalePackedKey("Deva", encodeLocale("hi", "")));
+
+ EXPECT_EQ(encodeLocale("ar", "015"), findParentLocalePackedKey("Arab", encodeLocale("ar", "AE")));
+ EXPECT_EQ(0x61729420u, findParentLocalePackedKey("Arab", encodeLocale("ar", "AE")));
+
+ EXPECT_EQ(encodeLocale("ar", "015"), findParentLocalePackedKey("~~~B", encodeLocale("ar", "XB")));
+ EXPECT_EQ(0x61729420u, findParentLocalePackedKey("Arab", encodeLocale("ar", "AE")));
+
+ EXPECT_EQ(encodeLocale("zh", "HK"), findParentLocalePackedKey("Hant", encodeLocale("zh", "MO")));
+}
+
+} // namespace android