1*d57664e9SAndroid Build Coastguard Worker /*
2*d57664e9SAndroid Build Coastguard Worker * Copyright (C) 2016 The Android Open Source Project
3*d57664e9SAndroid Build Coastguard Worker *
4*d57664e9SAndroid Build Coastguard Worker * Licensed under the Apache License, Version 2.0 (the "License");
5*d57664e9SAndroid Build Coastguard Worker * you may not use this file except in compliance with the License.
6*d57664e9SAndroid Build Coastguard Worker * You may obtain a copy of the License at
7*d57664e9SAndroid Build Coastguard Worker *
8*d57664e9SAndroid Build Coastguard Worker * http://www.apache.org/licenses/LICENSE-2.0
9*d57664e9SAndroid Build Coastguard Worker *
10*d57664e9SAndroid Build Coastguard Worker * Unless required by applicable law or agreed to in writing, software
11*d57664e9SAndroid Build Coastguard Worker * distributed under the License is distributed on an "AS IS" BASIS,
12*d57664e9SAndroid Build Coastguard Worker * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13*d57664e9SAndroid Build Coastguard Worker * See the License for the specific language governing permissions and
14*d57664e9SAndroid Build Coastguard Worker * limitations under the License.
15*d57664e9SAndroid Build Coastguard Worker */
16*d57664e9SAndroid Build Coastguard Worker
17*d57664e9SAndroid Build Coastguard Worker #include <array>
18*d57664e9SAndroid Build Coastguard Worker #include <cstdint>
19*d57664e9SAndroid Build Coastguard Worker #include <cstdlib>
20*d57664e9SAndroid Build Coastguard Worker #include <cstring>
21*d57664e9SAndroid Build Coastguard Worker #include <string>
22*d57664e9SAndroid Build Coastguard Worker #include <unordered_map>
23*d57664e9SAndroid Build Coastguard Worker #include <unordered_set>
24*d57664e9SAndroid Build Coastguard Worker
25*d57664e9SAndroid Build Coastguard Worker #include <androidfw/LocaleData.h>
26*d57664e9SAndroid Build Coastguard Worker
27*d57664e9SAndroid Build Coastguard Worker namespace android {
28*d57664e9SAndroid Build Coastguard Worker
29*d57664e9SAndroid Build Coastguard Worker #include "LocaleDataTables.cpp"
30*d57664e9SAndroid Build Coastguard Worker
packLocale(const char * language,const char * region)31*d57664e9SAndroid Build Coastguard Worker inline uint32_t packLocale(const char* language, const char* region) {
32*d57664e9SAndroid Build Coastguard Worker return (((uint8_t) language[0]) << 24u) | (((uint8_t) language[1]) << 16u) |
33*d57664e9SAndroid Build Coastguard Worker (((uint8_t) region[0]) << 8u) | ((uint8_t) region[1]);
34*d57664e9SAndroid Build Coastguard Worker }
35*d57664e9SAndroid Build Coastguard Worker
dropRegion(uint32_t packed_locale)36*d57664e9SAndroid Build Coastguard Worker inline uint32_t dropRegion(uint32_t packed_locale) {
37*d57664e9SAndroid Build Coastguard Worker return packed_locale & 0xFFFF0000LU;
38*d57664e9SAndroid Build Coastguard Worker }
39*d57664e9SAndroid Build Coastguard Worker
hasRegion(uint32_t packed_locale)40*d57664e9SAndroid Build Coastguard Worker inline bool hasRegion(uint32_t packed_locale) {
41*d57664e9SAndroid Build Coastguard Worker return (packed_locale & 0x0000FFFFLU) != 0;
42*d57664e9SAndroid Build Coastguard Worker }
43*d57664e9SAndroid Build Coastguard Worker
44*d57664e9SAndroid Build Coastguard Worker const size_t SCRIPT_LENGTH = 4;
45*d57664e9SAndroid Build Coastguard Worker const size_t SCRIPT_PARENTS_COUNT = sizeof(SCRIPT_PARENTS)/sizeof(SCRIPT_PARENTS[0]);
46*d57664e9SAndroid Build Coastguard Worker const uint32_t PACKED_ROOT = 0; // to represent the root locale
47*d57664e9SAndroid Build Coastguard Worker
findParent(uint32_t packed_locale,const char * script)48*d57664e9SAndroid Build Coastguard Worker uint32_t findParent(uint32_t packed_locale, const char* script) {
49*d57664e9SAndroid Build Coastguard Worker if (hasRegion(packed_locale)) {
50*d57664e9SAndroid Build Coastguard Worker for (size_t i = 0; i < SCRIPT_PARENTS_COUNT; i++) {
51*d57664e9SAndroid Build Coastguard Worker if (memcmp(script, SCRIPT_PARENTS[i].script, SCRIPT_LENGTH) == 0) {
52*d57664e9SAndroid Build Coastguard Worker auto map = SCRIPT_PARENTS[i].map;
53*d57664e9SAndroid Build Coastguard Worker auto lookup_result = map->find(packed_locale);
54*d57664e9SAndroid Build Coastguard Worker if (lookup_result != map->end()) {
55*d57664e9SAndroid Build Coastguard Worker return lookup_result->second;
56*d57664e9SAndroid Build Coastguard Worker }
57*d57664e9SAndroid Build Coastguard Worker break;
58*d57664e9SAndroid Build Coastguard Worker }
59*d57664e9SAndroid Build Coastguard Worker }
60*d57664e9SAndroid Build Coastguard Worker return dropRegion(packed_locale);
61*d57664e9SAndroid Build Coastguard Worker }
62*d57664e9SAndroid Build Coastguard Worker return PACKED_ROOT;
63*d57664e9SAndroid Build Coastguard Worker }
64*d57664e9SAndroid Build Coastguard Worker
65*d57664e9SAndroid Build Coastguard Worker // Find the ancestors of a locale, and fill 'out' with it (assumes out has enough
66*d57664e9SAndroid Build Coastguard Worker // space). If any of the members of stop_list was seen, write it in the
67*d57664e9SAndroid Build Coastguard Worker // output but stop afterwards.
68*d57664e9SAndroid Build Coastguard Worker //
69*d57664e9SAndroid Build Coastguard Worker // This also outputs the index of the last written ancestor in the stop_list
70*d57664e9SAndroid Build Coastguard Worker // to stop_list_index, which will be -1 if it is not found in the stop_list.
71*d57664e9SAndroid Build Coastguard Worker //
72*d57664e9SAndroid Build Coastguard Worker // Returns the number of ancestors written in the output, which is always
73*d57664e9SAndroid Build Coastguard Worker // at least one.
74*d57664e9SAndroid Build Coastguard Worker //
75*d57664e9SAndroid Build Coastguard Worker // (If 'out' is nullptr, we do everything the same way but we simply don't write
76*d57664e9SAndroid Build Coastguard Worker // any results in 'out'.)
findAncestors(uint32_t * out,ssize_t * stop_list_index,uint32_t packed_locale,const char * script,const uint32_t * stop_list,size_t stop_set_length)77*d57664e9SAndroid Build Coastguard Worker size_t findAncestors(uint32_t* out, ssize_t* stop_list_index,
78*d57664e9SAndroid Build Coastguard Worker uint32_t packed_locale, const char* script,
79*d57664e9SAndroid Build Coastguard Worker const uint32_t* stop_list, size_t stop_set_length) {
80*d57664e9SAndroid Build Coastguard Worker uint32_t ancestor = packed_locale;
81*d57664e9SAndroid Build Coastguard Worker size_t count = 0;
82*d57664e9SAndroid Build Coastguard Worker do {
83*d57664e9SAndroid Build Coastguard Worker if (out != nullptr) out[count] = ancestor;
84*d57664e9SAndroid Build Coastguard Worker count++;
85*d57664e9SAndroid Build Coastguard Worker for (size_t i = 0; i < stop_set_length; i++) {
86*d57664e9SAndroid Build Coastguard Worker if (stop_list[i] == ancestor) {
87*d57664e9SAndroid Build Coastguard Worker *stop_list_index = (ssize_t) i;
88*d57664e9SAndroid Build Coastguard Worker return count;
89*d57664e9SAndroid Build Coastguard Worker }
90*d57664e9SAndroid Build Coastguard Worker }
91*d57664e9SAndroid Build Coastguard Worker ancestor = findParent(ancestor, script);
92*d57664e9SAndroid Build Coastguard Worker } while (ancestor != PACKED_ROOT);
93*d57664e9SAndroid Build Coastguard Worker *stop_list_index = (ssize_t) -1;
94*d57664e9SAndroid Build Coastguard Worker return count;
95*d57664e9SAndroid Build Coastguard Worker }
96*d57664e9SAndroid Build Coastguard Worker
findDistance(uint32_t supported,const char * script,const uint32_t * request_ancestors,size_t request_ancestors_count)97*d57664e9SAndroid Build Coastguard Worker size_t findDistance(uint32_t supported,
98*d57664e9SAndroid Build Coastguard Worker const char* script,
99*d57664e9SAndroid Build Coastguard Worker const uint32_t* request_ancestors,
100*d57664e9SAndroid Build Coastguard Worker size_t request_ancestors_count) {
101*d57664e9SAndroid Build Coastguard Worker ssize_t request_ancestors_index;
102*d57664e9SAndroid Build Coastguard Worker const size_t supported_ancestor_count = findAncestors(
103*d57664e9SAndroid Build Coastguard Worker nullptr, &request_ancestors_index,
104*d57664e9SAndroid Build Coastguard Worker supported, script,
105*d57664e9SAndroid Build Coastguard Worker request_ancestors, request_ancestors_count);
106*d57664e9SAndroid Build Coastguard Worker // Since both locales share the same root, there will always be a shared
107*d57664e9SAndroid Build Coastguard Worker // ancestor, so the distance in the parent tree is the sum of the distance
108*d57664e9SAndroid Build Coastguard Worker // of 'supported' to the lowest common ancestor (number of ancestors
109*d57664e9SAndroid Build Coastguard Worker // written for 'supported' minus 1) plus the distance of 'request' to the
110*d57664e9SAndroid Build Coastguard Worker // lowest common ancestor (the index of the ancestor in request_ancestors).
111*d57664e9SAndroid Build Coastguard Worker return supported_ancestor_count + request_ancestors_index - 1;
112*d57664e9SAndroid Build Coastguard Worker }
113*d57664e9SAndroid Build Coastguard Worker
isRepresentative(uint32_t language_and_region,const char * script)114*d57664e9SAndroid Build Coastguard Worker inline bool isRepresentative(uint32_t language_and_region, const char* script) {
115*d57664e9SAndroid Build Coastguard Worker const uint64_t packed_locale = (
116*d57664e9SAndroid Build Coastguard Worker (((uint64_t) language_and_region) << 32u) |
117*d57664e9SAndroid Build Coastguard Worker (((uint64_t) script[0]) << 24u) |
118*d57664e9SAndroid Build Coastguard Worker (((uint64_t) script[1]) << 16u) |
119*d57664e9SAndroid Build Coastguard Worker (((uint64_t) script[2]) << 8u) |
120*d57664e9SAndroid Build Coastguard Worker ((uint64_t) script[3]));
121*d57664e9SAndroid Build Coastguard Worker
122*d57664e9SAndroid Build Coastguard Worker return (REPRESENTATIVE_LOCALES.count(packed_locale) != 0);
123*d57664e9SAndroid Build Coastguard Worker }
124*d57664e9SAndroid Build Coastguard Worker
125*d57664e9SAndroid Build Coastguard Worker const uint32_t US_SPANISH = 0x65735553LU; // es-US
126*d57664e9SAndroid Build Coastguard Worker const uint32_t MEXICAN_SPANISH = 0x65734D58LU; // es-MX
127*d57664e9SAndroid Build Coastguard Worker const uint32_t LATIN_AMERICAN_SPANISH = 0x6573A424LU; // es-419
128*d57664e9SAndroid Build Coastguard Worker
129*d57664e9SAndroid Build Coastguard Worker // The two locales es-US and es-MX are treated as special fallbacks for es-419.
130*d57664e9SAndroid Build Coastguard Worker // If there is no es-419, they are considered its equivalent.
isSpecialSpanish(uint32_t language_and_region)131*d57664e9SAndroid Build Coastguard Worker inline bool isSpecialSpanish(uint32_t language_and_region) {
132*d57664e9SAndroid Build Coastguard Worker return (language_and_region == US_SPANISH || language_and_region == MEXICAN_SPANISH);
133*d57664e9SAndroid Build Coastguard Worker }
134*d57664e9SAndroid Build Coastguard Worker
localeDataCompareRegions(const char * left_region,const char * right_region,const char * requested_language,const char * requested_script,const char * requested_region)135*d57664e9SAndroid Build Coastguard Worker int localeDataCompareRegions(
136*d57664e9SAndroid Build Coastguard Worker const char* left_region, const char* right_region,
137*d57664e9SAndroid Build Coastguard Worker const char* requested_language, const char* requested_script,
138*d57664e9SAndroid Build Coastguard Worker const char* requested_region) {
139*d57664e9SAndroid Build Coastguard Worker
140*d57664e9SAndroid Build Coastguard Worker if (left_region[0] == right_region[0] && left_region[1] == right_region[1]) {
141*d57664e9SAndroid Build Coastguard Worker return 0;
142*d57664e9SAndroid Build Coastguard Worker }
143*d57664e9SAndroid Build Coastguard Worker uint32_t left = packLocale(requested_language, left_region);
144*d57664e9SAndroid Build Coastguard Worker uint32_t right = packLocale(requested_language, right_region);
145*d57664e9SAndroid Build Coastguard Worker const uint32_t request = packLocale(requested_language, requested_region);
146*d57664e9SAndroid Build Coastguard Worker
147*d57664e9SAndroid Build Coastguard Worker // If one and only one of the two locales is a special Spanish locale, we
148*d57664e9SAndroid Build Coastguard Worker // replace it with es-419. We don't do the replacement if the other locale
149*d57664e9SAndroid Build Coastguard Worker // is already es-419, or both locales are special Spanish locales (when
150*d57664e9SAndroid Build Coastguard Worker // es-US is being compared to es-MX).
151*d57664e9SAndroid Build Coastguard Worker const bool leftIsSpecialSpanish = isSpecialSpanish(left);
152*d57664e9SAndroid Build Coastguard Worker const bool rightIsSpecialSpanish = isSpecialSpanish(right);
153*d57664e9SAndroid Build Coastguard Worker if (leftIsSpecialSpanish && !rightIsSpecialSpanish && right != LATIN_AMERICAN_SPANISH) {
154*d57664e9SAndroid Build Coastguard Worker left = LATIN_AMERICAN_SPANISH;
155*d57664e9SAndroid Build Coastguard Worker } else if (rightIsSpecialSpanish && !leftIsSpecialSpanish && left != LATIN_AMERICAN_SPANISH) {
156*d57664e9SAndroid Build Coastguard Worker right = LATIN_AMERICAN_SPANISH;
157*d57664e9SAndroid Build Coastguard Worker }
158*d57664e9SAndroid Build Coastguard Worker
159*d57664e9SAndroid Build Coastguard Worker uint32_t request_ancestors[MAX_PARENT_DEPTH+1];
160*d57664e9SAndroid Build Coastguard Worker ssize_t left_right_index;
161*d57664e9SAndroid Build Coastguard Worker // Find the parents of the request, but stop as soon as we saw left or right
162*d57664e9SAndroid Build Coastguard Worker const std::array<uint32_t, 2> left_and_right = {{left, right}};
163*d57664e9SAndroid Build Coastguard Worker const size_t ancestor_count = findAncestors(
164*d57664e9SAndroid Build Coastguard Worker request_ancestors, &left_right_index,
165*d57664e9SAndroid Build Coastguard Worker request, requested_script,
166*d57664e9SAndroid Build Coastguard Worker left_and_right.data(), left_and_right.size());
167*d57664e9SAndroid Build Coastguard Worker if (left_right_index == 0) { // We saw left earlier
168*d57664e9SAndroid Build Coastguard Worker return 1;
169*d57664e9SAndroid Build Coastguard Worker }
170*d57664e9SAndroid Build Coastguard Worker if (left_right_index == 1) { // We saw right earlier
171*d57664e9SAndroid Build Coastguard Worker return -1;
172*d57664e9SAndroid Build Coastguard Worker }
173*d57664e9SAndroid Build Coastguard Worker
174*d57664e9SAndroid Build Coastguard Worker // If we are here, neither left nor right are an ancestor of the
175*d57664e9SAndroid Build Coastguard Worker // request. This means that all the ancestors have been computed and
176*d57664e9SAndroid Build Coastguard Worker // the last ancestor is just the language by itself. We will use the
177*d57664e9SAndroid Build Coastguard Worker // distance in the parent tree for determining the better match.
178*d57664e9SAndroid Build Coastguard Worker const size_t left_distance = findDistance(
179*d57664e9SAndroid Build Coastguard Worker left, requested_script, request_ancestors, ancestor_count);
180*d57664e9SAndroid Build Coastguard Worker const size_t right_distance = findDistance(
181*d57664e9SAndroid Build Coastguard Worker right, requested_script, request_ancestors, ancestor_count);
182*d57664e9SAndroid Build Coastguard Worker if (left_distance != right_distance) {
183*d57664e9SAndroid Build Coastguard Worker return (int) right_distance - (int) left_distance; // smaller distance is better
184*d57664e9SAndroid Build Coastguard Worker }
185*d57664e9SAndroid Build Coastguard Worker
186*d57664e9SAndroid Build Coastguard Worker // If we are here, left and right are equidistant from the request. We will
187*d57664e9SAndroid Build Coastguard Worker // try and see if any of them is a representative locale.
188*d57664e9SAndroid Build Coastguard Worker const bool left_is_representative = isRepresentative(left, requested_script);
189*d57664e9SAndroid Build Coastguard Worker const bool right_is_representative = isRepresentative(right, requested_script);
190*d57664e9SAndroid Build Coastguard Worker if (left_is_representative != right_is_representative) {
191*d57664e9SAndroid Build Coastguard Worker return (int) left_is_representative - (int) right_is_representative;
192*d57664e9SAndroid Build Coastguard Worker }
193*d57664e9SAndroid Build Coastguard Worker
194*d57664e9SAndroid Build Coastguard Worker // We have no way of figuring out which locale is a better match. For
195*d57664e9SAndroid Build Coastguard Worker // the sake of stability, we consider the locale with the lower region
196*d57664e9SAndroid Build Coastguard Worker // code (in dictionary order) better, with two-letter codes before
197*d57664e9SAndroid Build Coastguard Worker // three-digit codes (since two-letter codes are more specific).
198*d57664e9SAndroid Build Coastguard Worker return (int64_t) right - (int64_t) left;
199*d57664e9SAndroid Build Coastguard Worker }
200*d57664e9SAndroid Build Coastguard Worker
localeDataComputeScript(char out[4],const char * language,const char * region)201*d57664e9SAndroid Build Coastguard Worker void localeDataComputeScript(char out[4], const char* language, const char* region) {
202*d57664e9SAndroid Build Coastguard Worker if (language[0] == '\0') {
203*d57664e9SAndroid Build Coastguard Worker memset(out, '\0', SCRIPT_LENGTH);
204*d57664e9SAndroid Build Coastguard Worker return;
205*d57664e9SAndroid Build Coastguard Worker }
206*d57664e9SAndroid Build Coastguard Worker uint32_t lookup_key = packLocale(language, region);
207*d57664e9SAndroid Build Coastguard Worker auto lookup_result = LIKELY_SCRIPTS.find(lookup_key);
208*d57664e9SAndroid Build Coastguard Worker if (lookup_result == LIKELY_SCRIPTS.end()) {
209*d57664e9SAndroid Build Coastguard Worker // We couldn't find the locale. Let's try without the region
210*d57664e9SAndroid Build Coastguard Worker if (region[0] != '\0') {
211*d57664e9SAndroid Build Coastguard Worker lookup_key = dropRegion(lookup_key);
212*d57664e9SAndroid Build Coastguard Worker lookup_result = LIKELY_SCRIPTS.find(lookup_key);
213*d57664e9SAndroid Build Coastguard Worker if (lookup_result != LIKELY_SCRIPTS.end()) {
214*d57664e9SAndroid Build Coastguard Worker memcpy(out, SCRIPT_CODES[lookup_result->second], SCRIPT_LENGTH);
215*d57664e9SAndroid Build Coastguard Worker return;
216*d57664e9SAndroid Build Coastguard Worker }
217*d57664e9SAndroid Build Coastguard Worker }
218*d57664e9SAndroid Build Coastguard Worker // We don't know anything about the locale
219*d57664e9SAndroid Build Coastguard Worker memset(out, '\0', SCRIPT_LENGTH);
220*d57664e9SAndroid Build Coastguard Worker return;
221*d57664e9SAndroid Build Coastguard Worker } else {
222*d57664e9SAndroid Build Coastguard Worker // We found the locale.
223*d57664e9SAndroid Build Coastguard Worker memcpy(out, SCRIPT_CODES[lookup_result->second], SCRIPT_LENGTH);
224*d57664e9SAndroid Build Coastguard Worker }
225*d57664e9SAndroid Build Coastguard Worker }
226*d57664e9SAndroid Build Coastguard Worker
227*d57664e9SAndroid Build Coastguard Worker const uint32_t ENGLISH_STOP_LIST[2] = {
228*d57664e9SAndroid Build Coastguard Worker 0x656E0000LU, // en
229*d57664e9SAndroid Build Coastguard Worker 0x656E8400LU, // en-001
230*d57664e9SAndroid Build Coastguard Worker };
231*d57664e9SAndroid Build Coastguard Worker const char ENGLISH_CHARS[2] = {'e', 'n'};
232*d57664e9SAndroid Build Coastguard Worker const char LATIN_CHARS[4] = {'L', 'a', 't', 'n'};
233*d57664e9SAndroid Build Coastguard Worker
localeDataIsCloseToUsEnglish(const char * region)234*d57664e9SAndroid Build Coastguard Worker bool localeDataIsCloseToUsEnglish(const char* region) {
235*d57664e9SAndroid Build Coastguard Worker const uint32_t locale = packLocale(ENGLISH_CHARS, region);
236*d57664e9SAndroid Build Coastguard Worker ssize_t stop_list_index;
237*d57664e9SAndroid Build Coastguard Worker findAncestors(nullptr, &stop_list_index, locale, LATIN_CHARS, ENGLISH_STOP_LIST, 2);
238*d57664e9SAndroid Build Coastguard Worker // A locale is like US English if we see "en" before "en-001" in its ancestor list.
239*d57664e9SAndroid Build Coastguard Worker return stop_list_index == 0; // 'en' is first in ENGLISH_STOP_LIST
240*d57664e9SAndroid Build Coastguard Worker }
241*d57664e9SAndroid Build Coastguard Worker
242*d57664e9SAndroid Build Coastguard Worker } // namespace android
243