1*993b0882SAndroid Build Coastguard Worker /*
2*993b0882SAndroid Build Coastguard Worker * Copyright (C) 2018 The Android Open Source Project
3*993b0882SAndroid Build Coastguard Worker *
4*993b0882SAndroid Build Coastguard Worker * Licensed under the Apache License, Version 2.0 (the "License");
5*993b0882SAndroid Build Coastguard Worker * you may not use this file except in compliance with the License.
6*993b0882SAndroid Build Coastguard Worker * You may obtain a copy of the License at
7*993b0882SAndroid Build Coastguard Worker *
8*993b0882SAndroid Build Coastguard Worker * http://www.apache.org/licenses/LICENSE-2.0
9*993b0882SAndroid Build Coastguard Worker *
10*993b0882SAndroid Build Coastguard Worker * Unless required by applicable law or agreed to in writing, software
11*993b0882SAndroid Build Coastguard Worker * distributed under the License is distributed on an "AS IS" BASIS,
12*993b0882SAndroid Build Coastguard Worker * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13*993b0882SAndroid Build Coastguard Worker * See the License for the specific language governing permissions and
14*993b0882SAndroid Build Coastguard Worker * limitations under the License.
15*993b0882SAndroid Build Coastguard Worker */
16*993b0882SAndroid Build Coastguard Worker
17*993b0882SAndroid Build Coastguard Worker #include "utils/i18n/locale.h"
18*993b0882SAndroid Build Coastguard Worker
19*993b0882SAndroid Build Coastguard Worker #include <string>
20*993b0882SAndroid Build Coastguard Worker
21*993b0882SAndroid Build Coastguard Worker #include "utils/strings/split.h"
22*993b0882SAndroid Build Coastguard Worker
23*993b0882SAndroid Build Coastguard Worker namespace libtextclassifier3 {
24*993b0882SAndroid Build Coastguard Worker
25*993b0882SAndroid Build Coastguard Worker namespace {
26*993b0882SAndroid Build Coastguard Worker constexpr const char* kAnyMatch = "*";
27*993b0882SAndroid Build Coastguard Worker
28*993b0882SAndroid Build Coastguard Worker // BCP 47 code for "Undetermined Language".
29*993b0882SAndroid Build Coastguard Worker constexpr const char* kUnknownLanguageCode = "und";
30*993b0882SAndroid Build Coastguard Worker
CheckLanguage(StringPiece language)31*993b0882SAndroid Build Coastguard Worker bool CheckLanguage(StringPiece language) {
32*993b0882SAndroid Build Coastguard Worker if (language.size() == 1 && language.data()[0] == '*') {
33*993b0882SAndroid Build Coastguard Worker return true;
34*993b0882SAndroid Build Coastguard Worker }
35*993b0882SAndroid Build Coastguard Worker
36*993b0882SAndroid Build Coastguard Worker if (language.size() != 2 && language.size() != 3) {
37*993b0882SAndroid Build Coastguard Worker return false;
38*993b0882SAndroid Build Coastguard Worker }
39*993b0882SAndroid Build Coastguard Worker
40*993b0882SAndroid Build Coastguard Worker // Needs to be all lowercase.
41*993b0882SAndroid Build Coastguard Worker for (int i = 0; i < language.size(); ++i) {
42*993b0882SAndroid Build Coastguard Worker if (!std::islower(language[i])) {
43*993b0882SAndroid Build Coastguard Worker return false;
44*993b0882SAndroid Build Coastguard Worker }
45*993b0882SAndroid Build Coastguard Worker }
46*993b0882SAndroid Build Coastguard Worker
47*993b0882SAndroid Build Coastguard Worker return true;
48*993b0882SAndroid Build Coastguard Worker }
49*993b0882SAndroid Build Coastguard Worker
CheckScript(StringPiece script)50*993b0882SAndroid Build Coastguard Worker bool CheckScript(StringPiece script) {
51*993b0882SAndroid Build Coastguard Worker if (script.size() != 4) {
52*993b0882SAndroid Build Coastguard Worker return false;
53*993b0882SAndroid Build Coastguard Worker }
54*993b0882SAndroid Build Coastguard Worker
55*993b0882SAndroid Build Coastguard Worker if (!std::isupper(script[0])) {
56*993b0882SAndroid Build Coastguard Worker return false;
57*993b0882SAndroid Build Coastguard Worker }
58*993b0882SAndroid Build Coastguard Worker
59*993b0882SAndroid Build Coastguard Worker // Needs to be all lowercase.
60*993b0882SAndroid Build Coastguard Worker for (int i = 1; i < script.size(); ++i) {
61*993b0882SAndroid Build Coastguard Worker if (!std::islower(script[i])) {
62*993b0882SAndroid Build Coastguard Worker return false;
63*993b0882SAndroid Build Coastguard Worker }
64*993b0882SAndroid Build Coastguard Worker }
65*993b0882SAndroid Build Coastguard Worker
66*993b0882SAndroid Build Coastguard Worker return true;
67*993b0882SAndroid Build Coastguard Worker }
68*993b0882SAndroid Build Coastguard Worker
CheckRegion(StringPiece region)69*993b0882SAndroid Build Coastguard Worker bool CheckRegion(StringPiece region) {
70*993b0882SAndroid Build Coastguard Worker if (region.size() == 2) {
71*993b0882SAndroid Build Coastguard Worker return std::isupper(region[0]) && std::isupper(region[1]);
72*993b0882SAndroid Build Coastguard Worker } else if (region.size() == 3) {
73*993b0882SAndroid Build Coastguard Worker return std::isdigit(region[0]) && std::isdigit(region[1]) &&
74*993b0882SAndroid Build Coastguard Worker std::isdigit(region[2]);
75*993b0882SAndroid Build Coastguard Worker } else {
76*993b0882SAndroid Build Coastguard Worker return false;
77*993b0882SAndroid Build Coastguard Worker }
78*993b0882SAndroid Build Coastguard Worker }
79*993b0882SAndroid Build Coastguard Worker
80*993b0882SAndroid Build Coastguard Worker } // namespace
81*993b0882SAndroid Build Coastguard Worker
FromBCP47(const std::string & locale_tag)82*993b0882SAndroid Build Coastguard Worker Locale Locale::FromBCP47(const std::string& locale_tag) {
83*993b0882SAndroid Build Coastguard Worker std::vector<StringPiece> parts = strings::Split(locale_tag, '-');
84*993b0882SAndroid Build Coastguard Worker if (parts.empty()) {
85*993b0882SAndroid Build Coastguard Worker return Locale::Invalid();
86*993b0882SAndroid Build Coastguard Worker }
87*993b0882SAndroid Build Coastguard Worker
88*993b0882SAndroid Build Coastguard Worker auto parts_it = parts.begin();
89*993b0882SAndroid Build Coastguard Worker StringPiece language = *parts_it;
90*993b0882SAndroid Build Coastguard Worker if (!CheckLanguage(language)) {
91*993b0882SAndroid Build Coastguard Worker return Locale::Invalid();
92*993b0882SAndroid Build Coastguard Worker }
93*993b0882SAndroid Build Coastguard Worker ++parts_it;
94*993b0882SAndroid Build Coastguard Worker
95*993b0882SAndroid Build Coastguard Worker StringPiece script;
96*993b0882SAndroid Build Coastguard Worker if (parts_it != parts.end()) {
97*993b0882SAndroid Build Coastguard Worker script = *parts_it;
98*993b0882SAndroid Build Coastguard Worker if (!CheckScript(script)) {
99*993b0882SAndroid Build Coastguard Worker script = "";
100*993b0882SAndroid Build Coastguard Worker } else {
101*993b0882SAndroid Build Coastguard Worker ++parts_it;
102*993b0882SAndroid Build Coastguard Worker }
103*993b0882SAndroid Build Coastguard Worker }
104*993b0882SAndroid Build Coastguard Worker
105*993b0882SAndroid Build Coastguard Worker StringPiece region;
106*993b0882SAndroid Build Coastguard Worker if (parts_it != parts.end()) {
107*993b0882SAndroid Build Coastguard Worker region = *parts_it;
108*993b0882SAndroid Build Coastguard Worker if (!CheckRegion(region)) {
109*993b0882SAndroid Build Coastguard Worker region = "";
110*993b0882SAndroid Build Coastguard Worker } else {
111*993b0882SAndroid Build Coastguard Worker ++parts_it;
112*993b0882SAndroid Build Coastguard Worker }
113*993b0882SAndroid Build Coastguard Worker }
114*993b0882SAndroid Build Coastguard Worker
115*993b0882SAndroid Build Coastguard Worker // NOTE: We don't parse the rest of the BCP47 tag here even if specified.
116*993b0882SAndroid Build Coastguard Worker
117*993b0882SAndroid Build Coastguard Worker return Locale(language.ToString(), script.ToString(), region.ToString());
118*993b0882SAndroid Build Coastguard Worker }
119*993b0882SAndroid Build Coastguard Worker
FromLanguageTag(const LanguageTag * language_tag)120*993b0882SAndroid Build Coastguard Worker Locale Locale::FromLanguageTag(const LanguageTag* language_tag) {
121*993b0882SAndroid Build Coastguard Worker if (language_tag == nullptr || language_tag->language() == nullptr) {
122*993b0882SAndroid Build Coastguard Worker return Locale::Invalid();
123*993b0882SAndroid Build Coastguard Worker }
124*993b0882SAndroid Build Coastguard Worker
125*993b0882SAndroid Build Coastguard Worker StringPiece language = language_tag->language()->c_str();
126*993b0882SAndroid Build Coastguard Worker if (!CheckLanguage(language)) {
127*993b0882SAndroid Build Coastguard Worker return Locale::Invalid();
128*993b0882SAndroid Build Coastguard Worker }
129*993b0882SAndroid Build Coastguard Worker
130*993b0882SAndroid Build Coastguard Worker StringPiece script;
131*993b0882SAndroid Build Coastguard Worker if (language_tag->script() != nullptr) {
132*993b0882SAndroid Build Coastguard Worker script = language_tag->script()->c_str();
133*993b0882SAndroid Build Coastguard Worker if (!CheckScript(script)) {
134*993b0882SAndroid Build Coastguard Worker script = "";
135*993b0882SAndroid Build Coastguard Worker }
136*993b0882SAndroid Build Coastguard Worker }
137*993b0882SAndroid Build Coastguard Worker
138*993b0882SAndroid Build Coastguard Worker StringPiece region;
139*993b0882SAndroid Build Coastguard Worker if (language_tag->region() != nullptr) {
140*993b0882SAndroid Build Coastguard Worker region = language_tag->region()->c_str();
141*993b0882SAndroid Build Coastguard Worker if (!CheckRegion(region)) {
142*993b0882SAndroid Build Coastguard Worker region = "";
143*993b0882SAndroid Build Coastguard Worker }
144*993b0882SAndroid Build Coastguard Worker }
145*993b0882SAndroid Build Coastguard Worker return Locale(language.ToString(), script.ToString(), region.ToString());
146*993b0882SAndroid Build Coastguard Worker }
147*993b0882SAndroid Build Coastguard Worker
IsUnknown() const148*993b0882SAndroid Build Coastguard Worker bool Locale::IsUnknown() const {
149*993b0882SAndroid Build Coastguard Worker return is_valid_ && language_ == kUnknownLanguageCode;
150*993b0882SAndroid Build Coastguard Worker }
151*993b0882SAndroid Build Coastguard Worker
IsLocaleSupported(const Locale & locale,const std::vector<Locale> & supported_locales,bool default_value)152*993b0882SAndroid Build Coastguard Worker bool Locale::IsLocaleSupported(const Locale& locale,
153*993b0882SAndroid Build Coastguard Worker const std::vector<Locale>& supported_locales,
154*993b0882SAndroid Build Coastguard Worker bool default_value) {
155*993b0882SAndroid Build Coastguard Worker if (!locale.IsValid()) {
156*993b0882SAndroid Build Coastguard Worker return false;
157*993b0882SAndroid Build Coastguard Worker }
158*993b0882SAndroid Build Coastguard Worker if (locale.IsUnknown()) {
159*993b0882SAndroid Build Coastguard Worker return default_value;
160*993b0882SAndroid Build Coastguard Worker }
161*993b0882SAndroid Build Coastguard Worker for (const Locale& supported_locale : supported_locales) {
162*993b0882SAndroid Build Coastguard Worker if (!supported_locale.IsValid()) {
163*993b0882SAndroid Build Coastguard Worker continue;
164*993b0882SAndroid Build Coastguard Worker }
165*993b0882SAndroid Build Coastguard Worker const bool language_matches =
166*993b0882SAndroid Build Coastguard Worker supported_locale.Language().empty() ||
167*993b0882SAndroid Build Coastguard Worker supported_locale.Language() == kAnyMatch ||
168*993b0882SAndroid Build Coastguard Worker supported_locale.Language() == locale.Language();
169*993b0882SAndroid Build Coastguard Worker const bool script_matches = supported_locale.Script().empty() ||
170*993b0882SAndroid Build Coastguard Worker supported_locale.Script() == kAnyMatch ||
171*993b0882SAndroid Build Coastguard Worker locale.Script().empty() ||
172*993b0882SAndroid Build Coastguard Worker supported_locale.Script() == locale.Script();
173*993b0882SAndroid Build Coastguard Worker const bool region_matches = supported_locale.Region().empty() ||
174*993b0882SAndroid Build Coastguard Worker supported_locale.Region() == kAnyMatch ||
175*993b0882SAndroid Build Coastguard Worker locale.Region().empty() ||
176*993b0882SAndroid Build Coastguard Worker supported_locale.Region() == locale.Region();
177*993b0882SAndroid Build Coastguard Worker if (language_matches && script_matches && region_matches) {
178*993b0882SAndroid Build Coastguard Worker return true;
179*993b0882SAndroid Build Coastguard Worker }
180*993b0882SAndroid Build Coastguard Worker }
181*993b0882SAndroid Build Coastguard Worker return false;
182*993b0882SAndroid Build Coastguard Worker }
183*993b0882SAndroid Build Coastguard Worker
IsAnyLocaleSupported(const std::vector<Locale> & locales,const std::vector<Locale> & supported_locales,bool default_value)184*993b0882SAndroid Build Coastguard Worker bool Locale::IsAnyLocaleSupported(const std::vector<Locale>& locales,
185*993b0882SAndroid Build Coastguard Worker const std::vector<Locale>& supported_locales,
186*993b0882SAndroid Build Coastguard Worker bool default_value) {
187*993b0882SAndroid Build Coastguard Worker if (locales.empty()) {
188*993b0882SAndroid Build Coastguard Worker return default_value;
189*993b0882SAndroid Build Coastguard Worker }
190*993b0882SAndroid Build Coastguard Worker if (supported_locales.empty()) {
191*993b0882SAndroid Build Coastguard Worker return default_value;
192*993b0882SAndroid Build Coastguard Worker }
193*993b0882SAndroid Build Coastguard Worker for (const Locale& locale : locales) {
194*993b0882SAndroid Build Coastguard Worker if (IsLocaleSupported(locale, supported_locales, default_value)) {
195*993b0882SAndroid Build Coastguard Worker return true;
196*993b0882SAndroid Build Coastguard Worker }
197*993b0882SAndroid Build Coastguard Worker }
198*993b0882SAndroid Build Coastguard Worker return false;
199*993b0882SAndroid Build Coastguard Worker }
200*993b0882SAndroid Build Coastguard Worker
operator ==(const Locale & locale) const201*993b0882SAndroid Build Coastguard Worker bool Locale::operator==(const Locale& locale) const {
202*993b0882SAndroid Build Coastguard Worker return language_ == locale.language_ && region_ == locale.region_ &&
203*993b0882SAndroid Build Coastguard Worker script_ == locale.script_;
204*993b0882SAndroid Build Coastguard Worker }
205*993b0882SAndroid Build Coastguard Worker
operator <(const Locale & locale) const206*993b0882SAndroid Build Coastguard Worker bool Locale::operator<(const Locale& locale) const {
207*993b0882SAndroid Build Coastguard Worker return std::tie(language_, region_, script_) <
208*993b0882SAndroid Build Coastguard Worker std::tie(locale.language_, locale.region_, locale.script_);
209*993b0882SAndroid Build Coastguard Worker }
210*993b0882SAndroid Build Coastguard Worker
operator !=(const Locale & locale) const211*993b0882SAndroid Build Coastguard Worker bool Locale::operator!=(const Locale& locale) const {
212*993b0882SAndroid Build Coastguard Worker return !(*this == locale);
213*993b0882SAndroid Build Coastguard Worker }
214*993b0882SAndroid Build Coastguard Worker
operator <<(logging::LoggingStringStream & stream,const Locale & locale)215*993b0882SAndroid Build Coastguard Worker logging::LoggingStringStream& operator<<(logging::LoggingStringStream& stream,
216*993b0882SAndroid Build Coastguard Worker const Locale& locale) {
217*993b0882SAndroid Build Coastguard Worker return stream << "Locale(language=" << locale.Language()
218*993b0882SAndroid Build Coastguard Worker << ", script=" << locale.Script()
219*993b0882SAndroid Build Coastguard Worker << ", region=" << locale.Region()
220*993b0882SAndroid Build Coastguard Worker << ", is_valid=" << locale.IsValid()
221*993b0882SAndroid Build Coastguard Worker << ", is_unknown=" << locale.IsUnknown() << ")";
222*993b0882SAndroid Build Coastguard Worker }
223*993b0882SAndroid Build Coastguard Worker
ParseLocales(StringPiece locales_list,std::vector<Locale> * locales)224*993b0882SAndroid Build Coastguard Worker bool ParseLocales(StringPiece locales_list, std::vector<Locale>* locales) {
225*993b0882SAndroid Build Coastguard Worker for (const auto& locale_str : strings::Split(locales_list, ',')) {
226*993b0882SAndroid Build Coastguard Worker const Locale locale = Locale::FromBCP47(locale_str.ToString());
227*993b0882SAndroid Build Coastguard Worker if (!locale.IsValid()) {
228*993b0882SAndroid Build Coastguard Worker TC3_LOG(ERROR) << "Invalid locale " << locale_str.ToString();
229*993b0882SAndroid Build Coastguard Worker return false;
230*993b0882SAndroid Build Coastguard Worker }
231*993b0882SAndroid Build Coastguard Worker locales->push_back(locale);
232*993b0882SAndroid Build Coastguard Worker }
233*993b0882SAndroid Build Coastguard Worker return true;
234*993b0882SAndroid Build Coastguard Worker }
235*993b0882SAndroid Build Coastguard Worker
236*993b0882SAndroid Build Coastguard Worker } // namespace libtextclassifier3
237