/* * Copyright (C) 2016 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "compile/PseudolocaleGenerator.h" #include #include #include #include "ResourceTable.h" #include "ResourceValues.h" #include "ValueVisitor.h" #include "androidfw/ResourceTypes.h" #include "androidfw/Util.h" #include "compile/Pseudolocalizer.h" #include "util/Util.h" using ::android::ConfigDescription; using ::android::StringPiece; using ::android::StringPiece16; namespace aapt { // The struct that represents both Span objects and UntranslatableSections. struct UnifiedSpan { // Only present for Span objects. If not present, this was an UntranslatableSection. std::optional tag; // The UTF-16 index into the string where this span starts. uint32_t first_char; // The UTF-16 index into the string where this span ends, inclusive. uint32_t last_char; }; inline static bool operator<(const UnifiedSpan& left, const UnifiedSpan& right) { if (left.first_char < right.first_char) { return true; } else if (left.first_char > right.first_char) { return false; } else if (left.last_char < right.last_char) { return true; } return false; } inline static UnifiedSpan SpanToUnifiedSpan(const android::StringPool::Span& span) { return UnifiedSpan{*span.name, span.first_char, span.last_char}; } inline static UnifiedSpan UntranslatableSectionToUnifiedSpan(const UntranslatableSection& section) { return UnifiedSpan{ {}, static_cast(section.start), static_cast(section.end) - 1}; } // Merges the Span and UntranslatableSections of this StyledString into a single vector of // UnifiedSpans. This will first check that the Spans are sorted in ascending order. static std::vector MergeSpans(const StyledString& string) { // Ensure the Spans are sorted and converted. std::vector sorted_spans; sorted_spans.reserve(string.value->spans.size()); std::transform(string.value->spans.begin(), string.value->spans.end(), std::back_inserter(sorted_spans), SpanToUnifiedSpan); // Stable sort to ensure tag sequences like "" are preserved. std::stable_sort(sorted_spans.begin(), sorted_spans.end()); // Ensure the UntranslatableSections are sorted and converted. std::vector sorted_untranslatable_sections; sorted_untranslatable_sections.reserve(string.untranslatable_sections.size()); std::transform(string.untranslatable_sections.begin(), string.untranslatable_sections.end(), std::back_inserter(sorted_untranslatable_sections), UntranslatableSectionToUnifiedSpan); std::sort(sorted_untranslatable_sections.begin(), sorted_untranslatable_sections.end()); std::vector merged_spans; merged_spans.reserve(sorted_spans.size() + sorted_untranslatable_sections.size()); auto span_iter = sorted_spans.begin(); auto untranslatable_iter = sorted_untranslatable_sections.begin(); while (span_iter != sorted_spans.end() && untranslatable_iter != sorted_untranslatable_sections.end()) { if (*span_iter < *untranslatable_iter) { merged_spans.push_back(std::move(*span_iter)); ++span_iter; } else { merged_spans.push_back(std::move(*untranslatable_iter)); ++untranslatable_iter; } } while (span_iter != sorted_spans.end()) { merged_spans.push_back(std::move(*span_iter)); ++span_iter; } while (untranslatable_iter != sorted_untranslatable_sections.end()) { merged_spans.push_back(std::move(*untranslatable_iter)); ++untranslatable_iter; } return merged_spans; } std::unique_ptr PseudolocalizeStyledString(StyledString* string, Pseudolocalizer::Method method, android::StringPool* pool) { Pseudolocalizer localizer(method); // Collect the spans and untranslatable sections into one set of spans, sorted by first_char. // This will effectively subdivide the string into multiple sections that can be individually // pseudolocalized, while keeping the span indices synchronized. std::vector merged_spans = MergeSpans(*string); // All Span indices are UTF-16 based, according to the resources.arsc format expected by the // runtime. So we will do all our processing in UTF-16, then convert back. const std::u16string text16 = android::util::Utf8ToUtf16(string->value->value); // Convenient wrapper around the text that allows us to work with StringPieces. const StringPiece16 text(text16); // The new string. std::string new_string = localizer.Start(); // The stack that keeps track of what nested Span we're in. std::vector span_stack; // The current position in the original text. uint32_t cursor = 0u; // The current position in the new text. uint32_t new_cursor = utf8_to_utf16_length(reinterpret_cast(new_string.data()), new_string.size(), false); // We assume no nesting of untranslatable sections, since XLIFF doesn't allow it. bool translatable = true; size_t span_idx = 0u; while (span_idx < merged_spans.size() || !span_stack.empty()) { UnifiedSpan* span = span_idx >= merged_spans.size() ? nullptr : &merged_spans[span_idx]; UnifiedSpan* parent_span = span_stack.empty() ? nullptr : &merged_spans[span_stack.back()]; if (span != nullptr) { if (parent_span == nullptr || parent_span->last_char > span->first_char) { // There is no parent, or this span is the child of the parent. // Pseudolocalize all the text until this span. const StringPiece16 substr = text.substr(cursor, span->first_char - cursor); cursor += substr.size(); // Pseudolocalize the substring. std::string new_substr = android::util::Utf16ToUtf8(substr); if (translatable) { new_substr = localizer.Text(new_substr); } new_cursor += utf8_to_utf16_length(reinterpret_cast(new_substr.data()), new_substr.size(), false); new_string += new_substr; // Rewrite the first_char. span->first_char = new_cursor; if (!span->tag) { // An untranslatable section has begun! translatable = false; } span_stack.push_back(span_idx); ++span_idx; continue; } } if (parent_span != nullptr) { // There is a parent, and either this span is not a child of it, or there are no more spans. // Pop this off the stack. const StringPiece16 substr = text.substr(cursor, parent_span->last_char - cursor + 1); cursor += substr.size(); // Pseudolocalize the substring. std::string new_substr = android::util::Utf16ToUtf8(substr); if (translatable) { new_substr = localizer.Text(new_substr); } new_cursor += utf8_to_utf16_length(reinterpret_cast(new_substr.data()), new_substr.size(), false); new_string += new_substr; parent_span->last_char = new_cursor - 1; if (parent_span->tag) { // An end to an untranslatable section. translatable = true; } span_stack.pop_back(); } } // Finish the pseudolocalization at the end of the string. new_string += localizer.Text(android::util::Utf16ToUtf8(text.substr(cursor, text.size() - cursor))); new_string += localizer.End(); android::StyleString localized; localized.str = std::move(new_string); // Convert the UnifiedSpans into regular Spans, skipping the UntranslatableSections. for (UnifiedSpan& span : merged_spans) { if (span.tag) { localized.spans.push_back( android::Span{std::move(span.tag.value()), span.first_char, span.last_char}); } } return util::make_unique(pool->MakeRef(localized)); } namespace { class Visitor : public ValueVisitor { public: // Either value or item will be populated upon visiting the value. std::unique_ptr value; std::unique_ptr item; Visitor(android::StringPool* pool, Pseudolocalizer::Method method) : pool_(pool), method_(method), localizer_(method) { } void Visit(Plural* plural) override { CloningValueTransformer cloner(pool_); std::unique_ptr localized = util::make_unique(); for (size_t i = 0; i < plural->values.size(); i++) { Visitor sub_visitor(pool_, method_); if (plural->values[i]) { plural->values[i]->Accept(&sub_visitor); if (sub_visitor.item) { localized->values[i] = std::move(sub_visitor.item); } else { localized->values[i] = plural->values[i]->Transform(cloner); } } } localized->SetSource(plural->GetSource()); localized->SetWeak(true); value = std::move(localized); } void Visit(String* string) override { const StringPiece original_string = *string->value; std::string result = localizer_.Start(); // Pseudolocalize only the translatable sections. size_t start = 0u; for (const UntranslatableSection& section : string->untranslatable_sections) { // Pseudolocalize the content before the untranslatable section. const size_t len = section.start - start; if (len > 0u) { result += localizer_.Text(original_string.substr(start, len)); } // Copy the untranslatable content. result += original_string.substr(section.start, section.end - section.start); start = section.end; } // Pseudolocalize the content after the last untranslatable section. if (start != original_string.size()) { const size_t len = original_string.size() - start; result += localizer_.Text(original_string.substr(start, len)); } result += localizer_.End(); std::unique_ptr localized = util::make_unique(pool_->MakeRef(result)); localized->SetSource(string->GetSource()); localized->SetWeak(true); item = std::move(localized); } void Visit(StyledString* string) override { item = PseudolocalizeStyledString(string, method_, pool_); item->SetSource(string->GetSource()); item->SetWeak(true); } private: DISALLOW_COPY_AND_ASSIGN(Visitor); android::StringPool* pool_; Pseudolocalizer::Method method_; Pseudolocalizer localizer_; }; class GrammaticalGenderVisitor : public ValueVisitor { public: std::unique_ptr value; std::unique_ptr item; GrammaticalGenderVisitor(android::StringPool* pool, uint8_t grammaticalInflection) : pool_(pool), grammaticalInflection_(grammaticalInflection) { } void Visit(Plural* plural) override { CloningValueTransformer cloner(pool_); std::unique_ptr grammatical_gendered = util::make_unique(); for (size_t i = 0; i < plural->values.size(); i++) { if (plural->values[i]) { GrammaticalGenderVisitor sub_visitor(pool_, grammaticalInflection_); plural->values[i]->Accept(&sub_visitor); if (sub_visitor.item) { grammatical_gendered->values[i] = std::move(sub_visitor.item); } else { grammatical_gendered->values[i] = plural->values[i]->Transform(cloner); } } } grammatical_gendered->SetSource(plural->GetSource()); grammatical_gendered->SetWeak(true); value = std::move(grammatical_gendered); } std::string AddGrammaticalGenderPrefix(const std::string_view& original_string) { std::string result; switch (grammaticalInflection_) { case android::ResTable_config::GRAMMATICAL_GENDER_MASCULINE: result = std::string("(M)") + std::string(original_string); break; case android::ResTable_config::GRAMMATICAL_GENDER_FEMININE: result = std::string("(F)") + std::string(original_string); break; case android::ResTable_config::GRAMMATICAL_GENDER_NEUTER: result = std::string("(N)") + std::string(original_string); break; default: result = std::string(original_string); break; } return result; } void Visit(String* string) override { std::string prefixed_string = AddGrammaticalGenderPrefix(std::string(*string->value)); std::unique_ptr grammatical_gendered = util::make_unique(pool_->MakeRef(prefixed_string)); grammatical_gendered->SetSource(string->GetSource()); grammatical_gendered->SetWeak(true); item = std::move(grammatical_gendered); } void Visit(StyledString* string) override { std::string prefixed_string = AddGrammaticalGenderPrefix(std::string(string->value->value)); android::StyleString new_string; new_string.str = std::move(prefixed_string); for (const android::StringPool::Span& span : string->value->spans) { new_string.spans.emplace_back(android::Span{*span.name, span.first_char, span.last_char}); } std::unique_ptr grammatical_gendered = util::make_unique(pool_->MakeRef(new_string)); grammatical_gendered->SetSource(string->GetSource()); grammatical_gendered->SetWeak(true); item = std::move(grammatical_gendered); } private: DISALLOW_COPY_AND_ASSIGN(GrammaticalGenderVisitor); android::StringPool* pool_; uint8_t grammaticalInflection_; }; ConfigDescription ModifyConfigForPseudoLocale(const ConfigDescription& base, Pseudolocalizer::Method m, uint8_t grammaticalInflection) { ConfigDescription modified = base; switch (m) { case Pseudolocalizer::Method::kAccent: modified.language[0] = 'e'; modified.language[1] = 'n'; modified.country[0] = 'X'; modified.country[1] = 'A'; break; case Pseudolocalizer::Method::kBidi: modified.language[0] = 'a'; modified.language[1] = 'r'; modified.country[0] = 'X'; modified.country[1] = 'B'; break; default: break; } modified.grammaticalInflection = grammaticalInflection; return modified; } void GrammaticalGender(ResourceConfigValue* original_value, ResourceConfigValue* localized_config_value, android::StringPool* pool, ResourceEntry* entry, const Pseudolocalizer::Method method, uint8_t grammaticalInflection) { GrammaticalGenderVisitor visitor(pool, grammaticalInflection); localized_config_value->value->Accept(&visitor); std::unique_ptr grammatical_gendered_value; if (visitor.value) { grammatical_gendered_value = std::move(visitor.value); } else if (visitor.item) { grammatical_gendered_value = std::move(visitor.item); } if (!grammatical_gendered_value) { return; } ConfigDescription config = ModifyConfigForPseudoLocale(original_value->config, method, grammaticalInflection); ResourceConfigValue* grammatical_gendered_config_value = entry->FindOrCreateValue(config, original_value->product); if (!grammatical_gendered_config_value->value) { // Only use auto-generated pseudo-localization if none is defined. grammatical_gendered_config_value->value = std::move(grammatical_gendered_value); } } const uint32_t MASK_MASCULINE = 1; // Bit mask for masculine const uint32_t MASK_FEMININE = 2; // Bit mask for feminine const uint32_t MASK_NEUTER = 4; // Bit mask for neuter void GrammaticalGenderIfNeeded(ResourceConfigValue* original_value, ResourceConfigValue* new_value, android::StringPool* pool, ResourceEntry* entry, const Pseudolocalizer::Method method, uint32_t gender_state) { if (gender_state & MASK_FEMININE) { GrammaticalGender(original_value, new_value, pool, entry, method, android::ResTable_config::GRAMMATICAL_GENDER_FEMININE); } if (gender_state & MASK_MASCULINE) { GrammaticalGender(original_value, new_value, pool, entry, method, android::ResTable_config::GRAMMATICAL_GENDER_MASCULINE); } if (gender_state & MASK_NEUTER) { GrammaticalGender(original_value, new_value, pool, entry, method, android::ResTable_config::GRAMMATICAL_GENDER_NEUTER); } } void PseudolocalizeIfNeeded(const Pseudolocalizer::Method method, ResourceConfigValue* original_value, android::StringPool* pool, ResourceEntry* entry, uint32_t gender_state, bool gender_flag) { Visitor visitor(pool, method); original_value->value->Accept(&visitor); std::unique_ptr localized_value; if (visitor.value) { localized_value = std::move(visitor.value); } else if (visitor.item) { localized_value = std::move(visitor.item); } if (!localized_value) { return; } ConfigDescription config_with_accent = ModifyConfigForPseudoLocale( original_value->config, method, android::ResTable_config::GRAMMATICAL_GENDER_ANY); ResourceConfigValue* new_config_value = entry->FindOrCreateValue(config_with_accent, original_value->product); if (!new_config_value->value) { // Only use auto-generated pseudo-localization if none is defined. new_config_value->value = std::move(localized_value); } if (gender_flag) { GrammaticalGenderIfNeeded(original_value, new_config_value, pool, entry, method, gender_state); } } // A value is pseudolocalizable if it does not define a locale (or is the default locale) and is // translatable. static bool IsPseudolocalizable(ResourceConfigValue* config_value) { const int diff = config_value->config.diff(ConfigDescription::DefaultConfig()); if (diff & ConfigDescription::CONFIG_LOCALE) { return false; } return config_value->value->IsTranslatable(); } } // namespace bool ParseGenderValuesAndSaveState(const std::string& grammatical_gender_values, uint32_t* gender_state, android::IDiagnostics* diag) { std::vector values = util::SplitAndLowercase(grammatical_gender_values, ','); for (size_t i = 0; i < values.size(); i++) { if (values[i].length() != 0) { if (values[i] == "f") { *gender_state |= MASK_FEMININE; } else if (values[i] == "m") { *gender_state |= MASK_MASCULINE; } else if (values[i] == "n") { *gender_state |= MASK_NEUTER; } else { diag->Error(android::DiagMessage() << "Invalid grammatical gender value: " << values[i]); return false; } } } return true; } bool ParseGenderRatio(const std::string& grammatical_gender_ratio, float* gender_ratio, android::IDiagnostics* diag) { const char* input = grammatical_gender_ratio.c_str(); char* endPtr; errno = 0; *gender_ratio = strtof(input, &endPtr); if (endPtr == input || *endPtr != '\0' || errno == ERANGE || *gender_ratio < 0 || *gender_ratio > 1) { diag->Error(android::DiagMessage() << "Invalid grammatical gender ratio: " << grammatical_gender_ratio << ", must be a real number between 0 and 1"); return false; } return true; } bool PseudolocaleGenerator::Consume(IAaptContext* context, ResourceTable* table) { uint32_t gender_state = 0; if (!ParseGenderValuesAndSaveState(grammatical_gender_values_, &gender_state, context->GetDiagnostics())) { return false; } float gender_ratio = 0; if (!ParseGenderRatio(grammatical_gender_ratio_, &gender_ratio, context->GetDiagnostics())) { return false; } std::random_device rd; std::mt19937 gen(rd()); std::uniform_real_distribution<> distrib(0.0, 1.0); for (auto& package : table->packages) { for (auto& type : package->types) { for (auto& entry : type->entries) { bool gender_flag = false; if (distrib(gen) < gender_ratio) { gender_flag = true; } std::vector values = entry->FindValuesIf(IsPseudolocalizable); for (ResourceConfigValue* value : values) { PseudolocalizeIfNeeded(Pseudolocalizer::Method::kAccent, value, &table->string_pool, entry.get(), gender_state, gender_flag); PseudolocalizeIfNeeded(Pseudolocalizer::Method::kBidi, value, &table->string_pool, entry.get(), gender_state, gender_flag); } } } } return true; } } // namespace aapt