1*993b0882SAndroid Build Coastguard Worker /*
2*993b0882SAndroid Build Coastguard Worker * Copyright (C) 2018 The Android Open Source Project
3*993b0882SAndroid Build Coastguard Worker *
4*993b0882SAndroid Build Coastguard Worker * Licensed under the Apache License, Version 2.0 (the "License");
5*993b0882SAndroid Build Coastguard Worker * you may not use this file except in compliance with the License.
6*993b0882SAndroid Build Coastguard Worker * You may obtain a copy of the License at
7*993b0882SAndroid Build Coastguard Worker *
8*993b0882SAndroid Build Coastguard Worker * http://www.apache.org/licenses/LICENSE-2.0
9*993b0882SAndroid Build Coastguard Worker *
10*993b0882SAndroid Build Coastguard Worker * Unless required by applicable law or agreed to in writing, software
11*993b0882SAndroid Build Coastguard Worker * distributed under the License is distributed on an "AS IS" BASIS,
12*993b0882SAndroid Build Coastguard Worker * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13*993b0882SAndroid Build Coastguard Worker * See the License for the specific language governing permissions and
14*993b0882SAndroid Build Coastguard Worker * limitations under the License.
15*993b0882SAndroid Build Coastguard Worker */
16*993b0882SAndroid Build Coastguard Worker
17*993b0882SAndroid Build Coastguard Worker #include "lang_id/lang-id-wrapper.h"
18*993b0882SAndroid Build Coastguard Worker
19*993b0882SAndroid Build Coastguard Worker #include <fcntl.h>
20*993b0882SAndroid Build Coastguard Worker
21*993b0882SAndroid Build Coastguard Worker #include "lang_id/fb_model/lang-id-from-fb.h"
22*993b0882SAndroid Build Coastguard Worker #include "lang_id/lang-id.h"
23*993b0882SAndroid Build Coastguard Worker
24*993b0882SAndroid Build Coastguard Worker namespace libtextclassifier3 {
25*993b0882SAndroid Build Coastguard Worker
26*993b0882SAndroid Build Coastguard Worker namespace langid {
27*993b0882SAndroid Build Coastguard Worker
LoadFromPath(const std::string & langid_model_path)28*993b0882SAndroid Build Coastguard Worker std::unique_ptr<libtextclassifier3::mobile::lang_id::LangId> LoadFromPath(
29*993b0882SAndroid Build Coastguard Worker const std::string& langid_model_path) {
30*993b0882SAndroid Build Coastguard Worker std::unique_ptr<libtextclassifier3::mobile::lang_id::LangId> langid_model =
31*993b0882SAndroid Build Coastguard Worker libtextclassifier3::mobile::lang_id::GetLangIdFromFlatbufferFile(langid_model_path);
32*993b0882SAndroid Build Coastguard Worker return langid_model;
33*993b0882SAndroid Build Coastguard Worker }
34*993b0882SAndroid Build Coastguard Worker
LoadFromDescriptor(const int langid_fd)35*993b0882SAndroid Build Coastguard Worker std::unique_ptr<libtextclassifier3::mobile::lang_id::LangId> LoadFromDescriptor(
36*993b0882SAndroid Build Coastguard Worker const int langid_fd) {
37*993b0882SAndroid Build Coastguard Worker std::unique_ptr<libtextclassifier3::mobile::lang_id::LangId> langid_model =
38*993b0882SAndroid Build Coastguard Worker libtextclassifier3::mobile::lang_id::GetLangIdFromFlatbufferFileDescriptor(
39*993b0882SAndroid Build Coastguard Worker langid_fd);
40*993b0882SAndroid Build Coastguard Worker return langid_model;
41*993b0882SAndroid Build Coastguard Worker }
42*993b0882SAndroid Build Coastguard Worker
LoadFromUnownedBuffer(const char * buffer,int size)43*993b0882SAndroid Build Coastguard Worker std::unique_ptr<libtextclassifier3::mobile::lang_id::LangId> LoadFromUnownedBuffer(
44*993b0882SAndroid Build Coastguard Worker const char* buffer, int size) {
45*993b0882SAndroid Build Coastguard Worker std::unique_ptr<libtextclassifier3::mobile::lang_id::LangId> langid_model =
46*993b0882SAndroid Build Coastguard Worker libtextclassifier3::mobile::lang_id::GetLangIdFromFlatbufferBytes(buffer, size);
47*993b0882SAndroid Build Coastguard Worker return langid_model;
48*993b0882SAndroid Build Coastguard Worker }
49*993b0882SAndroid Build Coastguard Worker
GetPredictions(const libtextclassifier3::mobile::lang_id::LangId * model,const std::string & text)50*993b0882SAndroid Build Coastguard Worker std::vector<std::pair<std::string, float>> GetPredictions(
51*993b0882SAndroid Build Coastguard Worker const libtextclassifier3::mobile::lang_id::LangId* model, const std::string& text) {
52*993b0882SAndroid Build Coastguard Worker return GetPredictions(model, text.data(), text.size());
53*993b0882SAndroid Build Coastguard Worker }
54*993b0882SAndroid Build Coastguard Worker
GetPredictions(const libtextclassifier3::mobile::lang_id::LangId * model,const char * text,int text_size)55*993b0882SAndroid Build Coastguard Worker std::vector<std::pair<std::string, float>> GetPredictions(
56*993b0882SAndroid Build Coastguard Worker const libtextclassifier3::mobile::lang_id::LangId* model, const char* text,
57*993b0882SAndroid Build Coastguard Worker int text_size) {
58*993b0882SAndroid Build Coastguard Worker std::vector<std::pair<std::string, float>> prediction_results;
59*993b0882SAndroid Build Coastguard Worker if (model == nullptr) {
60*993b0882SAndroid Build Coastguard Worker return prediction_results;
61*993b0882SAndroid Build Coastguard Worker }
62*993b0882SAndroid Build Coastguard Worker
63*993b0882SAndroid Build Coastguard Worker const float noise_threshold =
64*993b0882SAndroid Build Coastguard Worker model->GetFloatProperty("text_classifier_langid_noise_threshold", -1.0f);
65*993b0882SAndroid Build Coastguard Worker
66*993b0882SAndroid Build Coastguard Worker // Speed up the things by specifying the max results we want. For example, if
67*993b0882SAndroid Build Coastguard Worker // the noise threshold is 0.1, we don't need more than 10 results.
68*993b0882SAndroid Build Coastguard Worker const int max_results =
69*993b0882SAndroid Build Coastguard Worker noise_threshold < 0.01
70*993b0882SAndroid Build Coastguard Worker ? -1 // -1 means FindLanguages returns all predictions
71*993b0882SAndroid Build Coastguard Worker : static_cast<int>(1 / noise_threshold) + 1;
72*993b0882SAndroid Build Coastguard Worker
73*993b0882SAndroid Build Coastguard Worker libtextclassifier3::mobile::lang_id::LangIdResult langid_result;
74*993b0882SAndroid Build Coastguard Worker model->FindLanguages(text, text_size, &langid_result, max_results);
75*993b0882SAndroid Build Coastguard Worker for (int i = 0; i < langid_result.predictions.size(); i++) {
76*993b0882SAndroid Build Coastguard Worker const auto& prediction = langid_result.predictions[i];
77*993b0882SAndroid Build Coastguard Worker if (prediction.second >= noise_threshold && prediction.first != "und") {
78*993b0882SAndroid Build Coastguard Worker prediction_results.push_back({prediction.first, prediction.second});
79*993b0882SAndroid Build Coastguard Worker }
80*993b0882SAndroid Build Coastguard Worker }
81*993b0882SAndroid Build Coastguard Worker return prediction_results;
82*993b0882SAndroid Build Coastguard Worker }
83*993b0882SAndroid Build Coastguard Worker
GetLanguageTags(const libtextclassifier3::mobile::lang_id::LangId * model,const std::string & text)84*993b0882SAndroid Build Coastguard Worker std::string GetLanguageTags(const libtextclassifier3::mobile::lang_id::LangId* model,
85*993b0882SAndroid Build Coastguard Worker const std::string& text) {
86*993b0882SAndroid Build Coastguard Worker const std::vector<std::pair<std::string, float>>& predictions =
87*993b0882SAndroid Build Coastguard Worker GetPredictions(model, text);
88*993b0882SAndroid Build Coastguard Worker const float threshold =
89*993b0882SAndroid Build Coastguard Worker model->GetFloatProperty("text_classifier_langid_threshold", -1.0f);
90*993b0882SAndroid Build Coastguard Worker std::string detected_language_tags = "";
91*993b0882SAndroid Build Coastguard Worker bool first_accepted_language = true;
92*993b0882SAndroid Build Coastguard Worker for (int i = 0; i < predictions.size(); i++) {
93*993b0882SAndroid Build Coastguard Worker const auto& prediction = predictions[i];
94*993b0882SAndroid Build Coastguard Worker if (threshold >= 0.f && prediction.second < threshold) {
95*993b0882SAndroid Build Coastguard Worker continue;
96*993b0882SAndroid Build Coastguard Worker }
97*993b0882SAndroid Build Coastguard Worker if (first_accepted_language) {
98*993b0882SAndroid Build Coastguard Worker first_accepted_language = false;
99*993b0882SAndroid Build Coastguard Worker } else {
100*993b0882SAndroid Build Coastguard Worker detected_language_tags += ",";
101*993b0882SAndroid Build Coastguard Worker }
102*993b0882SAndroid Build Coastguard Worker detected_language_tags += prediction.first;
103*993b0882SAndroid Build Coastguard Worker }
104*993b0882SAndroid Build Coastguard Worker return detected_language_tags;
105*993b0882SAndroid Build Coastguard Worker }
106*993b0882SAndroid Build Coastguard Worker
107*993b0882SAndroid Build Coastguard Worker } // namespace langid
108*993b0882SAndroid Build Coastguard Worker
109*993b0882SAndroid Build Coastguard Worker } // namespace libtextclassifier3
110