xref: /aosp_15_r20/external/libtextclassifier/native/annotator/pod_ner/pod-ner-impl_test.cc (revision 993b0882672172b81d12fad7a7ac0c3e5c824a12)
1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "annotator/pod_ner/pod-ner-impl.h"
18 
19 #include <iostream>
20 #include <memory>
21 #include <thread>  // NOLINT(build/c++11)
22 
23 #include "annotator/model_generated.h"
24 #include "annotator/types.h"
25 #include "utils/jvm-test-utils.h"
26 #include "utils/test-data-test-utils.h"
27 #include "utils/tokenizer-utils.h"
28 #include "utils/utf8/unicodetext.h"
29 #include "utils/utf8/unilib.h"
30 #include "gmock/gmock.h"
31 #include "gtest/gtest.h"
32 
33 namespace libtextclassifier3 {
34 namespace {
35 
36 using ::testing::IsEmpty;
37 using ::testing::Not;
38 
39 using PodNerModel_::Label_::BoiseType;
40 using PodNerModel_::Label_::BoiseType_BEGIN;
41 using PodNerModel_::Label_::BoiseType_END;
42 using PodNerModel_::Label_::BoiseType_INTERMEDIATE;
43 using PodNerModel_::Label_::BoiseType_O;
44 using PodNerModel_::Label_::BoiseType_SINGLE;
45 using PodNerModel_::Label_::MentionType;
46 using PodNerModel_::Label_::MentionType_NAM;
47 using PodNerModel_::Label_::MentionType_NOM;
48 using PodNerModel_::Label_::MentionType_UNDEFINED;
49 
50 constexpr int kMinNumberOfTokens = 1;
51 constexpr int kMinNumberOfWordpieces = 1;
52 constexpr float kDefaultPriorityScore = 0.5;
53 
54 class PodNerTest : public testing::Test {
55  protected:
PodNerTest(ModeFlag enabled_modes=ModeFlag_ALL)56   explicit PodNerTest(ModeFlag enabled_modes = ModeFlag_ALL) {
57     PodNerModelT model;
58 
59     model.min_number_of_tokens = kMinNumberOfTokens;
60     model.min_number_of_wordpieces = kMinNumberOfWordpieces;
61     model.priority_score = kDefaultPriorityScore;
62 
63     const std::string tflite_model_buffer =
64         GetTestFileContent("annotator/pod_ner/test_data/tflite_model.tflite");
65     model.tflite_model = std::vector<uint8_t>(tflite_model_buffer.begin(),
66                                               tflite_model_buffer.end());
67     const std::string word_piece_vocab_buffer =
68         GetTestFileContent("annotator/pod_ner/test_data/vocab.txt");
69     model.word_piece_vocab = std::vector<uint8_t>(
70         word_piece_vocab_buffer.begin(), word_piece_vocab_buffer.end());
71     model.enabled_modes = enabled_modes;
72 
73     flatbuffers::FlatBufferBuilder builder;
74     builder.Finish(PodNerModel::Pack(builder, &model));
75 
76     model_buffer_ =
77         std::string(reinterpret_cast<const char*>(builder.GetBufferPointer()),
78                     builder.GetSize());
79     model_ = static_cast<const PodNerModel*>(
80         flatbuffers::GetRoot<PodNerModel>(model_buffer_.data()));
81 
82     model.append_final_period = true;
83     flatbuffers::FlatBufferBuilder builder_append_final_period;
84     builder_append_final_period.Finish(
85         PodNerModel::Pack(builder_append_final_period, &model));
86 
87     model_buffer_append_final_period_ =
88         std::string(reinterpret_cast<const char*>(
89                         builder_append_final_period.GetBufferPointer()),
90                     builder_append_final_period.GetSize());
91     model_append_final_period_ =
92         static_cast<const PodNerModel*>(flatbuffers::GetRoot<PodNerModel>(
93             model_buffer_append_final_period_.data()));
94 
95     unilib_ = CreateUniLibForTesting();
96   }
97 
98   std::string model_buffer_;
99   const PodNerModel* model_;
100   std::string model_buffer_append_final_period_;
101   const PodNerModel* model_append_final_period_;
102   std::unique_ptr<UniLib> unilib_;
103 };
104 
105 class PodNerForAnnotationAndClassificationTest : public PodNerTest {
106  protected:
PodNerForAnnotationAndClassificationTest()107   PodNerForAnnotationAndClassificationTest()
108       : PodNerTest(ModeFlag_ANNOTATION_AND_CLASSIFICATION) {}
109 };
110 
111 class PodNerForSelectionTest : public PodNerTest {
112  protected:
PodNerForSelectionTest()113   PodNerForSelectionTest() : PodNerTest(ModeFlag_SELECTION) {}
114 };
115 
TEST_F(PodNerTest,AnnotateSmokeTest)116 TEST_F(PodNerTest, AnnotateSmokeTest) {
117   std::unique_ptr<PodNerAnnotator> annotator =
118       PodNerAnnotator::Create(model_, *unilib_);
119   ASSERT_TRUE(annotator != nullptr);
120 
121   {
122     std::vector<AnnotatedSpan> annotations;
123     ASSERT_TRUE(annotator->Annotate(
124         UTF8ToUnicodeText("Google New York , in New York"), &annotations));
125     EXPECT_THAT(annotations, Not(IsEmpty()));
126   }
127 
128   {
129     std::vector<AnnotatedSpan> annotations;
130     ASSERT_TRUE(annotator->Annotate(
131         UTF8ToUnicodeText("Jamie I'm in the first picture and Cameron and Zach "
132                           "are in the second "
133                           "picture."),
134         &annotations));
135     EXPECT_THAT(annotations, Not(IsEmpty()));
136   }
137 }
138 
TEST_F(PodNerTest,AnnotateEmptyInput)139 TEST_F(PodNerTest, AnnotateEmptyInput) {
140   std::unique_ptr<PodNerAnnotator> annotator =
141       PodNerAnnotator::Create(model_, *unilib_);
142   ASSERT_TRUE(annotator != nullptr);
143 
144   {
145     std::vector<AnnotatedSpan> annotations;
146     ASSERT_TRUE(annotator->Annotate(UTF8ToUnicodeText(""), &annotations));
147     EXPECT_THAT(annotations, IsEmpty());
148   }
149 }
150 
FillCollections(const std::vector<std::string> & collection_names,const std::vector<float> & single_token_priority_scores,const std::vector<float> & multi_token_priority_scores,std::vector<std::unique_ptr<PodNerModel_::CollectionT>> * collections)151 void FillCollections(
152     const std::vector<std::string>& collection_names,
153     const std::vector<float>& single_token_priority_scores,
154     const std::vector<float>& multi_token_priority_scores,
155     std::vector<std::unique_ptr<PodNerModel_::CollectionT>>* collections) {
156   ASSERT_TRUE(collection_names.size() == single_token_priority_scores.size() &&
157               collection_names.size() == multi_token_priority_scores.size());
158   collections->clear();
159   for (int i = 0; i < collection_names.size(); ++i) {
160     collections->push_back(std::make_unique<PodNerModel_::CollectionT>());
161     collections->back()->name = collection_names[i];
162     collections->back()->single_token_priority_score =
163         single_token_priority_scores[i];
164     collections->back()->multi_token_priority_score =
165         multi_token_priority_scores[i];
166   }
167 }
168 
EmplaceToLabelVector(BoiseType boise_type,MentionType mention_type,int collection_id,std::vector<std::unique_ptr<PodNerModel_::LabelT>> * labels)169 void EmplaceToLabelVector(
170     BoiseType boise_type, MentionType mention_type, int collection_id,
171     std::vector<std::unique_ptr<PodNerModel_::LabelT>>* labels) {
172   labels->push_back(std::make_unique<PodNerModel_::LabelT>());
173   labels->back()->boise_type = boise_type;
174   labels->back()->mention_type = mention_type;
175   labels->back()->collection_id = collection_id;
176 }
177 
FillLabels(int num_collections,std::vector<std::unique_ptr<PodNerModel_::LabelT>> * labels)178 void FillLabels(int num_collections,
179                 std::vector<std::unique_ptr<PodNerModel_::LabelT>>* labels) {
180   labels->clear();
181   for (auto boise_type :
182        {BoiseType_BEGIN, BoiseType_END, BoiseType_INTERMEDIATE}) {
183     for (auto mention_type : {MentionType_NAM, MentionType_NOM}) {
184       for (int i = 0; i < num_collections - 1; ++i) {  // skip undefined
185         EmplaceToLabelVector(boise_type, mention_type, i, labels);
186       }
187     }
188   }
189   EmplaceToLabelVector(BoiseType_O, MentionType_UNDEFINED, num_collections - 1,
190                        labels);
191   for (auto mention_type : {MentionType_NAM, MentionType_NOM}) {
192     for (int i = 0; i < num_collections - 1; ++i) {  // skip undefined
193       EmplaceToLabelVector(BoiseType_SINGLE, mention_type, i, labels);
194     }
195   }
196 }
197 
TEST_F(PodNerTest,AnnotateDefaultCollections)198 TEST_F(PodNerTest, AnnotateDefaultCollections) {
199   std::unique_ptr<PodNerAnnotator> annotator =
200       PodNerAnnotator::Create(model_, *unilib_);
201   ASSERT_TRUE(annotator != nullptr);
202 
203   std::string multi_word_location = "I live in New York";
204   std::string single_word_location = "I live in Zurich";
205   {
206     std::vector<AnnotatedSpan> annotations;
207     ASSERT_TRUE(annotator->Annotate(UTF8ToUnicodeText(multi_word_location),
208                                     &annotations));
209     EXPECT_THAT(annotations, Not(IsEmpty()));
210     EXPECT_EQ(annotations[0].classification[0].collection, "location");
211     EXPECT_EQ(annotations[0].classification[0].priority_score,
212               kDefaultPriorityScore);
213 
214     annotations.clear();
215     ASSERT_TRUE(annotator->Annotate(UTF8ToUnicodeText(single_word_location),
216                                     &annotations));
217     EXPECT_THAT(annotations, Not(IsEmpty()));
218     EXPECT_EQ(annotations[0].classification[0].collection, "location");
219     EXPECT_EQ(annotations[0].classification[0].priority_score,
220               kDefaultPriorityScore);
221   }
222 }
223 
TEST_F(PodNerForSelectionTest,AnnotateWithDisabledAnnotationReturnsNoResults)224 TEST_F(PodNerForSelectionTest, AnnotateWithDisabledAnnotationReturnsNoResults) {
225   std::unique_ptr<PodNerAnnotator> annotator =
226       PodNerAnnotator::Create(model_, *unilib_);
227   ASSERT_TRUE(annotator != nullptr);
228 
229   std::string multi_word_location = "I live in New York";
230   std::vector<AnnotatedSpan> annotations;
231   ASSERT_TRUE(annotator->Annotate(UTF8ToUnicodeText(multi_word_location),
232                                   &annotations));
233   EXPECT_THAT(annotations, IsEmpty());
234 }
235 
TEST_F(PodNerTest,AnnotateConfigurableCollections)236 TEST_F(PodNerTest, AnnotateConfigurableCollections) {
237   std::unique_ptr<PodNerModelT> unpacked_model(model_->UnPack());
238   ASSERT_TRUE(unpacked_model != nullptr);
239 
240   float xxx_single_token_priority = 0.9;
241   float xxx_multi_token_priority = 1.7;
242   const std::vector<std::string> collection_names = {
243       "art",          "consumer_good", "event",  "xxx",
244       "organization", "ner_entity",    "person", "undefined"};
245   FillCollections(collection_names,
246                   /*single_token_priority_scores=*/
247                   {0., 0., 0., xxx_single_token_priority, 0., 0., 0., 0.},
248                   /*multi_token_priority_scores=*/
249                   {0., 0., 0., xxx_multi_token_priority, 0., 0., 0., 0.},
250                   &(unpacked_model->collections));
251   FillLabels(collection_names.size(), &(unpacked_model->labels));
252   flatbuffers::FlatBufferBuilder builder;
253   builder.Finish(PodNerModel::Pack(builder, unpacked_model.get()));
254   std::string model_buffer =
255       std::string(reinterpret_cast<const char*>(builder.GetBufferPointer()),
256                   builder.GetSize());
257   std::unique_ptr<PodNerAnnotator> annotator = PodNerAnnotator::Create(
258       static_cast<const PodNerModel*>(
259           flatbuffers::GetRoot<PodNerModel>(model_buffer.data())),
260       *unilib_);
261   ASSERT_TRUE(annotator != nullptr);
262 
263   std::string multi_word_location = "I live in New York";
264   std::string single_word_location = "I live in Zurich";
265   {
266     std::vector<AnnotatedSpan> annotations;
267     ASSERT_TRUE(annotator->Annotate(UTF8ToUnicodeText(multi_word_location),
268                                     &annotations));
269     EXPECT_THAT(annotations, Not(IsEmpty()));
270     EXPECT_EQ(annotations[0].classification[0].collection, "xxx");
271     EXPECT_EQ(annotations[0].classification[0].priority_score,
272               xxx_multi_token_priority);
273 
274     annotations.clear();
275     ASSERT_TRUE(annotator->Annotate(UTF8ToUnicodeText(single_word_location),
276                                     &annotations));
277     EXPECT_THAT(annotations, Not(IsEmpty()));
278     EXPECT_EQ(annotations[0].classification[0].collection, "xxx");
279     EXPECT_EQ(annotations[0].classification[0].priority_score,
280               xxx_single_token_priority);
281   }
282 }
283 
TEST_F(PodNerTest,AnnotateMinNumTokens)284 TEST_F(PodNerTest, AnnotateMinNumTokens) {
285   std::unique_ptr<PodNerAnnotator> annotator =
286       PodNerAnnotator::Create(model_, *unilib_);
287   ASSERT_TRUE(annotator != nullptr);
288 
289   std::string text = "in New York";
290   {
291     std::vector<AnnotatedSpan> annotations;
292     ASSERT_TRUE(annotator->Annotate(UTF8ToUnicodeText(text), &annotations));
293     EXPECT_THAT(annotations, Not(IsEmpty()));
294   }
295 
296   std::unique_ptr<PodNerModelT> unpacked_model(model_->UnPack());
297   ASSERT_TRUE(unpacked_model != nullptr);
298 
299   unpacked_model->min_number_of_tokens = 4;
300   flatbuffers::FlatBufferBuilder builder;
301   builder.Finish(PodNerModel::Pack(builder, unpacked_model.get()));
302 
303   std::string model_buffer =
304       std::string(reinterpret_cast<const char*>(builder.GetBufferPointer()),
305                   builder.GetSize());
306   annotator = PodNerAnnotator::Create(
307       static_cast<const PodNerModel*>(
308           flatbuffers::GetRoot<PodNerModel>(model_buffer.data())),
309       *unilib_);
310   ASSERT_TRUE(annotator != nullptr);
311   {
312     std::vector<AnnotatedSpan> annotations;
313     ASSERT_TRUE(annotator->Annotate(UTF8ToUnicodeText(text), &annotations));
314     EXPECT_THAT(annotations, IsEmpty());
315   }
316 }
317 
TEST_F(PodNerTest,AnnotateMinNumWordpieces)318 TEST_F(PodNerTest, AnnotateMinNumWordpieces) {
319   std::unique_ptr<PodNerAnnotator> annotator =
320       PodNerAnnotator::Create(model_, *unilib_);
321   ASSERT_TRUE(annotator != nullptr);
322 
323   std::string text = "in New York";
324   {
325     std::vector<AnnotatedSpan> annotations;
326     ASSERT_TRUE(annotator->Annotate(UTF8ToUnicodeText(text), &annotations));
327     EXPECT_THAT(annotations, Not(IsEmpty()));
328   }
329 
330   std::unique_ptr<PodNerModelT> unpacked_model(model_->UnPack());
331   ASSERT_TRUE(unpacked_model != nullptr);
332 
333   unpacked_model->min_number_of_wordpieces = 10;
334   flatbuffers::FlatBufferBuilder builder;
335   builder.Finish(PodNerModel::Pack(builder, unpacked_model.get()));
336 
337   std::string model_buffer =
338       std::string(reinterpret_cast<const char*>(builder.GetBufferPointer()),
339                   builder.GetSize());
340   annotator = PodNerAnnotator::Create(
341       static_cast<const PodNerModel*>(
342           flatbuffers::GetRoot<PodNerModel>(model_buffer.data())),
343       *unilib_);
344   ASSERT_TRUE(annotator != nullptr);
345   {
346     std::vector<AnnotatedSpan> annotations;
347     ASSERT_TRUE(annotator->Annotate(UTF8ToUnicodeText(text), &annotations));
348     EXPECT_THAT(annotations, IsEmpty());
349   }
350 }
351 
TEST_F(PodNerTest,AnnotateNonstandardText)352 TEST_F(PodNerTest, AnnotateNonstandardText) {
353   std::unique_ptr<PodNerAnnotator> annotator =
354       PodNerAnnotator::Create(model_, *unilib_);
355   ASSERT_TRUE(annotator != nullptr);
356 
357   const std::string nonstandard_text =
358       "abcNxCDU1RWNvbXByLXI4NS8xNzcwLzE3NzA4NDY2L3J1Ymluby1raWRzLXJlY2xpbmVyLXd"
359       "pdGgtY3VwLWhvbGRlci5qcGc=/"
360       "UnViaW5vIEtpZHMgUmVjbGluZXIgd2l0aCBDdXAgSG9sZGVyIGJ5IEhhcnJpZXQgQmVl."
361       "html>";
362   std::vector<AnnotatedSpan> annotations;
363   ASSERT_TRUE(
364       annotator->Annotate(UTF8ToUnicodeText(nonstandard_text), &annotations));
365   EXPECT_THAT(annotations, IsEmpty());
366 }
367 
TEST_F(PodNerTest,AnnotateTextWithLinefeed)368 TEST_F(PodNerTest, AnnotateTextWithLinefeed) {
369   std::unique_ptr<PodNerAnnotator> annotator =
370       PodNerAnnotator::Create(model_, *unilib_);
371   ASSERT_TRUE(annotator != nullptr);
372 
373   std::string nonstandard_text = "My name is Kuba\x09";
374   nonstandard_text += "and this is a test.";
375   std::vector<AnnotatedSpan> annotations;
376   ASSERT_TRUE(
377       annotator->Annotate(UTF8ToUnicodeText(nonstandard_text), &annotations));
378   EXPECT_THAT(annotations, Not(IsEmpty()));
379   EXPECT_EQ(annotations[0].span, CodepointSpan(11, 15));
380 
381   nonstandard_text = "My name is Kuba\x09 and this is a test.";
382   ASSERT_TRUE(
383       annotator->Annotate(UTF8ToUnicodeText(nonstandard_text), &annotations));
384   EXPECT_THAT(annotations, Not(IsEmpty()));
385   EXPECT_EQ(annotations[0].span, CodepointSpan(11, 15));
386 }
387 
TEST_F(PodNerTest,AnnotateWithUnknownWordpieces)388 TEST_F(PodNerTest, AnnotateWithUnknownWordpieces) {
389   std::unique_ptr<PodNerAnnotator> annotator =
390       PodNerAnnotator::Create(model_, *unilib_);
391   ASSERT_TRUE(annotator != nullptr);
392 
393   const std::string long_text =
394       "It is easy to spend a fun and exciting day in Seattle without a car.  "
395       "There are lots of ways to modify this itinerary. Add a ferry ride "
396       "from the waterfront. Spending the day at the Seattle Center or at the "
397       "aquarium could easily extend this from one to several days. Take the "
398       "Underground Tour in Pioneer Square. Visit the Klondike Gold Rush "
399       "Museum which is fun and free.  In the summer months you can ride the "
400       "passenger-only Water Taxi from the waterfront to West Seattle and "
401       "Alki Beach. Here's a sample one day itinerary: Start at the Space "
402       "Needle by taking the Seattle Monorail from downtown. Look around the "
403       "Seattle Center or go to the Space Needle.";
404   const std::string text_with_unknown_wordpieces = "před chvílí";
405 
406   std::vector<AnnotatedSpan> annotations;
407   ASSERT_TRUE(
408       annotator->Annotate(UTF8ToUnicodeText("Google New York , in New York. " +
409                                             text_with_unknown_wordpieces),
410                           &annotations));
411   EXPECT_THAT(annotations, IsEmpty());
412   ASSERT_TRUE(annotator->Annotate(
413       UTF8ToUnicodeText(long_text + " " + text_with_unknown_wordpieces),
414       &annotations));
415   EXPECT_THAT(annotations, Not(IsEmpty()));
416 }
417 
418 class PodNerTestWithOrWithoutFinalPeriod
419     : public PodNerTest,
420       public testing::WithParamInterface<bool> {};
421 
422 INSTANTIATE_TEST_SUITE_P(TestAnnotateLongText,
423                          PodNerTestWithOrWithoutFinalPeriod,
424                          testing::Values(true, false));
425 
TEST_P(PodNerTestWithOrWithoutFinalPeriod,AnnotateLongText)426 TEST_P(PodNerTestWithOrWithoutFinalPeriod, AnnotateLongText) {
427   std::unique_ptr<PodNerAnnotator> annotator = PodNerAnnotator::Create(
428       GetParam() ? model_append_final_period_ : model_, *unilib_);
429   ASSERT_TRUE(annotator != nullptr);
430 
431   const std::string long_text =
432       "It is easy to spend a fun and exciting day in Seattle without a car.  "
433       "There are lots of ways to modify this itinerary. Add a ferry ride "
434       "from the waterfront. Spending the day at the Seattle Center or at the "
435       "aquarium could easily extend this from one to several days. Take the "
436       "Underground Tour in Pioneer Square. Visit the Klondike Gold Rush "
437       "Museum which is fun and free.  In the summer months you can ride the "
438       "passenger-only Water Taxi from the waterfront to West Seattle and "
439       "Alki Beach. Here's a sample one day itinerary: Start at the Space "
440       "Needle by taking the Seattle Monorail from downtown. Look around the "
441       "Seattle Center or go to the Space Needle. If you're interested in "
442       "music the EMP-SFM (Experience Music Project - Science Fiction Musuem) "
443       "is located at the foot of the Space Needle.  It has a lot of rock'n "
444       "roll memorabilia that you may find interesting.  The Chihuly Garden "
445       "and Glass musuem is near the Space Needle and you can get a "
446       "combination ticket for both.  It gets really good reviews.  If you're "
447       "interested, then the Bill & Melinda Gates Foundation is across from "
448       "the EMP and has a visitors center that is free.  Come see how Bill "
449       "Gates is giving away his millions. Take the Monorail back downtown.  "
450       "You will be at 5th and Pine (Westlake Center). Head west to the Pike "
451       "Place Market. Look around then head for the Pike Place hill climb "
452       "which is a series of steps that walk down to the waterfront. You will "
453       "end up across the street from the Seattle Aquarium. Plenty of things "
454       "to do on the waterfront, boat cruises, seafood restaurants, the "
455       "Aquarium, or your typical tourist activities. You can walk or take "
456       "the waterfront trolley bus.  Note that waterfront construction has "
457       "relocated the  trolley Metro bus route 99 that will take you from "
458       "Pioneer Square all the way to the end of the waterfront where you can "
459       "visit the Seattle Art Musuem's XXX Sculpture Garden just north of "
460       "Pier 70. The route goes thru Chinatown/International District, "
461       "through Pioneer Square, up 1st ave past the Pike Place Market and to "
462       "1st and Cedar which is walking distance to the Space Needle.  It then "
463       "goes down Broad Street toward the Olympic Sculpture Garden.   It runs "
464       "approximately every 30 minutes during the day and early evening.";
465   std::vector<AnnotatedSpan> annotations;
466   ASSERT_TRUE(annotator->Annotate(UTF8ToUnicodeText(long_text), &annotations));
467   EXPECT_THAT(annotations, Not(IsEmpty()));
468 
469   const std::string location_from_beginning = "Seattle";
470   int start_span_location_from_beginning =
471       long_text.find(location_from_beginning);
472   EXPECT_EQ(annotations[0].span,
473             CodepointSpan(start_span_location_from_beginning,
474                           start_span_location_from_beginning +
475                               location_from_beginning.length()));
476 
477   const std::string location_from_end = "Olympic Sculpture Garden";
478   int start_span_location_from_end = long_text.find(location_from_end);
479   const AnnotatedSpan& last_annotation = *annotations.rbegin();
480   EXPECT_EQ(
481       last_annotation.span,
482       CodepointSpan(start_span_location_from_end,
483                     start_span_location_from_end + location_from_end.length()));
484 }
485 
TEST_F(PodNerTest,SuggestSelectionLongText)486 TEST_F(PodNerTest, SuggestSelectionLongText) {
487   std::unique_ptr<PodNerAnnotator> annotator =
488       PodNerAnnotator::Create(model_, *unilib_);
489   ASSERT_TRUE(annotator != nullptr);
490 
491   const std::string long_text =
492       "It is easy to spend a fun and exciting day in Seattle without a car.  "
493       "There are lots of ways to modify this itinerary. Add a ferry ride "
494       "from the waterfront. Spending the day at the Seattle Center or at the "
495       "aquarium could easily extend this from one to several days. Take the "
496       "Underground Tour in Pioneer Square. Visit the Klondike Gold Rush "
497       "Museum which is fun and free.  In the summer months you can ride the "
498       "passenger-only Water Taxi from the waterfront to West Seattle and "
499       "Alki Beach. Here's a sample one day itinerary: Start at the Space "
500       "Needle by taking the Seattle Monorail from downtown. Look around the "
501       "Seattle Center or go to the Space Needle. If you're interested in "
502       "music the EMP-SFM (Experience Music Project - Science Fiction Musuem) "
503       "is located at the foot of the Space Needle.  It has a lot of rock'n "
504       "roll memorabilia that you may find interesting.  The Chihuly Garden "
505       "and Glass musuem is near the Space Needle and you can get a "
506       "combination ticket for both.  It gets really good reviews.  If you're "
507       "interested, then the Bill & Melinda Gates Foundation is across from "
508       "the EMP and has a visitors center that is free.  Come see how Bill "
509       "Gates is giving away his millions. Take the Monorail back downtown.  "
510       "You will be at 5th and Pine (Westlake Center). Head west to the Pike "
511       "Place Market. Look around then head for the Pike Place hill climb "
512       "which is a series of steps that walk down to the waterfront. You will "
513       "end up across the street from the Seattle Aquarium. Plenty of things "
514       "to do on the waterfront, boat cruises, seafood restaurants, the "
515       "Aquarium, or your typical tourist activities. You can walk or take "
516       "the waterfront trolley bus.  Note that waterfront construction has "
517       "relocated the  trolley Metro bus route 99 that will take you from "
518       "Pioneer Square all the way to the end of the waterfront where you can "
519       "visit the Seattle Art Musuem's XXX Sculpture Garden just north of "
520       "Pier 70. The route goes thru Chinatown/International District, "
521       "through Pioneer Square, up 1st ave past the Pike Place Market and to "
522       "1st and Cedar which is walking distance to the Space Needle.  It then "
523       "goes down Broad Street toward the Olympic Sculpture Garden.   It runs "
524       "approximately every 30 minutes during the day and early evening.";
525   const std::string klondike = "Klondike Gold Rush Museum";
526   int klondike_start = long_text.find(klondike);
527 
528   AnnotatedSpan suggested_span;
529   EXPECT_TRUE(annotator->SuggestSelection(UTF8ToUnicodeText(long_text),
530                                           {klondike_start, klondike_start + 8},
531                                           &suggested_span));
532   EXPECT_EQ(suggested_span.span,
533             CodepointSpan(klondike_start, klondike_start + klondike.length()));
534 }
535 
TEST_F(PodNerTest,SuggestSelectionTest)536 TEST_F(PodNerTest, SuggestSelectionTest) {
537   std::unique_ptr<PodNerAnnotator> annotator =
538       PodNerAnnotator::Create(model_, *unilib_);
539   ASSERT_TRUE(annotator != nullptr);
540 
541   AnnotatedSpan suggested_span;
542   EXPECT_TRUE(annotator->SuggestSelection(
543       UTF8ToUnicodeText("Google New York, in New York"), {7, 10},
544       &suggested_span));
545   EXPECT_EQ(suggested_span.span, CodepointSpan(7, 15));
546   EXPECT_FALSE(annotator->SuggestSelection(
547       UTF8ToUnicodeText("Google New York, in New York"), {17, 19},
548       &suggested_span));
549   EXPECT_EQ(suggested_span.span, CodepointSpan(kInvalidIndex, kInvalidIndex));
550 }
551 
TEST_F(PodNerForAnnotationAndClassificationTest,SuggestSelectionWithDisabledSelectionReturnsNoResults)552 TEST_F(PodNerForAnnotationAndClassificationTest,
553        SuggestSelectionWithDisabledSelectionReturnsNoResults) {
554   std::unique_ptr<PodNerAnnotator> annotator =
555       PodNerAnnotator::Create(model_, *unilib_);
556   ASSERT_TRUE(annotator != nullptr);
557 
558   AnnotatedSpan suggested_span;
559   EXPECT_FALSE(annotator->SuggestSelection(
560       UTF8ToUnicodeText("Google New York, in New York"), {7, 10},
561       &suggested_span));
562 }
563 
TEST_F(PodNerTest,ClassifyTextTest)564 TEST_F(PodNerTest, ClassifyTextTest) {
565   std::unique_ptr<PodNerAnnotator> annotator =
566       PodNerAnnotator::Create(model_, *unilib_);
567   ASSERT_TRUE(annotator != nullptr);
568 
569   ClassificationResult result;
570   ASSERT_TRUE(annotator->ClassifyText(UTF8ToUnicodeText("We met in New York"),
571                                       {10, 18}, &result));
572   EXPECT_EQ(result.collection, "location");
573 }
574 
TEST_F(PodNerForSelectionTest,ClassifyTextWithDisabledClassificationReturnsFalse)575 TEST_F(PodNerForSelectionTest,
576        ClassifyTextWithDisabledClassificationReturnsFalse) {
577   std::unique_ptr<PodNerAnnotator> annotator =
578       PodNerAnnotator::Create(model_, *unilib_);
579   ASSERT_TRUE(annotator != nullptr);
580 
581   ClassificationResult result;
582   ASSERT_FALSE(annotator->ClassifyText(UTF8ToUnicodeText("We met in New York"),
583                                        {10, 18}, &result));
584 }
585 
TEST_F(PodNerTest,ThreadSafety)586 TEST_F(PodNerTest, ThreadSafety) {
587   std::unique_ptr<PodNerAnnotator> annotator =
588       PodNerAnnotator::Create(model_, *unilib_);
589   ASSERT_TRUE(annotator != nullptr);
590 
591   // Do inference in 20 threads. When run with --config=tsan, this should fire
592   // if there's a problem.
593   std::vector<std::thread> thread_pool(20);
594   for (std::thread& thread : thread_pool) {
595     thread = std::thread([&annotator]() {
596       AnnotatedSpan suggested_span;
597       EXPECT_TRUE(annotator->SuggestSelection(
598           UTF8ToUnicodeText("Google New York, in New York"), {7, 10},
599           &suggested_span));
600       EXPECT_EQ(suggested_span.span, CodepointSpan(7, 15));
601     });
602   }
603   for (std::thread& thread : thread_pool) {
604     thread.join();
605   }
606 }
607 
608 }  // namespace
609 }  // namespace libtextclassifier3
610