1 /*
2 * Copyright (C) 2018 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "annotator/pod_ner/pod-ner-impl.h"
18
19 #include <iostream>
20 #include <memory>
21 #include <thread> // NOLINT(build/c++11)
22
23 #include "annotator/model_generated.h"
24 #include "annotator/types.h"
25 #include "utils/jvm-test-utils.h"
26 #include "utils/test-data-test-utils.h"
27 #include "utils/tokenizer-utils.h"
28 #include "utils/utf8/unicodetext.h"
29 #include "utils/utf8/unilib.h"
30 #include "gmock/gmock.h"
31 #include "gtest/gtest.h"
32
33 namespace libtextclassifier3 {
34 namespace {
35
36 using ::testing::IsEmpty;
37 using ::testing::Not;
38
39 using PodNerModel_::Label_::BoiseType;
40 using PodNerModel_::Label_::BoiseType_BEGIN;
41 using PodNerModel_::Label_::BoiseType_END;
42 using PodNerModel_::Label_::BoiseType_INTERMEDIATE;
43 using PodNerModel_::Label_::BoiseType_O;
44 using PodNerModel_::Label_::BoiseType_SINGLE;
45 using PodNerModel_::Label_::MentionType;
46 using PodNerModel_::Label_::MentionType_NAM;
47 using PodNerModel_::Label_::MentionType_NOM;
48 using PodNerModel_::Label_::MentionType_UNDEFINED;
49
50 constexpr int kMinNumberOfTokens = 1;
51 constexpr int kMinNumberOfWordpieces = 1;
52 constexpr float kDefaultPriorityScore = 0.5;
53
54 class PodNerTest : public testing::Test {
55 protected:
PodNerTest(ModeFlag enabled_modes=ModeFlag_ALL)56 explicit PodNerTest(ModeFlag enabled_modes = ModeFlag_ALL) {
57 PodNerModelT model;
58
59 model.min_number_of_tokens = kMinNumberOfTokens;
60 model.min_number_of_wordpieces = kMinNumberOfWordpieces;
61 model.priority_score = kDefaultPriorityScore;
62
63 const std::string tflite_model_buffer =
64 GetTestFileContent("annotator/pod_ner/test_data/tflite_model.tflite");
65 model.tflite_model = std::vector<uint8_t>(tflite_model_buffer.begin(),
66 tflite_model_buffer.end());
67 const std::string word_piece_vocab_buffer =
68 GetTestFileContent("annotator/pod_ner/test_data/vocab.txt");
69 model.word_piece_vocab = std::vector<uint8_t>(
70 word_piece_vocab_buffer.begin(), word_piece_vocab_buffer.end());
71 model.enabled_modes = enabled_modes;
72
73 flatbuffers::FlatBufferBuilder builder;
74 builder.Finish(PodNerModel::Pack(builder, &model));
75
76 model_buffer_ =
77 std::string(reinterpret_cast<const char*>(builder.GetBufferPointer()),
78 builder.GetSize());
79 model_ = static_cast<const PodNerModel*>(
80 flatbuffers::GetRoot<PodNerModel>(model_buffer_.data()));
81
82 model.append_final_period = true;
83 flatbuffers::FlatBufferBuilder builder_append_final_period;
84 builder_append_final_period.Finish(
85 PodNerModel::Pack(builder_append_final_period, &model));
86
87 model_buffer_append_final_period_ =
88 std::string(reinterpret_cast<const char*>(
89 builder_append_final_period.GetBufferPointer()),
90 builder_append_final_period.GetSize());
91 model_append_final_period_ =
92 static_cast<const PodNerModel*>(flatbuffers::GetRoot<PodNerModel>(
93 model_buffer_append_final_period_.data()));
94
95 unilib_ = CreateUniLibForTesting();
96 }
97
98 std::string model_buffer_;
99 const PodNerModel* model_;
100 std::string model_buffer_append_final_period_;
101 const PodNerModel* model_append_final_period_;
102 std::unique_ptr<UniLib> unilib_;
103 };
104
105 class PodNerForAnnotationAndClassificationTest : public PodNerTest {
106 protected:
PodNerForAnnotationAndClassificationTest()107 PodNerForAnnotationAndClassificationTest()
108 : PodNerTest(ModeFlag_ANNOTATION_AND_CLASSIFICATION) {}
109 };
110
111 class PodNerForSelectionTest : public PodNerTest {
112 protected:
PodNerForSelectionTest()113 PodNerForSelectionTest() : PodNerTest(ModeFlag_SELECTION) {}
114 };
115
TEST_F(PodNerTest,AnnotateSmokeTest)116 TEST_F(PodNerTest, AnnotateSmokeTest) {
117 std::unique_ptr<PodNerAnnotator> annotator =
118 PodNerAnnotator::Create(model_, *unilib_);
119 ASSERT_TRUE(annotator != nullptr);
120
121 {
122 std::vector<AnnotatedSpan> annotations;
123 ASSERT_TRUE(annotator->Annotate(
124 UTF8ToUnicodeText("Google New York , in New York"), &annotations));
125 EXPECT_THAT(annotations, Not(IsEmpty()));
126 }
127
128 {
129 std::vector<AnnotatedSpan> annotations;
130 ASSERT_TRUE(annotator->Annotate(
131 UTF8ToUnicodeText("Jamie I'm in the first picture and Cameron and Zach "
132 "are in the second "
133 "picture."),
134 &annotations));
135 EXPECT_THAT(annotations, Not(IsEmpty()));
136 }
137 }
138
TEST_F(PodNerTest,AnnotateEmptyInput)139 TEST_F(PodNerTest, AnnotateEmptyInput) {
140 std::unique_ptr<PodNerAnnotator> annotator =
141 PodNerAnnotator::Create(model_, *unilib_);
142 ASSERT_TRUE(annotator != nullptr);
143
144 {
145 std::vector<AnnotatedSpan> annotations;
146 ASSERT_TRUE(annotator->Annotate(UTF8ToUnicodeText(""), &annotations));
147 EXPECT_THAT(annotations, IsEmpty());
148 }
149 }
150
FillCollections(const std::vector<std::string> & collection_names,const std::vector<float> & single_token_priority_scores,const std::vector<float> & multi_token_priority_scores,std::vector<std::unique_ptr<PodNerModel_::CollectionT>> * collections)151 void FillCollections(
152 const std::vector<std::string>& collection_names,
153 const std::vector<float>& single_token_priority_scores,
154 const std::vector<float>& multi_token_priority_scores,
155 std::vector<std::unique_ptr<PodNerModel_::CollectionT>>* collections) {
156 ASSERT_TRUE(collection_names.size() == single_token_priority_scores.size() &&
157 collection_names.size() == multi_token_priority_scores.size());
158 collections->clear();
159 for (int i = 0; i < collection_names.size(); ++i) {
160 collections->push_back(std::make_unique<PodNerModel_::CollectionT>());
161 collections->back()->name = collection_names[i];
162 collections->back()->single_token_priority_score =
163 single_token_priority_scores[i];
164 collections->back()->multi_token_priority_score =
165 multi_token_priority_scores[i];
166 }
167 }
168
EmplaceToLabelVector(BoiseType boise_type,MentionType mention_type,int collection_id,std::vector<std::unique_ptr<PodNerModel_::LabelT>> * labels)169 void EmplaceToLabelVector(
170 BoiseType boise_type, MentionType mention_type, int collection_id,
171 std::vector<std::unique_ptr<PodNerModel_::LabelT>>* labels) {
172 labels->push_back(std::make_unique<PodNerModel_::LabelT>());
173 labels->back()->boise_type = boise_type;
174 labels->back()->mention_type = mention_type;
175 labels->back()->collection_id = collection_id;
176 }
177
FillLabels(int num_collections,std::vector<std::unique_ptr<PodNerModel_::LabelT>> * labels)178 void FillLabels(int num_collections,
179 std::vector<std::unique_ptr<PodNerModel_::LabelT>>* labels) {
180 labels->clear();
181 for (auto boise_type :
182 {BoiseType_BEGIN, BoiseType_END, BoiseType_INTERMEDIATE}) {
183 for (auto mention_type : {MentionType_NAM, MentionType_NOM}) {
184 for (int i = 0; i < num_collections - 1; ++i) { // skip undefined
185 EmplaceToLabelVector(boise_type, mention_type, i, labels);
186 }
187 }
188 }
189 EmplaceToLabelVector(BoiseType_O, MentionType_UNDEFINED, num_collections - 1,
190 labels);
191 for (auto mention_type : {MentionType_NAM, MentionType_NOM}) {
192 for (int i = 0; i < num_collections - 1; ++i) { // skip undefined
193 EmplaceToLabelVector(BoiseType_SINGLE, mention_type, i, labels);
194 }
195 }
196 }
197
TEST_F(PodNerTest,AnnotateDefaultCollections)198 TEST_F(PodNerTest, AnnotateDefaultCollections) {
199 std::unique_ptr<PodNerAnnotator> annotator =
200 PodNerAnnotator::Create(model_, *unilib_);
201 ASSERT_TRUE(annotator != nullptr);
202
203 std::string multi_word_location = "I live in New York";
204 std::string single_word_location = "I live in Zurich";
205 {
206 std::vector<AnnotatedSpan> annotations;
207 ASSERT_TRUE(annotator->Annotate(UTF8ToUnicodeText(multi_word_location),
208 &annotations));
209 EXPECT_THAT(annotations, Not(IsEmpty()));
210 EXPECT_EQ(annotations[0].classification[0].collection, "location");
211 EXPECT_EQ(annotations[0].classification[0].priority_score,
212 kDefaultPriorityScore);
213
214 annotations.clear();
215 ASSERT_TRUE(annotator->Annotate(UTF8ToUnicodeText(single_word_location),
216 &annotations));
217 EXPECT_THAT(annotations, Not(IsEmpty()));
218 EXPECT_EQ(annotations[0].classification[0].collection, "location");
219 EXPECT_EQ(annotations[0].classification[0].priority_score,
220 kDefaultPriorityScore);
221 }
222 }
223
TEST_F(PodNerForSelectionTest,AnnotateWithDisabledAnnotationReturnsNoResults)224 TEST_F(PodNerForSelectionTest, AnnotateWithDisabledAnnotationReturnsNoResults) {
225 std::unique_ptr<PodNerAnnotator> annotator =
226 PodNerAnnotator::Create(model_, *unilib_);
227 ASSERT_TRUE(annotator != nullptr);
228
229 std::string multi_word_location = "I live in New York";
230 std::vector<AnnotatedSpan> annotations;
231 ASSERT_TRUE(annotator->Annotate(UTF8ToUnicodeText(multi_word_location),
232 &annotations));
233 EXPECT_THAT(annotations, IsEmpty());
234 }
235
TEST_F(PodNerTest,AnnotateConfigurableCollections)236 TEST_F(PodNerTest, AnnotateConfigurableCollections) {
237 std::unique_ptr<PodNerModelT> unpacked_model(model_->UnPack());
238 ASSERT_TRUE(unpacked_model != nullptr);
239
240 float xxx_single_token_priority = 0.9;
241 float xxx_multi_token_priority = 1.7;
242 const std::vector<std::string> collection_names = {
243 "art", "consumer_good", "event", "xxx",
244 "organization", "ner_entity", "person", "undefined"};
245 FillCollections(collection_names,
246 /*single_token_priority_scores=*/
247 {0., 0., 0., xxx_single_token_priority, 0., 0., 0., 0.},
248 /*multi_token_priority_scores=*/
249 {0., 0., 0., xxx_multi_token_priority, 0., 0., 0., 0.},
250 &(unpacked_model->collections));
251 FillLabels(collection_names.size(), &(unpacked_model->labels));
252 flatbuffers::FlatBufferBuilder builder;
253 builder.Finish(PodNerModel::Pack(builder, unpacked_model.get()));
254 std::string model_buffer =
255 std::string(reinterpret_cast<const char*>(builder.GetBufferPointer()),
256 builder.GetSize());
257 std::unique_ptr<PodNerAnnotator> annotator = PodNerAnnotator::Create(
258 static_cast<const PodNerModel*>(
259 flatbuffers::GetRoot<PodNerModel>(model_buffer.data())),
260 *unilib_);
261 ASSERT_TRUE(annotator != nullptr);
262
263 std::string multi_word_location = "I live in New York";
264 std::string single_word_location = "I live in Zurich";
265 {
266 std::vector<AnnotatedSpan> annotations;
267 ASSERT_TRUE(annotator->Annotate(UTF8ToUnicodeText(multi_word_location),
268 &annotations));
269 EXPECT_THAT(annotations, Not(IsEmpty()));
270 EXPECT_EQ(annotations[0].classification[0].collection, "xxx");
271 EXPECT_EQ(annotations[0].classification[0].priority_score,
272 xxx_multi_token_priority);
273
274 annotations.clear();
275 ASSERT_TRUE(annotator->Annotate(UTF8ToUnicodeText(single_word_location),
276 &annotations));
277 EXPECT_THAT(annotations, Not(IsEmpty()));
278 EXPECT_EQ(annotations[0].classification[0].collection, "xxx");
279 EXPECT_EQ(annotations[0].classification[0].priority_score,
280 xxx_single_token_priority);
281 }
282 }
283
TEST_F(PodNerTest,AnnotateMinNumTokens)284 TEST_F(PodNerTest, AnnotateMinNumTokens) {
285 std::unique_ptr<PodNerAnnotator> annotator =
286 PodNerAnnotator::Create(model_, *unilib_);
287 ASSERT_TRUE(annotator != nullptr);
288
289 std::string text = "in New York";
290 {
291 std::vector<AnnotatedSpan> annotations;
292 ASSERT_TRUE(annotator->Annotate(UTF8ToUnicodeText(text), &annotations));
293 EXPECT_THAT(annotations, Not(IsEmpty()));
294 }
295
296 std::unique_ptr<PodNerModelT> unpacked_model(model_->UnPack());
297 ASSERT_TRUE(unpacked_model != nullptr);
298
299 unpacked_model->min_number_of_tokens = 4;
300 flatbuffers::FlatBufferBuilder builder;
301 builder.Finish(PodNerModel::Pack(builder, unpacked_model.get()));
302
303 std::string model_buffer =
304 std::string(reinterpret_cast<const char*>(builder.GetBufferPointer()),
305 builder.GetSize());
306 annotator = PodNerAnnotator::Create(
307 static_cast<const PodNerModel*>(
308 flatbuffers::GetRoot<PodNerModel>(model_buffer.data())),
309 *unilib_);
310 ASSERT_TRUE(annotator != nullptr);
311 {
312 std::vector<AnnotatedSpan> annotations;
313 ASSERT_TRUE(annotator->Annotate(UTF8ToUnicodeText(text), &annotations));
314 EXPECT_THAT(annotations, IsEmpty());
315 }
316 }
317
TEST_F(PodNerTest,AnnotateMinNumWordpieces)318 TEST_F(PodNerTest, AnnotateMinNumWordpieces) {
319 std::unique_ptr<PodNerAnnotator> annotator =
320 PodNerAnnotator::Create(model_, *unilib_);
321 ASSERT_TRUE(annotator != nullptr);
322
323 std::string text = "in New York";
324 {
325 std::vector<AnnotatedSpan> annotations;
326 ASSERT_TRUE(annotator->Annotate(UTF8ToUnicodeText(text), &annotations));
327 EXPECT_THAT(annotations, Not(IsEmpty()));
328 }
329
330 std::unique_ptr<PodNerModelT> unpacked_model(model_->UnPack());
331 ASSERT_TRUE(unpacked_model != nullptr);
332
333 unpacked_model->min_number_of_wordpieces = 10;
334 flatbuffers::FlatBufferBuilder builder;
335 builder.Finish(PodNerModel::Pack(builder, unpacked_model.get()));
336
337 std::string model_buffer =
338 std::string(reinterpret_cast<const char*>(builder.GetBufferPointer()),
339 builder.GetSize());
340 annotator = PodNerAnnotator::Create(
341 static_cast<const PodNerModel*>(
342 flatbuffers::GetRoot<PodNerModel>(model_buffer.data())),
343 *unilib_);
344 ASSERT_TRUE(annotator != nullptr);
345 {
346 std::vector<AnnotatedSpan> annotations;
347 ASSERT_TRUE(annotator->Annotate(UTF8ToUnicodeText(text), &annotations));
348 EXPECT_THAT(annotations, IsEmpty());
349 }
350 }
351
TEST_F(PodNerTest,AnnotateNonstandardText)352 TEST_F(PodNerTest, AnnotateNonstandardText) {
353 std::unique_ptr<PodNerAnnotator> annotator =
354 PodNerAnnotator::Create(model_, *unilib_);
355 ASSERT_TRUE(annotator != nullptr);
356
357 const std::string nonstandard_text =
358 "abcNxCDU1RWNvbXByLXI4NS8xNzcwLzE3NzA4NDY2L3J1Ymluby1raWRzLXJlY2xpbmVyLXd"
359 "pdGgtY3VwLWhvbGRlci5qcGc=/"
360 "UnViaW5vIEtpZHMgUmVjbGluZXIgd2l0aCBDdXAgSG9sZGVyIGJ5IEhhcnJpZXQgQmVl."
361 "html>";
362 std::vector<AnnotatedSpan> annotations;
363 ASSERT_TRUE(
364 annotator->Annotate(UTF8ToUnicodeText(nonstandard_text), &annotations));
365 EXPECT_THAT(annotations, IsEmpty());
366 }
367
TEST_F(PodNerTest,AnnotateTextWithLinefeed)368 TEST_F(PodNerTest, AnnotateTextWithLinefeed) {
369 std::unique_ptr<PodNerAnnotator> annotator =
370 PodNerAnnotator::Create(model_, *unilib_);
371 ASSERT_TRUE(annotator != nullptr);
372
373 std::string nonstandard_text = "My name is Kuba\x09";
374 nonstandard_text += "and this is a test.";
375 std::vector<AnnotatedSpan> annotations;
376 ASSERT_TRUE(
377 annotator->Annotate(UTF8ToUnicodeText(nonstandard_text), &annotations));
378 EXPECT_THAT(annotations, Not(IsEmpty()));
379 EXPECT_EQ(annotations[0].span, CodepointSpan(11, 15));
380
381 nonstandard_text = "My name is Kuba\x09 and this is a test.";
382 ASSERT_TRUE(
383 annotator->Annotate(UTF8ToUnicodeText(nonstandard_text), &annotations));
384 EXPECT_THAT(annotations, Not(IsEmpty()));
385 EXPECT_EQ(annotations[0].span, CodepointSpan(11, 15));
386 }
387
TEST_F(PodNerTest,AnnotateWithUnknownWordpieces)388 TEST_F(PodNerTest, AnnotateWithUnknownWordpieces) {
389 std::unique_ptr<PodNerAnnotator> annotator =
390 PodNerAnnotator::Create(model_, *unilib_);
391 ASSERT_TRUE(annotator != nullptr);
392
393 const std::string long_text =
394 "It is easy to spend a fun and exciting day in Seattle without a car. "
395 "There are lots of ways to modify this itinerary. Add a ferry ride "
396 "from the waterfront. Spending the day at the Seattle Center or at the "
397 "aquarium could easily extend this from one to several days. Take the "
398 "Underground Tour in Pioneer Square. Visit the Klondike Gold Rush "
399 "Museum which is fun and free. In the summer months you can ride the "
400 "passenger-only Water Taxi from the waterfront to West Seattle and "
401 "Alki Beach. Here's a sample one day itinerary: Start at the Space "
402 "Needle by taking the Seattle Monorail from downtown. Look around the "
403 "Seattle Center or go to the Space Needle.";
404 const std::string text_with_unknown_wordpieces = "před chvílí";
405
406 std::vector<AnnotatedSpan> annotations;
407 ASSERT_TRUE(
408 annotator->Annotate(UTF8ToUnicodeText("Google New York , in New York. " +
409 text_with_unknown_wordpieces),
410 &annotations));
411 EXPECT_THAT(annotations, IsEmpty());
412 ASSERT_TRUE(annotator->Annotate(
413 UTF8ToUnicodeText(long_text + " " + text_with_unknown_wordpieces),
414 &annotations));
415 EXPECT_THAT(annotations, Not(IsEmpty()));
416 }
417
418 class PodNerTestWithOrWithoutFinalPeriod
419 : public PodNerTest,
420 public testing::WithParamInterface<bool> {};
421
422 INSTANTIATE_TEST_SUITE_P(TestAnnotateLongText,
423 PodNerTestWithOrWithoutFinalPeriod,
424 testing::Values(true, false));
425
TEST_P(PodNerTestWithOrWithoutFinalPeriod,AnnotateLongText)426 TEST_P(PodNerTestWithOrWithoutFinalPeriod, AnnotateLongText) {
427 std::unique_ptr<PodNerAnnotator> annotator = PodNerAnnotator::Create(
428 GetParam() ? model_append_final_period_ : model_, *unilib_);
429 ASSERT_TRUE(annotator != nullptr);
430
431 const std::string long_text =
432 "It is easy to spend a fun and exciting day in Seattle without a car. "
433 "There are lots of ways to modify this itinerary. Add a ferry ride "
434 "from the waterfront. Spending the day at the Seattle Center or at the "
435 "aquarium could easily extend this from one to several days. Take the "
436 "Underground Tour in Pioneer Square. Visit the Klondike Gold Rush "
437 "Museum which is fun and free. In the summer months you can ride the "
438 "passenger-only Water Taxi from the waterfront to West Seattle and "
439 "Alki Beach. Here's a sample one day itinerary: Start at the Space "
440 "Needle by taking the Seattle Monorail from downtown. Look around the "
441 "Seattle Center or go to the Space Needle. If you're interested in "
442 "music the EMP-SFM (Experience Music Project - Science Fiction Musuem) "
443 "is located at the foot of the Space Needle. It has a lot of rock'n "
444 "roll memorabilia that you may find interesting. The Chihuly Garden "
445 "and Glass musuem is near the Space Needle and you can get a "
446 "combination ticket for both. It gets really good reviews. If you're "
447 "interested, then the Bill & Melinda Gates Foundation is across from "
448 "the EMP and has a visitors center that is free. Come see how Bill "
449 "Gates is giving away his millions. Take the Monorail back downtown. "
450 "You will be at 5th and Pine (Westlake Center). Head west to the Pike "
451 "Place Market. Look around then head for the Pike Place hill climb "
452 "which is a series of steps that walk down to the waterfront. You will "
453 "end up across the street from the Seattle Aquarium. Plenty of things "
454 "to do on the waterfront, boat cruises, seafood restaurants, the "
455 "Aquarium, or your typical tourist activities. You can walk or take "
456 "the waterfront trolley bus. Note that waterfront construction has "
457 "relocated the trolley Metro bus route 99 that will take you from "
458 "Pioneer Square all the way to the end of the waterfront where you can "
459 "visit the Seattle Art Musuem's XXX Sculpture Garden just north of "
460 "Pier 70. The route goes thru Chinatown/International District, "
461 "through Pioneer Square, up 1st ave past the Pike Place Market and to "
462 "1st and Cedar which is walking distance to the Space Needle. It then "
463 "goes down Broad Street toward the Olympic Sculpture Garden. It runs "
464 "approximately every 30 minutes during the day and early evening.";
465 std::vector<AnnotatedSpan> annotations;
466 ASSERT_TRUE(annotator->Annotate(UTF8ToUnicodeText(long_text), &annotations));
467 EXPECT_THAT(annotations, Not(IsEmpty()));
468
469 const std::string location_from_beginning = "Seattle";
470 int start_span_location_from_beginning =
471 long_text.find(location_from_beginning);
472 EXPECT_EQ(annotations[0].span,
473 CodepointSpan(start_span_location_from_beginning,
474 start_span_location_from_beginning +
475 location_from_beginning.length()));
476
477 const std::string location_from_end = "Olympic Sculpture Garden";
478 int start_span_location_from_end = long_text.find(location_from_end);
479 const AnnotatedSpan& last_annotation = *annotations.rbegin();
480 EXPECT_EQ(
481 last_annotation.span,
482 CodepointSpan(start_span_location_from_end,
483 start_span_location_from_end + location_from_end.length()));
484 }
485
TEST_F(PodNerTest,SuggestSelectionLongText)486 TEST_F(PodNerTest, SuggestSelectionLongText) {
487 std::unique_ptr<PodNerAnnotator> annotator =
488 PodNerAnnotator::Create(model_, *unilib_);
489 ASSERT_TRUE(annotator != nullptr);
490
491 const std::string long_text =
492 "It is easy to spend a fun and exciting day in Seattle without a car. "
493 "There are lots of ways to modify this itinerary. Add a ferry ride "
494 "from the waterfront. Spending the day at the Seattle Center or at the "
495 "aquarium could easily extend this from one to several days. Take the "
496 "Underground Tour in Pioneer Square. Visit the Klondike Gold Rush "
497 "Museum which is fun and free. In the summer months you can ride the "
498 "passenger-only Water Taxi from the waterfront to West Seattle and "
499 "Alki Beach. Here's a sample one day itinerary: Start at the Space "
500 "Needle by taking the Seattle Monorail from downtown. Look around the "
501 "Seattle Center or go to the Space Needle. If you're interested in "
502 "music the EMP-SFM (Experience Music Project - Science Fiction Musuem) "
503 "is located at the foot of the Space Needle. It has a lot of rock'n "
504 "roll memorabilia that you may find interesting. The Chihuly Garden "
505 "and Glass musuem is near the Space Needle and you can get a "
506 "combination ticket for both. It gets really good reviews. If you're "
507 "interested, then the Bill & Melinda Gates Foundation is across from "
508 "the EMP and has a visitors center that is free. Come see how Bill "
509 "Gates is giving away his millions. Take the Monorail back downtown. "
510 "You will be at 5th and Pine (Westlake Center). Head west to the Pike "
511 "Place Market. Look around then head for the Pike Place hill climb "
512 "which is a series of steps that walk down to the waterfront. You will "
513 "end up across the street from the Seattle Aquarium. Plenty of things "
514 "to do on the waterfront, boat cruises, seafood restaurants, the "
515 "Aquarium, or your typical tourist activities. You can walk or take "
516 "the waterfront trolley bus. Note that waterfront construction has "
517 "relocated the trolley Metro bus route 99 that will take you from "
518 "Pioneer Square all the way to the end of the waterfront where you can "
519 "visit the Seattle Art Musuem's XXX Sculpture Garden just north of "
520 "Pier 70. The route goes thru Chinatown/International District, "
521 "through Pioneer Square, up 1st ave past the Pike Place Market and to "
522 "1st and Cedar which is walking distance to the Space Needle. It then "
523 "goes down Broad Street toward the Olympic Sculpture Garden. It runs "
524 "approximately every 30 minutes during the day and early evening.";
525 const std::string klondike = "Klondike Gold Rush Museum";
526 int klondike_start = long_text.find(klondike);
527
528 AnnotatedSpan suggested_span;
529 EXPECT_TRUE(annotator->SuggestSelection(UTF8ToUnicodeText(long_text),
530 {klondike_start, klondike_start + 8},
531 &suggested_span));
532 EXPECT_EQ(suggested_span.span,
533 CodepointSpan(klondike_start, klondike_start + klondike.length()));
534 }
535
TEST_F(PodNerTest,SuggestSelectionTest)536 TEST_F(PodNerTest, SuggestSelectionTest) {
537 std::unique_ptr<PodNerAnnotator> annotator =
538 PodNerAnnotator::Create(model_, *unilib_);
539 ASSERT_TRUE(annotator != nullptr);
540
541 AnnotatedSpan suggested_span;
542 EXPECT_TRUE(annotator->SuggestSelection(
543 UTF8ToUnicodeText("Google New York, in New York"), {7, 10},
544 &suggested_span));
545 EXPECT_EQ(suggested_span.span, CodepointSpan(7, 15));
546 EXPECT_FALSE(annotator->SuggestSelection(
547 UTF8ToUnicodeText("Google New York, in New York"), {17, 19},
548 &suggested_span));
549 EXPECT_EQ(suggested_span.span, CodepointSpan(kInvalidIndex, kInvalidIndex));
550 }
551
TEST_F(PodNerForAnnotationAndClassificationTest,SuggestSelectionWithDisabledSelectionReturnsNoResults)552 TEST_F(PodNerForAnnotationAndClassificationTest,
553 SuggestSelectionWithDisabledSelectionReturnsNoResults) {
554 std::unique_ptr<PodNerAnnotator> annotator =
555 PodNerAnnotator::Create(model_, *unilib_);
556 ASSERT_TRUE(annotator != nullptr);
557
558 AnnotatedSpan suggested_span;
559 EXPECT_FALSE(annotator->SuggestSelection(
560 UTF8ToUnicodeText("Google New York, in New York"), {7, 10},
561 &suggested_span));
562 }
563
TEST_F(PodNerTest,ClassifyTextTest)564 TEST_F(PodNerTest, ClassifyTextTest) {
565 std::unique_ptr<PodNerAnnotator> annotator =
566 PodNerAnnotator::Create(model_, *unilib_);
567 ASSERT_TRUE(annotator != nullptr);
568
569 ClassificationResult result;
570 ASSERT_TRUE(annotator->ClassifyText(UTF8ToUnicodeText("We met in New York"),
571 {10, 18}, &result));
572 EXPECT_EQ(result.collection, "location");
573 }
574
TEST_F(PodNerForSelectionTest,ClassifyTextWithDisabledClassificationReturnsFalse)575 TEST_F(PodNerForSelectionTest,
576 ClassifyTextWithDisabledClassificationReturnsFalse) {
577 std::unique_ptr<PodNerAnnotator> annotator =
578 PodNerAnnotator::Create(model_, *unilib_);
579 ASSERT_TRUE(annotator != nullptr);
580
581 ClassificationResult result;
582 ASSERT_FALSE(annotator->ClassifyText(UTF8ToUnicodeText("We met in New York"),
583 {10, 18}, &result));
584 }
585
TEST_F(PodNerTest,ThreadSafety)586 TEST_F(PodNerTest, ThreadSafety) {
587 std::unique_ptr<PodNerAnnotator> annotator =
588 PodNerAnnotator::Create(model_, *unilib_);
589 ASSERT_TRUE(annotator != nullptr);
590
591 // Do inference in 20 threads. When run with --config=tsan, this should fire
592 // if there's a problem.
593 std::vector<std::thread> thread_pool(20);
594 for (std::thread& thread : thread_pool) {
595 thread = std::thread([&annotator]() {
596 AnnotatedSpan suggested_span;
597 EXPECT_TRUE(annotator->SuggestSelection(
598 UTF8ToUnicodeText("Google New York, in New York"), {7, 10},
599 &suggested_span));
600 EXPECT_EQ(suggested_span.span, CodepointSpan(7, 15));
601 });
602 }
603 for (std::thread& thread : thread_pool) {
604 thread.join();
605 }
606 }
607
608 } // namespace
609 } // namespace libtextclassifier3
610