Lines Matching full:tokenizer

17 #include "utils/tokenizer.h"
29 class TestingTokenizer : public Tokenizer {
39 : Tokenizer(type, unilib, codepoint_ranges, in TestingTokenizer()
43 using Tokenizer::FindTokenizationRange;
126 TestingTokenizerProxy tokenizer(TokenizationType_INTERNAL_TOKENIZER, configs, in TEST() local
132 EXPECT_EQ(tokenizer.TestFindTokenizationRole(0), in TEST()
134 EXPECT_EQ(tokenizer.TestFindTokenizationRole(5), in TEST()
136 EXPECT_EQ(tokenizer.TestFindTokenizationRole(10), in TEST()
140 EXPECT_EQ(tokenizer.TestFindTokenizationRole(31), in TEST()
142 EXPECT_EQ(tokenizer.TestFindTokenizationRole(32), in TEST()
144 EXPECT_EQ(tokenizer.TestFindTokenizationRole(33), in TEST()
148 EXPECT_EQ(tokenizer.TestFindTokenizationRole(1233), in TEST()
150 EXPECT_EQ(tokenizer.TestFindTokenizationRole(1234), in TEST()
152 EXPECT_EQ(tokenizer.TestFindTokenizationRole(12344), in TEST()
154 EXPECT_EQ(tokenizer.TestFindTokenizationRole(12345), in TEST()
158 EXPECT_EQ(tokenizer.TestFindTokenizationRole(99), in TEST()
173 TestingTokenizerProxy tokenizer(TokenizationType_INTERNAL_TOKENIZER, configs, in TEST() local
178 std::vector<Token> tokens = tokenizer.Tokenize("Hello world!"); in TEST()
208 TestingTokenizerProxy tokenizer(TokenizationType_INTERNAL_TOKENIZER, configs, in TEST() local
213 EXPECT_THAT(tokenizer.Tokenize("앨라배마 주 전화(123) 456-789웹사이트"), in TEST()
344 TestingTokenizerProxy tokenizer(TokenizationType_INTERNAL_TOKENIZER, configs, in TEST() local
351 tokens = tokenizer.Tokenize( in TEST()
355 tokens = tokenizer.Tokenize("問少目 hello 木輸ยามきゃ"); in TEST()
375 TestingTokenizerProxy tokenizer(TokenizationType_ICU, {}, {}, in TEST() local
379 std::vector<Token> tokens = tokenizer.Tokenize("พระบาท สมเด็จ พระ ปร มิ"); in TEST()
395 TestingTokenizerProxy tokenizer(TokenizationType_ICU, {}, {}, in TEST() local
400 tokenizer.Tokenize("The interval is: -(12, 138*)"); in TEST()
423 TestingTokenizerProxy tokenizer(TokenizationType_ICU, {}, {}, in TEST() local
427 std::vector<Token> tokens = tokenizer.Tokenize("3.1 3﹒2 3.3"); in TEST()
441 TestingTokenizerProxy tokenizer(TokenizationType_ICU, {}, {}, in TEST() local
445 std::vector<Token> tokens = tokenizer.Tokenize("พระบาทสมเด็จพระปรมิ"); in TEST()
489 TestingTokenizerProxy tokenizer(TokenizationType_MIXED, configs, in TEST() local
495 std::vector<Token> tokens = tokenizer.Tokenize( in TEST()
520 TestingTokenizerProxy tokenizer(TokenizationType_INTERNAL_TOKENIZER, in TEST() local
526 EXPECT_EQ(tokenizer.Tokenize("앨라배마123웹사이트"), in TEST()
531 TestingTokenizerProxy tokenizer(TokenizationType_INTERNAL_TOKENIZER, in TEST() local
536 EXPECT_EQ(tokenizer.Tokenize("앨라배마123웹사이트"), in TEST()
544 TestingTokenizerProxy tokenizer(TokenizationType_LETTER_DIGIT, {}, {}, in TEST() local
548 std::vector<Token> tokens = tokenizer.Tokenize("7% -3.14 68.9#? 7% $99 .18."); in TEST()
561 TestingTokenizerProxy tokenizer(TokenizationType_LETTER_DIGIT, {}, {}, in TEST() local
565 std::vector<Token> tokens = tokenizer.Tokenize("2 pércént 3パーセント"); in TEST()
573 TestingTokenizerProxy tokenizer(TokenizationType_LETTER_DIGIT, {}, {}, in TEST() local
577 std::vector<Token> tokens = tokenizer.Tokenize("3 3﹒2 3.3%"); in TEST()
585 TestingTokenizerProxy tokenizer(TokenizationType_LETTER_DIGIT, {}, {}, in TEST() local
589 std::vector<Token> tokens = tokenizer.Tokenize("15.12.2019 january's 3.2"); in TEST()
600 TestingTokenizerProxy tokenizer(TokenizationType_LETTER_DIGIT, {}, {}, in TEST() local
604 std::vector<Token> tokens = tokenizer.Tokenize("The+2345++the +íí+"); in TEST()
614 TestingTokenizerProxy tokenizer(TokenizationType_LETTER_DIGIT, {}, {}, in TEST() local
618 std::vector<Token> tokens = tokenizer.Tokenize("2 3 4 5"); in TEST()