1*993b0882SAndroid Build Coastguard Worker /* 2*993b0882SAndroid Build Coastguard Worker * Copyright (C) 2018 The Android Open Source Project 3*993b0882SAndroid Build Coastguard Worker * 4*993b0882SAndroid Build Coastguard Worker * Licensed under the Apache License, Version 2.0 (the "License"); 5*993b0882SAndroid Build Coastguard Worker * you may not use this file except in compliance with the License. 6*993b0882SAndroid Build Coastguard Worker * You may obtain a copy of the License at 7*993b0882SAndroid Build Coastguard Worker * 8*993b0882SAndroid Build Coastguard Worker * http://www.apache.org/licenses/LICENSE-2.0 9*993b0882SAndroid Build Coastguard Worker * 10*993b0882SAndroid Build Coastguard Worker * Unless required by applicable law or agreed to in writing, software 11*993b0882SAndroid Build Coastguard Worker * distributed under the License is distributed on an "AS IS" BASIS, 12*993b0882SAndroid Build Coastguard Worker * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13*993b0882SAndroid Build Coastguard Worker * See the License for the specific language governing permissions and 14*993b0882SAndroid Build Coastguard Worker * limitations under the License. 15*993b0882SAndroid Build Coastguard Worker */ 16*993b0882SAndroid Build Coastguard Worker 17*993b0882SAndroid Build Coastguard Worker #ifndef LIBTEXTCLASSIFIER_UTILS_GRAMMAR_TESTING_UTILS_H_ 18*993b0882SAndroid Build Coastguard Worker #define LIBTEXTCLASSIFIER_UTILS_GRAMMAR_TESTING_UTILS_H_ 19*993b0882SAndroid Build Coastguard Worker 20*993b0882SAndroid Build Coastguard Worker #include <memory> 21*993b0882SAndroid Build Coastguard Worker #include <vector> 22*993b0882SAndroid Build Coastguard Worker 23*993b0882SAndroid Build Coastguard Worker #include "utils/base/arena.h" 24*993b0882SAndroid Build Coastguard Worker #include "utils/flatbuffers/reflection.h" 25*993b0882SAndroid Build Coastguard Worker #include "utils/grammar/parsing/derivation.h" 26*993b0882SAndroid Build Coastguard Worker #include "utils/grammar/parsing/parse-tree.h" 27*993b0882SAndroid Build Coastguard Worker #include "utils/grammar/semantics/value.h" 28*993b0882SAndroid Build Coastguard Worker #include "utils/grammar/testing/value_generated.h" 29*993b0882SAndroid Build Coastguard Worker #include "utils/grammar/text-context.h" 30*993b0882SAndroid Build Coastguard Worker #include "utils/i18n/locale.h" 31*993b0882SAndroid Build Coastguard Worker #include "utils/jvm-test-utils.h" 32*993b0882SAndroid Build Coastguard Worker #include "utils/test-data-test-utils.h" 33*993b0882SAndroid Build Coastguard Worker #include "utils/tokenizer.h" 34*993b0882SAndroid Build Coastguard Worker #include "utils/utf8/unilib.h" 35*993b0882SAndroid Build Coastguard Worker #include "gmock/gmock.h" 36*993b0882SAndroid Build Coastguard Worker #include "flatbuffers/base.h" 37*993b0882SAndroid Build Coastguard Worker #include "flatbuffers/flatbuffers.h" 38*993b0882SAndroid Build Coastguard Worker 39*993b0882SAndroid Build Coastguard Worker namespace libtextclassifier3::grammar { 40*993b0882SAndroid Build Coastguard Worker 41*993b0882SAndroid Build Coastguard Worker inline std::ostream& operator<<(std::ostream& os, const ParseTree* parse_tree) { 42*993b0882SAndroid Build Coastguard Worker return os << "ParseTree(lhs=" << parse_tree->lhs 43*993b0882SAndroid Build Coastguard Worker << ", begin=" << parse_tree->codepoint_span.first 44*993b0882SAndroid Build Coastguard Worker << ", end=" << parse_tree->codepoint_span.second << ")"; 45*993b0882SAndroid Build Coastguard Worker } 46*993b0882SAndroid Build Coastguard Worker 47*993b0882SAndroid Build Coastguard Worker inline std::ostream& operator<<(std::ostream& os, 48*993b0882SAndroid Build Coastguard Worker const Derivation& derivation) { 49*993b0882SAndroid Build Coastguard Worker return os << "Derivation(rule_id=" << derivation.rule_id << ", " 50*993b0882SAndroid Build Coastguard Worker << "parse_tree=" << derivation.parse_tree << ")"; 51*993b0882SAndroid Build Coastguard Worker } 52*993b0882SAndroid Build Coastguard Worker 53*993b0882SAndroid Build Coastguard Worker MATCHER_P3(IsDerivation, rule_id, begin, end, 54*993b0882SAndroid Build Coastguard Worker "is derivation of rule that " + 55*993b0882SAndroid Build Coastguard Worker ::testing::DescribeMatcher<int>(rule_id, negation) + 56*993b0882SAndroid Build Coastguard Worker ", begin that " + 57*993b0882SAndroid Build Coastguard Worker ::testing::DescribeMatcher<int>(begin, negation) + 58*993b0882SAndroid Build Coastguard Worker ", end that " + ::testing::DescribeMatcher<int>(end, negation)) { 59*993b0882SAndroid Build Coastguard Worker return ::testing::ExplainMatchResult(CodepointSpan(begin, end), 60*993b0882SAndroid Build Coastguard Worker arg.parse_tree->codepoint_span, 61*993b0882SAndroid Build Coastguard Worker result_listener) && 62*993b0882SAndroid Build Coastguard Worker ::testing::ExplainMatchResult(rule_id, arg.rule_id, result_listener); 63*993b0882SAndroid Build Coastguard Worker } 64*993b0882SAndroid Build Coastguard Worker 65*993b0882SAndroid Build Coastguard Worker // A test fixture with common auxiliary test methods. 66*993b0882SAndroid Build Coastguard Worker class GrammarTest : public testing::Test { 67*993b0882SAndroid Build Coastguard Worker protected: GrammarTest()68*993b0882SAndroid Build Coastguard Worker explicit GrammarTest() 69*993b0882SAndroid Build Coastguard Worker : unilib_(CreateUniLibForTesting()), 70*993b0882SAndroid Build Coastguard Worker arena_(/*block_size=*/16 << 10), 71*993b0882SAndroid Build Coastguard Worker semantic_values_schema_( 72*993b0882SAndroid Build Coastguard Worker GetTestFileContent("utils/grammar/testing/value.bfbs")), 73*993b0882SAndroid Build Coastguard Worker tokenizer_(libtextclassifier3::TokenizationType_ICU, unilib_.get(), 74*993b0882SAndroid Build Coastguard Worker /*codepoint_ranges=*/{}, 75*993b0882SAndroid Build Coastguard Worker /*internal_tokenizer_codepoint_ranges=*/{}, 76*993b0882SAndroid Build Coastguard Worker /*split_on_script_change=*/false, 77*993b0882SAndroid Build Coastguard Worker /*icu_preserve_whitespace_tokens=*/false) {} 78*993b0882SAndroid Build Coastguard Worker TextContextForText(const std::string & text)79*993b0882SAndroid Build Coastguard Worker TextContext TextContextForText(const std::string& text) { 80*993b0882SAndroid Build Coastguard Worker TextContext context; 81*993b0882SAndroid Build Coastguard Worker context.text = UTF8ToUnicodeText(text); 82*993b0882SAndroid Build Coastguard Worker context.tokens = tokenizer_.Tokenize(context.text); 83*993b0882SAndroid Build Coastguard Worker context.codepoints = context.text.Codepoints(); 84*993b0882SAndroid Build Coastguard Worker context.codepoints.push_back(context.text.end()); 85*993b0882SAndroid Build Coastguard Worker context.locales = {Locale::FromBCP47("en")}; 86*993b0882SAndroid Build Coastguard Worker context.context_span.first = 0; 87*993b0882SAndroid Build Coastguard Worker context.context_span.second = context.tokens.size(); 88*993b0882SAndroid Build Coastguard Worker return context; 89*993b0882SAndroid Build Coastguard Worker } 90*993b0882SAndroid Build Coastguard Worker 91*993b0882SAndroid Build Coastguard Worker // Creates a semantic expression union. 92*993b0882SAndroid Build Coastguard Worker template <typename T> AsSemanticExpressionUnion(T && expression)93*993b0882SAndroid Build Coastguard Worker SemanticExpressionT AsSemanticExpressionUnion(T&& expression) { 94*993b0882SAndroid Build Coastguard Worker SemanticExpressionT semantic_expression; 95*993b0882SAndroid Build Coastguard Worker semantic_expression.expression.Set(std::forward<T>(expression)); 96*993b0882SAndroid Build Coastguard Worker return semantic_expression; 97*993b0882SAndroid Build Coastguard Worker } 98*993b0882SAndroid Build Coastguard Worker 99*993b0882SAndroid Build Coastguard Worker template <typename T> CreateExpression(T && expression)100*993b0882SAndroid Build Coastguard Worker OwnedFlatbuffer<SemanticExpression> CreateExpression(T&& expression) { 101*993b0882SAndroid Build Coastguard Worker return Pack<SemanticExpression>( 102*993b0882SAndroid Build Coastguard Worker AsSemanticExpressionUnion(std::forward<T>(expression))); 103*993b0882SAndroid Build Coastguard Worker } 104*993b0882SAndroid Build Coastguard Worker CreateEmptyExpression()105*993b0882SAndroid Build Coastguard Worker OwnedFlatbuffer<SemanticExpression> CreateEmptyExpression() { 106*993b0882SAndroid Build Coastguard Worker return Pack<SemanticExpression>(SemanticExpressionT()); 107*993b0882SAndroid Build Coastguard Worker } 108*993b0882SAndroid Build Coastguard Worker 109*993b0882SAndroid Build Coastguard Worker // Packs a flatbuffer. 110*993b0882SAndroid Build Coastguard Worker template <typename T> Pack(const typename T::NativeTableType && value)111*993b0882SAndroid Build Coastguard Worker OwnedFlatbuffer<T> Pack(const typename T::NativeTableType&& value) { 112*993b0882SAndroid Build Coastguard Worker flatbuffers::FlatBufferBuilder builder; 113*993b0882SAndroid Build Coastguard Worker builder.Finish(T::Pack(builder, &value)); 114*993b0882SAndroid Build Coastguard Worker return OwnedFlatbuffer<T>(builder.Release()); 115*993b0882SAndroid Build Coastguard Worker } 116*993b0882SAndroid Build Coastguard Worker 117*993b0882SAndroid Build Coastguard Worker // Creates a test semantic value. CreateSemanticValue(const TestValueT & value)118*993b0882SAndroid Build Coastguard Worker const SemanticValue* CreateSemanticValue(const TestValueT& value) { 119*993b0882SAndroid Build Coastguard Worker const std::string value_buffer = PackFlatbuffer<TestValue>(&value); 120*993b0882SAndroid Build Coastguard Worker return arena_.AllocAndInit<SemanticValue>( 121*993b0882SAndroid Build Coastguard Worker semantic_values_schema_->objects()->Get( 122*993b0882SAndroid Build Coastguard Worker TypeIdForName(semantic_values_schema_.get(), 123*993b0882SAndroid Build Coastguard Worker "libtextclassifier3.grammar.TestValue") 124*993b0882SAndroid Build Coastguard Worker .value()), 125*993b0882SAndroid Build Coastguard Worker StringPiece(arena_.Memdup(value_buffer.data(), value_buffer.size()), 126*993b0882SAndroid Build Coastguard Worker value_buffer.size())); 127*993b0882SAndroid Build Coastguard Worker } 128*993b0882SAndroid Build Coastguard Worker 129*993b0882SAndroid Build Coastguard Worker // Creates a primitive semantic value. 130*993b0882SAndroid Build Coastguard Worker template <typename T> CreatePrimitiveSemanticValue(const T value)131*993b0882SAndroid Build Coastguard Worker const SemanticValue* CreatePrimitiveSemanticValue(const T value) { 132*993b0882SAndroid Build Coastguard Worker return arena_.AllocAndInit<SemanticValue>(value); 133*993b0882SAndroid Build Coastguard Worker } 134*993b0882SAndroid Build Coastguard Worker CreateConstExpression(const TestValueT & value)135*993b0882SAndroid Build Coastguard Worker std::unique_ptr<SemanticExpressionT> CreateConstExpression( 136*993b0882SAndroid Build Coastguard Worker const TestValueT& value) { 137*993b0882SAndroid Build Coastguard Worker ConstValueExpressionT const_value; 138*993b0882SAndroid Build Coastguard Worker const_value.base_type = reflection::BaseType::Obj; 139*993b0882SAndroid Build Coastguard Worker const_value.type = TypeIdForName(semantic_values_schema_.get(), 140*993b0882SAndroid Build Coastguard Worker "libtextclassifier3.grammar.TestValue") 141*993b0882SAndroid Build Coastguard Worker .value(); 142*993b0882SAndroid Build Coastguard Worker const std::string value_buffer = PackFlatbuffer<TestValue>(&value); 143*993b0882SAndroid Build Coastguard Worker const_value.value.assign(value_buffer.begin(), value_buffer.end()); 144*993b0882SAndroid Build Coastguard Worker auto semantic_expression = std::make_unique<SemanticExpressionT>(); 145*993b0882SAndroid Build Coastguard Worker semantic_expression->expression.Set(const_value); 146*993b0882SAndroid Build Coastguard Worker return semantic_expression; 147*993b0882SAndroid Build Coastguard Worker } 148*993b0882SAndroid Build Coastguard Worker CreateAndPackConstExpression(const TestValueT & value)149*993b0882SAndroid Build Coastguard Worker OwnedFlatbuffer<SemanticExpression> CreateAndPackConstExpression( 150*993b0882SAndroid Build Coastguard Worker const TestValueT& value) { 151*993b0882SAndroid Build Coastguard Worker ConstValueExpressionT const_value; 152*993b0882SAndroid Build Coastguard Worker const_value.base_type = reflection::BaseType::Obj; 153*993b0882SAndroid Build Coastguard Worker const_value.type = TypeIdForName(semantic_values_schema_.get(), 154*993b0882SAndroid Build Coastguard Worker "libtextclassifier3.grammar.TestValue") 155*993b0882SAndroid Build Coastguard Worker .value(); 156*993b0882SAndroid Build Coastguard Worker const std::string value_buffer = PackFlatbuffer<TestValue>(&value); 157*993b0882SAndroid Build Coastguard Worker const_value.value.assign(value_buffer.begin(), value_buffer.end()); 158*993b0882SAndroid Build Coastguard Worker return CreateExpression(const_value); 159*993b0882SAndroid Build Coastguard Worker } 160*993b0882SAndroid Build Coastguard Worker CreateConstDateExpression(const TestDateT & value)161*993b0882SAndroid Build Coastguard Worker std::unique_ptr<SemanticExpressionT> CreateConstDateExpression( 162*993b0882SAndroid Build Coastguard Worker const TestDateT& value) { 163*993b0882SAndroid Build Coastguard Worker ConstValueExpressionT const_value; 164*993b0882SAndroid Build Coastguard Worker const_value.base_type = reflection::BaseType::Obj; 165*993b0882SAndroid Build Coastguard Worker const_value.type = TypeIdForName(semantic_values_schema_.get(), 166*993b0882SAndroid Build Coastguard Worker "libtextclassifier3.grammar.TestDate") 167*993b0882SAndroid Build Coastguard Worker .value(); 168*993b0882SAndroid Build Coastguard Worker const std::string value_buffer = PackFlatbuffer<TestDate>(&value); 169*993b0882SAndroid Build Coastguard Worker const_value.value.assign(value_buffer.begin(), value_buffer.end()); 170*993b0882SAndroid Build Coastguard Worker auto semantic_expression = std::make_unique<SemanticExpressionT>(); 171*993b0882SAndroid Build Coastguard Worker semantic_expression->expression.Set(const_value); 172*993b0882SAndroid Build Coastguard Worker return semantic_expression; 173*993b0882SAndroid Build Coastguard Worker } 174*993b0882SAndroid Build Coastguard Worker CreateAndPackMergeValuesExpression(const std::vector<TestDateT> & values)175*993b0882SAndroid Build Coastguard Worker OwnedFlatbuffer<SemanticExpression> CreateAndPackMergeValuesExpression( 176*993b0882SAndroid Build Coastguard Worker const std::vector<TestDateT>& values) { 177*993b0882SAndroid Build Coastguard Worker MergeValueExpressionT merge_expression; 178*993b0882SAndroid Build Coastguard Worker merge_expression.type = TypeIdForName(semantic_values_schema_.get(), 179*993b0882SAndroid Build Coastguard Worker "libtextclassifier3.grammar.TestDate") 180*993b0882SAndroid Build Coastguard Worker .value(); 181*993b0882SAndroid Build Coastguard Worker for (const TestDateT& test_date : values) { 182*993b0882SAndroid Build Coastguard Worker merge_expression.values.emplace_back(new SemanticExpressionT); 183*993b0882SAndroid Build Coastguard Worker merge_expression.values.back() = CreateConstDateExpression(test_date); 184*993b0882SAndroid Build Coastguard Worker } 185*993b0882SAndroid Build Coastguard Worker return CreateExpression(std::move(merge_expression)); 186*993b0882SAndroid Build Coastguard Worker } 187*993b0882SAndroid Build Coastguard Worker 188*993b0882SAndroid Build Coastguard Worker template <typename T> CreatePrimitiveConstExpression(const T value)189*993b0882SAndroid Build Coastguard Worker std::unique_ptr<SemanticExpressionT> CreatePrimitiveConstExpression( 190*993b0882SAndroid Build Coastguard Worker const T value) { 191*993b0882SAndroid Build Coastguard Worker ConstValueExpressionT const_value; 192*993b0882SAndroid Build Coastguard Worker const_value.base_type = flatbuffers_base_type<T>::value; 193*993b0882SAndroid Build Coastguard Worker const_value.value.resize(sizeof(T)); 194*993b0882SAndroid Build Coastguard Worker flatbuffers::WriteScalar(const_value.value.data(), value); 195*993b0882SAndroid Build Coastguard Worker auto semantic_expression = std::make_unique<SemanticExpressionT>(); 196*993b0882SAndroid Build Coastguard Worker semantic_expression->expression.Set(const_value); 197*993b0882SAndroid Build Coastguard Worker return semantic_expression; 198*993b0882SAndroid Build Coastguard Worker } 199*993b0882SAndroid Build Coastguard Worker 200*993b0882SAndroid Build Coastguard Worker template <typename T> CreateAndPackPrimitiveConstExpression(const T value)201*993b0882SAndroid Build Coastguard Worker OwnedFlatbuffer<SemanticExpression> CreateAndPackPrimitiveConstExpression( 202*993b0882SAndroid Build Coastguard Worker const T value) { 203*993b0882SAndroid Build Coastguard Worker ConstValueExpressionT const_value; 204*993b0882SAndroid Build Coastguard Worker const_value.base_type = flatbuffers_base_type<T>::value; 205*993b0882SAndroid Build Coastguard Worker const_value.value.resize(sizeof(T)); 206*993b0882SAndroid Build Coastguard Worker flatbuffers::WriteScalar(const_value.value.data(), value); 207*993b0882SAndroid Build Coastguard Worker return CreateExpression(const_value); 208*993b0882SAndroid Build Coastguard Worker } 209*993b0882SAndroid Build Coastguard Worker 210*993b0882SAndroid Build Coastguard Worker template <> CreateAndPackPrimitiveConstExpression(const StringPiece value)211*993b0882SAndroid Build Coastguard Worker OwnedFlatbuffer<SemanticExpression> CreateAndPackPrimitiveConstExpression( 212*993b0882SAndroid Build Coastguard Worker const StringPiece value) { 213*993b0882SAndroid Build Coastguard Worker ConstValueExpressionT const_value; 214*993b0882SAndroid Build Coastguard Worker const_value.base_type = reflection::BaseType::String; 215*993b0882SAndroid Build Coastguard Worker const_value.value.assign(value.data(), value.data() + value.size()); 216*993b0882SAndroid Build Coastguard Worker return CreateExpression(const_value); 217*993b0882SAndroid Build Coastguard Worker } 218*993b0882SAndroid Build Coastguard Worker 219*993b0882SAndroid Build Coastguard Worker template <> CreatePrimitiveConstExpression(const StringPiece value)220*993b0882SAndroid Build Coastguard Worker std::unique_ptr<SemanticExpressionT> CreatePrimitiveConstExpression( 221*993b0882SAndroid Build Coastguard Worker const StringPiece value) { 222*993b0882SAndroid Build Coastguard Worker ConstValueExpressionT const_value; 223*993b0882SAndroid Build Coastguard Worker const_value.base_type = reflection::BaseType::String; 224*993b0882SAndroid Build Coastguard Worker const_value.value.assign(value.data(), value.data() + value.size()); 225*993b0882SAndroid Build Coastguard Worker auto semantic_expression = std::make_unique<SemanticExpressionT>(); 226*993b0882SAndroid Build Coastguard Worker semantic_expression->expression.Set(const_value); 227*993b0882SAndroid Build Coastguard Worker return semantic_expression; 228*993b0882SAndroid Build Coastguard Worker } 229*993b0882SAndroid Build Coastguard Worker 230*993b0882SAndroid Build Coastguard Worker const std::unique_ptr<UniLib> unilib_; 231*993b0882SAndroid Build Coastguard Worker UnsafeArena arena_; 232*993b0882SAndroid Build Coastguard Worker const OwnedFlatbuffer<reflection::Schema, std::string> 233*993b0882SAndroid Build Coastguard Worker semantic_values_schema_; 234*993b0882SAndroid Build Coastguard Worker const Tokenizer tokenizer_; 235*993b0882SAndroid Build Coastguard Worker }; 236*993b0882SAndroid Build Coastguard Worker 237*993b0882SAndroid Build Coastguard Worker } // namespace libtextclassifier3::grammar 238*993b0882SAndroid Build Coastguard Worker 239*993b0882SAndroid Build Coastguard Worker #endif // LIBTEXTCLASSIFIER_UTILS_GRAMMAR_TESTING_UTILS_H_ 240