xref: /aosp_15_r20/external/libtextclassifier/native/utils/grammar/testing/utils.h (revision 993b0882672172b81d12fad7a7ac0c3e5c824a12)
1*993b0882SAndroid Build Coastguard Worker /*
2*993b0882SAndroid Build Coastguard Worker  * Copyright (C) 2018 The Android Open Source Project
3*993b0882SAndroid Build Coastguard Worker  *
4*993b0882SAndroid Build Coastguard Worker  * Licensed under the Apache License, Version 2.0 (the "License");
5*993b0882SAndroid Build Coastguard Worker  * you may not use this file except in compliance with the License.
6*993b0882SAndroid Build Coastguard Worker  * You may obtain a copy of the License at
7*993b0882SAndroid Build Coastguard Worker  *
8*993b0882SAndroid Build Coastguard Worker  *      http://www.apache.org/licenses/LICENSE-2.0
9*993b0882SAndroid Build Coastguard Worker  *
10*993b0882SAndroid Build Coastguard Worker  * Unless required by applicable law or agreed to in writing, software
11*993b0882SAndroid Build Coastguard Worker  * distributed under the License is distributed on an "AS IS" BASIS,
12*993b0882SAndroid Build Coastguard Worker  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13*993b0882SAndroid Build Coastguard Worker  * See the License for the specific language governing permissions and
14*993b0882SAndroid Build Coastguard Worker  * limitations under the License.
15*993b0882SAndroid Build Coastguard Worker  */
16*993b0882SAndroid Build Coastguard Worker 
17*993b0882SAndroid Build Coastguard Worker #ifndef LIBTEXTCLASSIFIER_UTILS_GRAMMAR_TESTING_UTILS_H_
18*993b0882SAndroid Build Coastguard Worker #define LIBTEXTCLASSIFIER_UTILS_GRAMMAR_TESTING_UTILS_H_
19*993b0882SAndroid Build Coastguard Worker 
20*993b0882SAndroid Build Coastguard Worker #include <memory>
21*993b0882SAndroid Build Coastguard Worker #include <vector>
22*993b0882SAndroid Build Coastguard Worker 
23*993b0882SAndroid Build Coastguard Worker #include "utils/base/arena.h"
24*993b0882SAndroid Build Coastguard Worker #include "utils/flatbuffers/reflection.h"
25*993b0882SAndroid Build Coastguard Worker #include "utils/grammar/parsing/derivation.h"
26*993b0882SAndroid Build Coastguard Worker #include "utils/grammar/parsing/parse-tree.h"
27*993b0882SAndroid Build Coastguard Worker #include "utils/grammar/semantics/value.h"
28*993b0882SAndroid Build Coastguard Worker #include "utils/grammar/testing/value_generated.h"
29*993b0882SAndroid Build Coastguard Worker #include "utils/grammar/text-context.h"
30*993b0882SAndroid Build Coastguard Worker #include "utils/i18n/locale.h"
31*993b0882SAndroid Build Coastguard Worker #include "utils/jvm-test-utils.h"
32*993b0882SAndroid Build Coastguard Worker #include "utils/test-data-test-utils.h"
33*993b0882SAndroid Build Coastguard Worker #include "utils/tokenizer.h"
34*993b0882SAndroid Build Coastguard Worker #include "utils/utf8/unilib.h"
35*993b0882SAndroid Build Coastguard Worker #include "gmock/gmock.h"
36*993b0882SAndroid Build Coastguard Worker #include "flatbuffers/base.h"
37*993b0882SAndroid Build Coastguard Worker #include "flatbuffers/flatbuffers.h"
38*993b0882SAndroid Build Coastguard Worker 
39*993b0882SAndroid Build Coastguard Worker namespace libtextclassifier3::grammar {
40*993b0882SAndroid Build Coastguard Worker 
41*993b0882SAndroid Build Coastguard Worker inline std::ostream& operator<<(std::ostream& os, const ParseTree* parse_tree) {
42*993b0882SAndroid Build Coastguard Worker   return os << "ParseTree(lhs=" << parse_tree->lhs
43*993b0882SAndroid Build Coastguard Worker             << ", begin=" << parse_tree->codepoint_span.first
44*993b0882SAndroid Build Coastguard Worker             << ", end=" << parse_tree->codepoint_span.second << ")";
45*993b0882SAndroid Build Coastguard Worker }
46*993b0882SAndroid Build Coastguard Worker 
47*993b0882SAndroid Build Coastguard Worker inline std::ostream& operator<<(std::ostream& os,
48*993b0882SAndroid Build Coastguard Worker                                 const Derivation& derivation) {
49*993b0882SAndroid Build Coastguard Worker   return os << "Derivation(rule_id=" << derivation.rule_id << ", "
50*993b0882SAndroid Build Coastguard Worker             << "parse_tree=" << derivation.parse_tree << ")";
51*993b0882SAndroid Build Coastguard Worker }
52*993b0882SAndroid Build Coastguard Worker 
53*993b0882SAndroid Build Coastguard Worker MATCHER_P3(IsDerivation, rule_id, begin, end,
54*993b0882SAndroid Build Coastguard Worker            "is derivation of rule that " +
55*993b0882SAndroid Build Coastguard Worker                ::testing::DescribeMatcher<int>(rule_id, negation) +
56*993b0882SAndroid Build Coastguard Worker                ", begin that " +
57*993b0882SAndroid Build Coastguard Worker                ::testing::DescribeMatcher<int>(begin, negation) +
58*993b0882SAndroid Build Coastguard Worker                ", end that " + ::testing::DescribeMatcher<int>(end, negation)) {
59*993b0882SAndroid Build Coastguard Worker   return ::testing::ExplainMatchResult(CodepointSpan(begin, end),
60*993b0882SAndroid Build Coastguard Worker                                        arg.parse_tree->codepoint_span,
61*993b0882SAndroid Build Coastguard Worker                                        result_listener) &&
62*993b0882SAndroid Build Coastguard Worker          ::testing::ExplainMatchResult(rule_id, arg.rule_id, result_listener);
63*993b0882SAndroid Build Coastguard Worker }
64*993b0882SAndroid Build Coastguard Worker 
65*993b0882SAndroid Build Coastguard Worker // A test fixture with common auxiliary test methods.
66*993b0882SAndroid Build Coastguard Worker class GrammarTest : public testing::Test {
67*993b0882SAndroid Build Coastguard Worker  protected:
GrammarTest()68*993b0882SAndroid Build Coastguard Worker   explicit GrammarTest()
69*993b0882SAndroid Build Coastguard Worker       : unilib_(CreateUniLibForTesting()),
70*993b0882SAndroid Build Coastguard Worker         arena_(/*block_size=*/16 << 10),
71*993b0882SAndroid Build Coastguard Worker         semantic_values_schema_(
72*993b0882SAndroid Build Coastguard Worker             GetTestFileContent("utils/grammar/testing/value.bfbs")),
73*993b0882SAndroid Build Coastguard Worker         tokenizer_(libtextclassifier3::TokenizationType_ICU, unilib_.get(),
74*993b0882SAndroid Build Coastguard Worker                    /*codepoint_ranges=*/{},
75*993b0882SAndroid Build Coastguard Worker                    /*internal_tokenizer_codepoint_ranges=*/{},
76*993b0882SAndroid Build Coastguard Worker                    /*split_on_script_change=*/false,
77*993b0882SAndroid Build Coastguard Worker                    /*icu_preserve_whitespace_tokens=*/false) {}
78*993b0882SAndroid Build Coastguard Worker 
TextContextForText(const std::string & text)79*993b0882SAndroid Build Coastguard Worker   TextContext TextContextForText(const std::string& text) {
80*993b0882SAndroid Build Coastguard Worker     TextContext context;
81*993b0882SAndroid Build Coastguard Worker     context.text = UTF8ToUnicodeText(text);
82*993b0882SAndroid Build Coastguard Worker     context.tokens = tokenizer_.Tokenize(context.text);
83*993b0882SAndroid Build Coastguard Worker     context.codepoints = context.text.Codepoints();
84*993b0882SAndroid Build Coastguard Worker     context.codepoints.push_back(context.text.end());
85*993b0882SAndroid Build Coastguard Worker     context.locales = {Locale::FromBCP47("en")};
86*993b0882SAndroid Build Coastguard Worker     context.context_span.first = 0;
87*993b0882SAndroid Build Coastguard Worker     context.context_span.second = context.tokens.size();
88*993b0882SAndroid Build Coastguard Worker     return context;
89*993b0882SAndroid Build Coastguard Worker   }
90*993b0882SAndroid Build Coastguard Worker 
91*993b0882SAndroid Build Coastguard Worker   // Creates a semantic expression union.
92*993b0882SAndroid Build Coastguard Worker   template <typename T>
AsSemanticExpressionUnion(T && expression)93*993b0882SAndroid Build Coastguard Worker   SemanticExpressionT AsSemanticExpressionUnion(T&& expression) {
94*993b0882SAndroid Build Coastguard Worker     SemanticExpressionT semantic_expression;
95*993b0882SAndroid Build Coastguard Worker     semantic_expression.expression.Set(std::forward<T>(expression));
96*993b0882SAndroid Build Coastguard Worker     return semantic_expression;
97*993b0882SAndroid Build Coastguard Worker   }
98*993b0882SAndroid Build Coastguard Worker 
99*993b0882SAndroid Build Coastguard Worker   template <typename T>
CreateExpression(T && expression)100*993b0882SAndroid Build Coastguard Worker   OwnedFlatbuffer<SemanticExpression> CreateExpression(T&& expression) {
101*993b0882SAndroid Build Coastguard Worker     return Pack<SemanticExpression>(
102*993b0882SAndroid Build Coastguard Worker         AsSemanticExpressionUnion(std::forward<T>(expression)));
103*993b0882SAndroid Build Coastguard Worker   }
104*993b0882SAndroid Build Coastguard Worker 
CreateEmptyExpression()105*993b0882SAndroid Build Coastguard Worker   OwnedFlatbuffer<SemanticExpression> CreateEmptyExpression() {
106*993b0882SAndroid Build Coastguard Worker     return Pack<SemanticExpression>(SemanticExpressionT());
107*993b0882SAndroid Build Coastguard Worker   }
108*993b0882SAndroid Build Coastguard Worker 
109*993b0882SAndroid Build Coastguard Worker   // Packs a flatbuffer.
110*993b0882SAndroid Build Coastguard Worker   template <typename T>
Pack(const typename T::NativeTableType && value)111*993b0882SAndroid Build Coastguard Worker   OwnedFlatbuffer<T> Pack(const typename T::NativeTableType&& value) {
112*993b0882SAndroid Build Coastguard Worker     flatbuffers::FlatBufferBuilder builder;
113*993b0882SAndroid Build Coastguard Worker     builder.Finish(T::Pack(builder, &value));
114*993b0882SAndroid Build Coastguard Worker     return OwnedFlatbuffer<T>(builder.Release());
115*993b0882SAndroid Build Coastguard Worker   }
116*993b0882SAndroid Build Coastguard Worker 
117*993b0882SAndroid Build Coastguard Worker   // Creates a test semantic value.
CreateSemanticValue(const TestValueT & value)118*993b0882SAndroid Build Coastguard Worker   const SemanticValue* CreateSemanticValue(const TestValueT& value) {
119*993b0882SAndroid Build Coastguard Worker     const std::string value_buffer = PackFlatbuffer<TestValue>(&value);
120*993b0882SAndroid Build Coastguard Worker     return arena_.AllocAndInit<SemanticValue>(
121*993b0882SAndroid Build Coastguard Worker         semantic_values_schema_->objects()->Get(
122*993b0882SAndroid Build Coastguard Worker             TypeIdForName(semantic_values_schema_.get(),
123*993b0882SAndroid Build Coastguard Worker                           "libtextclassifier3.grammar.TestValue")
124*993b0882SAndroid Build Coastguard Worker                 .value()),
125*993b0882SAndroid Build Coastguard Worker         StringPiece(arena_.Memdup(value_buffer.data(), value_buffer.size()),
126*993b0882SAndroid Build Coastguard Worker                     value_buffer.size()));
127*993b0882SAndroid Build Coastguard Worker   }
128*993b0882SAndroid Build Coastguard Worker 
129*993b0882SAndroid Build Coastguard Worker   // Creates a primitive semantic value.
130*993b0882SAndroid Build Coastguard Worker   template <typename T>
CreatePrimitiveSemanticValue(const T value)131*993b0882SAndroid Build Coastguard Worker   const SemanticValue* CreatePrimitiveSemanticValue(const T value) {
132*993b0882SAndroid Build Coastguard Worker     return arena_.AllocAndInit<SemanticValue>(value);
133*993b0882SAndroid Build Coastguard Worker   }
134*993b0882SAndroid Build Coastguard Worker 
CreateConstExpression(const TestValueT & value)135*993b0882SAndroid Build Coastguard Worker   std::unique_ptr<SemanticExpressionT> CreateConstExpression(
136*993b0882SAndroid Build Coastguard Worker       const TestValueT& value) {
137*993b0882SAndroid Build Coastguard Worker     ConstValueExpressionT const_value;
138*993b0882SAndroid Build Coastguard Worker     const_value.base_type = reflection::BaseType::Obj;
139*993b0882SAndroid Build Coastguard Worker     const_value.type = TypeIdForName(semantic_values_schema_.get(),
140*993b0882SAndroid Build Coastguard Worker                                      "libtextclassifier3.grammar.TestValue")
141*993b0882SAndroid Build Coastguard Worker                            .value();
142*993b0882SAndroid Build Coastguard Worker     const std::string value_buffer = PackFlatbuffer<TestValue>(&value);
143*993b0882SAndroid Build Coastguard Worker     const_value.value.assign(value_buffer.begin(), value_buffer.end());
144*993b0882SAndroid Build Coastguard Worker     auto semantic_expression = std::make_unique<SemanticExpressionT>();
145*993b0882SAndroid Build Coastguard Worker     semantic_expression->expression.Set(const_value);
146*993b0882SAndroid Build Coastguard Worker     return semantic_expression;
147*993b0882SAndroid Build Coastguard Worker   }
148*993b0882SAndroid Build Coastguard Worker 
CreateAndPackConstExpression(const TestValueT & value)149*993b0882SAndroid Build Coastguard Worker   OwnedFlatbuffer<SemanticExpression> CreateAndPackConstExpression(
150*993b0882SAndroid Build Coastguard Worker       const TestValueT& value) {
151*993b0882SAndroid Build Coastguard Worker     ConstValueExpressionT const_value;
152*993b0882SAndroid Build Coastguard Worker     const_value.base_type = reflection::BaseType::Obj;
153*993b0882SAndroid Build Coastguard Worker     const_value.type = TypeIdForName(semantic_values_schema_.get(),
154*993b0882SAndroid Build Coastguard Worker                                      "libtextclassifier3.grammar.TestValue")
155*993b0882SAndroid Build Coastguard Worker                            .value();
156*993b0882SAndroid Build Coastguard Worker     const std::string value_buffer = PackFlatbuffer<TestValue>(&value);
157*993b0882SAndroid Build Coastguard Worker     const_value.value.assign(value_buffer.begin(), value_buffer.end());
158*993b0882SAndroid Build Coastguard Worker     return CreateExpression(const_value);
159*993b0882SAndroid Build Coastguard Worker   }
160*993b0882SAndroid Build Coastguard Worker 
CreateConstDateExpression(const TestDateT & value)161*993b0882SAndroid Build Coastguard Worker   std::unique_ptr<SemanticExpressionT> CreateConstDateExpression(
162*993b0882SAndroid Build Coastguard Worker       const TestDateT& value) {
163*993b0882SAndroid Build Coastguard Worker     ConstValueExpressionT const_value;
164*993b0882SAndroid Build Coastguard Worker     const_value.base_type = reflection::BaseType::Obj;
165*993b0882SAndroid Build Coastguard Worker     const_value.type = TypeIdForName(semantic_values_schema_.get(),
166*993b0882SAndroid Build Coastguard Worker                                      "libtextclassifier3.grammar.TestDate")
167*993b0882SAndroid Build Coastguard Worker                            .value();
168*993b0882SAndroid Build Coastguard Worker     const std::string value_buffer = PackFlatbuffer<TestDate>(&value);
169*993b0882SAndroid Build Coastguard Worker     const_value.value.assign(value_buffer.begin(), value_buffer.end());
170*993b0882SAndroid Build Coastguard Worker     auto semantic_expression = std::make_unique<SemanticExpressionT>();
171*993b0882SAndroid Build Coastguard Worker     semantic_expression->expression.Set(const_value);
172*993b0882SAndroid Build Coastguard Worker     return semantic_expression;
173*993b0882SAndroid Build Coastguard Worker   }
174*993b0882SAndroid Build Coastguard Worker 
CreateAndPackMergeValuesExpression(const std::vector<TestDateT> & values)175*993b0882SAndroid Build Coastguard Worker   OwnedFlatbuffer<SemanticExpression> CreateAndPackMergeValuesExpression(
176*993b0882SAndroid Build Coastguard Worker       const std::vector<TestDateT>& values) {
177*993b0882SAndroid Build Coastguard Worker     MergeValueExpressionT merge_expression;
178*993b0882SAndroid Build Coastguard Worker     merge_expression.type = TypeIdForName(semantic_values_schema_.get(),
179*993b0882SAndroid Build Coastguard Worker                                           "libtextclassifier3.grammar.TestDate")
180*993b0882SAndroid Build Coastguard Worker                                 .value();
181*993b0882SAndroid Build Coastguard Worker     for (const TestDateT& test_date : values) {
182*993b0882SAndroid Build Coastguard Worker       merge_expression.values.emplace_back(new SemanticExpressionT);
183*993b0882SAndroid Build Coastguard Worker       merge_expression.values.back() = CreateConstDateExpression(test_date);
184*993b0882SAndroid Build Coastguard Worker     }
185*993b0882SAndroid Build Coastguard Worker     return CreateExpression(std::move(merge_expression));
186*993b0882SAndroid Build Coastguard Worker   }
187*993b0882SAndroid Build Coastguard Worker 
188*993b0882SAndroid Build Coastguard Worker   template <typename T>
CreatePrimitiveConstExpression(const T value)189*993b0882SAndroid Build Coastguard Worker   std::unique_ptr<SemanticExpressionT> CreatePrimitiveConstExpression(
190*993b0882SAndroid Build Coastguard Worker       const T value) {
191*993b0882SAndroid Build Coastguard Worker     ConstValueExpressionT const_value;
192*993b0882SAndroid Build Coastguard Worker     const_value.base_type = flatbuffers_base_type<T>::value;
193*993b0882SAndroid Build Coastguard Worker     const_value.value.resize(sizeof(T));
194*993b0882SAndroid Build Coastguard Worker     flatbuffers::WriteScalar(const_value.value.data(), value);
195*993b0882SAndroid Build Coastguard Worker     auto semantic_expression = std::make_unique<SemanticExpressionT>();
196*993b0882SAndroid Build Coastguard Worker     semantic_expression->expression.Set(const_value);
197*993b0882SAndroid Build Coastguard Worker     return semantic_expression;
198*993b0882SAndroid Build Coastguard Worker   }
199*993b0882SAndroid Build Coastguard Worker 
200*993b0882SAndroid Build Coastguard Worker   template <typename T>
CreateAndPackPrimitiveConstExpression(const T value)201*993b0882SAndroid Build Coastguard Worker   OwnedFlatbuffer<SemanticExpression> CreateAndPackPrimitiveConstExpression(
202*993b0882SAndroid Build Coastguard Worker       const T value) {
203*993b0882SAndroid Build Coastguard Worker     ConstValueExpressionT const_value;
204*993b0882SAndroid Build Coastguard Worker     const_value.base_type = flatbuffers_base_type<T>::value;
205*993b0882SAndroid Build Coastguard Worker     const_value.value.resize(sizeof(T));
206*993b0882SAndroid Build Coastguard Worker     flatbuffers::WriteScalar(const_value.value.data(), value);
207*993b0882SAndroid Build Coastguard Worker     return CreateExpression(const_value);
208*993b0882SAndroid Build Coastguard Worker   }
209*993b0882SAndroid Build Coastguard Worker 
210*993b0882SAndroid Build Coastguard Worker   template <>
CreateAndPackPrimitiveConstExpression(const StringPiece value)211*993b0882SAndroid Build Coastguard Worker   OwnedFlatbuffer<SemanticExpression> CreateAndPackPrimitiveConstExpression(
212*993b0882SAndroid Build Coastguard Worker       const StringPiece value) {
213*993b0882SAndroid Build Coastguard Worker     ConstValueExpressionT const_value;
214*993b0882SAndroid Build Coastguard Worker     const_value.base_type = reflection::BaseType::String;
215*993b0882SAndroid Build Coastguard Worker     const_value.value.assign(value.data(), value.data() + value.size());
216*993b0882SAndroid Build Coastguard Worker     return CreateExpression(const_value);
217*993b0882SAndroid Build Coastguard Worker   }
218*993b0882SAndroid Build Coastguard Worker 
219*993b0882SAndroid Build Coastguard Worker   template <>
CreatePrimitiveConstExpression(const StringPiece value)220*993b0882SAndroid Build Coastguard Worker   std::unique_ptr<SemanticExpressionT> CreatePrimitiveConstExpression(
221*993b0882SAndroid Build Coastguard Worker       const StringPiece value) {
222*993b0882SAndroid Build Coastguard Worker     ConstValueExpressionT const_value;
223*993b0882SAndroid Build Coastguard Worker     const_value.base_type = reflection::BaseType::String;
224*993b0882SAndroid Build Coastguard Worker     const_value.value.assign(value.data(), value.data() + value.size());
225*993b0882SAndroid Build Coastguard Worker     auto semantic_expression = std::make_unique<SemanticExpressionT>();
226*993b0882SAndroid Build Coastguard Worker     semantic_expression->expression.Set(const_value);
227*993b0882SAndroid Build Coastguard Worker     return semantic_expression;
228*993b0882SAndroid Build Coastguard Worker   }
229*993b0882SAndroid Build Coastguard Worker 
230*993b0882SAndroid Build Coastguard Worker   const std::unique_ptr<UniLib> unilib_;
231*993b0882SAndroid Build Coastguard Worker   UnsafeArena arena_;
232*993b0882SAndroid Build Coastguard Worker   const OwnedFlatbuffer<reflection::Schema, std::string>
233*993b0882SAndroid Build Coastguard Worker       semantic_values_schema_;
234*993b0882SAndroid Build Coastguard Worker   const Tokenizer tokenizer_;
235*993b0882SAndroid Build Coastguard Worker };
236*993b0882SAndroid Build Coastguard Worker 
237*993b0882SAndroid Build Coastguard Worker }  // namespace libtextclassifier3::grammar
238*993b0882SAndroid Build Coastguard Worker 
239*993b0882SAndroid Build Coastguard Worker #endif  // LIBTEXTCLASSIFIER_UTILS_GRAMMAR_TESTING_UTILS_H_
240