xref: /aosp_15_r20/external/icu/icu4c/source/test/intltest/rbbitst.h (revision 0e209d3975ff4a8c132096b14b0e9364a753506e)
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*************************************************************************
4  * Copyright (c) 1999-2016, International Business Machines
5  * Corporation and others. All Rights Reserved.
6  *************************************************************************
7  *   Date        Name        Description
8  *   12/15/99    Madhu        Creation.
9  *   01/12/2000  Madhu        Updated for changed API and added new tests
10  ************************************************************************/
11 
12 
13 #ifndef RBBITEST_H
14 #define RBBITEST_H
15 
16 #include "unicode/utypes.h"
17 
18 #if !UCONFIG_NO_BREAK_ITERATION
19 
20 #include <stdio.h>
21 
22 #include <memory>
23 
24 #include "intltest.h"
25 #include "unicode/brkiter.h"
26 #include "unicode/rbbi.h"
27 #include "unicode/uscript.h"
28 
29 class  Enumeration;
30 class  BITestData;
31 struct TestParams;
32 class  RBBIMonkeyKind;
33 
34 U_NAMESPACE_BEGIN
35 class  UVector32;
36 U_NAMESPACE_END
37 
38 /**
39  * Test the RuleBasedBreakIterator class giving different rules
40  */
41 class RBBITest: public IntlTest {
42 public:
43 
44     RBBITest();
45     virtual ~RBBITest();
46 
47     void runIndexedTest( int32_t index, UBool exec, const char* &name, char* par = nullptr ) override;
48 
49     void TestGetAvailableLocales();
50     void TestGetDisplayName();
51     void TestEndBehaviour();
52     void TestBug4153072();
53     void TestJapaneseLineBreak();
54     void TestThaiLineBreak();
55     void TestMixedThaiLineBreak();
56     void TestMaiyamok();
57     void TestMonkey();
58 
59     void TestExtended();
60     void executeTest(TestParams *, UErrorCode &status);
61 
62     void TestWordBreaks();
63     void TestWordBoundary();
64     void TestLineBreaks();
65     void TestSentBreaks();
66     void TestBug3818();
67     void TestJapaneseWordBreak();
68     void TestTrieDict();
69     void TestUnicodeFiles();
70     void TestBug5775();
71     void TestTailoredBreaks();
72     void TestDictRules();
73     void TestBug5532();
74     void TestBug9983();
75     void TestBug7547();
76     void TestBug12797();
77     void TestBug12918();
78     void TestBug12932();
79     void TestEmoji();
80     void TestBug12519();
81     void TestBug12677();
82     void TestTableRedundancies();
83     void TestBug13447();
84     void TestReverse();
85     void TestReverse(std::unique_ptr<RuleBasedBreakIterator>bi);
86     void TestBug13692();
87     void TestDebugRules();
88     void TestUnpairedSurrogate();
89 
90     void TestDebug();
91     void TestProperties();
92     void Test8BitsTrieWith8BitStateTable();
93     void Test8BitsTrieWith16BitStateTable();
94     void Test16BitsTrieWith8BitStateTable();
95     void Test16BitsTrieWith16BitStateTable();
96     void TestTable_8_16_Bits();
97     void TestBug13590();
98     void TestLSTMThai();
99     void TestLSTMBurmese();
100     void TestRandomAccess();
101     void TestExternalBreakEngineWithFakeTaiLe();
102     void TestExternalBreakEngineWithFakeYue();
103     void TestBug22579();
104     void TestBug22581();
105     void TestBug22584();
106     void TestBug22585();
107     void TestBug22602();
108     void TestBug22636();
109 
110 #if U_ENABLE_TRACING
111     void TestTraceCreateCharacter();
112     void TestTraceCreateWord();
113     void TestTraceCreateSentence();
114     void TestTraceCreateTitle();
115     void TestTraceCreateLine();
116     void TestTraceCreateLineNormal();
117     void TestTraceCreateLineStrict();
118     void TestTraceCreateLineLoose();
119     void TestTraceCreateLineNormalPhrase();
120     void TestTraceCreateLineLoosePhrase();
121     void TestTraceCreateLineStrictPhrase();
122     void TestTraceCreateLinePhrase();
123     void TestTraceCreateBreakEngine();
124 #endif
125 
126 /***********************/
127 private:
128     /**
129      * internal methods to prepare test data
130      **/
131 
132     void RunMonkey(BreakIterator *bi, RBBIMonkeyKind &mk, const char *name, uint32_t  seed,
133         int32_t loopCount, UBool useUText, FILE *exportFile, UBool scalarsOnly);
134 
135     // Run one of the Unicode Consortium boundary test data files.
136     void runUnicodeTestData(const char *fileName, RuleBasedBreakIterator *bi);
137 
138     // Run tests from one of the LSTM test files.
139     void runLSTMTestFromFile(const char* filename, UScriptCode script);
140 
141     // Run a single test case from one of the Unicode Consortium test files.
142     void checkUnicodeTestCase(const char *testFileName, int lineNumber,
143                          const UnicodeString &testString,
144                          UVector32 *breakPositions,
145                          RuleBasedBreakIterator *bi);
146 
147     // Run the actual tests for TestTailoredBreaks()
148     void TBTest(BreakIterator* brkitr, int type, const char *locale, const char* escapedText,
149                 const int32_t *expectOffsets, int32_t expectOffsetsCount);
150 
151     /** Filter for test cases from the Unicode test data files.
152      *  Some need to be skipped because ICU is unable to fully implement the
153      *  Unicode boundary specifications.
154      *  @param testCase the test data string.
155      *  @param fileName the Unicode test data file name.
156      *  @return false if the test case should be run, true if it should be skipped.
157      */
158     UBool testCaseIsKnownIssue(const UnicodeString &testCase, const char *fileName);
159 
160     // Test parameters, from the test framework and test invocation.
161     const char* fTestParams;
162 
163     // Helper functions to test different trie bit sizes and state table bit sizes.
164     void testTrieStateTable(int32_t numChar, bool expectedTrieWidthIn8Bits, bool expectedStateRowIn8Bits);
165 
166 #if U_ENABLE_TRACING
167     void assertTestTraceResult(int32_t fnNumber, const char* expectedData);
168 #endif
169 
170 };
171 
172 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
173 
174 #endif
175