xref: /aosp_15_r20/external/icu/libicu/cts_headers/rbbitblb.h (revision 0e209d3975ff4a8c132096b14b0e9364a753506e)
1*0e209d39SAndroid Build Coastguard Worker // © 2016 and later: Unicode, Inc. and others.
2*0e209d39SAndroid Build Coastguard Worker // License & terms of use: http://www.unicode.org/copyright.html
3*0e209d39SAndroid Build Coastguard Worker //
4*0e209d39SAndroid Build Coastguard Worker //  rbbitblb.h
5*0e209d39SAndroid Build Coastguard Worker //
6*0e209d39SAndroid Build Coastguard Worker 
7*0e209d39SAndroid Build Coastguard Worker /*
8*0e209d39SAndroid Build Coastguard Worker **********************************************************************
9*0e209d39SAndroid Build Coastguard Worker *   Copyright (c) 2002-2016, International Business Machines
10*0e209d39SAndroid Build Coastguard Worker *   Corporation and others.  All Rights Reserved.
11*0e209d39SAndroid Build Coastguard Worker **********************************************************************
12*0e209d39SAndroid Build Coastguard Worker */
13*0e209d39SAndroid Build Coastguard Worker 
14*0e209d39SAndroid Build Coastguard Worker #ifndef RBBITBLB_H
15*0e209d39SAndroid Build Coastguard Worker #define RBBITBLB_H
16*0e209d39SAndroid Build Coastguard Worker 
17*0e209d39SAndroid Build Coastguard Worker #include "unicode/utypes.h"
18*0e209d39SAndroid Build Coastguard Worker 
19*0e209d39SAndroid Build Coastguard Worker #if !UCONFIG_NO_BREAK_ITERATION
20*0e209d39SAndroid Build Coastguard Worker 
21*0e209d39SAndroid Build Coastguard Worker #include "unicode/uobject.h"
22*0e209d39SAndroid Build Coastguard Worker #include "unicode/rbbi.h"
23*0e209d39SAndroid Build Coastguard Worker #include "rbbidata.h"
24*0e209d39SAndroid Build Coastguard Worker #include "rbbirb.h"
25*0e209d39SAndroid Build Coastguard Worker #include "rbbinode.h"
26*0e209d39SAndroid Build Coastguard Worker 
27*0e209d39SAndroid Build Coastguard Worker 
28*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_BEGIN
29*0e209d39SAndroid Build Coastguard Worker 
30*0e209d39SAndroid Build Coastguard Worker class RBBIRuleScanner;
31*0e209d39SAndroid Build Coastguard Worker class RBBIRuleBuilder;
32*0e209d39SAndroid Build Coastguard Worker class UVector32;
33*0e209d39SAndroid Build Coastguard Worker 
34*0e209d39SAndroid Build Coastguard Worker //
35*0e209d39SAndroid Build Coastguard Worker //  class RBBITableBuilder is part of the RBBI rule compiler.
36*0e209d39SAndroid Build Coastguard Worker //                         It builds the state transition table used by the RBBI runtime
37*0e209d39SAndroid Build Coastguard Worker //                         from the expression syntax tree generated by the rule scanner.
38*0e209d39SAndroid Build Coastguard Worker //
39*0e209d39SAndroid Build Coastguard Worker //                         This class is part of the RBBI implementation only.
40*0e209d39SAndroid Build Coastguard Worker //                         There is no user-visible public API here.
41*0e209d39SAndroid Build Coastguard Worker //
42*0e209d39SAndroid Build Coastguard Worker 
43*0e209d39SAndroid Build Coastguard Worker class RBBITableBuilder : public UMemory {
44*0e209d39SAndroid Build Coastguard Worker public:
45*0e209d39SAndroid Build Coastguard Worker     RBBITableBuilder(RBBIRuleBuilder *rb, RBBINode **rootNode, UErrorCode &status);
46*0e209d39SAndroid Build Coastguard Worker     ~RBBITableBuilder();
47*0e209d39SAndroid Build Coastguard Worker 
48*0e209d39SAndroid Build Coastguard Worker     void     buildForwardTable();
49*0e209d39SAndroid Build Coastguard Worker 
50*0e209d39SAndroid Build Coastguard Worker     /** Return the runtime size in bytes of the built state table.  */
51*0e209d39SAndroid Build Coastguard Worker     int32_t  getTableSize() const;
52*0e209d39SAndroid Build Coastguard Worker 
53*0e209d39SAndroid Build Coastguard Worker     /** Fill in the runtime state table. Sufficient memory must exist at the specified location.
54*0e209d39SAndroid Build Coastguard Worker      */
55*0e209d39SAndroid Build Coastguard Worker     void     exportTable(void *where);
56*0e209d39SAndroid Build Coastguard Worker 
57*0e209d39SAndroid Build Coastguard Worker     /** Use 8 bits to encode the forward table */
58*0e209d39SAndroid Build Coastguard Worker     bool     use8BitsForTable() const;
59*0e209d39SAndroid Build Coastguard Worker 
60*0e209d39SAndroid Build Coastguard Worker     /**
61*0e209d39SAndroid Build Coastguard Worker      *  Find duplicate (redundant) character classes. Begin looking with categories.first.
62*0e209d39SAndroid Build Coastguard Worker      *  Duplicate, if found are returned in the categories parameter.
63*0e209d39SAndroid Build Coastguard Worker      *  This is an iterator-like function, used to identify character classes
64*0e209d39SAndroid Build Coastguard Worker      *  (state table columns) that can be eliminated.
65*0e209d39SAndroid Build Coastguard Worker      *  @param categories in/out parameter, specifies where to start looking for duplicates,
66*0e209d39SAndroid Build Coastguard Worker      *                and returns the first pair of duplicates found, if any.
67*0e209d39SAndroid Build Coastguard Worker      *  @return true if duplicate char classes were found, false otherwise.
68*0e209d39SAndroid Build Coastguard Worker      */
69*0e209d39SAndroid Build Coastguard Worker     bool     findDuplCharClassFrom(IntPair *categories);
70*0e209d39SAndroid Build Coastguard Worker 
71*0e209d39SAndroid Build Coastguard Worker     /** Remove a column from the state table. Used when two character categories
72*0e209d39SAndroid Build Coastguard Worker      *  have been found equivalent, and merged together, to eliminate the unneeded table column.
73*0e209d39SAndroid Build Coastguard Worker      */
74*0e209d39SAndroid Build Coastguard Worker     void     removeColumn(int32_t column);
75*0e209d39SAndroid Build Coastguard Worker 
76*0e209d39SAndroid Build Coastguard Worker     /**
77*0e209d39SAndroid Build Coastguard Worker      * Check for, and remove duplicate states (table rows).
78*0e209d39SAndroid Build Coastguard Worker      * @return the number of states removed.
79*0e209d39SAndroid Build Coastguard Worker      */
80*0e209d39SAndroid Build Coastguard Worker     int32_t  removeDuplicateStates();
81*0e209d39SAndroid Build Coastguard Worker 
82*0e209d39SAndroid Build Coastguard Worker     /** Build the safe reverse table from the already-constructed forward table. */
83*0e209d39SAndroid Build Coastguard Worker     void     buildSafeReverseTable(UErrorCode &status);
84*0e209d39SAndroid Build Coastguard Worker 
85*0e209d39SAndroid Build Coastguard Worker     /** Return the runtime size in bytes of the built safe reverse state table. */
86*0e209d39SAndroid Build Coastguard Worker     int32_t  getSafeTableSize() const;
87*0e209d39SAndroid Build Coastguard Worker 
88*0e209d39SAndroid Build Coastguard Worker     /** Fill in the runtime safe state table. Sufficient memory must exist at the specified location.
89*0e209d39SAndroid Build Coastguard Worker      */
90*0e209d39SAndroid Build Coastguard Worker     void     exportSafeTable(void *where);
91*0e209d39SAndroid Build Coastguard Worker 
92*0e209d39SAndroid Build Coastguard Worker     /** Use 8 bits to encode the safe reverse table */
93*0e209d39SAndroid Build Coastguard Worker     bool     use8BitsForSafeTable() const;
94*0e209d39SAndroid Build Coastguard Worker 
95*0e209d39SAndroid Build Coastguard Worker private:
96*0e209d39SAndroid Build Coastguard Worker     void     calcNullable(RBBINode *n);
97*0e209d39SAndroid Build Coastguard Worker     void     calcFirstPos(RBBINode *n);
98*0e209d39SAndroid Build Coastguard Worker     void     calcLastPos(RBBINode  *n);
99*0e209d39SAndroid Build Coastguard Worker     void     calcFollowPos(RBBINode *n);
100*0e209d39SAndroid Build Coastguard Worker     void     calcChainedFollowPos(RBBINode *n, RBBINode *endMarkNode);
101*0e209d39SAndroid Build Coastguard Worker     void     bofFixup();
102*0e209d39SAndroid Build Coastguard Worker     void     buildStateTable();
103*0e209d39SAndroid Build Coastguard Worker     void     mapLookAheadRules();
104*0e209d39SAndroid Build Coastguard Worker     void     flagAcceptingStates();
105*0e209d39SAndroid Build Coastguard Worker     void     flagLookAheadStates();
106*0e209d39SAndroid Build Coastguard Worker     void     flagTaggedStates();
107*0e209d39SAndroid Build Coastguard Worker     void     mergeRuleStatusVals();
108*0e209d39SAndroid Build Coastguard Worker 
109*0e209d39SAndroid Build Coastguard Worker     /**
110*0e209d39SAndroid Build Coastguard Worker      * Merge redundant state table columns, eliminating character classes with identical behavior.
111*0e209d39SAndroid Build Coastguard Worker      * Done after the state tables are generated, just before converting to their run-time format.
112*0e209d39SAndroid Build Coastguard Worker      */
113*0e209d39SAndroid Build Coastguard Worker     int32_t  mergeColumns();
114*0e209d39SAndroid Build Coastguard Worker 
115*0e209d39SAndroid Build Coastguard Worker     void     addRuleRootNodes(UVector *dest, RBBINode *node);
116*0e209d39SAndroid Build Coastguard Worker 
117*0e209d39SAndroid Build Coastguard Worker     /**
118*0e209d39SAndroid Build Coastguard Worker      *  Find duplicate (redundant) states, beginning at the specified pair,
119*0e209d39SAndroid Build Coastguard Worker      *  within this state table. This is an iterator-like function, used to
120*0e209d39SAndroid Build Coastguard Worker      *  identify states (state table rows) that can be eliminated.
121*0e209d39SAndroid Build Coastguard Worker      *  @param states in/out parameter, specifies where to start looking for duplicates,
122*0e209d39SAndroid Build Coastguard Worker      *                and returns the first pair of duplicates found, if any.
123*0e209d39SAndroid Build Coastguard Worker      *  @return true if duplicate states were found, false otherwise.
124*0e209d39SAndroid Build Coastguard Worker      */
125*0e209d39SAndroid Build Coastguard Worker     bool findDuplicateState(IntPair *states);
126*0e209d39SAndroid Build Coastguard Worker 
127*0e209d39SAndroid Build Coastguard Worker     /** Remove a duplicate state.
128*0e209d39SAndroid Build Coastguard Worker      * @param duplStates The duplicate states. The first is kept, the second is removed.
129*0e209d39SAndroid Build Coastguard Worker      *                   All references to the second in the state table are retargeted
130*0e209d39SAndroid Build Coastguard Worker      *                   to the first.
131*0e209d39SAndroid Build Coastguard Worker      */
132*0e209d39SAndroid Build Coastguard Worker     void removeState(IntPair duplStates);
133*0e209d39SAndroid Build Coastguard Worker 
134*0e209d39SAndroid Build Coastguard Worker     /** Find the next duplicate state in the safe reverse table. An iterator function.
135*0e209d39SAndroid Build Coastguard Worker      *  @param states in/out parameter, specifies where to start looking for duplicates,
136*0e209d39SAndroid Build Coastguard Worker      *                and returns the first pair of duplicates found, if any.
137*0e209d39SAndroid Build Coastguard Worker      *  @return true if a duplicate pair of states was found.
138*0e209d39SAndroid Build Coastguard Worker      */
139*0e209d39SAndroid Build Coastguard Worker     bool findDuplicateSafeState(IntPair *states);
140*0e209d39SAndroid Build Coastguard Worker 
141*0e209d39SAndroid Build Coastguard Worker     /** Remove a duplicate state from the safe table.
142*0e209d39SAndroid Build Coastguard Worker      * @param duplStates The duplicate states. The first is kept, the second is removed.
143*0e209d39SAndroid Build Coastguard Worker      *                   All references to the second in the state table are retargeted
144*0e209d39SAndroid Build Coastguard Worker      *                   to the first.
145*0e209d39SAndroid Build Coastguard Worker      */
146*0e209d39SAndroid Build Coastguard Worker     void removeSafeState(IntPair duplStates);
147*0e209d39SAndroid Build Coastguard Worker 
148*0e209d39SAndroid Build Coastguard Worker     // Set functions for UVector.
149*0e209d39SAndroid Build Coastguard Worker     //   TODO:  make a USet subclass of UVector
150*0e209d39SAndroid Build Coastguard Worker 
151*0e209d39SAndroid Build Coastguard Worker     void     setAdd(UVector *dest, UVector *source);
152*0e209d39SAndroid Build Coastguard Worker     UBool    setEquals(UVector *a, UVector *b);
153*0e209d39SAndroid Build Coastguard Worker 
154*0e209d39SAndroid Build Coastguard Worker     void     sortedAdd(UVector **dest, int32_t val);
155*0e209d39SAndroid Build Coastguard Worker 
156*0e209d39SAndroid Build Coastguard Worker public:
157*0e209d39SAndroid Build Coastguard Worker #ifdef RBBI_DEBUG
158*0e209d39SAndroid Build Coastguard Worker     void     printSet(UVector *s);
159*0e209d39SAndroid Build Coastguard Worker     void     printPosSets(RBBINode *n /* = nullptr */);
160*0e209d39SAndroid Build Coastguard Worker     void     printStates();
161*0e209d39SAndroid Build Coastguard Worker     void     printRuleStatusTable();
162*0e209d39SAndroid Build Coastguard Worker     void     printReverseTable();
163*0e209d39SAndroid Build Coastguard Worker #else
164*0e209d39SAndroid Build Coastguard Worker     #define  printSet(s)
165*0e209d39SAndroid Build Coastguard Worker     #define  printPosSets(n)
166*0e209d39SAndroid Build Coastguard Worker     #define  printStates()
167*0e209d39SAndroid Build Coastguard Worker     #define  printRuleStatusTable()
168*0e209d39SAndroid Build Coastguard Worker     #define  printReverseTable()
169*0e209d39SAndroid Build Coastguard Worker #endif
170*0e209d39SAndroid Build Coastguard Worker 
171*0e209d39SAndroid Build Coastguard Worker private:
172*0e209d39SAndroid Build Coastguard Worker     RBBIRuleBuilder  *fRB;
173*0e209d39SAndroid Build Coastguard Worker     RBBINode         *&fTree;              // The root node of the parse tree to build a
174*0e209d39SAndroid Build Coastguard Worker                                            //   table for.
175*0e209d39SAndroid Build Coastguard Worker     UErrorCode       *fStatus;
176*0e209d39SAndroid Build Coastguard Worker 
177*0e209d39SAndroid Build Coastguard Worker     /** State Descriptors, UVector<RBBIStateDescriptor> */
178*0e209d39SAndroid Build Coastguard Worker     UVector          *fDStates;            //  D states (Aho's terminology)
179*0e209d39SAndroid Build Coastguard Worker                                            //  Index is state number
180*0e209d39SAndroid Build Coastguard Worker                                            //  Contents are RBBIStateDescriptor pointers.
181*0e209d39SAndroid Build Coastguard Worker 
182*0e209d39SAndroid Build Coastguard Worker     /** Synthesized safe table, UVector of UnicodeString, one string per table row.   */
183*0e209d39SAndroid Build Coastguard Worker     UVector          *fSafeTable;
184*0e209d39SAndroid Build Coastguard Worker 
185*0e209d39SAndroid Build Coastguard Worker     /** Map from rule number (fVal in look ahead nodes) to sequential lookahead index. */
186*0e209d39SAndroid Build Coastguard Worker     UVector32        *fLookAheadRuleMap = nullptr;
187*0e209d39SAndroid Build Coastguard Worker 
188*0e209d39SAndroid Build Coastguard Worker     /* Counter used when assigning lookahead rule numbers.
189*0e209d39SAndroid Build Coastguard Worker      * Contains the last look-ahead number already in use.
190*0e209d39SAndroid Build Coastguard Worker      * The first look-ahead number is 2; Number 1 (ACCEPTING_UNCONDITIONAL) is reserved
191*0e209d39SAndroid Build Coastguard Worker      * for non-lookahead accepting states. See the declarations of RBBIStateTableRowT.   */
192*0e209d39SAndroid Build Coastguard Worker     int32_t          fLASlotsInUse = ACCEPTING_UNCONDITIONAL;
193*0e209d39SAndroid Build Coastguard Worker 
194*0e209d39SAndroid Build Coastguard Worker 
195*0e209d39SAndroid Build Coastguard Worker     RBBITableBuilder(const RBBITableBuilder &other) = delete; // forbid copying of this class
196*0e209d39SAndroid Build Coastguard Worker     RBBITableBuilder &operator=(const RBBITableBuilder &other) = delete; // forbid copying of this class
197*0e209d39SAndroid Build Coastguard Worker };
198*0e209d39SAndroid Build Coastguard Worker 
199*0e209d39SAndroid Build Coastguard Worker //
200*0e209d39SAndroid Build Coastguard Worker //  RBBIStateDescriptor - The DFA is constructed as a set of these descriptors,
201*0e209d39SAndroid Build Coastguard Worker //                        one for each state.
202*0e209d39SAndroid Build Coastguard Worker class RBBIStateDescriptor : public UMemory {
203*0e209d39SAndroid Build Coastguard Worker public:
204*0e209d39SAndroid Build Coastguard Worker     UBool            fMarked;
205*0e209d39SAndroid Build Coastguard Worker     uint32_t         fAccepting;
206*0e209d39SAndroid Build Coastguard Worker     uint32_t         fLookAhead;
207*0e209d39SAndroid Build Coastguard Worker     UVector          *fTagVals;
208*0e209d39SAndroid Build Coastguard Worker     int32_t          fTagsIdx;
209*0e209d39SAndroid Build Coastguard Worker     UVector          *fPositions;          // Set of parse tree positions associated
210*0e209d39SAndroid Build Coastguard Worker                                            //   with this state.  Unordered (it's a set).
211*0e209d39SAndroid Build Coastguard Worker                                            //   UVector contents are RBBINode *
212*0e209d39SAndroid Build Coastguard Worker 
213*0e209d39SAndroid Build Coastguard Worker     UVector32        *fDtran;              // Transitions out of this state.
214*0e209d39SAndroid Build Coastguard Worker                                            //   indexed by input character
215*0e209d39SAndroid Build Coastguard Worker                                            //   contents is int index of dest state
216*0e209d39SAndroid Build Coastguard Worker                                            //   in RBBITableBuilder.fDStates
217*0e209d39SAndroid Build Coastguard Worker 
218*0e209d39SAndroid Build Coastguard Worker     RBBIStateDescriptor(int maxInputSymbol,  UErrorCode *fStatus);
219*0e209d39SAndroid Build Coastguard Worker     ~RBBIStateDescriptor();
220*0e209d39SAndroid Build Coastguard Worker 
221*0e209d39SAndroid Build Coastguard Worker private:
222*0e209d39SAndroid Build Coastguard Worker     RBBIStateDescriptor(const RBBIStateDescriptor &other) = delete; // forbid copying of this class
223*0e209d39SAndroid Build Coastguard Worker     RBBIStateDescriptor &operator=(const RBBIStateDescriptor &other) = delete; // forbid copying of this class
224*0e209d39SAndroid Build Coastguard Worker };
225*0e209d39SAndroid Build Coastguard Worker 
226*0e209d39SAndroid Build Coastguard Worker 
227*0e209d39SAndroid Build Coastguard Worker 
228*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_END
229*0e209d39SAndroid Build Coastguard Worker 
230*0e209d39SAndroid Build Coastguard Worker #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
231*0e209d39SAndroid Build Coastguard Worker 
232*0e209d39SAndroid Build Coastguard Worker #endif
233