xref: /aosp_15_r20/external/icu/libicu/cts_headers/rbbidata.h (revision 0e209d3975ff4a8c132096b14b0e9364a753506e)
1*0e209d39SAndroid Build Coastguard Worker // © 2016 and later: Unicode, Inc. and others.
2*0e209d39SAndroid Build Coastguard Worker // License & terms of use: http://www.unicode.org/copyright.html
3*0e209d39SAndroid Build Coastguard Worker /*
4*0e209d39SAndroid Build Coastguard Worker *******************************************************************************
5*0e209d39SAndroid Build Coastguard Worker *
6*0e209d39SAndroid Build Coastguard Worker *   Copyright (C) 1999-2014 International Business Machines
7*0e209d39SAndroid Build Coastguard Worker *   Corporation and others.  All Rights Reserved.
8*0e209d39SAndroid Build Coastguard Worker *
9*0e209d39SAndroid Build Coastguard Worker *******************************************************************************
10*0e209d39SAndroid Build Coastguard Worker *   file name:  rbbidata.h
11*0e209d39SAndroid Build Coastguard Worker *   encoding:   UTF-8
12*0e209d39SAndroid Build Coastguard Worker *   tab size:   8 (not used)
13*0e209d39SAndroid Build Coastguard Worker *   indentation:4
14*0e209d39SAndroid Build Coastguard Worker *
15*0e209d39SAndroid Build Coastguard Worker *   RBBI data formats  Includes
16*0e209d39SAndroid Build Coastguard Worker *
17*0e209d39SAndroid Build Coastguard Worker *                          Structs that describes the format of the Binary RBBI data,
18*0e209d39SAndroid Build Coastguard Worker *                          as it is stored in ICU's data file.
19*0e209d39SAndroid Build Coastguard Worker *
20*0e209d39SAndroid Build Coastguard Worker *      RBBIDataWrapper  -  Instances of this class sit between the
21*0e209d39SAndroid Build Coastguard Worker *                          raw data structs and the RulesBasedBreakIterator objects
22*0e209d39SAndroid Build Coastguard Worker *                          that are created by applications.  The wrapper class
23*0e209d39SAndroid Build Coastguard Worker *                          provides reference counting for the underlying data,
24*0e209d39SAndroid Build Coastguard Worker *                          and direct pointers to data that would not otherwise
25*0e209d39SAndroid Build Coastguard Worker *                          be accessible without ugly pointer arithmetic.  The
26*0e209d39SAndroid Build Coastguard Worker *                          wrapper does not attempt to provide any higher level
27*0e209d39SAndroid Build Coastguard Worker *                          abstractions for the data itself.
28*0e209d39SAndroid Build Coastguard Worker *
29*0e209d39SAndroid Build Coastguard Worker *                          There will be only one instance of RBBIDataWrapper for any
30*0e209d39SAndroid Build Coastguard Worker *                          set of RBBI run time data being shared by instances
31*0e209d39SAndroid Build Coastguard Worker *                          (clones) of RulesBasedBreakIterator.
32*0e209d39SAndroid Build Coastguard Worker */
33*0e209d39SAndroid Build Coastguard Worker 
34*0e209d39SAndroid Build Coastguard Worker #ifndef __RBBIDATA_H__
35*0e209d39SAndroid Build Coastguard Worker #define __RBBIDATA_H__
36*0e209d39SAndroid Build Coastguard Worker 
37*0e209d39SAndroid Build Coastguard Worker #include "unicode/utypes.h"
38*0e209d39SAndroid Build Coastguard Worker #include "unicode/udata.h"
39*0e209d39SAndroid Build Coastguard Worker #include "udataswp.h"
40*0e209d39SAndroid Build Coastguard Worker 
41*0e209d39SAndroid Build Coastguard Worker /**
42*0e209d39SAndroid Build Coastguard Worker  * Swap RBBI data. See udataswp.h.
43*0e209d39SAndroid Build Coastguard Worker  * @internal
44*0e209d39SAndroid Build Coastguard Worker  */
45*0e209d39SAndroid Build Coastguard Worker U_CAPI int32_t U_EXPORT2
46*0e209d39SAndroid Build Coastguard Worker ubrk_swap(const UDataSwapper *ds,
47*0e209d39SAndroid Build Coastguard Worker           const void *inData, int32_t length, void *outData,
48*0e209d39SAndroid Build Coastguard Worker           UErrorCode *pErrorCode);
49*0e209d39SAndroid Build Coastguard Worker 
50*0e209d39SAndroid Build Coastguard Worker #ifdef __cplusplus
51*0e209d39SAndroid Build Coastguard Worker 
52*0e209d39SAndroid Build Coastguard Worker #include "unicode/ucptrie.h"
53*0e209d39SAndroid Build Coastguard Worker #include "unicode/uobject.h"
54*0e209d39SAndroid Build Coastguard Worker #include "unicode/unistr.h"
55*0e209d39SAndroid Build Coastguard Worker #include "unicode/uversion.h"
56*0e209d39SAndroid Build Coastguard Worker #include "umutex.h"
57*0e209d39SAndroid Build Coastguard Worker 
58*0e209d39SAndroid Build Coastguard Worker 
59*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_BEGIN
60*0e209d39SAndroid Build Coastguard Worker 
61*0e209d39SAndroid Build Coastguard Worker // The current RBBI data format version.
62*0e209d39SAndroid Build Coastguard Worker static const uint8_t RBBI_DATA_FORMAT_VERSION[] = {6, 0, 0, 0};
63*0e209d39SAndroid Build Coastguard Worker 
64*0e209d39SAndroid Build Coastguard Worker /*
65*0e209d39SAndroid Build Coastguard Worker  *   The following structs map exactly onto the raw data from ICU common data file.
66*0e209d39SAndroid Build Coastguard Worker  */
67*0e209d39SAndroid Build Coastguard Worker struct RBBIDataHeader {
68*0e209d39SAndroid Build Coastguard Worker     uint32_t         fMagic;           /*  == 0xbla0                                               */
69*0e209d39SAndroid Build Coastguard Worker     UVersionInfo     fFormatVersion;   /* Data Format.  Same as the value in struct UDataInfo      */
70*0e209d39SAndroid Build Coastguard Worker                                        /*   if there is one associated with this data.             */
71*0e209d39SAndroid Build Coastguard Worker                                        /*     (version originates in rbbi, is copied to UDataInfo) */
72*0e209d39SAndroid Build Coastguard Worker     uint32_t         fLength;          /*  Total length in bytes of this RBBI Data,                */
73*0e209d39SAndroid Build Coastguard Worker                                        /*      including all sections, not just the header.        */
74*0e209d39SAndroid Build Coastguard Worker     uint32_t         fCatCount;        /*  Number of character categories.                         */
75*0e209d39SAndroid Build Coastguard Worker 
76*0e209d39SAndroid Build Coastguard Worker     /*                                                                        */
77*0e209d39SAndroid Build Coastguard Worker     /*  Offsets and sizes of each of the subsections within the RBBI data.    */
78*0e209d39SAndroid Build Coastguard Worker     /*  All offsets are bytes from the start of the RBBIDataHeader.           */
79*0e209d39SAndroid Build Coastguard Worker     /*  All sizes are in bytes.                                               */
80*0e209d39SAndroid Build Coastguard Worker     /*                                                                        */
81*0e209d39SAndroid Build Coastguard Worker     uint32_t         fFTable;         /*  forward state transition table. */
82*0e209d39SAndroid Build Coastguard Worker     uint32_t         fFTableLen;
83*0e209d39SAndroid Build Coastguard Worker     uint32_t         fRTable;         /*  Offset to the reverse state transition table. */
84*0e209d39SAndroid Build Coastguard Worker     uint32_t         fRTableLen;
85*0e209d39SAndroid Build Coastguard Worker     uint32_t         fTrie;           /*  Offset to Trie data for character categories */
86*0e209d39SAndroid Build Coastguard Worker     uint32_t         fTrieLen;
87*0e209d39SAndroid Build Coastguard Worker     uint32_t         fRuleSource;     /*  Offset to the source for for the break */
88*0e209d39SAndroid Build Coastguard Worker     uint32_t         fRuleSourceLen;  /*    rules.  Stored char16_t *. */
89*0e209d39SAndroid Build Coastguard Worker     uint32_t         fStatusTable;    /* Offset to the table of rule status values */
90*0e209d39SAndroid Build Coastguard Worker     uint32_t         fStatusTableLen;
91*0e209d39SAndroid Build Coastguard Worker 
92*0e209d39SAndroid Build Coastguard Worker     uint32_t         fReserved[6];    /*  Reserved for expansion */
93*0e209d39SAndroid Build Coastguard Worker 
94*0e209d39SAndroid Build Coastguard Worker };
95*0e209d39SAndroid Build Coastguard Worker 
96*0e209d39SAndroid Build Coastguard Worker 
97*0e209d39SAndroid Build Coastguard Worker 
98*0e209d39SAndroid Build Coastguard Worker template <typename T>
99*0e209d39SAndroid Build Coastguard Worker struct RBBIStateTableRowT {
100*0e209d39SAndroid Build Coastguard Worker     T               fAccepting;    //  Non-zero if this row is for an accepting state.
101*0e209d39SAndroid Build Coastguard Worker                                    //  Value 0: not an accepting state.
102*0e209d39SAndroid Build Coastguard Worker                                    //        1: (ACCEPTING_UNCONDITIONAL) Unconditional Accepting state.
103*0e209d39SAndroid Build Coastguard Worker                                    //       >1: Look-ahead match has completed.
104*0e209d39SAndroid Build Coastguard Worker                                    //           Actual boundary position happened earlier.
105*0e209d39SAndroid Build Coastguard Worker                                    //           Value here == fLookAhead in earlier
106*0e209d39SAndroid Build Coastguard Worker                                    //           state, at actual boundary pos.
107*0e209d39SAndroid Build Coastguard Worker     T               fLookAhead;    //  Non-zero if this row is for a state that
108*0e209d39SAndroid Build Coastguard Worker                                    //    corresponds to a '/' in the rule source.
109*0e209d39SAndroid Build Coastguard Worker                                    //    Value is the same as the fAccepting
110*0e209d39SAndroid Build Coastguard Worker                                    //    value for the rule (which will appear
111*0e209d39SAndroid Build Coastguard Worker                                    //    in a different state.
112*0e209d39SAndroid Build Coastguard Worker     T               fTagsIdx;      //  Non-zero if this row covers a {tagged} position
113*0e209d39SAndroid Build Coastguard Worker                                    //    from a rule.  Value is the index in the
114*0e209d39SAndroid Build Coastguard Worker                                    //    StatusTable of the set of matching
115*0e209d39SAndroid Build Coastguard Worker                                    //    tags (rule status values)
116*0e209d39SAndroid Build Coastguard Worker     T               fNextState[1]; //  Next State, indexed by char category.
117*0e209d39SAndroid Build Coastguard Worker                                    //    Variable-length array declared with length 1
118*0e209d39SAndroid Build Coastguard Worker                                    //    to disable bounds checkers.
119*0e209d39SAndroid Build Coastguard Worker                                    //    Array Size is actually fData->fHeader->fCatCount
120*0e209d39SAndroid Build Coastguard Worker                                    //    CAUTION:  see RBBITableBuilder::getTableSize()
121*0e209d39SAndroid Build Coastguard Worker                                    //              before changing anything here.
122*0e209d39SAndroid Build Coastguard Worker };
123*0e209d39SAndroid Build Coastguard Worker 
124*0e209d39SAndroid Build Coastguard Worker typedef RBBIStateTableRowT<uint8_t> RBBIStateTableRow8;
125*0e209d39SAndroid Build Coastguard Worker typedef RBBIStateTableRowT<uint16_t> RBBIStateTableRow16;
126*0e209d39SAndroid Build Coastguard Worker 
127*0e209d39SAndroid Build Coastguard Worker constexpr uint16_t ACCEPTING_UNCONDITIONAL = 1;   // Value constant for RBBIStateTableRow::fAccepting
128*0e209d39SAndroid Build Coastguard Worker 
129*0e209d39SAndroid Build Coastguard Worker union RBBIStateTableRow {
130*0e209d39SAndroid Build Coastguard Worker   RBBIStateTableRow16 r16;
131*0e209d39SAndroid Build Coastguard Worker   RBBIStateTableRow8 r8;
132*0e209d39SAndroid Build Coastguard Worker };
133*0e209d39SAndroid Build Coastguard Worker 
134*0e209d39SAndroid Build Coastguard Worker struct RBBIStateTable {
135*0e209d39SAndroid Build Coastguard Worker     uint32_t         fNumStates;            // Number of states.
136*0e209d39SAndroid Build Coastguard Worker     uint32_t         fRowLen;               // Length of a state table row, in bytes.
137*0e209d39SAndroid Build Coastguard Worker     uint32_t         fDictCategoriesStart;  // Char category number of the first dictionary
138*0e209d39SAndroid Build Coastguard Worker                                             //   char class, or the the largest category number + 1
139*0e209d39SAndroid Build Coastguard Worker                                             //   if there are no dictionary categories.
140*0e209d39SAndroid Build Coastguard Worker     uint32_t         fLookAheadResultsSize; // Size of run-time array required for holding
141*0e209d39SAndroid Build Coastguard Worker                                             //   look-ahead results. Indexed by row.fLookAhead.
142*0e209d39SAndroid Build Coastguard Worker     uint32_t         fFlags;                // Option Flags for this state table.
143*0e209d39SAndroid Build Coastguard Worker     char             fTableData[1];         // First RBBIStateTableRow begins here.
144*0e209d39SAndroid Build Coastguard Worker                                             //   Variable-length array declared with length 1
145*0e209d39SAndroid Build Coastguard Worker                                             //   to disable bounds checkers.
146*0e209d39SAndroid Build Coastguard Worker                                             //   (making it char[] simplifies ugly address
147*0e209d39SAndroid Build Coastguard Worker                                             //   arithmetic for indexing variable length rows.)
148*0e209d39SAndroid Build Coastguard Worker };
149*0e209d39SAndroid Build Coastguard Worker 
150*0e209d39SAndroid Build Coastguard Worker constexpr uint32_t RBBI_LOOKAHEAD_HARD_BREAK = 1;
151*0e209d39SAndroid Build Coastguard Worker constexpr uint32_t RBBI_BOF_REQUIRED = 2;
152*0e209d39SAndroid Build Coastguard Worker constexpr uint32_t RBBI_8BITS_ROWS = 4;
153*0e209d39SAndroid Build Coastguard Worker 
154*0e209d39SAndroid Build Coastguard Worker 
155*0e209d39SAndroid Build Coastguard Worker /*                                        */
156*0e209d39SAndroid Build Coastguard Worker /*   The reference counting wrapper class */
157*0e209d39SAndroid Build Coastguard Worker /*                                        */
158*0e209d39SAndroid Build Coastguard Worker class RBBIDataWrapper : public UMemory {
159*0e209d39SAndroid Build Coastguard Worker public:
160*0e209d39SAndroid Build Coastguard Worker     enum EDontAdopt {
161*0e209d39SAndroid Build Coastguard Worker         kDontAdopt
162*0e209d39SAndroid Build Coastguard Worker     };
163*0e209d39SAndroid Build Coastguard Worker     RBBIDataWrapper(const RBBIDataHeader *data, UErrorCode &status);
164*0e209d39SAndroid Build Coastguard Worker     RBBIDataWrapper(const RBBIDataHeader *data, enum EDontAdopt dontAdopt, UErrorCode &status);
165*0e209d39SAndroid Build Coastguard Worker     RBBIDataWrapper(UDataMemory* udm, UErrorCode &status);
166*0e209d39SAndroid Build Coastguard Worker     ~RBBIDataWrapper();
167*0e209d39SAndroid Build Coastguard Worker 
168*0e209d39SAndroid Build Coastguard Worker     static UBool          isDataVersionAcceptable(const UVersionInfo version);
169*0e209d39SAndroid Build Coastguard Worker 
170*0e209d39SAndroid Build Coastguard Worker     void                  init0();
171*0e209d39SAndroid Build Coastguard Worker     void                  init(const RBBIDataHeader *data, UErrorCode &status);
172*0e209d39SAndroid Build Coastguard Worker     RBBIDataWrapper      *addReference();
173*0e209d39SAndroid Build Coastguard Worker     void                  removeReference();
174*0e209d39SAndroid Build Coastguard Worker     bool                  operator ==(const RBBIDataWrapper &other) const;
175*0e209d39SAndroid Build Coastguard Worker     int32_t               hashCode();
176*0e209d39SAndroid Build Coastguard Worker     const UnicodeString  &getRuleSourceString() const;
177*0e209d39SAndroid Build Coastguard Worker     void                  printData();
178*0e209d39SAndroid Build Coastguard Worker     void                  printTable(const char *heading, const RBBIStateTable *table);
179*0e209d39SAndroid Build Coastguard Worker 
180*0e209d39SAndroid Build Coastguard Worker     /*                                     */
181*0e209d39SAndroid Build Coastguard Worker     /*   Pointers to items within the data */
182*0e209d39SAndroid Build Coastguard Worker     /*                                     */
183*0e209d39SAndroid Build Coastguard Worker     const RBBIDataHeader     *fHeader;
184*0e209d39SAndroid Build Coastguard Worker     const RBBIStateTable     *fForwardTable;
185*0e209d39SAndroid Build Coastguard Worker     const RBBIStateTable     *fReverseTable;
186*0e209d39SAndroid Build Coastguard Worker     const char               *fRuleSource;
187*0e209d39SAndroid Build Coastguard Worker     const int32_t            *fRuleStatusTable;
188*0e209d39SAndroid Build Coastguard Worker 
189*0e209d39SAndroid Build Coastguard Worker     /* number of int32_t values in the rule status table.   Used to sanity check indexing */
190*0e209d39SAndroid Build Coastguard Worker     int32_t             fStatusMaxIdx;
191*0e209d39SAndroid Build Coastguard Worker 
192*0e209d39SAndroid Build Coastguard Worker     UCPTrie             *fTrie;
193*0e209d39SAndroid Build Coastguard Worker 
194*0e209d39SAndroid Build Coastguard Worker private:
195*0e209d39SAndroid Build Coastguard Worker     u_atomic_int32_t    fRefCount;
196*0e209d39SAndroid Build Coastguard Worker     UDataMemory        *fUDataMem;
197*0e209d39SAndroid Build Coastguard Worker     UnicodeString       fRuleString;
198*0e209d39SAndroid Build Coastguard Worker     UBool               fDontFreeData;
199*0e209d39SAndroid Build Coastguard Worker 
200*0e209d39SAndroid Build Coastguard Worker     RBBIDataWrapper(const RBBIDataWrapper &other) = delete; /*  forbid copying of this class */
201*0e209d39SAndroid Build Coastguard Worker     RBBIDataWrapper &operator=(const RBBIDataWrapper &other) = delete; /*  forbid copying of this class */
202*0e209d39SAndroid Build Coastguard Worker };
203*0e209d39SAndroid Build Coastguard Worker 
204*0e209d39SAndroid Build Coastguard Worker 
205*0e209d39SAndroid Build Coastguard Worker 
206*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_END
207*0e209d39SAndroid Build Coastguard Worker 
208*0e209d39SAndroid Build Coastguard Worker U_CFUNC UBool rbbi_cleanup();
209*0e209d39SAndroid Build Coastguard Worker 
210*0e209d39SAndroid Build Coastguard Worker #endif /* C++ */
211*0e209d39SAndroid Build Coastguard Worker 
212*0e209d39SAndroid Build Coastguard Worker #endif
213