xref: /aosp_15_r20/external/icu/libicu/cts_headers/collationruleparser.h (revision 0e209d3975ff4a8c132096b14b0e9364a753506e)
1*0e209d39SAndroid Build Coastguard Worker // © 2016 and later: Unicode, Inc. and others.
2*0e209d39SAndroid Build Coastguard Worker // License & terms of use: http://www.unicode.org/copyright.html
3*0e209d39SAndroid Build Coastguard Worker /*
4*0e209d39SAndroid Build Coastguard Worker *******************************************************************************
5*0e209d39SAndroid Build Coastguard Worker * Copyright (C) 2013-2014, International Business Machines
6*0e209d39SAndroid Build Coastguard Worker * Corporation and others.  All Rights Reserved.
7*0e209d39SAndroid Build Coastguard Worker *******************************************************************************
8*0e209d39SAndroid Build Coastguard Worker * collationruleparser.h
9*0e209d39SAndroid Build Coastguard Worker *
10*0e209d39SAndroid Build Coastguard Worker * created on: 2013apr10
11*0e209d39SAndroid Build Coastguard Worker * created by: Markus W. Scherer
12*0e209d39SAndroid Build Coastguard Worker */
13*0e209d39SAndroid Build Coastguard Worker 
14*0e209d39SAndroid Build Coastguard Worker #ifndef __COLLATIONRULEPARSER_H__
15*0e209d39SAndroid Build Coastguard Worker #define __COLLATIONRULEPARSER_H__
16*0e209d39SAndroid Build Coastguard Worker 
17*0e209d39SAndroid Build Coastguard Worker #include "unicode/utypes.h"
18*0e209d39SAndroid Build Coastguard Worker 
19*0e209d39SAndroid Build Coastguard Worker #if !UCONFIG_NO_COLLATION
20*0e209d39SAndroid Build Coastguard Worker 
21*0e209d39SAndroid Build Coastguard Worker #include "unicode/ucol.h"
22*0e209d39SAndroid Build Coastguard Worker #include "unicode/uniset.h"
23*0e209d39SAndroid Build Coastguard Worker #include "unicode/unistr.h"
24*0e209d39SAndroid Build Coastguard Worker 
25*0e209d39SAndroid Build Coastguard Worker struct UParseError;
26*0e209d39SAndroid Build Coastguard Worker 
27*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_BEGIN
28*0e209d39SAndroid Build Coastguard Worker 
29*0e209d39SAndroid Build Coastguard Worker struct CollationData;
30*0e209d39SAndroid Build Coastguard Worker struct CollationTailoring;
31*0e209d39SAndroid Build Coastguard Worker 
32*0e209d39SAndroid Build Coastguard Worker class Locale;
33*0e209d39SAndroid Build Coastguard Worker class Normalizer2;
34*0e209d39SAndroid Build Coastguard Worker 
35*0e209d39SAndroid Build Coastguard Worker struct CollationSettings;
36*0e209d39SAndroid Build Coastguard Worker 
37*0e209d39SAndroid Build Coastguard Worker class U_I18N_API CollationRuleParser : public UMemory {
38*0e209d39SAndroid Build Coastguard Worker public:
39*0e209d39SAndroid Build Coastguard Worker     /** Special reset positions. */
40*0e209d39SAndroid Build Coastguard Worker     enum Position {
41*0e209d39SAndroid Build Coastguard Worker         FIRST_TERTIARY_IGNORABLE,
42*0e209d39SAndroid Build Coastguard Worker         LAST_TERTIARY_IGNORABLE,
43*0e209d39SAndroid Build Coastguard Worker         FIRST_SECONDARY_IGNORABLE,
44*0e209d39SAndroid Build Coastguard Worker         LAST_SECONDARY_IGNORABLE,
45*0e209d39SAndroid Build Coastguard Worker         FIRST_PRIMARY_IGNORABLE,
46*0e209d39SAndroid Build Coastguard Worker         LAST_PRIMARY_IGNORABLE,
47*0e209d39SAndroid Build Coastguard Worker         FIRST_VARIABLE,
48*0e209d39SAndroid Build Coastguard Worker         LAST_VARIABLE,
49*0e209d39SAndroid Build Coastguard Worker         FIRST_REGULAR,
50*0e209d39SAndroid Build Coastguard Worker         LAST_REGULAR,
51*0e209d39SAndroid Build Coastguard Worker         FIRST_IMPLICIT,
52*0e209d39SAndroid Build Coastguard Worker         LAST_IMPLICIT,
53*0e209d39SAndroid Build Coastguard Worker         FIRST_TRAILING,
54*0e209d39SAndroid Build Coastguard Worker         LAST_TRAILING
55*0e209d39SAndroid Build Coastguard Worker     };
56*0e209d39SAndroid Build Coastguard Worker 
57*0e209d39SAndroid Build Coastguard Worker     /**
58*0e209d39SAndroid Build Coastguard Worker      * First character of contractions that encode special reset positions.
59*0e209d39SAndroid Build Coastguard Worker      * U+FFFE cannot be tailored via rule syntax.
60*0e209d39SAndroid Build Coastguard Worker      *
61*0e209d39SAndroid Build Coastguard Worker      * The second contraction character is POS_BASE + Position.
62*0e209d39SAndroid Build Coastguard Worker      */
63*0e209d39SAndroid Build Coastguard Worker     static const char16_t POS_LEAD = 0xfffe;
64*0e209d39SAndroid Build Coastguard Worker     /**
65*0e209d39SAndroid Build Coastguard Worker      * Base for the second character of contractions that encode special reset positions.
66*0e209d39SAndroid Build Coastguard Worker      * Braille characters U+28xx are printable and normalization-inert.
67*0e209d39SAndroid Build Coastguard Worker      * @see POS_LEAD
68*0e209d39SAndroid Build Coastguard Worker      */
69*0e209d39SAndroid Build Coastguard Worker     static const char16_t POS_BASE = 0x2800;
70*0e209d39SAndroid Build Coastguard Worker 
71*0e209d39SAndroid Build Coastguard Worker     class U_I18N_API Sink : public UObject {
72*0e209d39SAndroid Build Coastguard Worker     public:
73*0e209d39SAndroid Build Coastguard Worker         virtual ~Sink();
74*0e209d39SAndroid Build Coastguard Worker         /**
75*0e209d39SAndroid Build Coastguard Worker          * Adds a reset.
76*0e209d39SAndroid Build Coastguard Worker          * strength=UCOL_IDENTICAL for &str.
77*0e209d39SAndroid Build Coastguard Worker          * strength=UCOL_PRIMARY/UCOL_SECONDARY/UCOL_TERTIARY for &[before n]str where n=1/2/3.
78*0e209d39SAndroid Build Coastguard Worker          */
79*0e209d39SAndroid Build Coastguard Worker         virtual void addReset(int32_t strength, const UnicodeString &str,
80*0e209d39SAndroid Build Coastguard Worker                               const char *&errorReason, UErrorCode &errorCode) = 0;
81*0e209d39SAndroid Build Coastguard Worker         /**
82*0e209d39SAndroid Build Coastguard Worker          * Adds a relation with strength and prefix | str / extension.
83*0e209d39SAndroid Build Coastguard Worker          */
84*0e209d39SAndroid Build Coastguard Worker         virtual void addRelation(int32_t strength, const UnicodeString &prefix,
85*0e209d39SAndroid Build Coastguard Worker                                  const UnicodeString &str, const UnicodeString &extension,
86*0e209d39SAndroid Build Coastguard Worker                                  const char *&errorReason, UErrorCode &errorCode) = 0;
87*0e209d39SAndroid Build Coastguard Worker 
88*0e209d39SAndroid Build Coastguard Worker         virtual void suppressContractions(const UnicodeSet &set, const char *&errorReason,
89*0e209d39SAndroid Build Coastguard Worker                                           UErrorCode &errorCode);
90*0e209d39SAndroid Build Coastguard Worker 
91*0e209d39SAndroid Build Coastguard Worker         virtual void optimize(const UnicodeSet &set, const char *&errorReason,
92*0e209d39SAndroid Build Coastguard Worker                               UErrorCode &errorCode);
93*0e209d39SAndroid Build Coastguard Worker     };
94*0e209d39SAndroid Build Coastguard Worker 
95*0e209d39SAndroid Build Coastguard Worker     class U_I18N_API Importer : public UObject {
96*0e209d39SAndroid Build Coastguard Worker     public:
97*0e209d39SAndroid Build Coastguard Worker         virtual ~Importer();
98*0e209d39SAndroid Build Coastguard Worker         virtual void getRules(
99*0e209d39SAndroid Build Coastguard Worker                 const char *localeID, const char *collationType,
100*0e209d39SAndroid Build Coastguard Worker                 UnicodeString &rules,
101*0e209d39SAndroid Build Coastguard Worker                 const char *&errorReason, UErrorCode &errorCode) = 0;
102*0e209d39SAndroid Build Coastguard Worker     };
103*0e209d39SAndroid Build Coastguard Worker 
104*0e209d39SAndroid Build Coastguard Worker     /**
105*0e209d39SAndroid Build Coastguard Worker      * Constructor.
106*0e209d39SAndroid Build Coastguard Worker      * The Sink must be set before parsing.
107*0e209d39SAndroid Build Coastguard Worker      * The Importer can be set, otherwise [import locale] syntax is not supported.
108*0e209d39SAndroid Build Coastguard Worker      */
109*0e209d39SAndroid Build Coastguard Worker     CollationRuleParser(const CollationData *base, UErrorCode &errorCode);
110*0e209d39SAndroid Build Coastguard Worker     ~CollationRuleParser();
111*0e209d39SAndroid Build Coastguard Worker 
112*0e209d39SAndroid Build Coastguard Worker     /**
113*0e209d39SAndroid Build Coastguard Worker      * Sets the pointer to a Sink object.
114*0e209d39SAndroid Build Coastguard Worker      * The pointer is aliased: Pointer copy without cloning or taking ownership.
115*0e209d39SAndroid Build Coastguard Worker      */
setSink(Sink * sinkAlias)116*0e209d39SAndroid Build Coastguard Worker     void setSink(Sink *sinkAlias) {
117*0e209d39SAndroid Build Coastguard Worker         sink = sinkAlias;
118*0e209d39SAndroid Build Coastguard Worker     }
119*0e209d39SAndroid Build Coastguard Worker 
120*0e209d39SAndroid Build Coastguard Worker     /**
121*0e209d39SAndroid Build Coastguard Worker      * Sets the pointer to an Importer object.
122*0e209d39SAndroid Build Coastguard Worker      * The pointer is aliased: Pointer copy without cloning or taking ownership.
123*0e209d39SAndroid Build Coastguard Worker      */
setImporter(Importer * importerAlias)124*0e209d39SAndroid Build Coastguard Worker     void setImporter(Importer *importerAlias) {
125*0e209d39SAndroid Build Coastguard Worker         importer = importerAlias;
126*0e209d39SAndroid Build Coastguard Worker     }
127*0e209d39SAndroid Build Coastguard Worker 
128*0e209d39SAndroid Build Coastguard Worker     void parse(const UnicodeString &ruleString,
129*0e209d39SAndroid Build Coastguard Worker                CollationSettings &outSettings,
130*0e209d39SAndroid Build Coastguard Worker                UParseError *outParseError,
131*0e209d39SAndroid Build Coastguard Worker                UErrorCode &errorCode);
132*0e209d39SAndroid Build Coastguard Worker 
getErrorReason()133*0e209d39SAndroid Build Coastguard Worker     const char *getErrorReason() const { return errorReason; }
134*0e209d39SAndroid Build Coastguard Worker 
135*0e209d39SAndroid Build Coastguard Worker     /**
136*0e209d39SAndroid Build Coastguard Worker      * Gets a script or reorder code from its string representation.
137*0e209d39SAndroid Build Coastguard Worker      * @return the script/reorder code, or
138*0e209d39SAndroid Build Coastguard Worker      * -1 if not recognized
139*0e209d39SAndroid Build Coastguard Worker      */
140*0e209d39SAndroid Build Coastguard Worker     static int32_t getReorderCode(const char *word);
141*0e209d39SAndroid Build Coastguard Worker 
142*0e209d39SAndroid Build Coastguard Worker private:
143*0e209d39SAndroid Build Coastguard Worker     /** UCOL_PRIMARY=0 .. UCOL_IDENTICAL=15 */
144*0e209d39SAndroid Build Coastguard Worker     static const int32_t STRENGTH_MASK = 0xf;
145*0e209d39SAndroid Build Coastguard Worker     static const int32_t STARRED_FLAG = 0x10;
146*0e209d39SAndroid Build Coastguard Worker     static const int32_t OFFSET_SHIFT = 8;
147*0e209d39SAndroid Build Coastguard Worker 
148*0e209d39SAndroid Build Coastguard Worker     void parse(const UnicodeString &ruleString, UErrorCode &errorCode);
149*0e209d39SAndroid Build Coastguard Worker     void parseRuleChain(UErrorCode &errorCode);
150*0e209d39SAndroid Build Coastguard Worker     int32_t parseResetAndPosition(UErrorCode &errorCode);
151*0e209d39SAndroid Build Coastguard Worker     int32_t parseRelationOperator(UErrorCode &errorCode);
152*0e209d39SAndroid Build Coastguard Worker     void parseRelationStrings(int32_t strength, int32_t i, UErrorCode &errorCode);
153*0e209d39SAndroid Build Coastguard Worker     void parseStarredCharacters(int32_t strength, int32_t i, UErrorCode &errorCode);
154*0e209d39SAndroid Build Coastguard Worker     int32_t parseTailoringString(int32_t i, UnicodeString &raw, UErrorCode &errorCode);
155*0e209d39SAndroid Build Coastguard Worker     int32_t parseString(int32_t i, UnicodeString &raw, UErrorCode &errorCode);
156*0e209d39SAndroid Build Coastguard Worker 
157*0e209d39SAndroid Build Coastguard Worker     /**
158*0e209d39SAndroid Build Coastguard Worker      * Sets str to a contraction of U+FFFE and (U+2800 + Position).
159*0e209d39SAndroid Build Coastguard Worker      * @return rule index after the special reset position
160*0e209d39SAndroid Build Coastguard Worker      */
161*0e209d39SAndroid Build Coastguard Worker     int32_t parseSpecialPosition(int32_t i, UnicodeString &str, UErrorCode &errorCode);
162*0e209d39SAndroid Build Coastguard Worker     void parseSetting(UErrorCode &errorCode);
163*0e209d39SAndroid Build Coastguard Worker     void parseReordering(const UnicodeString &raw, UErrorCode &errorCode);
164*0e209d39SAndroid Build Coastguard Worker     static UColAttributeValue getOnOffValue(const UnicodeString &s);
165*0e209d39SAndroid Build Coastguard Worker 
166*0e209d39SAndroid Build Coastguard Worker     int32_t parseUnicodeSet(int32_t i, UnicodeSet &set, UErrorCode &errorCode);
167*0e209d39SAndroid Build Coastguard Worker     int32_t readWords(int32_t i, UnicodeString &raw) const;
168*0e209d39SAndroid Build Coastguard Worker     int32_t skipComment(int32_t i) const;
169*0e209d39SAndroid Build Coastguard Worker 
170*0e209d39SAndroid Build Coastguard Worker     void setParseError(const char *reason, UErrorCode &errorCode);
171*0e209d39SAndroid Build Coastguard Worker     void setErrorContext();
172*0e209d39SAndroid Build Coastguard Worker 
173*0e209d39SAndroid Build Coastguard Worker     /**
174*0e209d39SAndroid Build Coastguard Worker      * ASCII [:P:] and [:S:]:
175*0e209d39SAndroid Build Coastguard Worker      * [\u0021-\u002F \u003A-\u0040 \u005B-\u0060 \u007B-\u007E]
176*0e209d39SAndroid Build Coastguard Worker      */
177*0e209d39SAndroid Build Coastguard Worker     static UBool isSyntaxChar(UChar32 c);
178*0e209d39SAndroid Build Coastguard Worker     int32_t skipWhiteSpace(int32_t i) const;
179*0e209d39SAndroid Build Coastguard Worker 
180*0e209d39SAndroid Build Coastguard Worker     const Normalizer2 &nfd, &nfc;
181*0e209d39SAndroid Build Coastguard Worker 
182*0e209d39SAndroid Build Coastguard Worker     const UnicodeString *rules;
183*0e209d39SAndroid Build Coastguard Worker     const CollationData *const baseData;
184*0e209d39SAndroid Build Coastguard Worker     CollationSettings *settings;
185*0e209d39SAndroid Build Coastguard Worker     UParseError *parseError;
186*0e209d39SAndroid Build Coastguard Worker     const char *errorReason;
187*0e209d39SAndroid Build Coastguard Worker 
188*0e209d39SAndroid Build Coastguard Worker     Sink *sink;
189*0e209d39SAndroid Build Coastguard Worker     Importer *importer;
190*0e209d39SAndroid Build Coastguard Worker 
191*0e209d39SAndroid Build Coastguard Worker     int32_t ruleIndex;
192*0e209d39SAndroid Build Coastguard Worker };
193*0e209d39SAndroid Build Coastguard Worker 
194*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_END
195*0e209d39SAndroid Build Coastguard Worker 
196*0e209d39SAndroid Build Coastguard Worker #endif  // !UCONFIG_NO_COLLATION
197*0e209d39SAndroid Build Coastguard Worker #endif  // __COLLATIONRULEPARSER_H__
198