1*0e209d39SAndroid Build Coastguard Worker // © 2016 and later: Unicode, Inc. and others. 2*0e209d39SAndroid Build Coastguard Worker // License & terms of use: http://www.unicode.org/copyright.html 3*0e209d39SAndroid Build Coastguard Worker /* 4*0e209d39SAndroid Build Coastguard Worker ******************************************************************************* 5*0e209d39SAndroid Build Coastguard Worker * Copyright (C) 2013-2014, International Business Machines 6*0e209d39SAndroid Build Coastguard Worker * Corporation and others. All Rights Reserved. 7*0e209d39SAndroid Build Coastguard Worker ******************************************************************************* 8*0e209d39SAndroid Build Coastguard Worker * collationruleparser.h 9*0e209d39SAndroid Build Coastguard Worker * 10*0e209d39SAndroid Build Coastguard Worker * created on: 2013apr10 11*0e209d39SAndroid Build Coastguard Worker * created by: Markus W. Scherer 12*0e209d39SAndroid Build Coastguard Worker */ 13*0e209d39SAndroid Build Coastguard Worker 14*0e209d39SAndroid Build Coastguard Worker #ifndef __COLLATIONRULEPARSER_H__ 15*0e209d39SAndroid Build Coastguard Worker #define __COLLATIONRULEPARSER_H__ 16*0e209d39SAndroid Build Coastguard Worker 17*0e209d39SAndroid Build Coastguard Worker #include "unicode/utypes.h" 18*0e209d39SAndroid Build Coastguard Worker 19*0e209d39SAndroid Build Coastguard Worker #if !UCONFIG_NO_COLLATION 20*0e209d39SAndroid Build Coastguard Worker 21*0e209d39SAndroid Build Coastguard Worker #include "unicode/ucol.h" 22*0e209d39SAndroid Build Coastguard Worker #include "unicode/uniset.h" 23*0e209d39SAndroid Build Coastguard Worker #include "unicode/unistr.h" 24*0e209d39SAndroid Build Coastguard Worker 25*0e209d39SAndroid Build Coastguard Worker struct UParseError; 26*0e209d39SAndroid Build Coastguard Worker 27*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_BEGIN 28*0e209d39SAndroid Build Coastguard Worker 29*0e209d39SAndroid Build Coastguard Worker struct CollationData; 30*0e209d39SAndroid Build Coastguard Worker struct CollationTailoring; 31*0e209d39SAndroid Build Coastguard Worker 32*0e209d39SAndroid Build Coastguard Worker class Locale; 33*0e209d39SAndroid Build Coastguard Worker class Normalizer2; 34*0e209d39SAndroid Build Coastguard Worker 35*0e209d39SAndroid Build Coastguard Worker struct CollationSettings; 36*0e209d39SAndroid Build Coastguard Worker 37*0e209d39SAndroid Build Coastguard Worker class U_I18N_API CollationRuleParser : public UMemory { 38*0e209d39SAndroid Build Coastguard Worker public: 39*0e209d39SAndroid Build Coastguard Worker /** Special reset positions. */ 40*0e209d39SAndroid Build Coastguard Worker enum Position { 41*0e209d39SAndroid Build Coastguard Worker FIRST_TERTIARY_IGNORABLE, 42*0e209d39SAndroid Build Coastguard Worker LAST_TERTIARY_IGNORABLE, 43*0e209d39SAndroid Build Coastguard Worker FIRST_SECONDARY_IGNORABLE, 44*0e209d39SAndroid Build Coastguard Worker LAST_SECONDARY_IGNORABLE, 45*0e209d39SAndroid Build Coastguard Worker FIRST_PRIMARY_IGNORABLE, 46*0e209d39SAndroid Build Coastguard Worker LAST_PRIMARY_IGNORABLE, 47*0e209d39SAndroid Build Coastguard Worker FIRST_VARIABLE, 48*0e209d39SAndroid Build Coastguard Worker LAST_VARIABLE, 49*0e209d39SAndroid Build Coastguard Worker FIRST_REGULAR, 50*0e209d39SAndroid Build Coastguard Worker LAST_REGULAR, 51*0e209d39SAndroid Build Coastguard Worker FIRST_IMPLICIT, 52*0e209d39SAndroid Build Coastguard Worker LAST_IMPLICIT, 53*0e209d39SAndroid Build Coastguard Worker FIRST_TRAILING, 54*0e209d39SAndroid Build Coastguard Worker LAST_TRAILING 55*0e209d39SAndroid Build Coastguard Worker }; 56*0e209d39SAndroid Build Coastguard Worker 57*0e209d39SAndroid Build Coastguard Worker /** 58*0e209d39SAndroid Build Coastguard Worker * First character of contractions that encode special reset positions. 59*0e209d39SAndroid Build Coastguard Worker * U+FFFE cannot be tailored via rule syntax. 60*0e209d39SAndroid Build Coastguard Worker * 61*0e209d39SAndroid Build Coastguard Worker * The second contraction character is POS_BASE + Position. 62*0e209d39SAndroid Build Coastguard Worker */ 63*0e209d39SAndroid Build Coastguard Worker static const char16_t POS_LEAD = 0xfffe; 64*0e209d39SAndroid Build Coastguard Worker /** 65*0e209d39SAndroid Build Coastguard Worker * Base for the second character of contractions that encode special reset positions. 66*0e209d39SAndroid Build Coastguard Worker * Braille characters U+28xx are printable and normalization-inert. 67*0e209d39SAndroid Build Coastguard Worker * @see POS_LEAD 68*0e209d39SAndroid Build Coastguard Worker */ 69*0e209d39SAndroid Build Coastguard Worker static const char16_t POS_BASE = 0x2800; 70*0e209d39SAndroid Build Coastguard Worker 71*0e209d39SAndroid Build Coastguard Worker class U_I18N_API Sink : public UObject { 72*0e209d39SAndroid Build Coastguard Worker public: 73*0e209d39SAndroid Build Coastguard Worker virtual ~Sink(); 74*0e209d39SAndroid Build Coastguard Worker /** 75*0e209d39SAndroid Build Coastguard Worker * Adds a reset. 76*0e209d39SAndroid Build Coastguard Worker * strength=UCOL_IDENTICAL for &str. 77*0e209d39SAndroid Build Coastguard Worker * strength=UCOL_PRIMARY/UCOL_SECONDARY/UCOL_TERTIARY for &[before n]str where n=1/2/3. 78*0e209d39SAndroid Build Coastguard Worker */ 79*0e209d39SAndroid Build Coastguard Worker virtual void addReset(int32_t strength, const UnicodeString &str, 80*0e209d39SAndroid Build Coastguard Worker const char *&errorReason, UErrorCode &errorCode) = 0; 81*0e209d39SAndroid Build Coastguard Worker /** 82*0e209d39SAndroid Build Coastguard Worker * Adds a relation with strength and prefix | str / extension. 83*0e209d39SAndroid Build Coastguard Worker */ 84*0e209d39SAndroid Build Coastguard Worker virtual void addRelation(int32_t strength, const UnicodeString &prefix, 85*0e209d39SAndroid Build Coastguard Worker const UnicodeString &str, const UnicodeString &extension, 86*0e209d39SAndroid Build Coastguard Worker const char *&errorReason, UErrorCode &errorCode) = 0; 87*0e209d39SAndroid Build Coastguard Worker 88*0e209d39SAndroid Build Coastguard Worker virtual void suppressContractions(const UnicodeSet &set, const char *&errorReason, 89*0e209d39SAndroid Build Coastguard Worker UErrorCode &errorCode); 90*0e209d39SAndroid Build Coastguard Worker 91*0e209d39SAndroid Build Coastguard Worker virtual void optimize(const UnicodeSet &set, const char *&errorReason, 92*0e209d39SAndroid Build Coastguard Worker UErrorCode &errorCode); 93*0e209d39SAndroid Build Coastguard Worker }; 94*0e209d39SAndroid Build Coastguard Worker 95*0e209d39SAndroid Build Coastguard Worker class U_I18N_API Importer : public UObject { 96*0e209d39SAndroid Build Coastguard Worker public: 97*0e209d39SAndroid Build Coastguard Worker virtual ~Importer(); 98*0e209d39SAndroid Build Coastguard Worker virtual void getRules( 99*0e209d39SAndroid Build Coastguard Worker const char *localeID, const char *collationType, 100*0e209d39SAndroid Build Coastguard Worker UnicodeString &rules, 101*0e209d39SAndroid Build Coastguard Worker const char *&errorReason, UErrorCode &errorCode) = 0; 102*0e209d39SAndroid Build Coastguard Worker }; 103*0e209d39SAndroid Build Coastguard Worker 104*0e209d39SAndroid Build Coastguard Worker /** 105*0e209d39SAndroid Build Coastguard Worker * Constructor. 106*0e209d39SAndroid Build Coastguard Worker * The Sink must be set before parsing. 107*0e209d39SAndroid Build Coastguard Worker * The Importer can be set, otherwise [import locale] syntax is not supported. 108*0e209d39SAndroid Build Coastguard Worker */ 109*0e209d39SAndroid Build Coastguard Worker CollationRuleParser(const CollationData *base, UErrorCode &errorCode); 110*0e209d39SAndroid Build Coastguard Worker ~CollationRuleParser(); 111*0e209d39SAndroid Build Coastguard Worker 112*0e209d39SAndroid Build Coastguard Worker /** 113*0e209d39SAndroid Build Coastguard Worker * Sets the pointer to a Sink object. 114*0e209d39SAndroid Build Coastguard Worker * The pointer is aliased: Pointer copy without cloning or taking ownership. 115*0e209d39SAndroid Build Coastguard Worker */ setSink(Sink * sinkAlias)116*0e209d39SAndroid Build Coastguard Worker void setSink(Sink *sinkAlias) { 117*0e209d39SAndroid Build Coastguard Worker sink = sinkAlias; 118*0e209d39SAndroid Build Coastguard Worker } 119*0e209d39SAndroid Build Coastguard Worker 120*0e209d39SAndroid Build Coastguard Worker /** 121*0e209d39SAndroid Build Coastguard Worker * Sets the pointer to an Importer object. 122*0e209d39SAndroid Build Coastguard Worker * The pointer is aliased: Pointer copy without cloning or taking ownership. 123*0e209d39SAndroid Build Coastguard Worker */ setImporter(Importer * importerAlias)124*0e209d39SAndroid Build Coastguard Worker void setImporter(Importer *importerAlias) { 125*0e209d39SAndroid Build Coastguard Worker importer = importerAlias; 126*0e209d39SAndroid Build Coastguard Worker } 127*0e209d39SAndroid Build Coastguard Worker 128*0e209d39SAndroid Build Coastguard Worker void parse(const UnicodeString &ruleString, 129*0e209d39SAndroid Build Coastguard Worker CollationSettings &outSettings, 130*0e209d39SAndroid Build Coastguard Worker UParseError *outParseError, 131*0e209d39SAndroid Build Coastguard Worker UErrorCode &errorCode); 132*0e209d39SAndroid Build Coastguard Worker getErrorReason()133*0e209d39SAndroid Build Coastguard Worker const char *getErrorReason() const { return errorReason; } 134*0e209d39SAndroid Build Coastguard Worker 135*0e209d39SAndroid Build Coastguard Worker /** 136*0e209d39SAndroid Build Coastguard Worker * Gets a script or reorder code from its string representation. 137*0e209d39SAndroid Build Coastguard Worker * @return the script/reorder code, or 138*0e209d39SAndroid Build Coastguard Worker * -1 if not recognized 139*0e209d39SAndroid Build Coastguard Worker */ 140*0e209d39SAndroid Build Coastguard Worker static int32_t getReorderCode(const char *word); 141*0e209d39SAndroid Build Coastguard Worker 142*0e209d39SAndroid Build Coastguard Worker private: 143*0e209d39SAndroid Build Coastguard Worker /** UCOL_PRIMARY=0 .. UCOL_IDENTICAL=15 */ 144*0e209d39SAndroid Build Coastguard Worker static const int32_t STRENGTH_MASK = 0xf; 145*0e209d39SAndroid Build Coastguard Worker static const int32_t STARRED_FLAG = 0x10; 146*0e209d39SAndroid Build Coastguard Worker static const int32_t OFFSET_SHIFT = 8; 147*0e209d39SAndroid Build Coastguard Worker 148*0e209d39SAndroid Build Coastguard Worker void parse(const UnicodeString &ruleString, UErrorCode &errorCode); 149*0e209d39SAndroid Build Coastguard Worker void parseRuleChain(UErrorCode &errorCode); 150*0e209d39SAndroid Build Coastguard Worker int32_t parseResetAndPosition(UErrorCode &errorCode); 151*0e209d39SAndroid Build Coastguard Worker int32_t parseRelationOperator(UErrorCode &errorCode); 152*0e209d39SAndroid Build Coastguard Worker void parseRelationStrings(int32_t strength, int32_t i, UErrorCode &errorCode); 153*0e209d39SAndroid Build Coastguard Worker void parseStarredCharacters(int32_t strength, int32_t i, UErrorCode &errorCode); 154*0e209d39SAndroid Build Coastguard Worker int32_t parseTailoringString(int32_t i, UnicodeString &raw, UErrorCode &errorCode); 155*0e209d39SAndroid Build Coastguard Worker int32_t parseString(int32_t i, UnicodeString &raw, UErrorCode &errorCode); 156*0e209d39SAndroid Build Coastguard Worker 157*0e209d39SAndroid Build Coastguard Worker /** 158*0e209d39SAndroid Build Coastguard Worker * Sets str to a contraction of U+FFFE and (U+2800 + Position). 159*0e209d39SAndroid Build Coastguard Worker * @return rule index after the special reset position 160*0e209d39SAndroid Build Coastguard Worker */ 161*0e209d39SAndroid Build Coastguard Worker int32_t parseSpecialPosition(int32_t i, UnicodeString &str, UErrorCode &errorCode); 162*0e209d39SAndroid Build Coastguard Worker void parseSetting(UErrorCode &errorCode); 163*0e209d39SAndroid Build Coastguard Worker void parseReordering(const UnicodeString &raw, UErrorCode &errorCode); 164*0e209d39SAndroid Build Coastguard Worker static UColAttributeValue getOnOffValue(const UnicodeString &s); 165*0e209d39SAndroid Build Coastguard Worker 166*0e209d39SAndroid Build Coastguard Worker int32_t parseUnicodeSet(int32_t i, UnicodeSet &set, UErrorCode &errorCode); 167*0e209d39SAndroid Build Coastguard Worker int32_t readWords(int32_t i, UnicodeString &raw) const; 168*0e209d39SAndroid Build Coastguard Worker int32_t skipComment(int32_t i) const; 169*0e209d39SAndroid Build Coastguard Worker 170*0e209d39SAndroid Build Coastguard Worker void setParseError(const char *reason, UErrorCode &errorCode); 171*0e209d39SAndroid Build Coastguard Worker void setErrorContext(); 172*0e209d39SAndroid Build Coastguard Worker 173*0e209d39SAndroid Build Coastguard Worker /** 174*0e209d39SAndroid Build Coastguard Worker * ASCII [:P:] and [:S:]: 175*0e209d39SAndroid Build Coastguard Worker * [\u0021-\u002F \u003A-\u0040 \u005B-\u0060 \u007B-\u007E] 176*0e209d39SAndroid Build Coastguard Worker */ 177*0e209d39SAndroid Build Coastguard Worker static UBool isSyntaxChar(UChar32 c); 178*0e209d39SAndroid Build Coastguard Worker int32_t skipWhiteSpace(int32_t i) const; 179*0e209d39SAndroid Build Coastguard Worker 180*0e209d39SAndroid Build Coastguard Worker const Normalizer2 &nfd, &nfc; 181*0e209d39SAndroid Build Coastguard Worker 182*0e209d39SAndroid Build Coastguard Worker const UnicodeString *rules; 183*0e209d39SAndroid Build Coastguard Worker const CollationData *const baseData; 184*0e209d39SAndroid Build Coastguard Worker CollationSettings *settings; 185*0e209d39SAndroid Build Coastguard Worker UParseError *parseError; 186*0e209d39SAndroid Build Coastguard Worker const char *errorReason; 187*0e209d39SAndroid Build Coastguard Worker 188*0e209d39SAndroid Build Coastguard Worker Sink *sink; 189*0e209d39SAndroid Build Coastguard Worker Importer *importer; 190*0e209d39SAndroid Build Coastguard Worker 191*0e209d39SAndroid Build Coastguard Worker int32_t ruleIndex; 192*0e209d39SAndroid Build Coastguard Worker }; 193*0e209d39SAndroid Build Coastguard Worker 194*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_END 195*0e209d39SAndroid Build Coastguard Worker 196*0e209d39SAndroid Build Coastguard Worker #endif // !UCONFIG_NO_COLLATION 197*0e209d39SAndroid Build Coastguard Worker #endif // __COLLATIONRULEPARSER_H__ 198