1*0e209d39SAndroid Build Coastguard Worker // © 2016 and later: Unicode, Inc. and others. 2*0e209d39SAndroid Build Coastguard Worker // License & terms of use: http://www.unicode.org/copyright.html 3*0e209d39SAndroid Build Coastguard Worker /* 4*0e209d39SAndroid Build Coastguard Worker ******************************************************************************* 5*0e209d39SAndroid Build Coastguard Worker * Copyright (C) 2013-2014, International Business Machines 6*0e209d39SAndroid Build Coastguard Worker * Corporation and others. All Rights Reserved. 7*0e209d39SAndroid Build Coastguard Worker ******************************************************************************* 8*0e209d39SAndroid Build Coastguard Worker * collationsets.h 9*0e209d39SAndroid Build Coastguard Worker * 10*0e209d39SAndroid Build Coastguard Worker * created on: 2013feb09 11*0e209d39SAndroid Build Coastguard Worker * created by: Markus W. Scherer 12*0e209d39SAndroid Build Coastguard Worker */ 13*0e209d39SAndroid Build Coastguard Worker 14*0e209d39SAndroid Build Coastguard Worker #ifndef __COLLATIONSETS_H__ 15*0e209d39SAndroid Build Coastguard Worker #define __COLLATIONSETS_H__ 16*0e209d39SAndroid Build Coastguard Worker 17*0e209d39SAndroid Build Coastguard Worker #include "unicode/utypes.h" 18*0e209d39SAndroid Build Coastguard Worker 19*0e209d39SAndroid Build Coastguard Worker #if !UCONFIG_NO_COLLATION 20*0e209d39SAndroid Build Coastguard Worker 21*0e209d39SAndroid Build Coastguard Worker #include "unicode/uniset.h" 22*0e209d39SAndroid Build Coastguard Worker #include "collation.h" 23*0e209d39SAndroid Build Coastguard Worker 24*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_BEGIN 25*0e209d39SAndroid Build Coastguard Worker 26*0e209d39SAndroid Build Coastguard Worker struct CollationData; 27*0e209d39SAndroid Build Coastguard Worker 28*0e209d39SAndroid Build Coastguard Worker /** 29*0e209d39SAndroid Build Coastguard Worker * Finds the set of characters and strings that sort differently in the tailoring 30*0e209d39SAndroid Build Coastguard Worker * from the base data. 31*0e209d39SAndroid Build Coastguard Worker * 32*0e209d39SAndroid Build Coastguard Worker * Every mapping in the tailoring needs to be compared to the base, 33*0e209d39SAndroid Build Coastguard Worker * because some mappings are copied for optimization, and 34*0e209d39SAndroid Build Coastguard Worker * all contractions for a character are copied if any contractions for that character 35*0e209d39SAndroid Build Coastguard Worker * are added, modified or removed. 36*0e209d39SAndroid Build Coastguard Worker * 37*0e209d39SAndroid Build Coastguard Worker * It might be simpler to re-parse the rule string, but: 38*0e209d39SAndroid Build Coastguard Worker * - That would require duplicating some of the from-rules builder code. 39*0e209d39SAndroid Build Coastguard Worker * - That would make the runtime code depend on the builder. 40*0e209d39SAndroid Build Coastguard Worker * - That would only work if we have the rule string, and we allow users to 41*0e209d39SAndroid Build Coastguard Worker * omit the rule string from data files. 42*0e209d39SAndroid Build Coastguard Worker */ 43*0e209d39SAndroid Build Coastguard Worker class TailoredSet : public UMemory { 44*0e209d39SAndroid Build Coastguard Worker public: TailoredSet(UnicodeSet * t)45*0e209d39SAndroid Build Coastguard Worker TailoredSet(UnicodeSet *t) 46*0e209d39SAndroid Build Coastguard Worker : data(nullptr), baseData(nullptr), 47*0e209d39SAndroid Build Coastguard Worker tailored(t), 48*0e209d39SAndroid Build Coastguard Worker suffix(nullptr), 49*0e209d39SAndroid Build Coastguard Worker errorCode(U_ZERO_ERROR) {} 50*0e209d39SAndroid Build Coastguard Worker 51*0e209d39SAndroid Build Coastguard Worker void forData(const CollationData *d, UErrorCode &errorCode); 52*0e209d39SAndroid Build Coastguard Worker 53*0e209d39SAndroid Build Coastguard Worker /** 54*0e209d39SAndroid Build Coastguard Worker * @return U_SUCCESS(errorCode) in C++, void in Java 55*0e209d39SAndroid Build Coastguard Worker * @internal only public for access by callback 56*0e209d39SAndroid Build Coastguard Worker */ 57*0e209d39SAndroid Build Coastguard Worker UBool handleCE32(UChar32 start, UChar32 end, uint32_t ce32); 58*0e209d39SAndroid Build Coastguard Worker 59*0e209d39SAndroid Build Coastguard Worker private: 60*0e209d39SAndroid Build Coastguard Worker void compare(UChar32 c, uint32_t ce32, uint32_t baseCE32); 61*0e209d39SAndroid Build Coastguard Worker void comparePrefixes(UChar32 c, const char16_t *p, const char16_t *q); 62*0e209d39SAndroid Build Coastguard Worker void compareContractions(UChar32 c, const char16_t *p, const char16_t *q); 63*0e209d39SAndroid Build Coastguard Worker 64*0e209d39SAndroid Build Coastguard Worker void addPrefixes(const CollationData *d, UChar32 c, const char16_t *p); 65*0e209d39SAndroid Build Coastguard Worker void addPrefix(const CollationData *d, const UnicodeString &pfx, UChar32 c, uint32_t ce32); 66*0e209d39SAndroid Build Coastguard Worker void addContractions(UChar32 c, const char16_t *p); 67*0e209d39SAndroid Build Coastguard Worker void addSuffix(UChar32 c, const UnicodeString &sfx); 68*0e209d39SAndroid Build Coastguard Worker void add(UChar32 c); 69*0e209d39SAndroid Build Coastguard Worker 70*0e209d39SAndroid Build Coastguard Worker /** Prefixes are reversed in the data structure. */ setPrefix(const UnicodeString & pfx)71*0e209d39SAndroid Build Coastguard Worker void setPrefix(const UnicodeString &pfx) { 72*0e209d39SAndroid Build Coastguard Worker unreversedPrefix = pfx; 73*0e209d39SAndroid Build Coastguard Worker unreversedPrefix.reverse(); 74*0e209d39SAndroid Build Coastguard Worker } resetPrefix()75*0e209d39SAndroid Build Coastguard Worker void resetPrefix() { 76*0e209d39SAndroid Build Coastguard Worker unreversedPrefix.remove(); 77*0e209d39SAndroid Build Coastguard Worker } 78*0e209d39SAndroid Build Coastguard Worker 79*0e209d39SAndroid Build Coastguard Worker const CollationData *data; 80*0e209d39SAndroid Build Coastguard Worker const CollationData *baseData; 81*0e209d39SAndroid Build Coastguard Worker UnicodeSet *tailored; 82*0e209d39SAndroid Build Coastguard Worker UnicodeString unreversedPrefix; 83*0e209d39SAndroid Build Coastguard Worker const UnicodeString *suffix; 84*0e209d39SAndroid Build Coastguard Worker UErrorCode errorCode; 85*0e209d39SAndroid Build Coastguard Worker }; 86*0e209d39SAndroid Build Coastguard Worker 87*0e209d39SAndroid Build Coastguard Worker class ContractionsAndExpansions : public UMemory { 88*0e209d39SAndroid Build Coastguard Worker public: 89*0e209d39SAndroid Build Coastguard Worker class CESink : public UMemory { 90*0e209d39SAndroid Build Coastguard Worker public: 91*0e209d39SAndroid Build Coastguard Worker virtual ~CESink(); 92*0e209d39SAndroid Build Coastguard Worker virtual void handleCE(int64_t ce) = 0; 93*0e209d39SAndroid Build Coastguard Worker virtual void handleExpansion(const int64_t ces[], int32_t length) = 0; 94*0e209d39SAndroid Build Coastguard Worker }; 95*0e209d39SAndroid Build Coastguard Worker ContractionsAndExpansions(UnicodeSet * con,UnicodeSet * exp,CESink * s,UBool prefixes)96*0e209d39SAndroid Build Coastguard Worker ContractionsAndExpansions(UnicodeSet *con, UnicodeSet *exp, CESink *s, UBool prefixes) 97*0e209d39SAndroid Build Coastguard Worker : data(nullptr), 98*0e209d39SAndroid Build Coastguard Worker contractions(con), expansions(exp), 99*0e209d39SAndroid Build Coastguard Worker sink(s), 100*0e209d39SAndroid Build Coastguard Worker addPrefixes(prefixes), 101*0e209d39SAndroid Build Coastguard Worker checkTailored(0), 102*0e209d39SAndroid Build Coastguard Worker suffix(nullptr), 103*0e209d39SAndroid Build Coastguard Worker errorCode(U_ZERO_ERROR) {} 104*0e209d39SAndroid Build Coastguard Worker 105*0e209d39SAndroid Build Coastguard Worker void forData(const CollationData *d, UErrorCode &errorCode); 106*0e209d39SAndroid Build Coastguard Worker void forCodePoint(const CollationData *d, UChar32 c, UErrorCode &ec); 107*0e209d39SAndroid Build Coastguard Worker 108*0e209d39SAndroid Build Coastguard Worker // all following: @internal, only public for access by callback 109*0e209d39SAndroid Build Coastguard Worker 110*0e209d39SAndroid Build Coastguard Worker void handleCE32(UChar32 start, UChar32 end, uint32_t ce32); 111*0e209d39SAndroid Build Coastguard Worker 112*0e209d39SAndroid Build Coastguard Worker void handlePrefixes(UChar32 start, UChar32 end, uint32_t ce32); 113*0e209d39SAndroid Build Coastguard Worker void handleContractions(UChar32 start, UChar32 end, uint32_t ce32); 114*0e209d39SAndroid Build Coastguard Worker 115*0e209d39SAndroid Build Coastguard Worker void addExpansions(UChar32 start, UChar32 end); 116*0e209d39SAndroid Build Coastguard Worker void addStrings(UChar32 start, UChar32 end, UnicodeSet *set); 117*0e209d39SAndroid Build Coastguard Worker 118*0e209d39SAndroid Build Coastguard Worker /** Prefixes are reversed in the data structure. */ setPrefix(const UnicodeString & pfx)119*0e209d39SAndroid Build Coastguard Worker void setPrefix(const UnicodeString &pfx) { 120*0e209d39SAndroid Build Coastguard Worker unreversedPrefix = pfx; 121*0e209d39SAndroid Build Coastguard Worker unreversedPrefix.reverse(); 122*0e209d39SAndroid Build Coastguard Worker } resetPrefix()123*0e209d39SAndroid Build Coastguard Worker void resetPrefix() { 124*0e209d39SAndroid Build Coastguard Worker unreversedPrefix.remove(); 125*0e209d39SAndroid Build Coastguard Worker } 126*0e209d39SAndroid Build Coastguard Worker 127*0e209d39SAndroid Build Coastguard Worker const CollationData *data; 128*0e209d39SAndroid Build Coastguard Worker UnicodeSet *contractions; 129*0e209d39SAndroid Build Coastguard Worker UnicodeSet *expansions; 130*0e209d39SAndroid Build Coastguard Worker CESink *sink; 131*0e209d39SAndroid Build Coastguard Worker UBool addPrefixes; 132*0e209d39SAndroid Build Coastguard Worker int8_t checkTailored; // -1: collected tailored +1: exclude tailored 133*0e209d39SAndroid Build Coastguard Worker UnicodeSet tailored; 134*0e209d39SAndroid Build Coastguard Worker UnicodeSet ranges; 135*0e209d39SAndroid Build Coastguard Worker UnicodeString unreversedPrefix; 136*0e209d39SAndroid Build Coastguard Worker const UnicodeString *suffix; 137*0e209d39SAndroid Build Coastguard Worker int64_t ces[Collation::MAX_EXPANSION_LENGTH]; 138*0e209d39SAndroid Build Coastguard Worker UErrorCode errorCode; 139*0e209d39SAndroid Build Coastguard Worker }; 140*0e209d39SAndroid Build Coastguard Worker 141*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_END 142*0e209d39SAndroid Build Coastguard Worker 143*0e209d39SAndroid Build Coastguard Worker #endif // !UCONFIG_NO_COLLATION 144*0e209d39SAndroid Build Coastguard Worker #endif // __COLLATIONSETS_H__ 145