1*0e209d39SAndroid Build Coastguard Worker // © 2016 and later: Unicode, Inc. and others. 2*0e209d39SAndroid Build Coastguard Worker // License & terms of use: http://www.unicode.org/copyright.html 3*0e209d39SAndroid Build Coastguard Worker /* 4*0e209d39SAndroid Build Coastguard Worker ******************************************************************************* 5*0e209d39SAndroid Build Coastguard Worker * Copyright (C) 2013-2014, International Business Machines 6*0e209d39SAndroid Build Coastguard Worker * Corporation and others. All Rights Reserved. 7*0e209d39SAndroid Build Coastguard Worker ******************************************************************************* 8*0e209d39SAndroid Build Coastguard Worker * collationrootelements.h 9*0e209d39SAndroid Build Coastguard Worker * 10*0e209d39SAndroid Build Coastguard Worker * created on: 2013mar01 11*0e209d39SAndroid Build Coastguard Worker * created by: Markus W. Scherer 12*0e209d39SAndroid Build Coastguard Worker */ 13*0e209d39SAndroid Build Coastguard Worker 14*0e209d39SAndroid Build Coastguard Worker #ifndef __COLLATIONROOTELEMENTS_H__ 15*0e209d39SAndroid Build Coastguard Worker #define __COLLATIONROOTELEMENTS_H__ 16*0e209d39SAndroid Build Coastguard Worker 17*0e209d39SAndroid Build Coastguard Worker #include "unicode/utypes.h" 18*0e209d39SAndroid Build Coastguard Worker 19*0e209d39SAndroid Build Coastguard Worker #if !UCONFIG_NO_COLLATION 20*0e209d39SAndroid Build Coastguard Worker 21*0e209d39SAndroid Build Coastguard Worker #include "unicode/uobject.h" 22*0e209d39SAndroid Build Coastguard Worker #include "collation.h" 23*0e209d39SAndroid Build Coastguard Worker 24*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_BEGIN 25*0e209d39SAndroid Build Coastguard Worker 26*0e209d39SAndroid Build Coastguard Worker /** 27*0e209d39SAndroid Build Coastguard Worker * Container and access methods for collation elements and weights 28*0e209d39SAndroid Build Coastguard Worker * that occur in the root collator. 29*0e209d39SAndroid Build Coastguard Worker * Needed for finding boundaries for building a tailoring. 30*0e209d39SAndroid Build Coastguard Worker * 31*0e209d39SAndroid Build Coastguard Worker * This class takes and returns 16-bit secondary and tertiary weights. 32*0e209d39SAndroid Build Coastguard Worker */ 33*0e209d39SAndroid Build Coastguard Worker class U_I18N_API CollationRootElements : public UMemory { 34*0e209d39SAndroid Build Coastguard Worker public: CollationRootElements(const uint32_t * rootElements,int32_t rootElementsLength)35*0e209d39SAndroid Build Coastguard Worker CollationRootElements(const uint32_t *rootElements, int32_t rootElementsLength) 36*0e209d39SAndroid Build Coastguard Worker : elements(rootElements), length(rootElementsLength) {} 37*0e209d39SAndroid Build Coastguard Worker 38*0e209d39SAndroid Build Coastguard Worker /** 39*0e209d39SAndroid Build Coastguard Worker * Higher than any root primary. 40*0e209d39SAndroid Build Coastguard Worker */ 41*0e209d39SAndroid Build Coastguard Worker static const uint32_t PRIMARY_SENTINEL = 0xffffff00; 42*0e209d39SAndroid Build Coastguard Worker 43*0e209d39SAndroid Build Coastguard Worker /** 44*0e209d39SAndroid Build Coastguard Worker * Flag in a root element, set if the element contains secondary & tertiary weights, 45*0e209d39SAndroid Build Coastguard Worker * rather than a primary. 46*0e209d39SAndroid Build Coastguard Worker */ 47*0e209d39SAndroid Build Coastguard Worker static const uint32_t SEC_TER_DELTA_FLAG = 0x80; 48*0e209d39SAndroid Build Coastguard Worker /** 49*0e209d39SAndroid Build Coastguard Worker * Mask for getting the primary range step value from a primary-range-end element. 50*0e209d39SAndroid Build Coastguard Worker */ 51*0e209d39SAndroid Build Coastguard Worker static const uint8_t PRIMARY_STEP_MASK = 0x7f; 52*0e209d39SAndroid Build Coastguard Worker 53*0e209d39SAndroid Build Coastguard Worker enum { 54*0e209d39SAndroid Build Coastguard Worker /** 55*0e209d39SAndroid Build Coastguard Worker * Index of the first CE with a non-zero tertiary weight. 56*0e209d39SAndroid Build Coastguard Worker * Same as the start of the compact root elements table. 57*0e209d39SAndroid Build Coastguard Worker */ 58*0e209d39SAndroid Build Coastguard Worker IX_FIRST_TERTIARY_INDEX, 59*0e209d39SAndroid Build Coastguard Worker /** 60*0e209d39SAndroid Build Coastguard Worker * Index of the first CE with a non-zero secondary weight. 61*0e209d39SAndroid Build Coastguard Worker */ 62*0e209d39SAndroid Build Coastguard Worker IX_FIRST_SECONDARY_INDEX, 63*0e209d39SAndroid Build Coastguard Worker /** 64*0e209d39SAndroid Build Coastguard Worker * Index of the first CE with a non-zero primary weight. 65*0e209d39SAndroid Build Coastguard Worker */ 66*0e209d39SAndroid Build Coastguard Worker IX_FIRST_PRIMARY_INDEX, 67*0e209d39SAndroid Build Coastguard Worker /** 68*0e209d39SAndroid Build Coastguard Worker * Must match Collation::COMMON_SEC_AND_TER_CE. 69*0e209d39SAndroid Build Coastguard Worker */ 70*0e209d39SAndroid Build Coastguard Worker IX_COMMON_SEC_AND_TER_CE, 71*0e209d39SAndroid Build Coastguard Worker /** 72*0e209d39SAndroid Build Coastguard Worker * Secondary & tertiary boundaries. 73*0e209d39SAndroid Build Coastguard Worker * Bits 31..24: [fixed last secondary common byte 45] 74*0e209d39SAndroid Build Coastguard Worker * Bits 23..16: [fixed first ignorable secondary byte 80] 75*0e209d39SAndroid Build Coastguard Worker * Bits 15.. 8: reserved, 0 76*0e209d39SAndroid Build Coastguard Worker * Bits 7.. 0: [fixed first ignorable tertiary byte 3C] 77*0e209d39SAndroid Build Coastguard Worker */ 78*0e209d39SAndroid Build Coastguard Worker IX_SEC_TER_BOUNDARIES, 79*0e209d39SAndroid Build Coastguard Worker /** 80*0e209d39SAndroid Build Coastguard Worker * The current number of indexes. 81*0e209d39SAndroid Build Coastguard Worker * Currently the same as elements[IX_FIRST_TERTIARY_INDEX]. 82*0e209d39SAndroid Build Coastguard Worker */ 83*0e209d39SAndroid Build Coastguard Worker IX_COUNT 84*0e209d39SAndroid Build Coastguard Worker }; 85*0e209d39SAndroid Build Coastguard Worker 86*0e209d39SAndroid Build Coastguard Worker /** 87*0e209d39SAndroid Build Coastguard Worker * Returns the boundary between tertiary weights of primary/secondary CEs 88*0e209d39SAndroid Build Coastguard Worker * and those of tertiary CEs. 89*0e209d39SAndroid Build Coastguard Worker * This is the upper limit for tertiaries of primary/secondary CEs. 90*0e209d39SAndroid Build Coastguard Worker * This minus one is the lower limit for tertiaries of tertiary CEs. 91*0e209d39SAndroid Build Coastguard Worker */ getTertiaryBoundary()92*0e209d39SAndroid Build Coastguard Worker uint32_t getTertiaryBoundary() const { 93*0e209d39SAndroid Build Coastguard Worker return (elements[IX_SEC_TER_BOUNDARIES] << 8) & 0xff00; 94*0e209d39SAndroid Build Coastguard Worker } 95*0e209d39SAndroid Build Coastguard Worker 96*0e209d39SAndroid Build Coastguard Worker /** 97*0e209d39SAndroid Build Coastguard Worker * Returns the first assigned tertiary CE. 98*0e209d39SAndroid Build Coastguard Worker */ getFirstTertiaryCE()99*0e209d39SAndroid Build Coastguard Worker uint32_t getFirstTertiaryCE() const { 100*0e209d39SAndroid Build Coastguard Worker return elements[elements[IX_FIRST_TERTIARY_INDEX]] & ~SEC_TER_DELTA_FLAG; 101*0e209d39SAndroid Build Coastguard Worker } 102*0e209d39SAndroid Build Coastguard Worker 103*0e209d39SAndroid Build Coastguard Worker /** 104*0e209d39SAndroid Build Coastguard Worker * Returns the last assigned tertiary CE. 105*0e209d39SAndroid Build Coastguard Worker */ getLastTertiaryCE()106*0e209d39SAndroid Build Coastguard Worker uint32_t getLastTertiaryCE() const { 107*0e209d39SAndroid Build Coastguard Worker return elements[elements[IX_FIRST_SECONDARY_INDEX] - 1] & ~SEC_TER_DELTA_FLAG; 108*0e209d39SAndroid Build Coastguard Worker } 109*0e209d39SAndroid Build Coastguard Worker 110*0e209d39SAndroid Build Coastguard Worker /** 111*0e209d39SAndroid Build Coastguard Worker * Returns the last common secondary weight. 112*0e209d39SAndroid Build Coastguard Worker * This is the lower limit for secondaries of primary CEs. 113*0e209d39SAndroid Build Coastguard Worker */ getLastCommonSecondary()114*0e209d39SAndroid Build Coastguard Worker uint32_t getLastCommonSecondary() const { 115*0e209d39SAndroid Build Coastguard Worker return (elements[IX_SEC_TER_BOUNDARIES] >> 16) & 0xff00; 116*0e209d39SAndroid Build Coastguard Worker } 117*0e209d39SAndroid Build Coastguard Worker 118*0e209d39SAndroid Build Coastguard Worker /** 119*0e209d39SAndroid Build Coastguard Worker * Returns the boundary between secondary weights of primary CEs 120*0e209d39SAndroid Build Coastguard Worker * and those of secondary CEs. 121*0e209d39SAndroid Build Coastguard Worker * This is the upper limit for secondaries of primary CEs. 122*0e209d39SAndroid Build Coastguard Worker * This minus one is the lower limit for secondaries of secondary CEs. 123*0e209d39SAndroid Build Coastguard Worker */ getSecondaryBoundary()124*0e209d39SAndroid Build Coastguard Worker uint32_t getSecondaryBoundary() const { 125*0e209d39SAndroid Build Coastguard Worker return (elements[IX_SEC_TER_BOUNDARIES] >> 8) & 0xff00; 126*0e209d39SAndroid Build Coastguard Worker } 127*0e209d39SAndroid Build Coastguard Worker 128*0e209d39SAndroid Build Coastguard Worker /** 129*0e209d39SAndroid Build Coastguard Worker * Returns the first assigned secondary CE. 130*0e209d39SAndroid Build Coastguard Worker */ getFirstSecondaryCE()131*0e209d39SAndroid Build Coastguard Worker uint32_t getFirstSecondaryCE() const { 132*0e209d39SAndroid Build Coastguard Worker return elements[elements[IX_FIRST_SECONDARY_INDEX]] & ~SEC_TER_DELTA_FLAG; 133*0e209d39SAndroid Build Coastguard Worker } 134*0e209d39SAndroid Build Coastguard Worker 135*0e209d39SAndroid Build Coastguard Worker /** 136*0e209d39SAndroid Build Coastguard Worker * Returns the last assigned secondary CE. 137*0e209d39SAndroid Build Coastguard Worker */ getLastSecondaryCE()138*0e209d39SAndroid Build Coastguard Worker uint32_t getLastSecondaryCE() const { 139*0e209d39SAndroid Build Coastguard Worker return elements[elements[IX_FIRST_PRIMARY_INDEX] - 1] & ~SEC_TER_DELTA_FLAG; 140*0e209d39SAndroid Build Coastguard Worker } 141*0e209d39SAndroid Build Coastguard Worker 142*0e209d39SAndroid Build Coastguard Worker /** 143*0e209d39SAndroid Build Coastguard Worker * Returns the first assigned primary weight. 144*0e209d39SAndroid Build Coastguard Worker */ getFirstPrimary()145*0e209d39SAndroid Build Coastguard Worker uint32_t getFirstPrimary() const { 146*0e209d39SAndroid Build Coastguard Worker return elements[elements[IX_FIRST_PRIMARY_INDEX]]; // step=0: cannot be a range end 147*0e209d39SAndroid Build Coastguard Worker } 148*0e209d39SAndroid Build Coastguard Worker 149*0e209d39SAndroid Build Coastguard Worker /** 150*0e209d39SAndroid Build Coastguard Worker * Returns the first assigned primary CE. 151*0e209d39SAndroid Build Coastguard Worker */ getFirstPrimaryCE()152*0e209d39SAndroid Build Coastguard Worker int64_t getFirstPrimaryCE() const { 153*0e209d39SAndroid Build Coastguard Worker return Collation::makeCE(getFirstPrimary()); 154*0e209d39SAndroid Build Coastguard Worker } 155*0e209d39SAndroid Build Coastguard Worker 156*0e209d39SAndroid Build Coastguard Worker /** 157*0e209d39SAndroid Build Coastguard Worker * Returns the last root CE with a primary weight before p. 158*0e209d39SAndroid Build Coastguard Worker * Intended only for reordering group boundaries. 159*0e209d39SAndroid Build Coastguard Worker */ 160*0e209d39SAndroid Build Coastguard Worker int64_t lastCEWithPrimaryBefore(uint32_t p) const; 161*0e209d39SAndroid Build Coastguard Worker 162*0e209d39SAndroid Build Coastguard Worker /** 163*0e209d39SAndroid Build Coastguard Worker * Returns the first root CE with a primary weight of at least p. 164*0e209d39SAndroid Build Coastguard Worker * Intended only for reordering group boundaries. 165*0e209d39SAndroid Build Coastguard Worker */ 166*0e209d39SAndroid Build Coastguard Worker int64_t firstCEWithPrimaryAtLeast(uint32_t p) const; 167*0e209d39SAndroid Build Coastguard Worker 168*0e209d39SAndroid Build Coastguard Worker /** 169*0e209d39SAndroid Build Coastguard Worker * Returns the primary weight before p. 170*0e209d39SAndroid Build Coastguard Worker * p must be greater than the first root primary. 171*0e209d39SAndroid Build Coastguard Worker */ 172*0e209d39SAndroid Build Coastguard Worker uint32_t getPrimaryBefore(uint32_t p, UBool isCompressible) const; 173*0e209d39SAndroid Build Coastguard Worker 174*0e209d39SAndroid Build Coastguard Worker /** Returns the secondary weight before [p, s]. */ 175*0e209d39SAndroid Build Coastguard Worker uint32_t getSecondaryBefore(uint32_t p, uint32_t s) const; 176*0e209d39SAndroid Build Coastguard Worker 177*0e209d39SAndroid Build Coastguard Worker /** Returns the tertiary weight before [p, s, t]. */ 178*0e209d39SAndroid Build Coastguard Worker uint32_t getTertiaryBefore(uint32_t p, uint32_t s, uint32_t t) const; 179*0e209d39SAndroid Build Coastguard Worker 180*0e209d39SAndroid Build Coastguard Worker /** 181*0e209d39SAndroid Build Coastguard Worker * Finds the index of the input primary. 182*0e209d39SAndroid Build Coastguard Worker * p must occur as a root primary, and must not be 0. 183*0e209d39SAndroid Build Coastguard Worker */ 184*0e209d39SAndroid Build Coastguard Worker int32_t findPrimary(uint32_t p) const; 185*0e209d39SAndroid Build Coastguard Worker 186*0e209d39SAndroid Build Coastguard Worker /** 187*0e209d39SAndroid Build Coastguard Worker * Returns the primary weight after p where index=findPrimary(p). 188*0e209d39SAndroid Build Coastguard Worker * p must be at least the first root primary. 189*0e209d39SAndroid Build Coastguard Worker */ 190*0e209d39SAndroid Build Coastguard Worker uint32_t getPrimaryAfter(uint32_t p, int32_t index, UBool isCompressible) const; 191*0e209d39SAndroid Build Coastguard Worker /** 192*0e209d39SAndroid Build Coastguard Worker * Returns the secondary weight after [p, s] where index=findPrimary(p) 193*0e209d39SAndroid Build Coastguard Worker * except use index=0 for p=0. 194*0e209d39SAndroid Build Coastguard Worker * 195*0e209d39SAndroid Build Coastguard Worker * Must return a weight for every root [p, s] as well as for every weight 196*0e209d39SAndroid Build Coastguard Worker * returned by getSecondaryBefore(). If p!=0 then s can be BEFORE_WEIGHT16. 197*0e209d39SAndroid Build Coastguard Worker * 198*0e209d39SAndroid Build Coastguard Worker * Exception: [0, 0] is handled by the CollationBuilder: 199*0e209d39SAndroid Build Coastguard Worker * Both its lower and upper boundaries are special. 200*0e209d39SAndroid Build Coastguard Worker */ 201*0e209d39SAndroid Build Coastguard Worker uint32_t getSecondaryAfter(int32_t index, uint32_t s) const; 202*0e209d39SAndroid Build Coastguard Worker /** 203*0e209d39SAndroid Build Coastguard Worker * Returns the tertiary weight after [p, s, t] where index=findPrimary(p) 204*0e209d39SAndroid Build Coastguard Worker * except use index=0 for p=0. 205*0e209d39SAndroid Build Coastguard Worker * 206*0e209d39SAndroid Build Coastguard Worker * Must return a weight for every root [p, s, t] as well as for every weight 207*0e209d39SAndroid Build Coastguard Worker * returned by getTertiaryBefore(). If s!=0 then t can be BEFORE_WEIGHT16. 208*0e209d39SAndroid Build Coastguard Worker * 209*0e209d39SAndroid Build Coastguard Worker * Exception: [0, 0, 0] is handled by the CollationBuilder: 210*0e209d39SAndroid Build Coastguard Worker * Both its lower and upper boundaries are special. 211*0e209d39SAndroid Build Coastguard Worker */ 212*0e209d39SAndroid Build Coastguard Worker uint32_t getTertiaryAfter(int32_t index, uint32_t s, uint32_t t) const; 213*0e209d39SAndroid Build Coastguard Worker 214*0e209d39SAndroid Build Coastguard Worker private: 215*0e209d39SAndroid Build Coastguard Worker /** 216*0e209d39SAndroid Build Coastguard Worker * Returns the first secondary & tertiary weights for p where index=findPrimary(p)+1. 217*0e209d39SAndroid Build Coastguard Worker */ 218*0e209d39SAndroid Build Coastguard Worker uint32_t getFirstSecTerForPrimary(int32_t index) const; 219*0e209d39SAndroid Build Coastguard Worker 220*0e209d39SAndroid Build Coastguard Worker /** 221*0e209d39SAndroid Build Coastguard Worker * Finds the largest index i where elements[i]<=p. 222*0e209d39SAndroid Build Coastguard Worker * Requires first primary<=p<0xffffff00 (PRIMARY_SENTINEL). 223*0e209d39SAndroid Build Coastguard Worker * Does not require that p is a root collator primary. 224*0e209d39SAndroid Build Coastguard Worker */ 225*0e209d39SAndroid Build Coastguard Worker int32_t findP(uint32_t p) const; 226*0e209d39SAndroid Build Coastguard Worker isEndOfPrimaryRange(uint32_t q)227*0e209d39SAndroid Build Coastguard Worker static inline UBool isEndOfPrimaryRange(uint32_t q) { 228*0e209d39SAndroid Build Coastguard Worker return (q & SEC_TER_DELTA_FLAG) == 0 && (q & PRIMARY_STEP_MASK) != 0; 229*0e209d39SAndroid Build Coastguard Worker } 230*0e209d39SAndroid Build Coastguard Worker 231*0e209d39SAndroid Build Coastguard Worker /** 232*0e209d39SAndroid Build Coastguard Worker * Data structure: 233*0e209d39SAndroid Build Coastguard Worker * 234*0e209d39SAndroid Build Coastguard Worker * The first few entries are indexes, up to elements[IX_FIRST_TERTIARY_INDEX]. 235*0e209d39SAndroid Build Coastguard Worker * See the comments on the IX_ constants. 236*0e209d39SAndroid Build Coastguard Worker * 237*0e209d39SAndroid Build Coastguard Worker * All other elements are a compact form of the root collator CEs 238*0e209d39SAndroid Build Coastguard Worker * in mostly collation order. 239*0e209d39SAndroid Build Coastguard Worker * 240*0e209d39SAndroid Build Coastguard Worker * A sequence of one or more root CEs with the same primary weight is stored as 241*0e209d39SAndroid Build Coastguard Worker * one element with the primary weight, with the SEC_TER_DELTA_FLAG flag not set, 242*0e209d39SAndroid Build Coastguard Worker * followed by elements with only the secondary/tertiary weights, 243*0e209d39SAndroid Build Coastguard Worker * each with that flag set. 244*0e209d39SAndroid Build Coastguard Worker * If the lowest secondary/tertiary combination is Collation::COMMON_SEC_AND_TER_CE, 245*0e209d39SAndroid Build Coastguard Worker * then the element for that combination is omitted. 246*0e209d39SAndroid Build Coastguard Worker * 247*0e209d39SAndroid Build Coastguard Worker * Note: If the first actual secondary/tertiary combination is higher than 248*0e209d39SAndroid Build Coastguard Worker * Collation::COMMON_SEC_AND_TER_CE (which is unusual), 249*0e209d39SAndroid Build Coastguard Worker * the runtime code will assume anyway that Collation::COMMON_SEC_AND_TER_CE is present. 250*0e209d39SAndroid Build Coastguard Worker * 251*0e209d39SAndroid Build Coastguard Worker * A range of only-primary CEs with a consistent "step" increment 252*0e209d39SAndroid Build Coastguard Worker * from each primary to the next may be stored as a range. 253*0e209d39SAndroid Build Coastguard Worker * Only the first and last primary are stored, and the last has the step 254*0e209d39SAndroid Build Coastguard Worker * value in the low bits (PRIMARY_STEP_MASK). 255*0e209d39SAndroid Build Coastguard Worker * 256*0e209d39SAndroid Build Coastguard Worker * An range-end element may also either start a new range or be followed by 257*0e209d39SAndroid Build Coastguard Worker * elements with secondary/tertiary deltas. 258*0e209d39SAndroid Build Coastguard Worker * 259*0e209d39SAndroid Build Coastguard Worker * A primary element that is not a range end has zero step bits. 260*0e209d39SAndroid Build Coastguard Worker * 261*0e209d39SAndroid Build Coastguard Worker * There is no element for the completely ignorable CE (all weights 0). 262*0e209d39SAndroid Build Coastguard Worker * 263*0e209d39SAndroid Build Coastguard Worker * Before elements[IX_FIRST_PRIMARY_INDEX], all elements are secondary/tertiary deltas, 264*0e209d39SAndroid Build Coastguard Worker * for all of the ignorable root CEs. 265*0e209d39SAndroid Build Coastguard Worker * 266*0e209d39SAndroid Build Coastguard Worker * There are no elements for unassigned-implicit primary CEs. 267*0e209d39SAndroid Build Coastguard Worker * All primaries stored here are at most 3 bytes long. 268*0e209d39SAndroid Build Coastguard Worker */ 269*0e209d39SAndroid Build Coastguard Worker const uint32_t *elements; 270*0e209d39SAndroid Build Coastguard Worker int32_t length; 271*0e209d39SAndroid Build Coastguard Worker }; 272*0e209d39SAndroid Build Coastguard Worker 273*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_END 274*0e209d39SAndroid Build Coastguard Worker 275*0e209d39SAndroid Build Coastguard Worker #endif // !UCONFIG_NO_COLLATION 276*0e209d39SAndroid Build Coastguard Worker #endif // __COLLATIONROOTELEMENTS_H__ 277