1*0e209d39SAndroid Build Coastguard Worker // © 2016 and later: Unicode, Inc. and others. 2*0e209d39SAndroid Build Coastguard Worker // License & terms of use: http://www.unicode.org/copyright.html 3*0e209d39SAndroid Build Coastguard Worker /* 4*0e209d39SAndroid Build Coastguard Worker ******************************************************************************* 5*0e209d39SAndroid Build Coastguard Worker * Copyright (C) 2012-2014, International Business Machines 6*0e209d39SAndroid Build Coastguard Worker * Corporation and others. All Rights Reserved. 7*0e209d39SAndroid Build Coastguard Worker ******************************************************************************* 8*0e209d39SAndroid Build Coastguard Worker * collationkeys.h 9*0e209d39SAndroid Build Coastguard Worker * 10*0e209d39SAndroid Build Coastguard Worker * created on: 2012sep02 11*0e209d39SAndroid Build Coastguard Worker * created by: Markus W. Scherer 12*0e209d39SAndroid Build Coastguard Worker */ 13*0e209d39SAndroid Build Coastguard Worker 14*0e209d39SAndroid Build Coastguard Worker #ifndef __COLLATIONKEYS_H__ 15*0e209d39SAndroid Build Coastguard Worker #define __COLLATIONKEYS_H__ 16*0e209d39SAndroid Build Coastguard Worker 17*0e209d39SAndroid Build Coastguard Worker #include "unicode/utypes.h" 18*0e209d39SAndroid Build Coastguard Worker 19*0e209d39SAndroid Build Coastguard Worker #if !UCONFIG_NO_COLLATION 20*0e209d39SAndroid Build Coastguard Worker 21*0e209d39SAndroid Build Coastguard Worker #include "unicode/bytestream.h" 22*0e209d39SAndroid Build Coastguard Worker #include "unicode/ucol.h" 23*0e209d39SAndroid Build Coastguard Worker #include "charstr.h" 24*0e209d39SAndroid Build Coastguard Worker #include "collation.h" 25*0e209d39SAndroid Build Coastguard Worker 26*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_BEGIN 27*0e209d39SAndroid Build Coastguard Worker 28*0e209d39SAndroid Build Coastguard Worker class CollationIterator; 29*0e209d39SAndroid Build Coastguard Worker struct CollationDataReader; 30*0e209d39SAndroid Build Coastguard Worker struct CollationSettings; 31*0e209d39SAndroid Build Coastguard Worker 32*0e209d39SAndroid Build Coastguard Worker class SortKeyByteSink : public ByteSink { 33*0e209d39SAndroid Build Coastguard Worker public: SortKeyByteSink(char * dest,int32_t destCapacity)34*0e209d39SAndroid Build Coastguard Worker SortKeyByteSink(char *dest, int32_t destCapacity) 35*0e209d39SAndroid Build Coastguard Worker : buffer_(dest), capacity_(destCapacity), 36*0e209d39SAndroid Build Coastguard Worker appended_(0), ignore_(0) {} 37*0e209d39SAndroid Build Coastguard Worker virtual ~SortKeyByteSink(); 38*0e209d39SAndroid Build Coastguard Worker IgnoreBytes(int32_t numIgnore)39*0e209d39SAndroid Build Coastguard Worker void IgnoreBytes(int32_t numIgnore) { ignore_ = numIgnore; } 40*0e209d39SAndroid Build Coastguard Worker 41*0e209d39SAndroid Build Coastguard Worker virtual void Append(const char *bytes, int32_t n) override; Append(uint32_t b)42*0e209d39SAndroid Build Coastguard Worker void Append(uint32_t b) { 43*0e209d39SAndroid Build Coastguard Worker if (ignore_ > 0) { 44*0e209d39SAndroid Build Coastguard Worker --ignore_; 45*0e209d39SAndroid Build Coastguard Worker } else { 46*0e209d39SAndroid Build Coastguard Worker if (appended_ < capacity_ || Resize(1, appended_)) { 47*0e209d39SAndroid Build Coastguard Worker buffer_[appended_] = (char)b; 48*0e209d39SAndroid Build Coastguard Worker } 49*0e209d39SAndroid Build Coastguard Worker ++appended_; 50*0e209d39SAndroid Build Coastguard Worker } 51*0e209d39SAndroid Build Coastguard Worker } 52*0e209d39SAndroid Build Coastguard Worker virtual char *GetAppendBuffer(int32_t min_capacity, 53*0e209d39SAndroid Build Coastguard Worker int32_t desired_capacity_hint, 54*0e209d39SAndroid Build Coastguard Worker char *scratch, int32_t scratch_capacity, 55*0e209d39SAndroid Build Coastguard Worker int32_t *result_capacity) override; NumberOfBytesAppended()56*0e209d39SAndroid Build Coastguard Worker int32_t NumberOfBytesAppended() const { return appended_; } 57*0e209d39SAndroid Build Coastguard Worker 58*0e209d39SAndroid Build Coastguard Worker /** 59*0e209d39SAndroid Build Coastguard Worker * @return how many bytes can be appended (including ignored ones) 60*0e209d39SAndroid Build Coastguard Worker * without reallocation 61*0e209d39SAndroid Build Coastguard Worker */ GetRemainingCapacity()62*0e209d39SAndroid Build Coastguard Worker int32_t GetRemainingCapacity() const { 63*0e209d39SAndroid Build Coastguard Worker // Either ignore_ or appended_ should be 0. 64*0e209d39SAndroid Build Coastguard Worker return ignore_ + capacity_ - appended_; 65*0e209d39SAndroid Build Coastguard Worker } 66*0e209d39SAndroid Build Coastguard Worker Overflowed()67*0e209d39SAndroid Build Coastguard Worker UBool Overflowed() const { return appended_ > capacity_; } 68*0e209d39SAndroid Build Coastguard Worker /** @return false if memory allocation failed */ IsOk()69*0e209d39SAndroid Build Coastguard Worker UBool IsOk() const { return buffer_ != nullptr; } 70*0e209d39SAndroid Build Coastguard Worker 71*0e209d39SAndroid Build Coastguard Worker protected: 72*0e209d39SAndroid Build Coastguard Worker virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length) = 0; 73*0e209d39SAndroid Build Coastguard Worker virtual UBool Resize(int32_t appendCapacity, int32_t length) = 0; 74*0e209d39SAndroid Build Coastguard Worker SetNotOk()75*0e209d39SAndroid Build Coastguard Worker void SetNotOk() { 76*0e209d39SAndroid Build Coastguard Worker buffer_ = nullptr; 77*0e209d39SAndroid Build Coastguard Worker capacity_ = 0; 78*0e209d39SAndroid Build Coastguard Worker } 79*0e209d39SAndroid Build Coastguard Worker 80*0e209d39SAndroid Build Coastguard Worker char *buffer_; 81*0e209d39SAndroid Build Coastguard Worker int32_t capacity_; 82*0e209d39SAndroid Build Coastguard Worker int32_t appended_; 83*0e209d39SAndroid Build Coastguard Worker int32_t ignore_; 84*0e209d39SAndroid Build Coastguard Worker 85*0e209d39SAndroid Build Coastguard Worker private: 86*0e209d39SAndroid Build Coastguard Worker SortKeyByteSink(const SortKeyByteSink &); // copy constructor not implemented 87*0e209d39SAndroid Build Coastguard Worker SortKeyByteSink &operator=(const SortKeyByteSink &); // assignment operator not implemented 88*0e209d39SAndroid Build Coastguard Worker }; 89*0e209d39SAndroid Build Coastguard Worker 90*0e209d39SAndroid Build Coastguard Worker class U_I18N_API CollationKeys /* not : public UObject because all methods are static */ { 91*0e209d39SAndroid Build Coastguard Worker public: 92*0e209d39SAndroid Build Coastguard Worker class LevelCallback : public UMemory { 93*0e209d39SAndroid Build Coastguard Worker public: 94*0e209d39SAndroid Build Coastguard Worker virtual ~LevelCallback(); 95*0e209d39SAndroid Build Coastguard Worker /** 96*0e209d39SAndroid Build Coastguard Worker * @param level The next level about to be written to the ByteSink. 97*0e209d39SAndroid Build Coastguard Worker * @return true if the level is to be written 98*0e209d39SAndroid Build Coastguard Worker * (the base class implementation always returns true) 99*0e209d39SAndroid Build Coastguard Worker */ 100*0e209d39SAndroid Build Coastguard Worker virtual UBool needToWrite(Collation::Level level); 101*0e209d39SAndroid Build Coastguard Worker }; 102*0e209d39SAndroid Build Coastguard Worker 103*0e209d39SAndroid Build Coastguard Worker /** 104*0e209d39SAndroid Build Coastguard Worker * Writes the sort key bytes for minLevel up to the iterator data's strength. 105*0e209d39SAndroid Build Coastguard Worker * Optionally writes the case level. 106*0e209d39SAndroid Build Coastguard Worker * Stops writing levels when callback.needToWrite(level) returns false. 107*0e209d39SAndroid Build Coastguard Worker * Separates levels with the LEVEL_SEPARATOR_BYTE 108*0e209d39SAndroid Build Coastguard Worker * but does not write a TERMINATOR_BYTE. 109*0e209d39SAndroid Build Coastguard Worker */ 110*0e209d39SAndroid Build Coastguard Worker static void writeSortKeyUpToQuaternary(CollationIterator &iter, 111*0e209d39SAndroid Build Coastguard Worker const UBool *compressibleBytes, 112*0e209d39SAndroid Build Coastguard Worker const CollationSettings &settings, 113*0e209d39SAndroid Build Coastguard Worker SortKeyByteSink &sink, 114*0e209d39SAndroid Build Coastguard Worker Collation::Level minLevel, LevelCallback &callback, 115*0e209d39SAndroid Build Coastguard Worker UBool preflight, UErrorCode &errorCode); 116*0e209d39SAndroid Build Coastguard Worker private: 117*0e209d39SAndroid Build Coastguard Worker friend struct CollationDataReader; 118*0e209d39SAndroid Build Coastguard Worker 119*0e209d39SAndroid Build Coastguard Worker CollationKeys() = delete; // no instantiation 120*0e209d39SAndroid Build Coastguard Worker 121*0e209d39SAndroid Build Coastguard Worker // Secondary level: Compress up to 33 common weights as 05..25 or 25..45. 122*0e209d39SAndroid Build Coastguard Worker static const uint32_t SEC_COMMON_LOW = Collation::COMMON_BYTE; 123*0e209d39SAndroid Build Coastguard Worker static const uint32_t SEC_COMMON_MIDDLE = SEC_COMMON_LOW + 0x20; 124*0e209d39SAndroid Build Coastguard Worker static const uint32_t SEC_COMMON_HIGH = SEC_COMMON_LOW + 0x40; 125*0e209d39SAndroid Build Coastguard Worker static const int32_t SEC_COMMON_MAX_COUNT = 0x21; 126*0e209d39SAndroid Build Coastguard Worker 127*0e209d39SAndroid Build Coastguard Worker // Case level, lowerFirst: Compress up to 7 common weights as 1..7 or 7..13. 128*0e209d39SAndroid Build Coastguard Worker static const uint32_t CASE_LOWER_FIRST_COMMON_LOW = 1; 129*0e209d39SAndroid Build Coastguard Worker static const uint32_t CASE_LOWER_FIRST_COMMON_MIDDLE = 7; 130*0e209d39SAndroid Build Coastguard Worker static const uint32_t CASE_LOWER_FIRST_COMMON_HIGH = 13; 131*0e209d39SAndroid Build Coastguard Worker static const int32_t CASE_LOWER_FIRST_COMMON_MAX_COUNT = 7; 132*0e209d39SAndroid Build Coastguard Worker 133*0e209d39SAndroid Build Coastguard Worker // Case level, upperFirst: Compress up to 13 common weights as 3..15. 134*0e209d39SAndroid Build Coastguard Worker static const uint32_t CASE_UPPER_FIRST_COMMON_LOW = 3; 135*0e209d39SAndroid Build Coastguard Worker static const uint32_t CASE_UPPER_FIRST_COMMON_HIGH = 15; 136*0e209d39SAndroid Build Coastguard Worker static const int32_t CASE_UPPER_FIRST_COMMON_MAX_COUNT = 13; 137*0e209d39SAndroid Build Coastguard Worker 138*0e209d39SAndroid Build Coastguard Worker // Tertiary level only (no case): Compress up to 97 common weights as 05..65 or 65..C5. 139*0e209d39SAndroid Build Coastguard Worker static const uint32_t TER_ONLY_COMMON_LOW = Collation::COMMON_BYTE; 140*0e209d39SAndroid Build Coastguard Worker static const uint32_t TER_ONLY_COMMON_MIDDLE = TER_ONLY_COMMON_LOW + 0x60; 141*0e209d39SAndroid Build Coastguard Worker static const uint32_t TER_ONLY_COMMON_HIGH = TER_ONLY_COMMON_LOW + 0xc0; 142*0e209d39SAndroid Build Coastguard Worker static const int32_t TER_ONLY_COMMON_MAX_COUNT = 0x61; 143*0e209d39SAndroid Build Coastguard Worker 144*0e209d39SAndroid Build Coastguard Worker // Tertiary with case, lowerFirst: Compress up to 33 common weights as 05..25 or 25..45. 145*0e209d39SAndroid Build Coastguard Worker static const uint32_t TER_LOWER_FIRST_COMMON_LOW = Collation::COMMON_BYTE; 146*0e209d39SAndroid Build Coastguard Worker static const uint32_t TER_LOWER_FIRST_COMMON_MIDDLE = TER_LOWER_FIRST_COMMON_LOW + 0x20; 147*0e209d39SAndroid Build Coastguard Worker static const uint32_t TER_LOWER_FIRST_COMMON_HIGH = TER_LOWER_FIRST_COMMON_LOW + 0x40; 148*0e209d39SAndroid Build Coastguard Worker static const int32_t TER_LOWER_FIRST_COMMON_MAX_COUNT = 0x21; 149*0e209d39SAndroid Build Coastguard Worker 150*0e209d39SAndroid Build Coastguard Worker // Tertiary with case, upperFirst: Compress up to 33 common weights as 85..A5 or A5..C5. 151*0e209d39SAndroid Build Coastguard Worker static const uint32_t TER_UPPER_FIRST_COMMON_LOW = Collation::COMMON_BYTE + 0x80; 152*0e209d39SAndroid Build Coastguard Worker static const uint32_t TER_UPPER_FIRST_COMMON_MIDDLE = TER_UPPER_FIRST_COMMON_LOW + 0x20; 153*0e209d39SAndroid Build Coastguard Worker static const uint32_t TER_UPPER_FIRST_COMMON_HIGH = TER_UPPER_FIRST_COMMON_LOW + 0x40; 154*0e209d39SAndroid Build Coastguard Worker static const int32_t TER_UPPER_FIRST_COMMON_MAX_COUNT = 0x21; 155*0e209d39SAndroid Build Coastguard Worker 156*0e209d39SAndroid Build Coastguard Worker // Quaternary level: Compress up to 113 common weights as 1C..8C or 8C..FC. 157*0e209d39SAndroid Build Coastguard Worker static const uint32_t QUAT_COMMON_LOW = 0x1c; 158*0e209d39SAndroid Build Coastguard Worker static const uint32_t QUAT_COMMON_MIDDLE = QUAT_COMMON_LOW + 0x70; 159*0e209d39SAndroid Build Coastguard Worker static const uint32_t QUAT_COMMON_HIGH = QUAT_COMMON_LOW + 0xE0; 160*0e209d39SAndroid Build Coastguard Worker static const int32_t QUAT_COMMON_MAX_COUNT = 0x71; 161*0e209d39SAndroid Build Coastguard Worker // Primary weights shifted to quaternary level must be encoded with 162*0e209d39SAndroid Build Coastguard Worker // a lead byte below the common-weight compression range. 163*0e209d39SAndroid Build Coastguard Worker static const uint32_t QUAT_SHIFTED_LIMIT_BYTE = QUAT_COMMON_LOW - 1; // 0x1b 164*0e209d39SAndroid Build Coastguard Worker }; 165*0e209d39SAndroid Build Coastguard Worker 166*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_END 167*0e209d39SAndroid Build Coastguard Worker 168*0e209d39SAndroid Build Coastguard Worker #endif // !UCONFIG_NO_COLLATION 169*0e209d39SAndroid Build Coastguard Worker #endif // __COLLATIONKEYS_H__ 170