1*0e209d39SAndroid Build Coastguard Worker // © 2019 and later: Unicode, Inc. and others. 2*0e209d39SAndroid Build Coastguard Worker // License & terms of use: http://www.unicode.org/copyright.html 3*0e209d39SAndroid Build Coastguard Worker 4*0e209d39SAndroid Build Coastguard Worker // locdistance.h 5*0e209d39SAndroid Build Coastguard Worker // created: 2019may08 Markus W. Scherer 6*0e209d39SAndroid Build Coastguard Worker 7*0e209d39SAndroid Build Coastguard Worker #ifndef __LOCDISTANCE_H__ 8*0e209d39SAndroid Build Coastguard Worker #define __LOCDISTANCE_H__ 9*0e209d39SAndroid Build Coastguard Worker 10*0e209d39SAndroid Build Coastguard Worker #include "unicode/utypes.h" 11*0e209d39SAndroid Build Coastguard Worker #include "unicode/bytestrie.h" 12*0e209d39SAndroid Build Coastguard Worker #include "unicode/localematcher.h" 13*0e209d39SAndroid Build Coastguard Worker #include "unicode/locid.h" 14*0e209d39SAndroid Build Coastguard Worker #include "unicode/uobject.h" 15*0e209d39SAndroid Build Coastguard Worker #include "lsr.h" 16*0e209d39SAndroid Build Coastguard Worker 17*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_BEGIN 18*0e209d39SAndroid Build Coastguard Worker 19*0e209d39SAndroid Build Coastguard Worker struct LocaleDistanceData; 20*0e209d39SAndroid Build Coastguard Worker 21*0e209d39SAndroid Build Coastguard Worker /** 22*0e209d39SAndroid Build Coastguard Worker * Offline-built data for LocaleMatcher. 23*0e209d39SAndroid Build Coastguard Worker * Mostly but not only the data for mapping locales to their maximized forms. 24*0e209d39SAndroid Build Coastguard Worker */ 25*0e209d39SAndroid Build Coastguard Worker class LocaleDistance final : public UMemory { 26*0e209d39SAndroid Build Coastguard Worker public: 27*0e209d39SAndroid Build Coastguard Worker static const LocaleDistance *getSingleton(UErrorCode &errorCode); 28*0e209d39SAndroid Build Coastguard Worker shiftDistance(int32_t distance)29*0e209d39SAndroid Build Coastguard Worker static int32_t shiftDistance(int32_t distance) { 30*0e209d39SAndroid Build Coastguard Worker return distance << DISTANCE_SHIFT; 31*0e209d39SAndroid Build Coastguard Worker } 32*0e209d39SAndroid Build Coastguard Worker getShiftedDistance(int32_t indexAndDistance)33*0e209d39SAndroid Build Coastguard Worker static int32_t getShiftedDistance(int32_t indexAndDistance) { 34*0e209d39SAndroid Build Coastguard Worker return indexAndDistance & DISTANCE_MASK; 35*0e209d39SAndroid Build Coastguard Worker } 36*0e209d39SAndroid Build Coastguard Worker getDistanceDouble(int32_t indexAndDistance)37*0e209d39SAndroid Build Coastguard Worker static double getDistanceDouble(int32_t indexAndDistance) { 38*0e209d39SAndroid Build Coastguard Worker double shiftedDistance = getShiftedDistance(indexAndDistance); 39*0e209d39SAndroid Build Coastguard Worker return shiftedDistance / (1 << DISTANCE_SHIFT); 40*0e209d39SAndroid Build Coastguard Worker } 41*0e209d39SAndroid Build Coastguard Worker getDistanceFloor(int32_t indexAndDistance)42*0e209d39SAndroid Build Coastguard Worker static int32_t getDistanceFloor(int32_t indexAndDistance) { 43*0e209d39SAndroid Build Coastguard Worker return (indexAndDistance & DISTANCE_MASK) >> DISTANCE_SHIFT; 44*0e209d39SAndroid Build Coastguard Worker } 45*0e209d39SAndroid Build Coastguard Worker getIndex(int32_t indexAndDistance)46*0e209d39SAndroid Build Coastguard Worker static int32_t getIndex(int32_t indexAndDistance) { 47*0e209d39SAndroid Build Coastguard Worker // assert indexAndDistance >= 0; 48*0e209d39SAndroid Build Coastguard Worker return indexAndDistance >> INDEX_SHIFT; 49*0e209d39SAndroid Build Coastguard Worker } 50*0e209d39SAndroid Build Coastguard Worker 51*0e209d39SAndroid Build Coastguard Worker /** 52*0e209d39SAndroid Build Coastguard Worker * Finds the supported LSR with the smallest distance from the desired one. 53*0e209d39SAndroid Build Coastguard Worker * Equivalent LSR subtags must be normalized into a canonical form. 54*0e209d39SAndroid Build Coastguard Worker * 55*0e209d39SAndroid Build Coastguard Worker * <p>Returns the index of the lowest-distance supported LSR in the high bits 56*0e209d39SAndroid Build Coastguard Worker * (negative if none has a distance below the threshold), 57*0e209d39SAndroid Build Coastguard Worker * and its distance (0..ABOVE_THRESHOLD) in the low bits. 58*0e209d39SAndroid Build Coastguard Worker */ 59*0e209d39SAndroid Build Coastguard Worker int32_t getBestIndexAndDistance(const LSR &desired, 60*0e209d39SAndroid Build Coastguard Worker const LSR **supportedLSRs, int32_t supportedLSRsLength, 61*0e209d39SAndroid Build Coastguard Worker int32_t shiftedThreshold, 62*0e209d39SAndroid Build Coastguard Worker ULocMatchFavorSubtag favorSubtag, 63*0e209d39SAndroid Build Coastguard Worker ULocMatchDirection direction) const; 64*0e209d39SAndroid Build Coastguard Worker 65*0e209d39SAndroid Build Coastguard Worker bool isParadigmLSR(const LSR &lsr) const; 66*0e209d39SAndroid Build Coastguard Worker getDefaultScriptDistance()67*0e209d39SAndroid Build Coastguard Worker int32_t getDefaultScriptDistance() const { 68*0e209d39SAndroid Build Coastguard Worker return defaultScriptDistance; 69*0e209d39SAndroid Build Coastguard Worker } 70*0e209d39SAndroid Build Coastguard Worker getDefaultDemotionPerDesiredLocale()71*0e209d39SAndroid Build Coastguard Worker int32_t getDefaultDemotionPerDesiredLocale() const { 72*0e209d39SAndroid Build Coastguard Worker return defaultDemotionPerDesiredLocale; 73*0e209d39SAndroid Build Coastguard Worker } 74*0e209d39SAndroid Build Coastguard Worker 75*0e209d39SAndroid Build Coastguard Worker private: 76*0e209d39SAndroid Build Coastguard Worker // The distance is shifted left to gain some fraction bits. 77*0e209d39SAndroid Build Coastguard Worker static constexpr int32_t DISTANCE_SHIFT = 3; 78*0e209d39SAndroid Build Coastguard Worker static constexpr int32_t DISTANCE_FRACTION_MASK = 7; 79*0e209d39SAndroid Build Coastguard Worker // 7 bits for 0..100 80*0e209d39SAndroid Build Coastguard Worker static constexpr int32_t DISTANCE_INT_SHIFT = 7; 81*0e209d39SAndroid Build Coastguard Worker static constexpr int32_t INDEX_SHIFT = DISTANCE_INT_SHIFT + DISTANCE_SHIFT; 82*0e209d39SAndroid Build Coastguard Worker static constexpr int32_t DISTANCE_MASK = 0x3ff; 83*0e209d39SAndroid Build Coastguard Worker // tic constexpr int32_t MAX_INDEX = 0x1fffff; // avoids sign bit 84*0e209d39SAndroid Build Coastguard Worker static constexpr int32_t INDEX_NEG_1 = 0xfffffc00; 85*0e209d39SAndroid Build Coastguard Worker 86*0e209d39SAndroid Build Coastguard Worker LocaleDistance(const LocaleDistanceData &data, const LikelySubtags &likely); 87*0e209d39SAndroid Build Coastguard Worker LocaleDistance(const LocaleDistance &other) = delete; 88*0e209d39SAndroid Build Coastguard Worker LocaleDistance &operator=(const LocaleDistance &other) = delete; 89*0e209d39SAndroid Build Coastguard Worker 90*0e209d39SAndroid Build Coastguard Worker static void initLocaleDistance(UErrorCode &errorCode); 91*0e209d39SAndroid Build Coastguard Worker isMatch(const LSR & desired,const LSR & supported,int32_t shiftedThreshold,ULocMatchFavorSubtag favorSubtag)92*0e209d39SAndroid Build Coastguard Worker bool isMatch(const LSR &desired, const LSR &supported, 93*0e209d39SAndroid Build Coastguard Worker int32_t shiftedThreshold, ULocMatchFavorSubtag favorSubtag) const { 94*0e209d39SAndroid Build Coastguard Worker const LSR *pSupp = &supported; 95*0e209d39SAndroid Build Coastguard Worker return getBestIndexAndDistance( 96*0e209d39SAndroid Build Coastguard Worker desired, &pSupp, 1, 97*0e209d39SAndroid Build Coastguard Worker shiftedThreshold, favorSubtag, ULOCMATCH_DIRECTION_WITH_ONE_WAY) >= 0; 98*0e209d39SAndroid Build Coastguard Worker } 99*0e209d39SAndroid Build Coastguard Worker 100*0e209d39SAndroid Build Coastguard Worker static int32_t getDesSuppScriptDistance(BytesTrie &iter, uint64_t startState, 101*0e209d39SAndroid Build Coastguard Worker const char *desired, const char *supported); 102*0e209d39SAndroid Build Coastguard Worker 103*0e209d39SAndroid Build Coastguard Worker static int32_t getRegionPartitionsDistance( 104*0e209d39SAndroid Build Coastguard Worker BytesTrie &iter, uint64_t startState, 105*0e209d39SAndroid Build Coastguard Worker const char *desiredPartitions, const char *supportedPartitions, 106*0e209d39SAndroid Build Coastguard Worker int32_t threshold); 107*0e209d39SAndroid Build Coastguard Worker 108*0e209d39SAndroid Build Coastguard Worker static int32_t getFallbackRegionDistance(BytesTrie &iter, uint64_t startState); 109*0e209d39SAndroid Build Coastguard Worker 110*0e209d39SAndroid Build Coastguard Worker static int32_t trieNext(BytesTrie &iter, const char *s, bool wantValue); 111*0e209d39SAndroid Build Coastguard Worker partitionsForRegion(const LSR & lsr)112*0e209d39SAndroid Build Coastguard Worker const char *partitionsForRegion(const LSR &lsr) const { 113*0e209d39SAndroid Build Coastguard Worker // ill-formed region -> one non-matching string 114*0e209d39SAndroid Build Coastguard Worker int32_t pIndex = regionToPartitionsIndex[lsr.regionIndex]; 115*0e209d39SAndroid Build Coastguard Worker return partitionArrays[pIndex]; 116*0e209d39SAndroid Build Coastguard Worker } 117*0e209d39SAndroid Build Coastguard Worker getDefaultRegionDistance()118*0e209d39SAndroid Build Coastguard Worker int32_t getDefaultRegionDistance() const { 119*0e209d39SAndroid Build Coastguard Worker return defaultRegionDistance; 120*0e209d39SAndroid Build Coastguard Worker } 121*0e209d39SAndroid Build Coastguard Worker 122*0e209d39SAndroid Build Coastguard Worker const LikelySubtags &likelySubtags; 123*0e209d39SAndroid Build Coastguard Worker 124*0e209d39SAndroid Build Coastguard Worker // The trie maps each dlang+slang+dscript+sscript+dregion+sregion 125*0e209d39SAndroid Build Coastguard Worker // (encoded in ASCII with bit 7 set on the last character of each subtag) to a distance. 126*0e209d39SAndroid Build Coastguard Worker // There is also a trie value for each subsequence of whole subtags. 127*0e209d39SAndroid Build Coastguard Worker // One '*' is used for a (desired, supported) pair of "und", "Zzzz"/"", or "ZZ"/"". 128*0e209d39SAndroid Build Coastguard Worker BytesTrie trie; 129*0e209d39SAndroid Build Coastguard Worker 130*0e209d39SAndroid Build Coastguard Worker /** 131*0e209d39SAndroid Build Coastguard Worker * Maps each region to zero or more single-character partitions. 132*0e209d39SAndroid Build Coastguard Worker */ 133*0e209d39SAndroid Build Coastguard Worker const uint8_t *regionToPartitionsIndex; 134*0e209d39SAndroid Build Coastguard Worker const char **partitionArrays; 135*0e209d39SAndroid Build Coastguard Worker 136*0e209d39SAndroid Build Coastguard Worker /** 137*0e209d39SAndroid Build Coastguard Worker * Used to get the paradigm region for a cluster, if there is one. 138*0e209d39SAndroid Build Coastguard Worker */ 139*0e209d39SAndroid Build Coastguard Worker const LSR *paradigmLSRs; 140*0e209d39SAndroid Build Coastguard Worker int32_t paradigmLSRsLength; 141*0e209d39SAndroid Build Coastguard Worker 142*0e209d39SAndroid Build Coastguard Worker int32_t defaultLanguageDistance; 143*0e209d39SAndroid Build Coastguard Worker int32_t defaultScriptDistance; 144*0e209d39SAndroid Build Coastguard Worker int32_t defaultRegionDistance; 145*0e209d39SAndroid Build Coastguard Worker int32_t minRegionDistance; 146*0e209d39SAndroid Build Coastguard Worker int32_t defaultDemotionPerDesiredLocale; 147*0e209d39SAndroid Build Coastguard Worker }; 148*0e209d39SAndroid Build Coastguard Worker 149*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_END 150*0e209d39SAndroid Build Coastguard Worker 151*0e209d39SAndroid Build Coastguard Worker #endif // __LOCDISTANCE_H__ 152