xref: /aosp_15_r20/external/icu/libicu/cts_headers/locdistance.h (revision 0e209d3975ff4a8c132096b14b0e9364a753506e)
1*0e209d39SAndroid Build Coastguard Worker // © 2019 and later: Unicode, Inc. and others.
2*0e209d39SAndroid Build Coastguard Worker // License & terms of use: http://www.unicode.org/copyright.html
3*0e209d39SAndroid Build Coastguard Worker 
4*0e209d39SAndroid Build Coastguard Worker // locdistance.h
5*0e209d39SAndroid Build Coastguard Worker // created: 2019may08 Markus W. Scherer
6*0e209d39SAndroid Build Coastguard Worker 
7*0e209d39SAndroid Build Coastguard Worker #ifndef __LOCDISTANCE_H__
8*0e209d39SAndroid Build Coastguard Worker #define __LOCDISTANCE_H__
9*0e209d39SAndroid Build Coastguard Worker 
10*0e209d39SAndroid Build Coastguard Worker #include "unicode/utypes.h"
11*0e209d39SAndroid Build Coastguard Worker #include "unicode/bytestrie.h"
12*0e209d39SAndroid Build Coastguard Worker #include "unicode/localematcher.h"
13*0e209d39SAndroid Build Coastguard Worker #include "unicode/locid.h"
14*0e209d39SAndroid Build Coastguard Worker #include "unicode/uobject.h"
15*0e209d39SAndroid Build Coastguard Worker #include "lsr.h"
16*0e209d39SAndroid Build Coastguard Worker 
17*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_BEGIN
18*0e209d39SAndroid Build Coastguard Worker 
19*0e209d39SAndroid Build Coastguard Worker struct LocaleDistanceData;
20*0e209d39SAndroid Build Coastguard Worker 
21*0e209d39SAndroid Build Coastguard Worker /**
22*0e209d39SAndroid Build Coastguard Worker  * Offline-built data for LocaleMatcher.
23*0e209d39SAndroid Build Coastguard Worker  * Mostly but not only the data for mapping locales to their maximized forms.
24*0e209d39SAndroid Build Coastguard Worker  */
25*0e209d39SAndroid Build Coastguard Worker class LocaleDistance final : public UMemory {
26*0e209d39SAndroid Build Coastguard Worker public:
27*0e209d39SAndroid Build Coastguard Worker     static const LocaleDistance *getSingleton(UErrorCode &errorCode);
28*0e209d39SAndroid Build Coastguard Worker 
shiftDistance(int32_t distance)29*0e209d39SAndroid Build Coastguard Worker     static int32_t shiftDistance(int32_t distance) {
30*0e209d39SAndroid Build Coastguard Worker         return distance << DISTANCE_SHIFT;
31*0e209d39SAndroid Build Coastguard Worker     }
32*0e209d39SAndroid Build Coastguard Worker 
getShiftedDistance(int32_t indexAndDistance)33*0e209d39SAndroid Build Coastguard Worker     static int32_t getShiftedDistance(int32_t indexAndDistance) {
34*0e209d39SAndroid Build Coastguard Worker         return indexAndDistance & DISTANCE_MASK;
35*0e209d39SAndroid Build Coastguard Worker     }
36*0e209d39SAndroid Build Coastguard Worker 
getDistanceDouble(int32_t indexAndDistance)37*0e209d39SAndroid Build Coastguard Worker     static double getDistanceDouble(int32_t indexAndDistance) {
38*0e209d39SAndroid Build Coastguard Worker         double shiftedDistance = getShiftedDistance(indexAndDistance);
39*0e209d39SAndroid Build Coastguard Worker         return shiftedDistance / (1 << DISTANCE_SHIFT);
40*0e209d39SAndroid Build Coastguard Worker     }
41*0e209d39SAndroid Build Coastguard Worker 
getDistanceFloor(int32_t indexAndDistance)42*0e209d39SAndroid Build Coastguard Worker     static int32_t getDistanceFloor(int32_t indexAndDistance) {
43*0e209d39SAndroid Build Coastguard Worker         return (indexAndDistance & DISTANCE_MASK) >> DISTANCE_SHIFT;
44*0e209d39SAndroid Build Coastguard Worker     }
45*0e209d39SAndroid Build Coastguard Worker 
getIndex(int32_t indexAndDistance)46*0e209d39SAndroid Build Coastguard Worker     static int32_t getIndex(int32_t indexAndDistance) {
47*0e209d39SAndroid Build Coastguard Worker         // assert indexAndDistance >= 0;
48*0e209d39SAndroid Build Coastguard Worker         return indexAndDistance >> INDEX_SHIFT;
49*0e209d39SAndroid Build Coastguard Worker     }
50*0e209d39SAndroid Build Coastguard Worker 
51*0e209d39SAndroid Build Coastguard Worker     /**
52*0e209d39SAndroid Build Coastguard Worker      * Finds the supported LSR with the smallest distance from the desired one.
53*0e209d39SAndroid Build Coastguard Worker      * Equivalent LSR subtags must be normalized into a canonical form.
54*0e209d39SAndroid Build Coastguard Worker      *
55*0e209d39SAndroid Build Coastguard Worker      * <p>Returns the index of the lowest-distance supported LSR in the high bits
56*0e209d39SAndroid Build Coastguard Worker      * (negative if none has a distance below the threshold),
57*0e209d39SAndroid Build Coastguard Worker      * and its distance (0..ABOVE_THRESHOLD) in the low bits.
58*0e209d39SAndroid Build Coastguard Worker      */
59*0e209d39SAndroid Build Coastguard Worker     int32_t getBestIndexAndDistance(const LSR &desired,
60*0e209d39SAndroid Build Coastguard Worker                                     const LSR **supportedLSRs, int32_t supportedLSRsLength,
61*0e209d39SAndroid Build Coastguard Worker                                     int32_t shiftedThreshold,
62*0e209d39SAndroid Build Coastguard Worker                                     ULocMatchFavorSubtag favorSubtag,
63*0e209d39SAndroid Build Coastguard Worker                                     ULocMatchDirection direction) const;
64*0e209d39SAndroid Build Coastguard Worker 
65*0e209d39SAndroid Build Coastguard Worker     bool isParadigmLSR(const LSR &lsr) const;
66*0e209d39SAndroid Build Coastguard Worker 
getDefaultScriptDistance()67*0e209d39SAndroid Build Coastguard Worker     int32_t getDefaultScriptDistance() const {
68*0e209d39SAndroid Build Coastguard Worker         return defaultScriptDistance;
69*0e209d39SAndroid Build Coastguard Worker     }
70*0e209d39SAndroid Build Coastguard Worker 
getDefaultDemotionPerDesiredLocale()71*0e209d39SAndroid Build Coastguard Worker     int32_t getDefaultDemotionPerDesiredLocale() const {
72*0e209d39SAndroid Build Coastguard Worker         return defaultDemotionPerDesiredLocale;
73*0e209d39SAndroid Build Coastguard Worker     }
74*0e209d39SAndroid Build Coastguard Worker 
75*0e209d39SAndroid Build Coastguard Worker private:
76*0e209d39SAndroid Build Coastguard Worker     // The distance is shifted left to gain some fraction bits.
77*0e209d39SAndroid Build Coastguard Worker     static constexpr int32_t DISTANCE_SHIFT = 3;
78*0e209d39SAndroid Build Coastguard Worker     static constexpr int32_t DISTANCE_FRACTION_MASK = 7;
79*0e209d39SAndroid Build Coastguard Worker     // 7 bits for 0..100
80*0e209d39SAndroid Build Coastguard Worker     static constexpr int32_t DISTANCE_INT_SHIFT = 7;
81*0e209d39SAndroid Build Coastguard Worker     static constexpr int32_t INDEX_SHIFT = DISTANCE_INT_SHIFT + DISTANCE_SHIFT;
82*0e209d39SAndroid Build Coastguard Worker     static constexpr int32_t DISTANCE_MASK = 0x3ff;
83*0e209d39SAndroid Build Coastguard Worker     // tic constexpr int32_t MAX_INDEX = 0x1fffff;  // avoids sign bit
84*0e209d39SAndroid Build Coastguard Worker     static constexpr int32_t INDEX_NEG_1 = 0xfffffc00;
85*0e209d39SAndroid Build Coastguard Worker 
86*0e209d39SAndroid Build Coastguard Worker     LocaleDistance(const LocaleDistanceData &data, const LikelySubtags &likely);
87*0e209d39SAndroid Build Coastguard Worker     LocaleDistance(const LocaleDistance &other) = delete;
88*0e209d39SAndroid Build Coastguard Worker     LocaleDistance &operator=(const LocaleDistance &other) = delete;
89*0e209d39SAndroid Build Coastguard Worker 
90*0e209d39SAndroid Build Coastguard Worker     static void initLocaleDistance(UErrorCode &errorCode);
91*0e209d39SAndroid Build Coastguard Worker 
isMatch(const LSR & desired,const LSR & supported,int32_t shiftedThreshold,ULocMatchFavorSubtag favorSubtag)92*0e209d39SAndroid Build Coastguard Worker     bool isMatch(const LSR &desired, const LSR &supported,
93*0e209d39SAndroid Build Coastguard Worker                  int32_t shiftedThreshold, ULocMatchFavorSubtag favorSubtag) const {
94*0e209d39SAndroid Build Coastguard Worker         const LSR *pSupp = &supported;
95*0e209d39SAndroid Build Coastguard Worker         return getBestIndexAndDistance(
96*0e209d39SAndroid Build Coastguard Worker             desired, &pSupp, 1,
97*0e209d39SAndroid Build Coastguard Worker             shiftedThreshold, favorSubtag, ULOCMATCH_DIRECTION_WITH_ONE_WAY) >= 0;
98*0e209d39SAndroid Build Coastguard Worker     }
99*0e209d39SAndroid Build Coastguard Worker 
100*0e209d39SAndroid Build Coastguard Worker     static int32_t getDesSuppScriptDistance(BytesTrie &iter, uint64_t startState,
101*0e209d39SAndroid Build Coastguard Worker                                             const char *desired, const char *supported);
102*0e209d39SAndroid Build Coastguard Worker 
103*0e209d39SAndroid Build Coastguard Worker     static int32_t getRegionPartitionsDistance(
104*0e209d39SAndroid Build Coastguard Worker         BytesTrie &iter, uint64_t startState,
105*0e209d39SAndroid Build Coastguard Worker         const char *desiredPartitions, const char *supportedPartitions,
106*0e209d39SAndroid Build Coastguard Worker         int32_t threshold);
107*0e209d39SAndroid Build Coastguard Worker 
108*0e209d39SAndroid Build Coastguard Worker     static int32_t getFallbackRegionDistance(BytesTrie &iter, uint64_t startState);
109*0e209d39SAndroid Build Coastguard Worker 
110*0e209d39SAndroid Build Coastguard Worker     static int32_t trieNext(BytesTrie &iter, const char *s, bool wantValue);
111*0e209d39SAndroid Build Coastguard Worker 
partitionsForRegion(const LSR & lsr)112*0e209d39SAndroid Build Coastguard Worker     const char *partitionsForRegion(const LSR &lsr) const {
113*0e209d39SAndroid Build Coastguard Worker         // ill-formed region -> one non-matching string
114*0e209d39SAndroid Build Coastguard Worker         int32_t pIndex = regionToPartitionsIndex[lsr.regionIndex];
115*0e209d39SAndroid Build Coastguard Worker         return partitionArrays[pIndex];
116*0e209d39SAndroid Build Coastguard Worker     }
117*0e209d39SAndroid Build Coastguard Worker 
getDefaultRegionDistance()118*0e209d39SAndroid Build Coastguard Worker     int32_t getDefaultRegionDistance() const {
119*0e209d39SAndroid Build Coastguard Worker         return defaultRegionDistance;
120*0e209d39SAndroid Build Coastguard Worker     }
121*0e209d39SAndroid Build Coastguard Worker 
122*0e209d39SAndroid Build Coastguard Worker     const LikelySubtags &likelySubtags;
123*0e209d39SAndroid Build Coastguard Worker 
124*0e209d39SAndroid Build Coastguard Worker     // The trie maps each dlang+slang+dscript+sscript+dregion+sregion
125*0e209d39SAndroid Build Coastguard Worker     // (encoded in ASCII with bit 7 set on the last character of each subtag) to a distance.
126*0e209d39SAndroid Build Coastguard Worker     // There is also a trie value for each subsequence of whole subtags.
127*0e209d39SAndroid Build Coastguard Worker     // One '*' is used for a (desired, supported) pair of "und", "Zzzz"/"", or "ZZ"/"".
128*0e209d39SAndroid Build Coastguard Worker     BytesTrie trie;
129*0e209d39SAndroid Build Coastguard Worker 
130*0e209d39SAndroid Build Coastguard Worker     /**
131*0e209d39SAndroid Build Coastguard Worker      * Maps each region to zero or more single-character partitions.
132*0e209d39SAndroid Build Coastguard Worker      */
133*0e209d39SAndroid Build Coastguard Worker     const uint8_t *regionToPartitionsIndex;
134*0e209d39SAndroid Build Coastguard Worker     const char **partitionArrays;
135*0e209d39SAndroid Build Coastguard Worker 
136*0e209d39SAndroid Build Coastguard Worker     /**
137*0e209d39SAndroid Build Coastguard Worker      * Used to get the paradigm region for a cluster, if there is one.
138*0e209d39SAndroid Build Coastguard Worker      */
139*0e209d39SAndroid Build Coastguard Worker     const LSR *paradigmLSRs;
140*0e209d39SAndroid Build Coastguard Worker     int32_t paradigmLSRsLength;
141*0e209d39SAndroid Build Coastguard Worker 
142*0e209d39SAndroid Build Coastguard Worker     int32_t defaultLanguageDistance;
143*0e209d39SAndroid Build Coastguard Worker     int32_t defaultScriptDistance;
144*0e209d39SAndroid Build Coastguard Worker     int32_t defaultRegionDistance;
145*0e209d39SAndroid Build Coastguard Worker     int32_t minRegionDistance;
146*0e209d39SAndroid Build Coastguard Worker     int32_t defaultDemotionPerDesiredLocale;
147*0e209d39SAndroid Build Coastguard Worker };
148*0e209d39SAndroid Build Coastguard Worker 
149*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_END
150*0e209d39SAndroid Build Coastguard Worker 
151*0e209d39SAndroid Build Coastguard Worker #endif  // __LOCDISTANCE_H__
152