xref: /aosp_15_r20/external/icu/libicu/cts_headers/measunit_impl.h (revision 0e209d3975ff4a8c132096b14b0e9364a753506e)
1 // © 2020 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 #ifndef __MEASUNIT_IMPL_H__
5 #define __MEASUNIT_IMPL_H__
6 
7 #include "unicode/utypes.h"
8 
9 #if !UCONFIG_NO_FORMATTING
10 
11 #include "unicode/measunit.h"
12 #include "cmemory.h"
13 #include "charstr.h"
14 
15 U_NAMESPACE_BEGIN
16 
17 namespace number::impl {
18 class LongNameHandler;
19 }
20 
21 static const char16_t kDefaultCurrency[] = u"XXX";
22 static const char kDefaultCurrency8[] = "XXX";
23 
24 /**
25  * Looks up the "unitQuantity" (aka "type" or "category") of a base unit
26  * identifier. The category is returned via `result`, which must initially be
27  * empty.
28  *
29  * This only supports base units: other units must be resolved to base units
30  * before passing to this function, otherwise U_UNSUPPORTED_ERROR status may be
31  * returned.
32  *
33  * Categories are found in `unitQuantities` in the `units` resource (see
34  * `units.txt`).
35  */
36 // TODO: make this function accepts any `MeasureUnit` as Java and move it to the `UnitsData` class.
37 CharString U_I18N_API getUnitQuantity(const MeasureUnitImpl &baseMeasureUnitImpl, UErrorCode &status);
38 
39 /**
40  * A struct representing a single unit (optional SI or binary prefix, and dimensionality).
41  */
42 struct U_I18N_API SingleUnitImpl : public UMemory {
43     /**
44      * Gets a single unit from the MeasureUnit. If there are multiple single units, sets an error
45      * code and returns the base dimensionless unit. Parses if necessary.
46      */
47     static SingleUnitImpl forMeasureUnit(const MeasureUnit& measureUnit, UErrorCode& status);
48 
49     /** Transform this SingleUnitImpl into a MeasureUnit, simplifying if possible. */
50     MeasureUnit build(UErrorCode& status) const;
51 
52     /**
53      * Returns the "simple unit ID", without SI or dimensionality prefix: this
54      * instance may represent a square-kilometer, but only "meter" will be
55      * returned.
56      *
57      * The returned pointer points at memory that exists for the duration of the
58      * program's running.
59      */
60     const char *getSimpleUnitID() const;
61 
62     /**
63      * Generates and append a neutral identifier string for a single unit which means we do not include
64      * the dimension signal.
65      */
66     void appendNeutralIdentifier(CharString &result, UErrorCode &status) const;
67 
68     /**
69      * Returns the index of this unit's "quantity" in unitQuantities (in
70      * measunit_extra.cpp). The value of this index determines sort order for
71      * normalization of unit identifiers.
72      */
73     int32_t getUnitCategoryIndex() const;
74 
75     /**
76      * Compare this SingleUnitImpl to another SingleUnitImpl for the sake of
77      * sorting and coalescing.
78      *
79      * Sort order of units is specified by UTS #35
80      * (https://unicode.org/reports/tr35/tr35-info.html#Unit_Identifier_Normalization).
81      *
82      * Takes the sign of dimensionality into account, but not the absolute
83      * value: per-meter is not considered the same as meter, but meter is
84      * considered the same as square-meter.
85      *
86      * The dimensionless unit generally does not get compared, but if it did, it
87      * would sort before other units by virtue of index being < 0 and
88      * dimensionality not being negative.
89      */
compareToSingleUnitImpl90     int32_t compareTo(const SingleUnitImpl& other) const {
91         if (dimensionality < 0 && other.dimensionality > 0) {
92             // Positive dimensions first
93             return 1;
94         }
95         if (dimensionality > 0 && other.dimensionality < 0) {
96             return -1;
97         }
98 
99         // Sort by official quantity order
100         int32_t thisQuantity = this->getUnitCategoryIndex();
101         int32_t otherQuantity = other.getUnitCategoryIndex();
102         if (thisQuantity < otherQuantity) {
103             return -1;
104         }
105         if (thisQuantity > otherQuantity) {
106             return 1;
107         }
108 
109         // If quantity order didn't help, then we go by index.
110         if (index < other.index) {
111             return -1;
112         }
113         if (index > other.index) {
114             return 1;
115         }
116 
117         // When comparing binary prefixes vs SI prefixes, instead of comparing the actual values, we can
118         // multiply the binary prefix power by 3 and compare the powers. if they are equal, we can can
119         // compare the bases.
120         // NOTE: this methodology will fail if the binary prefix more than or equal 98.
121         int32_t unitBase = umeas_getPrefixBase(unitPrefix);
122         int32_t otherUnitBase = umeas_getPrefixBase(other.unitPrefix);
123 
124         // Values for comparison purposes only.
125         int32_t unitPower = unitBase == 1024 /* Binary Prefix */ ? umeas_getPrefixPower(unitPrefix) * 3
126                                                                  : umeas_getPrefixPower(unitPrefix);
127         int32_t otherUnitPower =
128             otherUnitBase == 1024 /* Binary Prefix */ ? umeas_getPrefixPower(other.unitPrefix) * 3
129                                                       : umeas_getPrefixPower(other.unitPrefix);
130 
131         // NOTE: if the unitPower is less than the other,
132         // we return 1 not -1. Thus because we want th sorting order
133         // for the bigger prefix to be before the smaller.
134         // Example: megabyte should come before kilobyte.
135         if (unitPower < otherUnitPower) {
136             return 1;
137         }
138         if (unitPower > otherUnitPower) {
139             return -1;
140         }
141 
142         if (unitBase < otherUnitBase) {
143             return 1;
144         }
145         if (unitBase > otherUnitBase) {
146             return -1;
147         }
148 
149         return 0;
150     }
151 
152     /**
153      * Return whether this SingleUnitImpl is compatible with another for the purpose of coalescing.
154      *
155      * Units with the same base unit and SI or binary prefix should match, except that they must also
156      * have the same dimensionality sign, such that we don't merge numerator and denominator.
157      */
isCompatibleWithSingleUnitImpl158     bool isCompatibleWith(const SingleUnitImpl& other) const {
159         return (compareTo(other) == 0);
160     }
161 
162     /**
163      * Returns true if this unit is the "dimensionless base unit", as produced
164      * by the MeasureUnit() default constructor. (This does not include the
165      * likes of concentrations or angles.)
166      */
isDimensionlessSingleUnitImpl167     bool isDimensionless() const {
168         return index == -1;
169     }
170 
171     /**
172      * Simple unit index, unique for every simple unit, -1 for the dimensionless
173      * unit. This is an index into a string list in measunit_extra.cpp, as
174      * loaded by SimpleUnitIdentifiersSink.
175      *
176      * The default value is -1, meaning the dimensionless unit:
177      * isDimensionless() will return true, until index is changed.
178      */
179     int32_t index = -1;
180 
181     /**
182      * SI or binary prefix.
183      *
184      * This is ignored for the dimensionless unit.
185      */
186     UMeasurePrefix unitPrefix = UMEASURE_PREFIX_ONE;
187 
188     /**
189      * Dimensionality.
190      *
191      * This is meaningless for the dimensionless unit.
192      */
193     int32_t dimensionality = 1;
194 };
195 
196 // Forward declaration
197 struct MeasureUnitImplWithIndex;
198 
199 // Export explicit template instantiations of MaybeStackArray, MemoryPool and
200 // MaybeStackVector. This is required when building DLLs for Windows. (See
201 // datefmt.h, collationiterator.h, erarules.h and others for similar examples.)
202 #if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN
203 template class U_I18N_API MaybeStackArray<SingleUnitImpl *, 8>;
204 template class U_I18N_API MemoryPool<SingleUnitImpl, 8>;
205 template class U_I18N_API MaybeStackVector<SingleUnitImpl, 8>;
206 #endif
207 
208 /**
209  * Internal representation of measurement units. Capable of representing all complexities of units,
210  * including mixed and compound units.
211  */
212 class U_I18N_API MeasureUnitImpl : public UMemory {
213   public:
214     MeasureUnitImpl() = default;
215     MeasureUnitImpl(MeasureUnitImpl &&other) = default;
216     // No copy constructor, use MeasureUnitImpl::copy() to make it explicit.
217     MeasureUnitImpl(const MeasureUnitImpl &other, UErrorCode &status) = delete;
218     MeasureUnitImpl(const SingleUnitImpl &singleUnit, UErrorCode &status);
219 
220     MeasureUnitImpl &operator=(MeasureUnitImpl &&other) noexcept = default;
221 
222     /** Extract the MeasureUnitImpl from a MeasureUnit. */
get(const MeasureUnit & measureUnit)223     static inline const MeasureUnitImpl *get(const MeasureUnit &measureUnit) {
224         return measureUnit.fImpl;
225     }
226 
227     /**
228      * Parse a unit identifier into a MeasureUnitImpl.
229      *
230      * @param identifier The unit identifier string.
231      * @param status Set if the identifier string is not valid.
232      * @return A newly parsed value object. Behaviour of this unit is
233      * unspecified if an error is returned via status.
234      */
235     static MeasureUnitImpl forIdentifier(StringPiece identifier, UErrorCode& status);
236 
237     /**
238      * Extract the MeasureUnitImpl from a MeasureUnit, or parse if it is not present.
239      *
240      * @param measureUnit The source MeasureUnit.
241      * @param memory A place to write the new MeasureUnitImpl if parsing is required.
242      * @param status Set if an error occurs.
243      * @return A reference to either measureUnit.fImpl or memory.
244      */
245     static const MeasureUnitImpl& forMeasureUnit(
246         const MeasureUnit& measureUnit, MeasureUnitImpl& memory, UErrorCode& status);
247 
248     /**
249      * Extract the MeasureUnitImpl from a MeasureUnit, or parse if it is not present.
250      *
251      * @param measureUnit The source MeasureUnit.
252      * @param status Set if an error occurs.
253      * @return A value object, either newly parsed or copied from measureUnit.
254      */
255     static MeasureUnitImpl forMeasureUnitMaybeCopy(
256         const MeasureUnit& measureUnit, UErrorCode& status);
257 
258     /**
259      * Used for currency units.
260      */
forCurrencyCode(StringPiece currencyCode)261     static inline MeasureUnitImpl forCurrencyCode(StringPiece currencyCode) {
262         MeasureUnitImpl result;
263         UErrorCode localStatus = U_ZERO_ERROR;
264         result.identifier.append(currencyCode, localStatus);
265         // localStatus is not expected to fail since currencyCode should be 3 chars long
266         return result;
267     }
268 
269     /** Transform this MeasureUnitImpl into a MeasureUnit, simplifying if possible. */
270     MeasureUnit build(UErrorCode& status) &&;
271 
272     /**
273      * Create a copy of this MeasureUnitImpl. Don't use copy constructor to make this explicit.
274      */
275     MeasureUnitImpl copy(UErrorCode& status) const;
276 
277     /**
278      * Extracts the list of all the individual units inside the `MeasureUnitImpl` with their indices.
279      *      For example:
280      *          -   if the `MeasureUnitImpl` is `foot-per-hour`
281      *                  it will return a list of 1 {(0, `foot-per-hour`)}
282      *          -   if the `MeasureUnitImpl` is `foot-and-inch`
283      *                  it will return a list of 2 {(0, `foot`), (1, `inch`)}
284      */
285     MaybeStackVector<MeasureUnitImplWithIndex>
286     extractIndividualUnitsWithIndices(UErrorCode &status) const;
287 
288     /** Mutates this MeasureUnitImpl to take the reciprocal. */
289     void takeReciprocal(UErrorCode& status);
290 
291     /**
292      * Returns a simplified version of the unit.
293      * NOTE: the simplification happen when there are two units equals in their base unit and their
294      * prefixes.
295      *
296      * Example 1: "square-meter-per-meter" --> "meter"
297      * Example 2: "square-millimeter-per-meter" --> "square-millimeter-per-meter"
298      */
299     MeasureUnitImpl copyAndSimplify(UErrorCode &status) const;
300 
301     /**
302      * Mutates this MeasureUnitImpl to append a single unit.
303      *
304      * @return true if a new item was added. If unit is the dimensionless unit,
305      * it is never added: the return value will always be false.
306      */
307     bool appendSingleUnit(const SingleUnitImpl& singleUnit, UErrorCode& status);
308 
309     /**
310      * Normalizes a MeasureUnitImpl and generate the identifier string in place.
311      */
312     void serialize(UErrorCode &status);
313 
314     /** The complexity, either SINGLE, COMPOUND, or MIXED. */
315     UMeasureUnitComplexity complexity = UMEASURE_UNIT_SINGLE;
316 
317     /**
318      * The list of single units. These may be summed or multiplied, based on the
319      * value of the complexity field.
320      *
321      * The "dimensionless" unit (SingleUnitImpl default constructor) must not be
322      * added to this list.
323      */
324     MaybeStackVector<SingleUnitImpl> singleUnits;
325 
326     /**
327      * The full unit identifier.  Owned by the MeasureUnitImpl.  Empty if not computed.
328      */
329     CharString identifier;
330 
331     // For calling serialize
332     // TODO(icu-units#147): revisit serialization
333     friend class number::impl::LongNameHandler;
334 };
335 
336 struct U_I18N_API MeasureUnitImplWithIndex : public UMemory {
337     const int32_t index;
338     MeasureUnitImpl unitImpl;
339     // Makes a copy of unitImpl.
MeasureUnitImplWithIndexMeasureUnitImplWithIndex340     MeasureUnitImplWithIndex(int32_t index, const MeasureUnitImpl &unitImpl, UErrorCode &status)
341         : index(index), unitImpl(unitImpl.copy(status)) {
342     }
MeasureUnitImplWithIndexMeasureUnitImplWithIndex343     MeasureUnitImplWithIndex(int32_t index, const SingleUnitImpl &singleUnitImpl, UErrorCode &status)
344         : index(index), unitImpl(MeasureUnitImpl(singleUnitImpl, status)) {
345     }
346 };
347 
348 // Export explicit template instantiations of MaybeStackArray, MemoryPool and
349 // MaybeStackVector. This is required when building DLLs for Windows. (See
350 // datefmt.h, collationiterator.h, erarules.h and others for similar examples.)
351 #if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN
352 template class U_I18N_API MaybeStackArray<MeasureUnitImplWithIndex *, 8>;
353 template class U_I18N_API MemoryPool<MeasureUnitImplWithIndex, 8>;
354 template class U_I18N_API MaybeStackVector<MeasureUnitImplWithIndex, 8>;
355 
356 // Export an explicit template instantiation of the LocalPointer that is used as a
357 // data member of MeasureUnitImpl.
358 // (When building DLLs for Windows this is required.)
359 #if defined(_MSC_VER)
360 // Ignore warning 4661 as LocalPointerBase does not use operator== or operator!=
361 #pragma warning(push)
362 #pragma warning(disable : 4661)
363 #endif
364 template class U_I18N_API LocalPointerBase<MeasureUnitImpl>;
365 template class U_I18N_API LocalPointer<MeasureUnitImpl>;
366 #if defined(_MSC_VER)
367 #pragma warning(pop)
368 #endif
369 #endif
370 
371 U_NAMESPACE_END
372 
373 #endif /* #if !UCONFIG_NO_FORMATTING */
374 #endif //__MEASUNIT_IMPL_H__
375