1 // © 2020 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 4 #ifndef __MEASUNIT_IMPL_H__ 5 #define __MEASUNIT_IMPL_H__ 6 7 #include "unicode/utypes.h" 8 9 #if !UCONFIG_NO_FORMATTING 10 11 #include "unicode/measunit.h" 12 #include "cmemory.h" 13 #include "charstr.h" 14 15 U_NAMESPACE_BEGIN 16 17 namespace number::impl { 18 class LongNameHandler; 19 } 20 21 static const char16_t kDefaultCurrency[] = u"XXX"; 22 static const char kDefaultCurrency8[] = "XXX"; 23 24 /** 25 * Looks up the "unitQuantity" (aka "type" or "category") of a base unit 26 * identifier. The category is returned via `result`, which must initially be 27 * empty. 28 * 29 * This only supports base units: other units must be resolved to base units 30 * before passing to this function, otherwise U_UNSUPPORTED_ERROR status may be 31 * returned. 32 * 33 * Categories are found in `unitQuantities` in the `units` resource (see 34 * `units.txt`). 35 */ 36 // TODO: make this function accepts any `MeasureUnit` as Java and move it to the `UnitsData` class. 37 CharString U_I18N_API getUnitQuantity(const MeasureUnitImpl &baseMeasureUnitImpl, UErrorCode &status); 38 39 /** 40 * A struct representing a single unit (optional SI or binary prefix, and dimensionality). 41 */ 42 struct U_I18N_API SingleUnitImpl : public UMemory { 43 /** 44 * Gets a single unit from the MeasureUnit. If there are multiple single units, sets an error 45 * code and returns the base dimensionless unit. Parses if necessary. 46 */ 47 static SingleUnitImpl forMeasureUnit(const MeasureUnit& measureUnit, UErrorCode& status); 48 49 /** Transform this SingleUnitImpl into a MeasureUnit, simplifying if possible. */ 50 MeasureUnit build(UErrorCode& status) const; 51 52 /** 53 * Returns the "simple unit ID", without SI or dimensionality prefix: this 54 * instance may represent a square-kilometer, but only "meter" will be 55 * returned. 56 * 57 * The returned pointer points at memory that exists for the duration of the 58 * program's running. 59 */ 60 const char *getSimpleUnitID() const; 61 62 /** 63 * Generates and append a neutral identifier string for a single unit which means we do not include 64 * the dimension signal. 65 */ 66 void appendNeutralIdentifier(CharString &result, UErrorCode &status) const; 67 68 /** 69 * Returns the index of this unit's "quantity" in unitQuantities (in 70 * measunit_extra.cpp). The value of this index determines sort order for 71 * normalization of unit identifiers. 72 */ 73 int32_t getUnitCategoryIndex() const; 74 75 /** 76 * Compare this SingleUnitImpl to another SingleUnitImpl for the sake of 77 * sorting and coalescing. 78 * 79 * Sort order of units is specified by UTS #35 80 * (https://unicode.org/reports/tr35/tr35-info.html#Unit_Identifier_Normalization). 81 * 82 * Takes the sign of dimensionality into account, but not the absolute 83 * value: per-meter is not considered the same as meter, but meter is 84 * considered the same as square-meter. 85 * 86 * The dimensionless unit generally does not get compared, but if it did, it 87 * would sort before other units by virtue of index being < 0 and 88 * dimensionality not being negative. 89 */ compareToSingleUnitImpl90 int32_t compareTo(const SingleUnitImpl& other) const { 91 if (dimensionality < 0 && other.dimensionality > 0) { 92 // Positive dimensions first 93 return 1; 94 } 95 if (dimensionality > 0 && other.dimensionality < 0) { 96 return -1; 97 } 98 99 // Sort by official quantity order 100 int32_t thisQuantity = this->getUnitCategoryIndex(); 101 int32_t otherQuantity = other.getUnitCategoryIndex(); 102 if (thisQuantity < otherQuantity) { 103 return -1; 104 } 105 if (thisQuantity > otherQuantity) { 106 return 1; 107 } 108 109 // If quantity order didn't help, then we go by index. 110 if (index < other.index) { 111 return -1; 112 } 113 if (index > other.index) { 114 return 1; 115 } 116 117 // When comparing binary prefixes vs SI prefixes, instead of comparing the actual values, we can 118 // multiply the binary prefix power by 3 and compare the powers. if they are equal, we can can 119 // compare the bases. 120 // NOTE: this methodology will fail if the binary prefix more than or equal 98. 121 int32_t unitBase = umeas_getPrefixBase(unitPrefix); 122 int32_t otherUnitBase = umeas_getPrefixBase(other.unitPrefix); 123 124 // Values for comparison purposes only. 125 int32_t unitPower = unitBase == 1024 /* Binary Prefix */ ? umeas_getPrefixPower(unitPrefix) * 3 126 : umeas_getPrefixPower(unitPrefix); 127 int32_t otherUnitPower = 128 otherUnitBase == 1024 /* Binary Prefix */ ? umeas_getPrefixPower(other.unitPrefix) * 3 129 : umeas_getPrefixPower(other.unitPrefix); 130 131 // NOTE: if the unitPower is less than the other, 132 // we return 1 not -1. Thus because we want th sorting order 133 // for the bigger prefix to be before the smaller. 134 // Example: megabyte should come before kilobyte. 135 if (unitPower < otherUnitPower) { 136 return 1; 137 } 138 if (unitPower > otherUnitPower) { 139 return -1; 140 } 141 142 if (unitBase < otherUnitBase) { 143 return 1; 144 } 145 if (unitBase > otherUnitBase) { 146 return -1; 147 } 148 149 return 0; 150 } 151 152 /** 153 * Return whether this SingleUnitImpl is compatible with another for the purpose of coalescing. 154 * 155 * Units with the same base unit and SI or binary prefix should match, except that they must also 156 * have the same dimensionality sign, such that we don't merge numerator and denominator. 157 */ isCompatibleWithSingleUnitImpl158 bool isCompatibleWith(const SingleUnitImpl& other) const { 159 return (compareTo(other) == 0); 160 } 161 162 /** 163 * Returns true if this unit is the "dimensionless base unit", as produced 164 * by the MeasureUnit() default constructor. (This does not include the 165 * likes of concentrations or angles.) 166 */ isDimensionlessSingleUnitImpl167 bool isDimensionless() const { 168 return index == -1; 169 } 170 171 /** 172 * Simple unit index, unique for every simple unit, -1 for the dimensionless 173 * unit. This is an index into a string list in measunit_extra.cpp, as 174 * loaded by SimpleUnitIdentifiersSink. 175 * 176 * The default value is -1, meaning the dimensionless unit: 177 * isDimensionless() will return true, until index is changed. 178 */ 179 int32_t index = -1; 180 181 /** 182 * SI or binary prefix. 183 * 184 * This is ignored for the dimensionless unit. 185 */ 186 UMeasurePrefix unitPrefix = UMEASURE_PREFIX_ONE; 187 188 /** 189 * Dimensionality. 190 * 191 * This is meaningless for the dimensionless unit. 192 */ 193 int32_t dimensionality = 1; 194 }; 195 196 // Forward declaration 197 struct MeasureUnitImplWithIndex; 198 199 // Export explicit template instantiations of MaybeStackArray, MemoryPool and 200 // MaybeStackVector. This is required when building DLLs for Windows. (See 201 // datefmt.h, collationiterator.h, erarules.h and others for similar examples.) 202 #if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN 203 template class U_I18N_API MaybeStackArray<SingleUnitImpl *, 8>; 204 template class U_I18N_API MemoryPool<SingleUnitImpl, 8>; 205 template class U_I18N_API MaybeStackVector<SingleUnitImpl, 8>; 206 #endif 207 208 /** 209 * Internal representation of measurement units. Capable of representing all complexities of units, 210 * including mixed and compound units. 211 */ 212 class U_I18N_API MeasureUnitImpl : public UMemory { 213 public: 214 MeasureUnitImpl() = default; 215 MeasureUnitImpl(MeasureUnitImpl &&other) = default; 216 // No copy constructor, use MeasureUnitImpl::copy() to make it explicit. 217 MeasureUnitImpl(const MeasureUnitImpl &other, UErrorCode &status) = delete; 218 MeasureUnitImpl(const SingleUnitImpl &singleUnit, UErrorCode &status); 219 220 MeasureUnitImpl &operator=(MeasureUnitImpl &&other) noexcept = default; 221 222 /** Extract the MeasureUnitImpl from a MeasureUnit. */ get(const MeasureUnit & measureUnit)223 static inline const MeasureUnitImpl *get(const MeasureUnit &measureUnit) { 224 return measureUnit.fImpl; 225 } 226 227 /** 228 * Parse a unit identifier into a MeasureUnitImpl. 229 * 230 * @param identifier The unit identifier string. 231 * @param status Set if the identifier string is not valid. 232 * @return A newly parsed value object. Behaviour of this unit is 233 * unspecified if an error is returned via status. 234 */ 235 static MeasureUnitImpl forIdentifier(StringPiece identifier, UErrorCode& status); 236 237 /** 238 * Extract the MeasureUnitImpl from a MeasureUnit, or parse if it is not present. 239 * 240 * @param measureUnit The source MeasureUnit. 241 * @param memory A place to write the new MeasureUnitImpl if parsing is required. 242 * @param status Set if an error occurs. 243 * @return A reference to either measureUnit.fImpl or memory. 244 */ 245 static const MeasureUnitImpl& forMeasureUnit( 246 const MeasureUnit& measureUnit, MeasureUnitImpl& memory, UErrorCode& status); 247 248 /** 249 * Extract the MeasureUnitImpl from a MeasureUnit, or parse if it is not present. 250 * 251 * @param measureUnit The source MeasureUnit. 252 * @param status Set if an error occurs. 253 * @return A value object, either newly parsed or copied from measureUnit. 254 */ 255 static MeasureUnitImpl forMeasureUnitMaybeCopy( 256 const MeasureUnit& measureUnit, UErrorCode& status); 257 258 /** 259 * Used for currency units. 260 */ forCurrencyCode(StringPiece currencyCode)261 static inline MeasureUnitImpl forCurrencyCode(StringPiece currencyCode) { 262 MeasureUnitImpl result; 263 UErrorCode localStatus = U_ZERO_ERROR; 264 result.identifier.append(currencyCode, localStatus); 265 // localStatus is not expected to fail since currencyCode should be 3 chars long 266 return result; 267 } 268 269 /** Transform this MeasureUnitImpl into a MeasureUnit, simplifying if possible. */ 270 MeasureUnit build(UErrorCode& status) &&; 271 272 /** 273 * Create a copy of this MeasureUnitImpl. Don't use copy constructor to make this explicit. 274 */ 275 MeasureUnitImpl copy(UErrorCode& status) const; 276 277 /** 278 * Extracts the list of all the individual units inside the `MeasureUnitImpl` with their indices. 279 * For example: 280 * - if the `MeasureUnitImpl` is `foot-per-hour` 281 * it will return a list of 1 {(0, `foot-per-hour`)} 282 * - if the `MeasureUnitImpl` is `foot-and-inch` 283 * it will return a list of 2 {(0, `foot`), (1, `inch`)} 284 */ 285 MaybeStackVector<MeasureUnitImplWithIndex> 286 extractIndividualUnitsWithIndices(UErrorCode &status) const; 287 288 /** Mutates this MeasureUnitImpl to take the reciprocal. */ 289 void takeReciprocal(UErrorCode& status); 290 291 /** 292 * Returns a simplified version of the unit. 293 * NOTE: the simplification happen when there are two units equals in their base unit and their 294 * prefixes. 295 * 296 * Example 1: "square-meter-per-meter" --> "meter" 297 * Example 2: "square-millimeter-per-meter" --> "square-millimeter-per-meter" 298 */ 299 MeasureUnitImpl copyAndSimplify(UErrorCode &status) const; 300 301 /** 302 * Mutates this MeasureUnitImpl to append a single unit. 303 * 304 * @return true if a new item was added. If unit is the dimensionless unit, 305 * it is never added: the return value will always be false. 306 */ 307 bool appendSingleUnit(const SingleUnitImpl& singleUnit, UErrorCode& status); 308 309 /** 310 * Normalizes a MeasureUnitImpl and generate the identifier string in place. 311 */ 312 void serialize(UErrorCode &status); 313 314 /** The complexity, either SINGLE, COMPOUND, or MIXED. */ 315 UMeasureUnitComplexity complexity = UMEASURE_UNIT_SINGLE; 316 317 /** 318 * The list of single units. These may be summed or multiplied, based on the 319 * value of the complexity field. 320 * 321 * The "dimensionless" unit (SingleUnitImpl default constructor) must not be 322 * added to this list. 323 */ 324 MaybeStackVector<SingleUnitImpl> singleUnits; 325 326 /** 327 * The full unit identifier. Owned by the MeasureUnitImpl. Empty if not computed. 328 */ 329 CharString identifier; 330 331 // For calling serialize 332 // TODO(icu-units#147): revisit serialization 333 friend class number::impl::LongNameHandler; 334 }; 335 336 struct U_I18N_API MeasureUnitImplWithIndex : public UMemory { 337 const int32_t index; 338 MeasureUnitImpl unitImpl; 339 // Makes a copy of unitImpl. MeasureUnitImplWithIndexMeasureUnitImplWithIndex340 MeasureUnitImplWithIndex(int32_t index, const MeasureUnitImpl &unitImpl, UErrorCode &status) 341 : index(index), unitImpl(unitImpl.copy(status)) { 342 } MeasureUnitImplWithIndexMeasureUnitImplWithIndex343 MeasureUnitImplWithIndex(int32_t index, const SingleUnitImpl &singleUnitImpl, UErrorCode &status) 344 : index(index), unitImpl(MeasureUnitImpl(singleUnitImpl, status)) { 345 } 346 }; 347 348 // Export explicit template instantiations of MaybeStackArray, MemoryPool and 349 // MaybeStackVector. This is required when building DLLs for Windows. (See 350 // datefmt.h, collationiterator.h, erarules.h and others for similar examples.) 351 #if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN 352 template class U_I18N_API MaybeStackArray<MeasureUnitImplWithIndex *, 8>; 353 template class U_I18N_API MemoryPool<MeasureUnitImplWithIndex, 8>; 354 template class U_I18N_API MaybeStackVector<MeasureUnitImplWithIndex, 8>; 355 356 // Export an explicit template instantiation of the LocalPointer that is used as a 357 // data member of MeasureUnitImpl. 358 // (When building DLLs for Windows this is required.) 359 #if defined(_MSC_VER) 360 // Ignore warning 4661 as LocalPointerBase does not use operator== or operator!= 361 #pragma warning(push) 362 #pragma warning(disable : 4661) 363 #endif 364 template class U_I18N_API LocalPointerBase<MeasureUnitImpl>; 365 template class U_I18N_API LocalPointer<MeasureUnitImpl>; 366 #if defined(_MSC_VER) 367 #pragma warning(pop) 368 #endif 369 #endif 370 371 U_NAMESPACE_END 372 373 #endif /* #if !UCONFIG_NO_FORMATTING */ 374 #endif //__MEASUNIT_IMPL_H__ 375