// Copyright 2023 The PDFium Authors // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #ifndef CORE_FXCRT_UTF16_H_ #define CORE_FXCRT_UTF16_H_ #include "third_party/base/check.h" namespace pdfium { // The number of suffix bits in a UTF-16 surrogate. inline constexpr int kSurrogateBits = 10; // A bitmask for the suffix of a UTF-16 surrogate. inline constexpr char16_t kSurrogateMask = (1 << kSurrogateBits) - 1; // The first supplementary code point, `U+10000`. inline constexpr char32_t kMinimumSupplementaryCodePoint = 0x10000; // The last supplementary code point, `U+10FFFF`. inline constexpr char32_t kMaximumSupplementaryCodePoint = kMinimumSupplementaryCodePoint + (kSurrogateMask << kSurrogateBits | kSurrogateMask); // The first UTF-16 high surrogate code unit, `U+D800`. inline constexpr char16_t kMinimumHighSurrogateCodeUnit = 0xd800; // The last UTF-16 high surrogate code unit, `U+DBFF`. inline constexpr char16_t kMaximumHighSurrogateCodeUnit = kMinimumHighSurrogateCodeUnit | kSurrogateMask; // The first UTF-16 low surrogate code unit, `U+DC00`. inline constexpr char16_t kMinimumLowSurrogateCodeUnit = kMaximumHighSurrogateCodeUnit + 1; // The last UTF-16 low surrogate code unit, `U+DFFF`. inline constexpr char16_t kMaximumLowSurrogateCodeUnit = kMinimumLowSurrogateCodeUnit | kSurrogateMask; // Returns `true` if `code_point` is in a supplementary plane, and therefore // requires encoding as a UTF-16 surrogate pair. constexpr bool IsSupplementary(char32_t code_point) { return code_point >= kMinimumSupplementaryCodePoint && code_point <= kMaximumSupplementaryCodePoint; } // Returns `true` if `code_point` is a UTF-16 high surrogate. constexpr bool IsHighSurrogate(char32_t code_point) { return code_point >= kMinimumHighSurrogateCodeUnit && code_point <= kMaximumHighSurrogateCodeUnit; } // Returns `true` if `code_point` is a UTF-16 low surrogate. constexpr bool IsLowSurrogate(char32_t code_point) { return code_point >= kMinimumLowSurrogateCodeUnit && code_point <= kMaximumLowSurrogateCodeUnit; } // A UTF-16 surrogate pair. class SurrogatePair final { public: // Constructs a surrogate pair from a high and a low surrogate. constexpr SurrogatePair(char16_t high, char16_t low) : high_(high), low_(low) { DCHECK(IsHighSurrogate(high_)); DCHECK(IsLowSurrogate(low_)); } // Constructs a surrogate pair from a code point. explicit constexpr SurrogatePair(char32_t code_point) : high_(GetHighSurrogate(code_point)), low_(GetLowSurrogate(code_point)) { // This constructor initializes `high_` and `low_` using helper functions // because C++17 requires it for `constexpr` constructors. DCHECK(IsSupplementary(code_point)); } constexpr char16_t high() const { return high_; } constexpr char16_t low() const { return low_; } // Decodes this surrogate pair to a code point. constexpr char32_t ToCodePoint() const { char32_t code_point = low_ & kSurrogateMask; code_point |= (high_ & kSurrogateMask) << kSurrogateBits; return kMinimumSupplementaryCodePoint + code_point; } private: static constexpr char16_t GetHighSurrogate(char32_t code_point) { code_point -= kMinimumSupplementaryCodePoint; char16_t code_unit = (code_point >> kSurrogateBits) & kSurrogateMask; return kMinimumHighSurrogateCodeUnit | code_unit; } static constexpr char16_t GetLowSurrogate(char32_t code_point) { code_point -= kMinimumSupplementaryCodePoint; char16_t code_unit = code_point & kSurrogateMask; return kMinimumLowSurrogateCodeUnit | code_unit; } char16_t high_; char16_t low_; }; } // namespace pdfium #endif // CORE_FXCRT_UTF16_H_