1 // Copyright 2017 The PDFium Authors 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #ifndef CORE_FXCRT_WIDESTRING_H_ 8 #define CORE_FXCRT_WIDESTRING_H_ 9 10 #include <stdarg.h> 11 #include <stddef.h> 12 #include <stdint.h> 13 #include <wchar.h> 14 15 #include <functional> 16 #include <iosfwd> 17 #include <iterator> 18 #include <utility> 19 20 #include "core/fxcrt/retain_ptr.h" 21 #include "core/fxcrt/string_data_template.h" 22 #include "core/fxcrt/string_view_template.h" 23 #include "third_party/abseil-cpp/absl/types/optional.h" 24 #include "third_party/base/check.h" 25 #include "third_party/base/containers/span.h" 26 27 namespace fxcrt { 28 29 class ByteString; 30 31 // A mutable string with shared buffers using copy-on-write semantics that 32 // avoids the cost of std::string's iterator stability guarantees. 33 class WideString { 34 public: 35 // TODO(crbug.com/pdfium/2031): Consider switching to `char16_t` instead. 36 using CharType = wchar_t; 37 using const_iterator = const CharType*; 38 using const_reverse_iterator = std::reverse_iterator<const_iterator>; 39 40 [[nodiscard]] static WideString FormatInteger(int i); 41 [[nodiscard]] static WideString Format(const wchar_t* pFormat, ...); 42 [[nodiscard]] static WideString FormatV(const wchar_t* lpszFormat, 43 va_list argList); 44 45 WideString(); 46 WideString(const WideString& other); 47 48 // Move-construct a WideString. After construction, |other| is empty. 49 WideString(WideString&& other) noexcept; 50 51 // Make a one-character string from one wide char. 52 explicit WideString(wchar_t ch); 53 54 // Deliberately implicit to avoid calling on every string literal. 55 // NOLINTNEXTLINE(runtime/explicit) 56 WideString(const wchar_t* ptr); 57 58 // No implicit conversions from byte strings. 59 // NOLINTNEXTLINE(runtime/explicit) 60 WideString(char) = delete; 61 62 WideString(const wchar_t* pStr, size_t len); 63 64 explicit WideString(WideStringView str); 65 WideString(WideStringView str1, WideStringView str2); 66 WideString(const std::initializer_list<WideStringView>& list); 67 68 ~WideString(); 69 70 [[nodiscard]] static WideString FromASCII(ByteStringView str); 71 [[nodiscard]] static WideString FromLatin1(ByteStringView str); 72 [[nodiscard]] static WideString FromDefANSI(ByteStringView str); 73 [[nodiscard]] static WideString FromUTF8(ByteStringView str); 74 [[nodiscard]] static WideString FromUTF16LE(const unsigned short* str, 75 size_t len); 76 [[nodiscard]] static WideString FromUTF16BE(const unsigned short* wstr, 77 size_t wlen); 78 79 [[nodiscard]] static size_t WStringLength(const unsigned short* str); 80 81 // Explicit conversion to C-style wide string. 82 // Note: Any subsequent modification of |this| will invalidate the result. c_str()83 const wchar_t* c_str() const { return m_pData ? m_pData->m_String : L""; } 84 85 // Explicit conversion to WideStringView. 86 // Note: Any subsequent modification of |this| will invalidate the result. AsStringView()87 WideStringView AsStringView() const { 88 return WideStringView(c_str(), GetLength()); 89 } 90 91 // Explicit conversion to span. 92 // Note: Any subsequent modification of |this| will invalidate the result. span()93 pdfium::span<const wchar_t> span() const { 94 return pdfium::make_span(m_pData ? m_pData->m_String : nullptr, 95 GetLength()); 96 } 97 98 // Note: Any subsequent modification of |this| will invalidate iterators. begin()99 const_iterator begin() const { return m_pData ? m_pData->m_String : nullptr; } end()100 const_iterator end() const { 101 return m_pData ? m_pData->m_String + m_pData->m_nDataLength : nullptr; 102 } 103 104 // Note: Any subsequent modification of |this| will invalidate iterators. rbegin()105 const_reverse_iterator rbegin() const { 106 return const_reverse_iterator(end()); 107 } rend()108 const_reverse_iterator rend() const { 109 return const_reverse_iterator(begin()); 110 } 111 112 // Holds on to buffer if possible for later re-use. Assign WideString() 113 // to force immediate release if desired. 114 void clear(); 115 GetLength()116 size_t GetLength() const { return m_pData ? m_pData->m_nDataLength : 0; } GetStringLength()117 size_t GetStringLength() const { 118 return m_pData ? wcslen(m_pData->m_String) : 0; 119 } IsEmpty()120 bool IsEmpty() const { return !GetLength(); } IsValidIndex(size_t index)121 bool IsValidIndex(size_t index) const { return index < GetLength(); } IsValidLength(size_t length)122 bool IsValidLength(size_t length) const { return length <= GetLength(); } 123 124 WideString& operator=(const wchar_t* str); 125 WideString& operator=(WideStringView str); 126 WideString& operator=(const WideString& that); 127 128 // Move-assign a WideString. After assignment, |that| is empty. 129 WideString& operator=(WideString&& that) noexcept; 130 131 WideString& operator+=(const wchar_t* str); 132 WideString& operator+=(wchar_t ch); 133 WideString& operator+=(const WideString& str); 134 WideString& operator+=(WideStringView str); 135 136 bool operator==(const wchar_t* ptr) const; 137 bool operator==(WideStringView str) const; 138 bool operator==(const WideString& other) const; 139 140 bool operator!=(const wchar_t* ptr) const { return !(*this == ptr); } 141 bool operator!=(WideStringView str) const { return !(*this == str); } 142 bool operator!=(const WideString& other) const { return !(*this == other); } 143 144 bool operator<(const wchar_t* ptr) const; 145 bool operator<(WideStringView str) const; 146 bool operator<(const WideString& other) const; 147 148 CharType operator[](const size_t index) const { 149 CHECK(IsValidIndex(index)); 150 return m_pData->m_String[index]; 151 } 152 Front()153 CharType Front() const { return GetLength() ? (*this)[0] : 0; } Back()154 CharType Back() const { return GetLength() ? (*this)[GetLength() - 1] : 0; } 155 156 void SetAt(size_t index, wchar_t c); 157 158 int Compare(const wchar_t* str) const; 159 int Compare(const WideString& str) const; 160 int CompareNoCase(const wchar_t* str) const; 161 162 WideString Substr(size_t offset) const; 163 WideString Substr(size_t first, size_t count) const; 164 WideString First(size_t count) const; 165 WideString Last(size_t count) const; 166 167 size_t Insert(size_t index, wchar_t ch); InsertAtFront(wchar_t ch)168 size_t InsertAtFront(wchar_t ch) { return Insert(0, ch); } InsertAtBack(wchar_t ch)169 size_t InsertAtBack(wchar_t ch) { return Insert(GetLength(), ch); } 170 size_t Delete(size_t index, size_t count = 1); 171 172 void MakeLower(); 173 void MakeUpper(); 174 175 void Trim(); 176 void Trim(wchar_t target); 177 void Trim(WideStringView targets); 178 179 void TrimLeft(); 180 void TrimLeft(wchar_t target); 181 void TrimLeft(WideStringView targets); 182 183 void TrimRight(); 184 void TrimRight(wchar_t target); 185 void TrimRight(WideStringView targets); 186 187 void Reserve(size_t len); 188 189 // Note: any modification of the string (including ReleaseBuffer()) may 190 // invalidate the span, which must not outlive its buffer. 191 pdfium::span<wchar_t> GetBuffer(size_t nMinBufLength); 192 void ReleaseBuffer(size_t nNewLength); 193 194 int GetInteger() const; 195 196 absl::optional<size_t> Find(WideStringView subStr, size_t start = 0) const; 197 absl::optional<size_t> Find(wchar_t ch, size_t start = 0) const; 198 absl::optional<size_t> ReverseFind(wchar_t ch) const; 199 200 bool Contains(WideStringView lpszSub, size_t start = 0) const { 201 return Find(lpszSub, start).has_value(); 202 } 203 204 bool Contains(char ch, size_t start = 0) const { 205 return Find(ch, start).has_value(); 206 } 207 208 size_t Replace(WideStringView pOld, WideStringView pNew); 209 size_t Remove(wchar_t ch); 210 IsASCII()211 bool IsASCII() const { return AsStringView().IsASCII(); } EqualsASCII(ByteStringView that)212 bool EqualsASCII(ByteStringView that) const { 213 return AsStringView().EqualsASCII(that); 214 } EqualsASCIINoCase(ByteStringView that)215 bool EqualsASCIINoCase(ByteStringView that) const { 216 return AsStringView().EqualsASCIINoCase(that); 217 } 218 219 ByteString ToASCII() const; 220 ByteString ToLatin1() const; 221 ByteString ToDefANSI() const; 222 ByteString ToUTF8() const; 223 224 // This method will add \0\0 to the end of the string to represent the 225 // wide string terminator. These values are in the string, not just the data, 226 // so GetLength() will include them. 227 ByteString ToUTF16LE() const; 228 229 // Replace the characters &<>'" with HTML entities. 230 WideString EncodeEntities() const; 231 232 protected: 233 using StringData = StringDataTemplate<wchar_t>; 234 235 void ReallocBeforeWrite(size_t nNewLength); 236 void AllocBeforeWrite(size_t nNewLength); 237 void AllocCopy(WideString& dest, size_t nCopyLen, size_t nCopyIndex) const; 238 void AssignCopy(const wchar_t* pSrcData, size_t nSrcLen); 239 void Concat(const wchar_t* pSrcData, size_t nSrcLen); 240 intptr_t ReferenceCountForTesting() const; 241 242 RetainPtr<StringData> m_pData; 243 244 friend class WideString_Assign_Test; 245 friend class WideString_ConcatInPlace_Test; 246 friend class WideString_Construct_Test; 247 friend class StringPool_WideString_Test; 248 }; 249 250 inline WideString operator+(WideStringView str1, WideStringView str2) { 251 return WideString(str1, str2); 252 } 253 inline WideString operator+(WideStringView str1, const wchar_t* str2) { 254 return WideString(str1, str2); 255 } 256 inline WideString operator+(const wchar_t* str1, WideStringView str2) { 257 return WideString(str1, str2); 258 } 259 inline WideString operator+(WideStringView str1, wchar_t ch) { 260 return WideString(str1, WideStringView(ch)); 261 } 262 inline WideString operator+(wchar_t ch, WideStringView str2) { 263 return WideString(WideStringView(ch), str2); 264 } 265 inline WideString operator+(const WideString& str1, const WideString& str2) { 266 return WideString(str1.AsStringView(), str2.AsStringView()); 267 } 268 inline WideString operator+(const WideString& str1, wchar_t ch) { 269 return WideString(str1.AsStringView(), WideStringView(ch)); 270 } 271 inline WideString operator+(wchar_t ch, const WideString& str2) { 272 return WideString(WideStringView(ch), str2.AsStringView()); 273 } 274 inline WideString operator+(const WideString& str1, const wchar_t* str2) { 275 return WideString(str1.AsStringView(), str2); 276 } 277 inline WideString operator+(const wchar_t* str1, const WideString& str2) { 278 return WideString(str1, str2.AsStringView()); 279 } 280 inline WideString operator+(const WideString& str1, WideStringView str2) { 281 return WideString(str1.AsStringView(), str2); 282 } 283 inline WideString operator+(WideStringView str1, const WideString& str2) { 284 return WideString(str1, str2.AsStringView()); 285 } 286 inline bool operator==(const wchar_t* lhs, const WideString& rhs) { 287 return rhs == lhs; 288 } 289 inline bool operator==(WideStringView lhs, const WideString& rhs) { 290 return rhs == lhs; 291 } 292 inline bool operator!=(const wchar_t* lhs, const WideString& rhs) { 293 return rhs != lhs; 294 } 295 inline bool operator!=(WideStringView lhs, const WideString& rhs) { 296 return rhs != lhs; 297 } 298 inline bool operator<(const wchar_t* lhs, const WideString& rhs) { 299 return rhs.Compare(lhs) > 0; 300 } 301 302 std::wostream& operator<<(std::wostream& os, const WideString& str); 303 std::ostream& operator<<(std::ostream& os, const WideString& str); 304 std::wostream& operator<<(std::wostream& os, WideStringView str); 305 std::ostream& operator<<(std::ostream& os, WideStringView str); 306 307 // This is declared here for use in gtest-based tests but is defined in a test 308 // support target. This should not be used in production code. Just use 309 // operator<< from above instead. 310 // In some cases, gtest will automatically use operator<< as well, but in this 311 // case, it needs PrintTo() because WideString looks like a container to gtest. 312 void PrintTo(const WideString& str, std::ostream* os); 313 314 } // namespace fxcrt 315 316 using WideString = fxcrt::WideString; 317 318 uint32_t FX_HashCode_GetW(WideStringView str); 319 uint32_t FX_HashCode_GetLoweredW(WideStringView str); 320 321 namespace std { 322 323 template <> 324 struct hash<WideString> { 325 size_t operator()(const WideString& str) const { 326 return FX_HashCode_GetW(str.AsStringView()); 327 } 328 }; 329 330 } // namespace std 331 332 extern template struct std::hash<WideString>; 333 334 #endif // CORE_FXCRT_WIDESTRING_H_ 335