xref: /aosp_15_r20/external/tensorflow/tensorflow/core/platform/tstring.h (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_CORE_PLATFORM_TSTRING_H_
17 #define TENSORFLOW_CORE_PLATFORM_TSTRING_H_
18 
19 #include <assert.h>
20 
21 #include <ostream>
22 #include <string>
23 
24 #include "tensorflow/core/platform/cord.h"
25 #include "tensorflow/core/platform/ctstring.h"
26 #include "tensorflow/core/platform/stringpiece.h"
27 
28 namespace tensorflow {
29 
30 // tensorflow::tstring is the scalar type for DT_STRING tensors.
31 //
32 // tstrings are meant to be used when interfacing with string tensors, and
33 // should not be considered as a general replacement for std::string in
34 // tensorflow.  The primary purpose of tstring is to provide a unified and
35 // stable ABI for string tensors across TF Core/C-API/Lite/etc---mitigating
36 // unnecessary conversions across language boundaries, and allowing for compiler
37 // agnostic interoperability across dynamically loaded modules.
38 //
39 // In addition to ABI stability, tstrings features two string subtypes, VIEW and
40 // OFFSET.
41 //
42 // VIEW tstrings are views into unowned character buffers; they can be used to
43 // pass around existing character strings without incurring a per object heap
44 // allocation.  Note that, like std::string_view, it is the user's
45 // responsibility to ensure that the underlying buffer of a VIEW tstring exceeds
46 // the lifetime of the associated tstring object.
47 //
48 // TODO(dero): Methods for creating OFFSET tensors are not currently
49 // implemented.
50 //
51 // OFFSET tstrings are platform independent offset defined strings which can be
52 // directly mmaped or copied into a tensor buffer without the need for
53 // deserialization or processing.  For security reasons, it is imperative that
54 // OFFSET based string tensors are validated before use, or are from a trusted
55 // source.
56 //
57 // Underlying VIEW and OFFSET buffers are considered immutable, so l-value
58 // assignment, mutation, or non-const access to data() of tstrings will result
59 // in the conversion to an owned SMALL/LARGE type.
60 //
61 // The interface for tstring largely overlaps with std::string. Except where
62 // noted, expect equivalent semantics with synonymous std::string methods.
63 class tstring {
64   TF_TString tstr_;
65 
66  public:
67   enum Type {
68     // See cstring.h
69     SMALL = TF_TSTR_SMALL,
70     LARGE = TF_TSTR_LARGE,
71     OFFSET = TF_TSTR_OFFSET,
72     VIEW = TF_TSTR_VIEW,
73   };
74 
75   // Assignment to a tstring object with a tstring::view type will create a VIEW
76   // type tstring.
77   class view {
78     const char* data_;
79     size_t size_;
80 
81    public:
view(const char * data,size_t size)82     explicit view(const char* data, size_t size) : data_(data), size_(size) {}
view(const char * data)83     explicit view(const char* data) : data_(data), size_(::strlen(data)) {}
84 
data()85     const char* data() const { return data_; }
86 
size()87     size_t size() const { return size_; }
88 
89     view() = delete;
90     view(const view&) = delete;
91     view& operator=(const view&) = delete;
92   };
93 
94   typedef const char* const_iterator;
95 
96   // Ctor
97   tstring();
98   tstring(const std::string& str);  // NOLINT TODO(b/147740521): Make explicit.
99   tstring(const char* str, size_t len);
100   tstring(const char* str);  // NOLINT TODO(b/147740521): Make explicit.
101   tstring(size_t n, char c);
102   explicit tstring(const StringPiece str);
103 #ifdef PLATFORM_GOOGLE
104   explicit tstring(const std::Cord& cord);
105 #endif  // PLATFORM_GOOGLE
106 
107   // Copy
108   tstring(const tstring& str);
109 
110   // Move
111   tstring(tstring&& str) noexcept;
112 
113   // Dtor
114   ~tstring();
115 
116   // Copy Assignment
117   tstring& operator=(const tstring& str);
118   tstring& operator=(const std::string& str);
119   tstring& operator=(const char* str);
120   tstring& operator=(char ch);
121   tstring& operator=(const StringPiece str);
122 #ifdef PLATFORM_GOOGLE
123   tstring& operator=(const std::Cord& cord);
124 #endif  // PLATFORM_GOOGLE
125 
126   // View Assignment
127   tstring& operator=(const view& tsv);
128 
129   // Move Assignment
130   tstring& operator=(tstring&& str);
131 
132   // Comparison
133   int compare(const char* str, size_t len) const;
134   bool operator<(const tstring& o) const;
135   bool operator>(const tstring& o) const;
136   bool operator==(const char* str) const;
137   bool operator==(const tstring& o) const;
138   bool operator!=(const char* str) const;
139   bool operator!=(const tstring& o) const;
140 
141   // Conversion Operators
142   // TODO(b/147740521): Make explicit.
143   operator std::string() const;  // NOLINT
144   // TODO(b/147740521): Make explicit.
145   operator StringPiece() const;  // NOLINT
146 #ifdef PLATFORM_GOOGLE
147   template <typename T,
148             typename std::enable_if<std::is_same<T, std::AlphaNum>::value,
149                                     T>::type* = nullptr>
150   operator T() const;  // NOLINT TODO(b/147740521): Remove.
151 #endif  // PLATFORM_GOOGLE
152 
153   // Attributes
154   size_t size() const;
155   size_t length() const;
156   size_t capacity() const;
157   bool empty() const;
158   Type type() const;
159 
160   // Allocation
161   void resize(size_t new_size, char c = 0);
162   // Similar to resize, but will leave the newly grown region uninitialized.
163   void resize_uninitialized(size_t new_size);
164   void clear() noexcept;
165   void reserve(size_t n);
166 
167   // Iterators
168   const_iterator begin() const;
169   const_iterator end() const;
170 
171   // Const Element Access
172   const char* c_str() const;
173   const char* data() const;
174   const char& operator[](size_t i) const;
175   const char& back() const;
176 
177   // Mutable Element Access
178   // NOTE: For VIEW/OFFSET types, calling these methods will result in the
179   // conversion to a SMALL or heap allocated LARGE type.  As a result,
180   // previously obtained pointers, references, or iterators to the underlying
181   // buffer will point to the original VIEW/OFFSET and not the new allocation.
182   char* mdata();
183   char* data();  // DEPRECATED: Use mdata().
184   char& operator[](size_t i);
185 
186   // Assignment
187   tstring& assign(const char* str, size_t len);
188   tstring& assign(const char* str);
189 
190   // View Assignment
191   tstring& assign_as_view(const tstring& str);
192   tstring& assign_as_view(const std::string& str);
193   tstring& assign_as_view(const StringPiece str);
194   tstring& assign_as_view(const char* str, size_t len);
195   tstring& assign_as_view(const char* str);
196 
197   // Modifiers
198   // NOTE: Invalid input will result in undefined behavior.
199   tstring& append(const tstring& str);
200   tstring& append(const char* str, size_t len);
201   tstring& append(const char* str);
202   tstring& append(size_t n, char c);
203 
204   tstring& erase(size_t pos, size_t len);
205 
206   tstring& insert(size_t pos, const tstring& str, size_t subpos, size_t sublen);
207   tstring& insert(size_t pos, size_t n, char c);
208   void swap(tstring& str);
209   void push_back(char ch);
210 
211   // Friends
212   friend bool operator==(const char* a, const tstring& b);
213   friend bool operator==(const std::string& a, const tstring& b);
214   friend tstring operator+(const tstring& a, const tstring& b);
215   friend std::ostream& operator<<(std::ostream& o, const tstring& str);
216   friend std::hash<tstring>;
217 };
218 
219 // Non-member function overloads
220 
221 bool operator==(const char* a, const tstring& b);
222 bool operator==(const std::string& a, const tstring& b);
223 tstring operator+(const tstring& a, const tstring& b);
224 std::ostream& operator<<(std::ostream& o, const tstring& str);
225 
226 // Implementations
227 
228 // Ctor
229 
tstring()230 inline tstring::tstring() { TF_TString_Init(&tstr_); }
231 
tstring(const char * str,size_t len)232 inline tstring::tstring(const char* str, size_t len) {
233   TF_TString_Init(&tstr_);
234   TF_TString_Copy(&tstr_, str, len);
235 }
236 
tstring(const char * str)237 inline tstring::tstring(const char* str) : tstring(str, ::strlen(str)) {}
238 
tstring(size_t n,char c)239 inline tstring::tstring(size_t n, char c) {
240   TF_TString_Init(&tstr_);
241   TF_TString_Resize(&tstr_, n, c);
242 }
243 
tstring(const std::string & str)244 inline tstring::tstring(const std::string& str)
245     : tstring(str.data(), str.size()) {}
246 
tstring(const StringPiece str)247 inline tstring::tstring(const StringPiece str)
248     : tstring(str.data(), str.size()) {}
249 
250 #ifdef PLATFORM_GOOGLE
tstring(const std::Cord & cord)251 inline tstring::tstring(const std::Cord& cord) {
252   TF_TString_Init(&tstr_);
253   TF_TString_ResizeUninitialized(&tstr_, cord.size());
254 
255   cord.CopyToArray(data());
256 }
257 #endif  // PLATFORM_GOOGLE
258 
259 // Copy
260 
tstring(const tstring & str)261 inline tstring::tstring(const tstring& str) {
262   TF_TString_Init(&tstr_);
263   TF_TString_Assign(&tstr_, &str.tstr_);
264 }
265 
266 // Move
267 
tstring(tstring && str)268 inline tstring::tstring(tstring&& str) noexcept {
269   TF_TString_Init(&tstr_);
270   TF_TString_Move(&tstr_, &str.tstr_);
271 }
272 
273 // Dtor
274 
~tstring()275 inline tstring::~tstring() { TF_TString_Dealloc(&tstr_); }
276 
277 // Copy Assignment
278 
279 inline tstring& tstring::operator=(const tstring& str) {
280   TF_TString_Assign(&tstr_, &str.tstr_);
281 
282   return *this;
283 }
284 
285 inline tstring& tstring::operator=(const std::string& str) {
286   TF_TString_Copy(&tstr_, str.data(), str.size());
287   return *this;
288 }
289 
290 inline tstring& tstring::operator=(const char* str) {
291   TF_TString_Copy(&tstr_, str, ::strlen(str));
292 
293   return *this;
294 }
295 
296 inline tstring& tstring::operator=(char c) {
297   resize_uninitialized(1);
298   (*this)[0] = c;
299 
300   return *this;
301 }
302 
303 inline tstring& tstring::operator=(const StringPiece str) {
304   TF_TString_Copy(&tstr_, str.data(), str.size());
305 
306   return *this;
307 }
308 
309 #ifdef PLATFORM_GOOGLE
310 inline tstring& tstring::operator=(const std::Cord& cord) {
311   TF_TString_ResizeUninitialized(&tstr_, cord.size());
312 
313   cord.CopyToArray(data());
314 
315   return *this;
316 }
317 #endif  // PLATFORM_GOOGLE
318 
319 // View Assignment
320 
321 inline tstring& tstring::operator=(const tstring::view& tsv) {
322   assign_as_view(tsv.data(), tsv.size());
323 
324   return *this;
325 }
326 
327 // Move Assignment
328 
329 inline tstring& tstring::operator=(tstring&& str) {
330   TF_TString_Move(&tstr_, &str.tstr_);
331 
332   return *this;
333 }
334 
335 // Comparison
336 
compare(const char * str,size_t len)337 inline int tstring::compare(const char* str, size_t len) const {
338   int ret = ::memcmp(data(), str, std::min(len, size()));
339 
340   if (ret < 0) return -1;
341   if (ret > 0) return +1;
342 
343   if (size() < len) return -1;
344   if (size() > len) return +1;
345 
346   return 0;
347 }
348 
349 inline bool tstring::operator<(const tstring& o) const {
350   return compare(o.data(), o.size()) < 0;
351 }
352 
353 inline bool tstring::operator>(const tstring& o) const {
354   return compare(o.data(), o.size()) > 0;
355 }
356 
357 inline bool tstring::operator==(const char* str) const {
358   return ::strlen(str) == size() && ::memcmp(data(), str, size()) == 0;
359 }
360 
361 inline bool tstring::operator==(const tstring& o) const {
362   return o.size() == size() && ::memcmp(data(), o.data(), size()) == 0;
363 }
364 
365 inline bool tstring::operator!=(const char* str) const {
366   return !(*this == str);
367 }
368 
369 inline bool tstring::operator!=(const tstring& o) const {
370   return !(*this == o);
371 }
372 
373 // Conversion Operators
374 
string()375 inline tstring::operator std::string() const {
376   return std::string(data(), size());
377 }
378 
StringPiece()379 inline tstring::operator StringPiece() const {
380   return StringPiece(data(), size());
381 }
382 
383 #ifdef PLATFORM_GOOGLE
384 template <typename T, typename std::enable_if<
385                           std::is_same<T, std::AlphaNum>::value, T>::type*>
T()386 inline tstring::operator T() const {
387   return T(StringPiece(*this));
388 }
389 #endif  // PLATFORM_GOOGLE
390 
391 // Attributes
392 
size()393 inline size_t tstring::size() const { return TF_TString_GetSize(&tstr_); }
394 
length()395 inline size_t tstring::length() const { return size(); }
396 
capacity()397 inline size_t tstring::capacity() const {
398   return TF_TString_GetCapacity(&tstr_);
399 }
400 
empty()401 inline bool tstring::empty() const { return size() == 0; }
402 
type()403 inline tstring::Type tstring::type() const {
404   return static_cast<tstring::Type>(TF_TString_GetType(&tstr_));
405 }
406 
407 // Allocation
408 
resize(size_t new_size,char c)409 inline void tstring::resize(size_t new_size, char c) {
410   TF_TString_Resize(&tstr_, new_size, c);
411 }
412 
resize_uninitialized(size_t new_size)413 inline void tstring::resize_uninitialized(size_t new_size) {
414   TF_TString_ResizeUninitialized(&tstr_, new_size);
415 }
416 
clear()417 inline void tstring::clear() noexcept {
418   TF_TString_ResizeUninitialized(&tstr_, 0);
419 }
420 
reserve(size_t n)421 inline void tstring::reserve(size_t n) { TF_TString_Reserve(&tstr_, n); }
422 
423 // Iterators
424 
begin()425 inline tstring::const_iterator tstring::begin() const { return &(*this)[0]; }
end()426 inline tstring::const_iterator tstring::end() const { return &(*this)[size()]; }
427 
428 // Element Access
429 
c_str()430 inline const char* tstring::c_str() const { return data(); }
431 
data()432 inline const char* tstring::data() const {
433   return TF_TString_GetDataPointer(&tstr_);
434 }
435 
436 inline const char& tstring::operator[](size_t i) const { return data()[i]; }
437 
back()438 inline const char& tstring::back() const { return (*this)[size() - 1]; }
439 
mdata()440 inline char* tstring::mdata() {
441   return TF_TString_GetMutableDataPointer(&tstr_);
442 }
443 
data()444 inline char* tstring::data() {
445   // Deprecated
446   return mdata();
447 }
448 
449 inline char& tstring::operator[](size_t i) { return mdata()[i]; }
450 
451 // Assignment
452 
assign(const char * str,size_t len)453 inline tstring& tstring::assign(const char* str, size_t len) {
454   TF_TString_Copy(&tstr_, str, len);
455 
456   return *this;
457 }
458 
assign(const char * str)459 inline tstring& tstring::assign(const char* str) {
460   assign(str, ::strlen(str));
461 
462   return *this;
463 }
464 
465 // View Assignment
466 
assign_as_view(const tstring & str)467 inline tstring& tstring::assign_as_view(const tstring& str) {
468   assign_as_view(str.data(), str.size());
469 
470   return *this;
471 }
472 
assign_as_view(const std::string & str)473 inline tstring& tstring::assign_as_view(const std::string& str) {
474   assign_as_view(str.data(), str.size());
475 
476   return *this;
477 }
478 
assign_as_view(const StringPiece str)479 inline tstring& tstring::assign_as_view(const StringPiece str) {
480   assign_as_view(str.data(), str.size());
481 
482   return *this;
483 }
484 
assign_as_view(const char * str,size_t len)485 inline tstring& tstring::assign_as_view(const char* str, size_t len) {
486   TF_TString_AssignView(&tstr_, str, len);
487 
488   return *this;
489 }
490 
assign_as_view(const char * str)491 inline tstring& tstring::assign_as_view(const char* str) {
492   assign_as_view(str, ::strlen(str));
493 
494   return *this;
495 }
496 
497 // Modifiers
498 
append(const tstring & str)499 inline tstring& tstring::append(const tstring& str) {
500   TF_TString_Append(&tstr_, &str.tstr_);
501 
502   return *this;
503 }
504 
append(const char * str,size_t len)505 inline tstring& tstring::append(const char* str, size_t len) {
506   TF_TString_AppendN(&tstr_, str, len);
507 
508   return *this;
509 }
510 
append(const char * str)511 inline tstring& tstring::append(const char* str) {
512   append(str, ::strlen(str));
513 
514   return *this;
515 }
516 
append(size_t n,char c)517 inline tstring& tstring::append(size_t n, char c) {
518   // For append use cases, we want to ensure amortized growth.
519   const size_t new_size = size() + n;
520   TF_TString_ReserveAmortized(&tstr_, new_size);
521   resize(new_size, c);
522 
523   return *this;
524 }
525 
erase(size_t pos,size_t len)526 inline tstring& tstring::erase(size_t pos, size_t len) {
527   memmove(mdata() + pos, data() + pos + len, size() - len - pos);
528 
529   resize(size() - len);
530 
531   return *this;
532 }
533 
insert(size_t pos,const tstring & str,size_t subpos,size_t sublen)534 inline tstring& tstring::insert(size_t pos, const tstring& str, size_t subpos,
535                                 size_t sublen) {
536   size_t orig_size = size();
537   TF_TString_ResizeUninitialized(&tstr_, orig_size + sublen);
538 
539   memmove(mdata() + pos + sublen, data() + pos, orig_size - pos);
540   memmove(mdata() + pos, str.data() + subpos, sublen);
541 
542   return *this;
543 }
544 
insert(size_t pos,size_t n,char c)545 inline tstring& tstring::insert(size_t pos, size_t n, char c) {
546   size_t size_ = size();
547   TF_TString_ResizeUninitialized(&tstr_, size_ + n);
548 
549   memmove(mdata() + pos + n, data() + pos, size_ - pos);
550   memset(mdata() + pos, c, n);
551 
552   return *this;
553 }
554 
swap(tstring & str)555 inline void tstring::swap(tstring& str) {
556   // TODO(dero): Invalid for OFFSET (unimplemented).
557   std::swap(tstr_, str.tstr_);
558 }
559 
push_back(char ch)560 inline void tstring::push_back(char ch) { append(1, ch); }
561 
562 // Friends
563 
564 inline bool operator==(const char* a, const tstring& b) {
565   return ::strlen(a) == b.size() && ::memcmp(a, b.data(), b.size()) == 0;
566 }
567 
568 inline bool operator==(const std::string& a, const tstring& b) {
569   return a.size() == b.size() && ::memcmp(a.data(), b.data(), b.size()) == 0;
570 }
571 
572 inline tstring operator+(const tstring& a, const tstring& b) {
573   tstring r;
574   r.reserve(a.size() + b.size());
575   r.append(a);
576   r.append(b);
577 
578   return r;
579 }
580 
581 inline std::ostream& operator<<(std::ostream& o, const tstring& str) {
582   return o.write(str.data(), str.size());
583 }
584 
585 }  // namespace tensorflow
586 
587 #endif  // TENSORFLOW_CORE_PLATFORM_TSTRING_H_
588