1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #ifndef TENSORFLOW_CORE_PLATFORM_TSTRING_H_
17 #define TENSORFLOW_CORE_PLATFORM_TSTRING_H_
18
19 #include <assert.h>
20
21 #include <ostream>
22 #include <string>
23
24 #include "tensorflow/core/platform/cord.h"
25 #include "tensorflow/core/platform/ctstring.h"
26 #include "tensorflow/core/platform/stringpiece.h"
27
28 namespace tensorflow {
29
30 // tensorflow::tstring is the scalar type for DT_STRING tensors.
31 //
32 // tstrings are meant to be used when interfacing with string tensors, and
33 // should not be considered as a general replacement for std::string in
34 // tensorflow. The primary purpose of tstring is to provide a unified and
35 // stable ABI for string tensors across TF Core/C-API/Lite/etc---mitigating
36 // unnecessary conversions across language boundaries, and allowing for compiler
37 // agnostic interoperability across dynamically loaded modules.
38 //
39 // In addition to ABI stability, tstrings features two string subtypes, VIEW and
40 // OFFSET.
41 //
42 // VIEW tstrings are views into unowned character buffers; they can be used to
43 // pass around existing character strings without incurring a per object heap
44 // allocation. Note that, like std::string_view, it is the user's
45 // responsibility to ensure that the underlying buffer of a VIEW tstring exceeds
46 // the lifetime of the associated tstring object.
47 //
48 // TODO(dero): Methods for creating OFFSET tensors are not currently
49 // implemented.
50 //
51 // OFFSET tstrings are platform independent offset defined strings which can be
52 // directly mmaped or copied into a tensor buffer without the need for
53 // deserialization or processing. For security reasons, it is imperative that
54 // OFFSET based string tensors are validated before use, or are from a trusted
55 // source.
56 //
57 // Underlying VIEW and OFFSET buffers are considered immutable, so l-value
58 // assignment, mutation, or non-const access to data() of tstrings will result
59 // in the conversion to an owned SMALL/LARGE type.
60 //
61 // The interface for tstring largely overlaps with std::string. Except where
62 // noted, expect equivalent semantics with synonymous std::string methods.
63 class tstring {
64 TF_TString tstr_;
65
66 public:
67 enum Type {
68 // See cstring.h
69 SMALL = TF_TSTR_SMALL,
70 LARGE = TF_TSTR_LARGE,
71 OFFSET = TF_TSTR_OFFSET,
72 VIEW = TF_TSTR_VIEW,
73 };
74
75 // Assignment to a tstring object with a tstring::view type will create a VIEW
76 // type tstring.
77 class view {
78 const char* data_;
79 size_t size_;
80
81 public:
view(const char * data,size_t size)82 explicit view(const char* data, size_t size) : data_(data), size_(size) {}
view(const char * data)83 explicit view(const char* data) : data_(data), size_(::strlen(data)) {}
84
data()85 const char* data() const { return data_; }
86
size()87 size_t size() const { return size_; }
88
89 view() = delete;
90 view(const view&) = delete;
91 view& operator=(const view&) = delete;
92 };
93
94 typedef const char* const_iterator;
95
96 // Ctor
97 tstring();
98 tstring(const std::string& str); // NOLINT TODO(b/147740521): Make explicit.
99 tstring(const char* str, size_t len);
100 tstring(const char* str); // NOLINT TODO(b/147740521): Make explicit.
101 tstring(size_t n, char c);
102 explicit tstring(const StringPiece str);
103 #ifdef PLATFORM_GOOGLE
104 explicit tstring(const std::Cord& cord);
105 #endif // PLATFORM_GOOGLE
106
107 // Copy
108 tstring(const tstring& str);
109
110 // Move
111 tstring(tstring&& str) noexcept;
112
113 // Dtor
114 ~tstring();
115
116 // Copy Assignment
117 tstring& operator=(const tstring& str);
118 tstring& operator=(const std::string& str);
119 tstring& operator=(const char* str);
120 tstring& operator=(char ch);
121 tstring& operator=(const StringPiece str);
122 #ifdef PLATFORM_GOOGLE
123 tstring& operator=(const std::Cord& cord);
124 #endif // PLATFORM_GOOGLE
125
126 // View Assignment
127 tstring& operator=(const view& tsv);
128
129 // Move Assignment
130 tstring& operator=(tstring&& str);
131
132 // Comparison
133 int compare(const char* str, size_t len) const;
134 bool operator<(const tstring& o) const;
135 bool operator>(const tstring& o) const;
136 bool operator==(const char* str) const;
137 bool operator==(const tstring& o) const;
138 bool operator!=(const char* str) const;
139 bool operator!=(const tstring& o) const;
140
141 // Conversion Operators
142 // TODO(b/147740521): Make explicit.
143 operator std::string() const; // NOLINT
144 // TODO(b/147740521): Make explicit.
145 operator StringPiece() const; // NOLINT
146 #ifdef PLATFORM_GOOGLE
147 template <typename T,
148 typename std::enable_if<std::is_same<T, std::AlphaNum>::value,
149 T>::type* = nullptr>
150 operator T() const; // NOLINT TODO(b/147740521): Remove.
151 #endif // PLATFORM_GOOGLE
152
153 // Attributes
154 size_t size() const;
155 size_t length() const;
156 size_t capacity() const;
157 bool empty() const;
158 Type type() const;
159
160 // Allocation
161 void resize(size_t new_size, char c = 0);
162 // Similar to resize, but will leave the newly grown region uninitialized.
163 void resize_uninitialized(size_t new_size);
164 void clear() noexcept;
165 void reserve(size_t n);
166
167 // Iterators
168 const_iterator begin() const;
169 const_iterator end() const;
170
171 // Const Element Access
172 const char* c_str() const;
173 const char* data() const;
174 const char& operator[](size_t i) const;
175 const char& back() const;
176
177 // Mutable Element Access
178 // NOTE: For VIEW/OFFSET types, calling these methods will result in the
179 // conversion to a SMALL or heap allocated LARGE type. As a result,
180 // previously obtained pointers, references, or iterators to the underlying
181 // buffer will point to the original VIEW/OFFSET and not the new allocation.
182 char* mdata();
183 char* data(); // DEPRECATED: Use mdata().
184 char& operator[](size_t i);
185
186 // Assignment
187 tstring& assign(const char* str, size_t len);
188 tstring& assign(const char* str);
189
190 // View Assignment
191 tstring& assign_as_view(const tstring& str);
192 tstring& assign_as_view(const std::string& str);
193 tstring& assign_as_view(const StringPiece str);
194 tstring& assign_as_view(const char* str, size_t len);
195 tstring& assign_as_view(const char* str);
196
197 // Modifiers
198 // NOTE: Invalid input will result in undefined behavior.
199 tstring& append(const tstring& str);
200 tstring& append(const char* str, size_t len);
201 tstring& append(const char* str);
202 tstring& append(size_t n, char c);
203
204 tstring& erase(size_t pos, size_t len);
205
206 tstring& insert(size_t pos, const tstring& str, size_t subpos, size_t sublen);
207 tstring& insert(size_t pos, size_t n, char c);
208 void swap(tstring& str);
209 void push_back(char ch);
210
211 // Friends
212 friend bool operator==(const char* a, const tstring& b);
213 friend bool operator==(const std::string& a, const tstring& b);
214 friend tstring operator+(const tstring& a, const tstring& b);
215 friend std::ostream& operator<<(std::ostream& o, const tstring& str);
216 friend std::hash<tstring>;
217 };
218
219 // Non-member function overloads
220
221 bool operator==(const char* a, const tstring& b);
222 bool operator==(const std::string& a, const tstring& b);
223 tstring operator+(const tstring& a, const tstring& b);
224 std::ostream& operator<<(std::ostream& o, const tstring& str);
225
226 // Implementations
227
228 // Ctor
229
tstring()230 inline tstring::tstring() { TF_TString_Init(&tstr_); }
231
tstring(const char * str,size_t len)232 inline tstring::tstring(const char* str, size_t len) {
233 TF_TString_Init(&tstr_);
234 TF_TString_Copy(&tstr_, str, len);
235 }
236
tstring(const char * str)237 inline tstring::tstring(const char* str) : tstring(str, ::strlen(str)) {}
238
tstring(size_t n,char c)239 inline tstring::tstring(size_t n, char c) {
240 TF_TString_Init(&tstr_);
241 TF_TString_Resize(&tstr_, n, c);
242 }
243
tstring(const std::string & str)244 inline tstring::tstring(const std::string& str)
245 : tstring(str.data(), str.size()) {}
246
tstring(const StringPiece str)247 inline tstring::tstring(const StringPiece str)
248 : tstring(str.data(), str.size()) {}
249
250 #ifdef PLATFORM_GOOGLE
tstring(const std::Cord & cord)251 inline tstring::tstring(const std::Cord& cord) {
252 TF_TString_Init(&tstr_);
253 TF_TString_ResizeUninitialized(&tstr_, cord.size());
254
255 cord.CopyToArray(data());
256 }
257 #endif // PLATFORM_GOOGLE
258
259 // Copy
260
tstring(const tstring & str)261 inline tstring::tstring(const tstring& str) {
262 TF_TString_Init(&tstr_);
263 TF_TString_Assign(&tstr_, &str.tstr_);
264 }
265
266 // Move
267
tstring(tstring && str)268 inline tstring::tstring(tstring&& str) noexcept {
269 TF_TString_Init(&tstr_);
270 TF_TString_Move(&tstr_, &str.tstr_);
271 }
272
273 // Dtor
274
~tstring()275 inline tstring::~tstring() { TF_TString_Dealloc(&tstr_); }
276
277 // Copy Assignment
278
279 inline tstring& tstring::operator=(const tstring& str) {
280 TF_TString_Assign(&tstr_, &str.tstr_);
281
282 return *this;
283 }
284
285 inline tstring& tstring::operator=(const std::string& str) {
286 TF_TString_Copy(&tstr_, str.data(), str.size());
287 return *this;
288 }
289
290 inline tstring& tstring::operator=(const char* str) {
291 TF_TString_Copy(&tstr_, str, ::strlen(str));
292
293 return *this;
294 }
295
296 inline tstring& tstring::operator=(char c) {
297 resize_uninitialized(1);
298 (*this)[0] = c;
299
300 return *this;
301 }
302
303 inline tstring& tstring::operator=(const StringPiece str) {
304 TF_TString_Copy(&tstr_, str.data(), str.size());
305
306 return *this;
307 }
308
309 #ifdef PLATFORM_GOOGLE
310 inline tstring& tstring::operator=(const std::Cord& cord) {
311 TF_TString_ResizeUninitialized(&tstr_, cord.size());
312
313 cord.CopyToArray(data());
314
315 return *this;
316 }
317 #endif // PLATFORM_GOOGLE
318
319 // View Assignment
320
321 inline tstring& tstring::operator=(const tstring::view& tsv) {
322 assign_as_view(tsv.data(), tsv.size());
323
324 return *this;
325 }
326
327 // Move Assignment
328
329 inline tstring& tstring::operator=(tstring&& str) {
330 TF_TString_Move(&tstr_, &str.tstr_);
331
332 return *this;
333 }
334
335 // Comparison
336
compare(const char * str,size_t len)337 inline int tstring::compare(const char* str, size_t len) const {
338 int ret = ::memcmp(data(), str, std::min(len, size()));
339
340 if (ret < 0) return -1;
341 if (ret > 0) return +1;
342
343 if (size() < len) return -1;
344 if (size() > len) return +1;
345
346 return 0;
347 }
348
349 inline bool tstring::operator<(const tstring& o) const {
350 return compare(o.data(), o.size()) < 0;
351 }
352
353 inline bool tstring::operator>(const tstring& o) const {
354 return compare(o.data(), o.size()) > 0;
355 }
356
357 inline bool tstring::operator==(const char* str) const {
358 return ::strlen(str) == size() && ::memcmp(data(), str, size()) == 0;
359 }
360
361 inline bool tstring::operator==(const tstring& o) const {
362 return o.size() == size() && ::memcmp(data(), o.data(), size()) == 0;
363 }
364
365 inline bool tstring::operator!=(const char* str) const {
366 return !(*this == str);
367 }
368
369 inline bool tstring::operator!=(const tstring& o) const {
370 return !(*this == o);
371 }
372
373 // Conversion Operators
374
string()375 inline tstring::operator std::string() const {
376 return std::string(data(), size());
377 }
378
StringPiece()379 inline tstring::operator StringPiece() const {
380 return StringPiece(data(), size());
381 }
382
383 #ifdef PLATFORM_GOOGLE
384 template <typename T, typename std::enable_if<
385 std::is_same<T, std::AlphaNum>::value, T>::type*>
T()386 inline tstring::operator T() const {
387 return T(StringPiece(*this));
388 }
389 #endif // PLATFORM_GOOGLE
390
391 // Attributes
392
size()393 inline size_t tstring::size() const { return TF_TString_GetSize(&tstr_); }
394
length()395 inline size_t tstring::length() const { return size(); }
396
capacity()397 inline size_t tstring::capacity() const {
398 return TF_TString_GetCapacity(&tstr_);
399 }
400
empty()401 inline bool tstring::empty() const { return size() == 0; }
402
type()403 inline tstring::Type tstring::type() const {
404 return static_cast<tstring::Type>(TF_TString_GetType(&tstr_));
405 }
406
407 // Allocation
408
resize(size_t new_size,char c)409 inline void tstring::resize(size_t new_size, char c) {
410 TF_TString_Resize(&tstr_, new_size, c);
411 }
412
resize_uninitialized(size_t new_size)413 inline void tstring::resize_uninitialized(size_t new_size) {
414 TF_TString_ResizeUninitialized(&tstr_, new_size);
415 }
416
clear()417 inline void tstring::clear() noexcept {
418 TF_TString_ResizeUninitialized(&tstr_, 0);
419 }
420
reserve(size_t n)421 inline void tstring::reserve(size_t n) { TF_TString_Reserve(&tstr_, n); }
422
423 // Iterators
424
begin()425 inline tstring::const_iterator tstring::begin() const { return &(*this)[0]; }
end()426 inline tstring::const_iterator tstring::end() const { return &(*this)[size()]; }
427
428 // Element Access
429
c_str()430 inline const char* tstring::c_str() const { return data(); }
431
data()432 inline const char* tstring::data() const {
433 return TF_TString_GetDataPointer(&tstr_);
434 }
435
436 inline const char& tstring::operator[](size_t i) const { return data()[i]; }
437
back()438 inline const char& tstring::back() const { return (*this)[size() - 1]; }
439
mdata()440 inline char* tstring::mdata() {
441 return TF_TString_GetMutableDataPointer(&tstr_);
442 }
443
data()444 inline char* tstring::data() {
445 // Deprecated
446 return mdata();
447 }
448
449 inline char& tstring::operator[](size_t i) { return mdata()[i]; }
450
451 // Assignment
452
assign(const char * str,size_t len)453 inline tstring& tstring::assign(const char* str, size_t len) {
454 TF_TString_Copy(&tstr_, str, len);
455
456 return *this;
457 }
458
assign(const char * str)459 inline tstring& tstring::assign(const char* str) {
460 assign(str, ::strlen(str));
461
462 return *this;
463 }
464
465 // View Assignment
466
assign_as_view(const tstring & str)467 inline tstring& tstring::assign_as_view(const tstring& str) {
468 assign_as_view(str.data(), str.size());
469
470 return *this;
471 }
472
assign_as_view(const std::string & str)473 inline tstring& tstring::assign_as_view(const std::string& str) {
474 assign_as_view(str.data(), str.size());
475
476 return *this;
477 }
478
assign_as_view(const StringPiece str)479 inline tstring& tstring::assign_as_view(const StringPiece str) {
480 assign_as_view(str.data(), str.size());
481
482 return *this;
483 }
484
assign_as_view(const char * str,size_t len)485 inline tstring& tstring::assign_as_view(const char* str, size_t len) {
486 TF_TString_AssignView(&tstr_, str, len);
487
488 return *this;
489 }
490
assign_as_view(const char * str)491 inline tstring& tstring::assign_as_view(const char* str) {
492 assign_as_view(str, ::strlen(str));
493
494 return *this;
495 }
496
497 // Modifiers
498
append(const tstring & str)499 inline tstring& tstring::append(const tstring& str) {
500 TF_TString_Append(&tstr_, &str.tstr_);
501
502 return *this;
503 }
504
append(const char * str,size_t len)505 inline tstring& tstring::append(const char* str, size_t len) {
506 TF_TString_AppendN(&tstr_, str, len);
507
508 return *this;
509 }
510
append(const char * str)511 inline tstring& tstring::append(const char* str) {
512 append(str, ::strlen(str));
513
514 return *this;
515 }
516
append(size_t n,char c)517 inline tstring& tstring::append(size_t n, char c) {
518 // For append use cases, we want to ensure amortized growth.
519 const size_t new_size = size() + n;
520 TF_TString_ReserveAmortized(&tstr_, new_size);
521 resize(new_size, c);
522
523 return *this;
524 }
525
erase(size_t pos,size_t len)526 inline tstring& tstring::erase(size_t pos, size_t len) {
527 memmove(mdata() + pos, data() + pos + len, size() - len - pos);
528
529 resize(size() - len);
530
531 return *this;
532 }
533
insert(size_t pos,const tstring & str,size_t subpos,size_t sublen)534 inline tstring& tstring::insert(size_t pos, const tstring& str, size_t subpos,
535 size_t sublen) {
536 size_t orig_size = size();
537 TF_TString_ResizeUninitialized(&tstr_, orig_size + sublen);
538
539 memmove(mdata() + pos + sublen, data() + pos, orig_size - pos);
540 memmove(mdata() + pos, str.data() + subpos, sublen);
541
542 return *this;
543 }
544
insert(size_t pos,size_t n,char c)545 inline tstring& tstring::insert(size_t pos, size_t n, char c) {
546 size_t size_ = size();
547 TF_TString_ResizeUninitialized(&tstr_, size_ + n);
548
549 memmove(mdata() + pos + n, data() + pos, size_ - pos);
550 memset(mdata() + pos, c, n);
551
552 return *this;
553 }
554
swap(tstring & str)555 inline void tstring::swap(tstring& str) {
556 // TODO(dero): Invalid for OFFSET (unimplemented).
557 std::swap(tstr_, str.tstr_);
558 }
559
push_back(char ch)560 inline void tstring::push_back(char ch) { append(1, ch); }
561
562 // Friends
563
564 inline bool operator==(const char* a, const tstring& b) {
565 return ::strlen(a) == b.size() && ::memcmp(a, b.data(), b.size()) == 0;
566 }
567
568 inline bool operator==(const std::string& a, const tstring& b) {
569 return a.size() == b.size() && ::memcmp(a.data(), b.data(), b.size()) == 0;
570 }
571
572 inline tstring operator+(const tstring& a, const tstring& b) {
573 tstring r;
574 r.reserve(a.size() + b.size());
575 r.append(a);
576 r.append(b);
577
578 return r;
579 }
580
581 inline std::ostream& operator<<(std::ostream& o, const tstring& str) {
582 return o.write(str.data(), str.size());
583 }
584
585 } // namespace tensorflow
586
587 #endif // TENSORFLOW_CORE_PLATFORM_TSTRING_H_
588