1*ccdc9c3eSSadaf Ebrahimi // Copyright 2001-2010 The RE2 Authors. All Rights Reserved. 2*ccdc9c3eSSadaf Ebrahimi // Use of this source code is governed by a BSD-style 3*ccdc9c3eSSadaf Ebrahimi // license that can be found in the LICENSE file. 4*ccdc9c3eSSadaf Ebrahimi 5*ccdc9c3eSSadaf Ebrahimi #ifndef RE2_STRINGPIECE_H_ 6*ccdc9c3eSSadaf Ebrahimi #define RE2_STRINGPIECE_H_ 7*ccdc9c3eSSadaf Ebrahimi 8*ccdc9c3eSSadaf Ebrahimi // A string-like object that points to a sized piece of memory. 9*ccdc9c3eSSadaf Ebrahimi // 10*ccdc9c3eSSadaf Ebrahimi // Functions or methods may use const StringPiece& parameters to accept either 11*ccdc9c3eSSadaf Ebrahimi // a "const char*" or a "string" value that will be implicitly converted to 12*ccdc9c3eSSadaf Ebrahimi // a StringPiece. The implicit conversion means that it is often appropriate 13*ccdc9c3eSSadaf Ebrahimi // to include this .h file in other files rather than forward-declaring 14*ccdc9c3eSSadaf Ebrahimi // StringPiece as would be appropriate for most other Google classes. 15*ccdc9c3eSSadaf Ebrahimi // 16*ccdc9c3eSSadaf Ebrahimi // Systematic usage of StringPiece is encouraged as it will reduce unnecessary 17*ccdc9c3eSSadaf Ebrahimi // conversions from "const char*" to "string" and back again. 18*ccdc9c3eSSadaf Ebrahimi // 19*ccdc9c3eSSadaf Ebrahimi // 20*ccdc9c3eSSadaf Ebrahimi // Arghh! I wish C++ literals were "string". 21*ccdc9c3eSSadaf Ebrahimi 22*ccdc9c3eSSadaf Ebrahimi // Doing this simplifies the logic below. 23*ccdc9c3eSSadaf Ebrahimi #ifndef __has_include 24*ccdc9c3eSSadaf Ebrahimi #define __has_include(x) 0 25*ccdc9c3eSSadaf Ebrahimi #endif 26*ccdc9c3eSSadaf Ebrahimi 27*ccdc9c3eSSadaf Ebrahimi #include <stddef.h> 28*ccdc9c3eSSadaf Ebrahimi #include <string.h> 29*ccdc9c3eSSadaf Ebrahimi #include <algorithm> 30*ccdc9c3eSSadaf Ebrahimi #include <iosfwd> 31*ccdc9c3eSSadaf Ebrahimi #include <iterator> 32*ccdc9c3eSSadaf Ebrahimi #include <string> 33*ccdc9c3eSSadaf Ebrahimi #if __has_include(<string_view>) && __cplusplus >= 201703L 34*ccdc9c3eSSadaf Ebrahimi #include <string_view> 35*ccdc9c3eSSadaf Ebrahimi #endif 36*ccdc9c3eSSadaf Ebrahimi 37*ccdc9c3eSSadaf Ebrahimi namespace re2 { 38*ccdc9c3eSSadaf Ebrahimi 39*ccdc9c3eSSadaf Ebrahimi class StringPiece { 40*ccdc9c3eSSadaf Ebrahimi public: 41*ccdc9c3eSSadaf Ebrahimi typedef std::char_traits<char> traits_type; 42*ccdc9c3eSSadaf Ebrahimi typedef char value_type; 43*ccdc9c3eSSadaf Ebrahimi typedef char* pointer; 44*ccdc9c3eSSadaf Ebrahimi typedef const char* const_pointer; 45*ccdc9c3eSSadaf Ebrahimi typedef char& reference; 46*ccdc9c3eSSadaf Ebrahimi typedef const char& const_reference; 47*ccdc9c3eSSadaf Ebrahimi typedef const char* const_iterator; 48*ccdc9c3eSSadaf Ebrahimi typedef const_iterator iterator; 49*ccdc9c3eSSadaf Ebrahimi typedef std::reverse_iterator<const_iterator> const_reverse_iterator; 50*ccdc9c3eSSadaf Ebrahimi typedef const_reverse_iterator reverse_iterator; 51*ccdc9c3eSSadaf Ebrahimi typedef size_t size_type; 52*ccdc9c3eSSadaf Ebrahimi typedef ptrdiff_t difference_type; 53*ccdc9c3eSSadaf Ebrahimi static const size_type npos = static_cast<size_type>(-1); 54*ccdc9c3eSSadaf Ebrahimi 55*ccdc9c3eSSadaf Ebrahimi // We provide non-explicit singleton constructors so users can pass 56*ccdc9c3eSSadaf Ebrahimi // in a "const char*" or a "string" wherever a "StringPiece" is 57*ccdc9c3eSSadaf Ebrahimi // expected. StringPiece()58*ccdc9c3eSSadaf Ebrahimi StringPiece() 59*ccdc9c3eSSadaf Ebrahimi : data_(NULL), size_(0) {} 60*ccdc9c3eSSadaf Ebrahimi #if __has_include(<string_view>) && __cplusplus >= 201703L StringPiece(const std::string_view & str)61*ccdc9c3eSSadaf Ebrahimi StringPiece(const std::string_view& str) 62*ccdc9c3eSSadaf Ebrahimi : data_(str.data()), size_(str.size()) {} 63*ccdc9c3eSSadaf Ebrahimi #endif StringPiece(const std::string & str)64*ccdc9c3eSSadaf Ebrahimi StringPiece(const std::string& str) 65*ccdc9c3eSSadaf Ebrahimi : data_(str.data()), size_(str.size()) {} StringPiece(const char * str)66*ccdc9c3eSSadaf Ebrahimi StringPiece(const char* str) 67*ccdc9c3eSSadaf Ebrahimi : data_(str), size_(str == NULL ? 0 : strlen(str)) {} StringPiece(const char * str,size_type len)68*ccdc9c3eSSadaf Ebrahimi StringPiece(const char* str, size_type len) 69*ccdc9c3eSSadaf Ebrahimi : data_(str), size_(len) {} 70*ccdc9c3eSSadaf Ebrahimi begin()71*ccdc9c3eSSadaf Ebrahimi const_iterator begin() const { return data_; } end()72*ccdc9c3eSSadaf Ebrahimi const_iterator end() const { return data_ + size_; } rbegin()73*ccdc9c3eSSadaf Ebrahimi const_reverse_iterator rbegin() const { 74*ccdc9c3eSSadaf Ebrahimi return const_reverse_iterator(data_ + size_); 75*ccdc9c3eSSadaf Ebrahimi } rend()76*ccdc9c3eSSadaf Ebrahimi const_reverse_iterator rend() const { 77*ccdc9c3eSSadaf Ebrahimi return const_reverse_iterator(data_); 78*ccdc9c3eSSadaf Ebrahimi } 79*ccdc9c3eSSadaf Ebrahimi size()80*ccdc9c3eSSadaf Ebrahimi size_type size() const { return size_; } length()81*ccdc9c3eSSadaf Ebrahimi size_type length() const { return size_; } empty()82*ccdc9c3eSSadaf Ebrahimi bool empty() const { return size_ == 0; } 83*ccdc9c3eSSadaf Ebrahimi 84*ccdc9c3eSSadaf Ebrahimi const_reference operator[](size_type i) const { return data_[i]; } data()85*ccdc9c3eSSadaf Ebrahimi const_pointer data() const { return data_; } 86*ccdc9c3eSSadaf Ebrahimi remove_prefix(size_type n)87*ccdc9c3eSSadaf Ebrahimi void remove_prefix(size_type n) { 88*ccdc9c3eSSadaf Ebrahimi data_ += n; 89*ccdc9c3eSSadaf Ebrahimi size_ -= n; 90*ccdc9c3eSSadaf Ebrahimi } 91*ccdc9c3eSSadaf Ebrahimi remove_suffix(size_type n)92*ccdc9c3eSSadaf Ebrahimi void remove_suffix(size_type n) { 93*ccdc9c3eSSadaf Ebrahimi size_ -= n; 94*ccdc9c3eSSadaf Ebrahimi } 95*ccdc9c3eSSadaf Ebrahimi set(const char * str)96*ccdc9c3eSSadaf Ebrahimi void set(const char* str) { 97*ccdc9c3eSSadaf Ebrahimi data_ = str; 98*ccdc9c3eSSadaf Ebrahimi size_ = str == NULL ? 0 : strlen(str); 99*ccdc9c3eSSadaf Ebrahimi } 100*ccdc9c3eSSadaf Ebrahimi set(const char * str,size_type len)101*ccdc9c3eSSadaf Ebrahimi void set(const char* str, size_type len) { 102*ccdc9c3eSSadaf Ebrahimi data_ = str; 103*ccdc9c3eSSadaf Ebrahimi size_ = len; 104*ccdc9c3eSSadaf Ebrahimi } 105*ccdc9c3eSSadaf Ebrahimi 106*ccdc9c3eSSadaf Ebrahimi // Converts to `std::basic_string`. 107*ccdc9c3eSSadaf Ebrahimi template <typename A> 108*ccdc9c3eSSadaf Ebrahimi explicit operator std::basic_string<char, traits_type, A>() const { 109*ccdc9c3eSSadaf Ebrahimi if (!data_) return {}; 110*ccdc9c3eSSadaf Ebrahimi return std::basic_string<char, traits_type, A>(data_, size_); 111*ccdc9c3eSSadaf Ebrahimi } 112*ccdc9c3eSSadaf Ebrahimi as_string()113*ccdc9c3eSSadaf Ebrahimi std::string as_string() const { 114*ccdc9c3eSSadaf Ebrahimi return std::string(data_, size_); 115*ccdc9c3eSSadaf Ebrahimi } 116*ccdc9c3eSSadaf Ebrahimi 117*ccdc9c3eSSadaf Ebrahimi // We also define ToString() here, since many other string-like 118*ccdc9c3eSSadaf Ebrahimi // interfaces name the routine that converts to a C++ string 119*ccdc9c3eSSadaf Ebrahimi // "ToString", and it's confusing to have the method that does that 120*ccdc9c3eSSadaf Ebrahimi // for a StringPiece be called "as_string()". We also leave the 121*ccdc9c3eSSadaf Ebrahimi // "as_string()" method defined here for existing code. ToString()122*ccdc9c3eSSadaf Ebrahimi std::string ToString() const { 123*ccdc9c3eSSadaf Ebrahimi return std::string(data_, size_); 124*ccdc9c3eSSadaf Ebrahimi } 125*ccdc9c3eSSadaf Ebrahimi CopyToString(std::string * target)126*ccdc9c3eSSadaf Ebrahimi void CopyToString(std::string* target) const { 127*ccdc9c3eSSadaf Ebrahimi target->assign(data_, size_); 128*ccdc9c3eSSadaf Ebrahimi } 129*ccdc9c3eSSadaf Ebrahimi AppendToString(std::string * target)130*ccdc9c3eSSadaf Ebrahimi void AppendToString(std::string* target) const { 131*ccdc9c3eSSadaf Ebrahimi target->append(data_, size_); 132*ccdc9c3eSSadaf Ebrahimi } 133*ccdc9c3eSSadaf Ebrahimi 134*ccdc9c3eSSadaf Ebrahimi size_type copy(char* buf, size_type n, size_type pos = 0) const; 135*ccdc9c3eSSadaf Ebrahimi StringPiece substr(size_type pos = 0, size_type n = npos) const; 136*ccdc9c3eSSadaf Ebrahimi compare(const StringPiece & x)137*ccdc9c3eSSadaf Ebrahimi int compare(const StringPiece& x) const { 138*ccdc9c3eSSadaf Ebrahimi size_type min_size = std::min(size(), x.size()); 139*ccdc9c3eSSadaf Ebrahimi if (min_size > 0) { 140*ccdc9c3eSSadaf Ebrahimi int r = memcmp(data(), x.data(), min_size); 141*ccdc9c3eSSadaf Ebrahimi if (r < 0) return -1; 142*ccdc9c3eSSadaf Ebrahimi if (r > 0) return 1; 143*ccdc9c3eSSadaf Ebrahimi } 144*ccdc9c3eSSadaf Ebrahimi if (size() < x.size()) return -1; 145*ccdc9c3eSSadaf Ebrahimi if (size() > x.size()) return 1; 146*ccdc9c3eSSadaf Ebrahimi return 0; 147*ccdc9c3eSSadaf Ebrahimi } 148*ccdc9c3eSSadaf Ebrahimi 149*ccdc9c3eSSadaf Ebrahimi // Does "this" start with "x"? starts_with(const StringPiece & x)150*ccdc9c3eSSadaf Ebrahimi bool starts_with(const StringPiece& x) const { 151*ccdc9c3eSSadaf Ebrahimi return x.empty() || 152*ccdc9c3eSSadaf Ebrahimi (size() >= x.size() && memcmp(data(), x.data(), x.size()) == 0); 153*ccdc9c3eSSadaf Ebrahimi } 154*ccdc9c3eSSadaf Ebrahimi 155*ccdc9c3eSSadaf Ebrahimi // Does "this" end with "x"? ends_with(const StringPiece & x)156*ccdc9c3eSSadaf Ebrahimi bool ends_with(const StringPiece& x) const { 157*ccdc9c3eSSadaf Ebrahimi return x.empty() || 158*ccdc9c3eSSadaf Ebrahimi (size() >= x.size() && 159*ccdc9c3eSSadaf Ebrahimi memcmp(data() + (size() - x.size()), x.data(), x.size()) == 0); 160*ccdc9c3eSSadaf Ebrahimi } 161*ccdc9c3eSSadaf Ebrahimi contains(const StringPiece & s)162*ccdc9c3eSSadaf Ebrahimi bool contains(const StringPiece& s) const { 163*ccdc9c3eSSadaf Ebrahimi return find(s) != npos; 164*ccdc9c3eSSadaf Ebrahimi } 165*ccdc9c3eSSadaf Ebrahimi 166*ccdc9c3eSSadaf Ebrahimi size_type find(const StringPiece& s, size_type pos = 0) const; 167*ccdc9c3eSSadaf Ebrahimi size_type find(char c, size_type pos = 0) const; 168*ccdc9c3eSSadaf Ebrahimi size_type rfind(const StringPiece& s, size_type pos = npos) const; 169*ccdc9c3eSSadaf Ebrahimi size_type rfind(char c, size_type pos = npos) const; 170*ccdc9c3eSSadaf Ebrahimi 171*ccdc9c3eSSadaf Ebrahimi private: 172*ccdc9c3eSSadaf Ebrahimi const_pointer data_; 173*ccdc9c3eSSadaf Ebrahimi size_type size_; 174*ccdc9c3eSSadaf Ebrahimi }; 175*ccdc9c3eSSadaf Ebrahimi 176*ccdc9c3eSSadaf Ebrahimi inline bool operator==(const StringPiece& x, const StringPiece& y) { 177*ccdc9c3eSSadaf Ebrahimi StringPiece::size_type len = x.size(); 178*ccdc9c3eSSadaf Ebrahimi if (len != y.size()) return false; 179*ccdc9c3eSSadaf Ebrahimi return x.data() == y.data() || len == 0 || 180*ccdc9c3eSSadaf Ebrahimi memcmp(x.data(), y.data(), len) == 0; 181*ccdc9c3eSSadaf Ebrahimi } 182*ccdc9c3eSSadaf Ebrahimi 183*ccdc9c3eSSadaf Ebrahimi inline bool operator!=(const StringPiece& x, const StringPiece& y) { 184*ccdc9c3eSSadaf Ebrahimi return !(x == y); 185*ccdc9c3eSSadaf Ebrahimi } 186*ccdc9c3eSSadaf Ebrahimi 187*ccdc9c3eSSadaf Ebrahimi inline bool operator<(const StringPiece& x, const StringPiece& y) { 188*ccdc9c3eSSadaf Ebrahimi StringPiece::size_type min_size = std::min(x.size(), y.size()); 189*ccdc9c3eSSadaf Ebrahimi int r = min_size == 0 ? 0 : memcmp(x.data(), y.data(), min_size); 190*ccdc9c3eSSadaf Ebrahimi return (r < 0) || (r == 0 && x.size() < y.size()); 191*ccdc9c3eSSadaf Ebrahimi } 192*ccdc9c3eSSadaf Ebrahimi 193*ccdc9c3eSSadaf Ebrahimi inline bool operator>(const StringPiece& x, const StringPiece& y) { 194*ccdc9c3eSSadaf Ebrahimi return y < x; 195*ccdc9c3eSSadaf Ebrahimi } 196*ccdc9c3eSSadaf Ebrahimi 197*ccdc9c3eSSadaf Ebrahimi inline bool operator<=(const StringPiece& x, const StringPiece& y) { 198*ccdc9c3eSSadaf Ebrahimi return !(x > y); 199*ccdc9c3eSSadaf Ebrahimi } 200*ccdc9c3eSSadaf Ebrahimi 201*ccdc9c3eSSadaf Ebrahimi inline bool operator>=(const StringPiece& x, const StringPiece& y) { 202*ccdc9c3eSSadaf Ebrahimi return !(x < y); 203*ccdc9c3eSSadaf Ebrahimi } 204*ccdc9c3eSSadaf Ebrahimi 205*ccdc9c3eSSadaf Ebrahimi // Allow StringPiece to be logged. 206*ccdc9c3eSSadaf Ebrahimi std::ostream& operator<<(std::ostream& o, const StringPiece& p); 207*ccdc9c3eSSadaf Ebrahimi 208*ccdc9c3eSSadaf Ebrahimi } // namespace re2 209*ccdc9c3eSSadaf Ebrahimi 210*ccdc9c3eSSadaf Ebrahimi #endif // RE2_STRINGPIECE_H_ 211