1 // Copyright 2022 Google LLC 2 // 3 // Use of this source code is governed by an MIT-style 4 // license that can be found in the LICENSE file or at 5 // https://opensource.org/licenses/MIT. 6 7 /* This is a wrapper for the Google range-sse.cc algorithm which checks whether a 8 * sequence of bytes is a valid UTF-8 sequence and finds the longest valid prefix of 9 * the UTF-8 sequence. 10 * 11 * The key difference is that it checks for as much ASCII symbols as possible 12 * and then falls back to the range-sse.cc algorithm. The changes to the 13 * algorithm are cosmetic, mostly to trick the clang compiler to produce optimal 14 * code. 15 * 16 * For API see the utf8_validity.h header. 17 */ 18 19 #include "utf8_validity.h" 20 21 #include <cstddef> 22 23 #include "absl/strings/string_view.h" 24 #include "utf8_range.h" 25 26 namespace utf8_range { 27 IsStructurallyValid(absl::string_view str)28bool IsStructurallyValid(absl::string_view str) { 29 return utf8_range_IsValid(str.data(), str.size()); 30 } 31 SpanStructurallyValid(absl::string_view str)32size_t SpanStructurallyValid(absl::string_view str) { 33 return utf8_range_ValidPrefix(str.data(), str.size()); 34 } 35 36 } // namespace utf8_range 37