1*fd525a9cSAndroid Build Coastguard Worker // Copyright 2017 Google Inc. All rights reserved.
2*fd525a9cSAndroid Build Coastguard Worker //
3*fd525a9cSAndroid Build Coastguard Worker // Licensed under the Apache License, Version 2.0 (the "License");
4*fd525a9cSAndroid Build Coastguard Worker // you may not use this file except in compliance with the License.
5*fd525a9cSAndroid Build Coastguard Worker // You may obtain a copy of the License at
6*fd525a9cSAndroid Build Coastguard Worker //
7*fd525a9cSAndroid Build Coastguard Worker // http://www.apache.org/licenses/LICENSE-2.0
8*fd525a9cSAndroid Build Coastguard Worker //
9*fd525a9cSAndroid Build Coastguard Worker // Unless required by applicable law or agreed to in writing, software
10*fd525a9cSAndroid Build Coastguard Worker // distributed under the License is distributed on an "AS IS" BASIS,
11*fd525a9cSAndroid Build Coastguard Worker // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12*fd525a9cSAndroid Build Coastguard Worker // See the License for the specific language governing permissions and
13*fd525a9cSAndroid Build Coastguard Worker // limitations under the License.
14*fd525a9cSAndroid Build Coastguard Worker
15*fd525a9cSAndroid Build Coastguard Worker #include "src/utf8_fix.h"
16*fd525a9cSAndroid Build Coastguard Worker
17*fd525a9cSAndroid Build Coastguard Worker #include <algorithm>
18*fd525a9cSAndroid Build Coastguard Worker #include <cassert>
19*fd525a9cSAndroid Build Coastguard Worker
20*fd525a9cSAndroid Build Coastguard Worker namespace protobuf_mutator {
21*fd525a9cSAndroid Build Coastguard Worker
22*fd525a9cSAndroid Build Coastguard Worker namespace {
23*fd525a9cSAndroid Build Coastguard Worker
StoreCode(char * e,char32_t code,uint8_t size,uint8_t prefix)24*fd525a9cSAndroid Build Coastguard Worker void StoreCode(char* e, char32_t code, uint8_t size, uint8_t prefix) {
25*fd525a9cSAndroid Build Coastguard Worker while (--size) {
26*fd525a9cSAndroid Build Coastguard Worker *(--e) = 0x80 | (code & 0x3F);
27*fd525a9cSAndroid Build Coastguard Worker code >>= 6;
28*fd525a9cSAndroid Build Coastguard Worker }
29*fd525a9cSAndroid Build Coastguard Worker *(--e) = prefix | code;
30*fd525a9cSAndroid Build Coastguard Worker }
31*fd525a9cSAndroid Build Coastguard Worker
FixCode(char * b,const char * e,RandomEngine * random)32*fd525a9cSAndroid Build Coastguard Worker char* FixCode(char* b, const char* e, RandomEngine* random) {
33*fd525a9cSAndroid Build Coastguard Worker const char* start = b;
34*fd525a9cSAndroid Build Coastguard Worker assert(b < e);
35*fd525a9cSAndroid Build Coastguard Worker
36*fd525a9cSAndroid Build Coastguard Worker e = std::min<const char*>(e, b + 4);
37*fd525a9cSAndroid Build Coastguard Worker char32_t c = *b++;
38*fd525a9cSAndroid Build Coastguard Worker for (; b < e && (*b & 0xC0) == 0x80; ++b) {
39*fd525a9cSAndroid Build Coastguard Worker c = (c << 6) + (*b & 0x3F);
40*fd525a9cSAndroid Build Coastguard Worker }
41*fd525a9cSAndroid Build Coastguard Worker uint8_t size = b - start;
42*fd525a9cSAndroid Build Coastguard Worker switch (size) {
43*fd525a9cSAndroid Build Coastguard Worker case 1:
44*fd525a9cSAndroid Build Coastguard Worker c &= 0x7F;
45*fd525a9cSAndroid Build Coastguard Worker StoreCode(b, c, size, 0);
46*fd525a9cSAndroid Build Coastguard Worker break;
47*fd525a9cSAndroid Build Coastguard Worker case 2:
48*fd525a9cSAndroid Build Coastguard Worker c &= 0x7FF;
49*fd525a9cSAndroid Build Coastguard Worker if (c < 0x80) {
50*fd525a9cSAndroid Build Coastguard Worker // Use uint32_t because uniform_int_distribution does not support
51*fd525a9cSAndroid Build Coastguard Worker // char32_t on Windows.
52*fd525a9cSAndroid Build Coastguard Worker c = std::uniform_int_distribution<uint32_t>(0x80, 0x7FF)(*random);
53*fd525a9cSAndroid Build Coastguard Worker }
54*fd525a9cSAndroid Build Coastguard Worker StoreCode(b, c, size, 0xC0);
55*fd525a9cSAndroid Build Coastguard Worker break;
56*fd525a9cSAndroid Build Coastguard Worker case 3:
57*fd525a9cSAndroid Build Coastguard Worker c &= 0xFFFF;
58*fd525a9cSAndroid Build Coastguard Worker
59*fd525a9cSAndroid Build Coastguard Worker // [0xD800, 0xE000) are reserved for UTF-16 surrogate halves.
60*fd525a9cSAndroid Build Coastguard Worker if (c < 0x800 || (c >= 0xD800 && c < 0xE000)) {
61*fd525a9cSAndroid Build Coastguard Worker uint32_t halves = 0xE000 - 0xD800;
62*fd525a9cSAndroid Build Coastguard Worker c = std::uniform_int_distribution<uint32_t>(0x800,
63*fd525a9cSAndroid Build Coastguard Worker 0xFFFF - halves)(*random);
64*fd525a9cSAndroid Build Coastguard Worker if (c >= 0xD800) c += halves;
65*fd525a9cSAndroid Build Coastguard Worker }
66*fd525a9cSAndroid Build Coastguard Worker StoreCode(b, c, size, 0xE0);
67*fd525a9cSAndroid Build Coastguard Worker break;
68*fd525a9cSAndroid Build Coastguard Worker case 4:
69*fd525a9cSAndroid Build Coastguard Worker c &= 0x1FFFFF;
70*fd525a9cSAndroid Build Coastguard Worker if (c < 0x10000 || c > 0x10FFFF) {
71*fd525a9cSAndroid Build Coastguard Worker c = std::uniform_int_distribution<uint32_t>(0x10000, 0x10FFFF)(*random);
72*fd525a9cSAndroid Build Coastguard Worker }
73*fd525a9cSAndroid Build Coastguard Worker StoreCode(b, c, size, 0xF0);
74*fd525a9cSAndroid Build Coastguard Worker break;
75*fd525a9cSAndroid Build Coastguard Worker default:
76*fd525a9cSAndroid Build Coastguard Worker assert(false && "Unexpected size of UTF-8 sequence");
77*fd525a9cSAndroid Build Coastguard Worker }
78*fd525a9cSAndroid Build Coastguard Worker return b;
79*fd525a9cSAndroid Build Coastguard Worker }
80*fd525a9cSAndroid Build Coastguard Worker
81*fd525a9cSAndroid Build Coastguard Worker } // namespace
82*fd525a9cSAndroid Build Coastguard Worker
FixUtf8String(std::string * str,RandomEngine * random)83*fd525a9cSAndroid Build Coastguard Worker void FixUtf8String(std::string* str, RandomEngine* random) {
84*fd525a9cSAndroid Build Coastguard Worker if (str->empty()) return;
85*fd525a9cSAndroid Build Coastguard Worker char* b = &(*str)[0];
86*fd525a9cSAndroid Build Coastguard Worker const char* e = b + str->size();
87*fd525a9cSAndroid Build Coastguard Worker while (b < e) {
88*fd525a9cSAndroid Build Coastguard Worker b = FixCode(b, e, random);
89*fd525a9cSAndroid Build Coastguard Worker }
90*fd525a9cSAndroid Build Coastguard Worker }
91*fd525a9cSAndroid Build Coastguard Worker
92*fd525a9cSAndroid Build Coastguard Worker } // namespace protobuf_mutator
93