xref: /aosp_15_r20/external/jazzer-api/launcher/testdata/test/ModifiedUtf8Encoder.java (revision 33edd6723662ea34453766bfdca85dbfdd5342b8)
1 // Copyright 2021 Code Intelligence GmbH
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 package test;
16 
17 import java.nio.charset.Charset;
18 import java.util.ArrayList;
19 
20 final class ModifiedUtf8Encoder {
21   // Encodes a string in the JVM's modified UTF-8 encoding.
encode(String value)22   static public byte[] encode(String value) {
23     // Modified UTF-8 is almost the same as CESU-8, the only difference being that the zero
24     // character is coded on two bytes.
25     byte[] cesuBytes = value.getBytes(Charset.forName("CESU-8"));
26     ArrayList<Byte> modifiedUtf8Bytes = new ArrayList<>();
27     for (byte cesuByte : cesuBytes) {
28       if (cesuByte != 0) {
29         modifiedUtf8Bytes.add(cesuByte);
30       } else {
31         modifiedUtf8Bytes.add((byte) 0xC0);
32         modifiedUtf8Bytes.add((byte) 0x80);
33       }
34     }
35     byte[] out = new byte[modifiedUtf8Bytes.size()];
36     for (int i = 0; i < modifiedUtf8Bytes.size(); i++) {
37       out[i] = modifiedUtf8Bytes.get(i);
38     }
39     return out;
40   }
41 }
42