1 // Copyright 2021 Code Intelligence GmbH 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package test; 16 17 import java.nio.charset.Charset; 18 import java.util.ArrayList; 19 20 final class ModifiedUtf8Encoder { 21 // Encodes a string in the JVM's modified UTF-8 encoding. encode(String value)22 static public byte[] encode(String value) { 23 // Modified UTF-8 is almost the same as CESU-8, the only difference being that the zero 24 // character is coded on two bytes. 25 byte[] cesuBytes = value.getBytes(Charset.forName("CESU-8")); 26 ArrayList<Byte> modifiedUtf8Bytes = new ArrayList<>(); 27 for (byte cesuByte : cesuBytes) { 28 if (cesuByte != 0) { 29 modifiedUtf8Bytes.add(cesuByte); 30 } else { 31 modifiedUtf8Bytes.add((byte) 0xC0); 32 modifiedUtf8Bytes.add((byte) 0x80); 33 } 34 } 35 byte[] out = new byte[modifiedUtf8Bytes.size()]; 36 for (int i = 0; i < modifiedUtf8Bytes.size(); i++) { 37 out[i] = modifiedUtf8Bytes.get(i); 38 } 39 return out; 40 } 41 } 42