1*33edd672SMark // Copyright 2021 Code Intelligence GmbH 2*33edd672SMark // 3*33edd672SMark // Licensed under the Apache License, Version 2.0 (the "License"); 4*33edd672SMark // you may not use this file except in compliance with the License. 5*33edd672SMark // You may obtain a copy of the License at 6*33edd672SMark // 7*33edd672SMark // http://www.apache.org/licenses/LICENSE-2.0 8*33edd672SMark // 9*33edd672SMark // Unless required by applicable law or agreed to in writing, software 10*33edd672SMark // distributed under the License is distributed on an "AS IS" BASIS, 11*33edd672SMark // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12*33edd672SMark // See the License for the specific language governing permissions and 13*33edd672SMark // limitations under the License. 14*33edd672SMark 15*33edd672SMark package test; 16*33edd672SMark 17*33edd672SMark import java.nio.charset.Charset; 18*33edd672SMark import java.util.ArrayList; 19*33edd672SMark 20*33edd672SMark final class ModifiedUtf8Encoder { 21*33edd672SMark // Encodes a string in the JVM's modified UTF-8 encoding. encode(String value)22*33edd672SMark static public byte[] encode(String value) { 23*33edd672SMark // Modified UTF-8 is almost the same as CESU-8, the only difference being that the zero 24*33edd672SMark // character is coded on two bytes. 25*33edd672SMark byte[] cesuBytes = value.getBytes(Charset.forName("CESU-8")); 26*33edd672SMark ArrayList<Byte> modifiedUtf8Bytes = new ArrayList<>(); 27*33edd672SMark for (byte cesuByte : cesuBytes) { 28*33edd672SMark if (cesuByte != 0) { 29*33edd672SMark modifiedUtf8Bytes.add(cesuByte); 30*33edd672SMark } else { 31*33edd672SMark modifiedUtf8Bytes.add((byte) 0xC0); 32*33edd672SMark modifiedUtf8Bytes.add((byte) 0x80); 33*33edd672SMark } 34*33edd672SMark } 35*33edd672SMark byte[] out = new byte[modifiedUtf8Bytes.size()]; 36*33edd672SMark for (int i = 0; i < modifiedUtf8Bytes.size(); i++) { 37*33edd672SMark out[i] = modifiedUtf8Bytes.get(i); 38*33edd672SMark } 39*33edd672SMark return out; 40*33edd672SMark } 41*33edd672SMark } 42