xref: /aosp_15_r20/external/jazzer-api/launcher/testdata/test/ModifiedUtf8Encoder.java (revision 33edd6723662ea34453766bfdca85dbfdd5342b8)
1*33edd672SMark // Copyright 2021 Code Intelligence GmbH
2*33edd672SMark //
3*33edd672SMark // Licensed under the Apache License, Version 2.0 (the "License");
4*33edd672SMark // you may not use this file except in compliance with the License.
5*33edd672SMark // You may obtain a copy of the License at
6*33edd672SMark //
7*33edd672SMark //      http://www.apache.org/licenses/LICENSE-2.0
8*33edd672SMark //
9*33edd672SMark // Unless required by applicable law or agreed to in writing, software
10*33edd672SMark // distributed under the License is distributed on an "AS IS" BASIS,
11*33edd672SMark // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12*33edd672SMark // See the License for the specific language governing permissions and
13*33edd672SMark // limitations under the License.
14*33edd672SMark 
15*33edd672SMark package test;
16*33edd672SMark 
17*33edd672SMark import java.nio.charset.Charset;
18*33edd672SMark import java.util.ArrayList;
19*33edd672SMark 
20*33edd672SMark final class ModifiedUtf8Encoder {
21*33edd672SMark   // Encodes a string in the JVM's modified UTF-8 encoding.
encode(String value)22*33edd672SMark   static public byte[] encode(String value) {
23*33edd672SMark     // Modified UTF-8 is almost the same as CESU-8, the only difference being that the zero
24*33edd672SMark     // character is coded on two bytes.
25*33edd672SMark     byte[] cesuBytes = value.getBytes(Charset.forName("CESU-8"));
26*33edd672SMark     ArrayList<Byte> modifiedUtf8Bytes = new ArrayList<>();
27*33edd672SMark     for (byte cesuByte : cesuBytes) {
28*33edd672SMark       if (cesuByte != 0) {
29*33edd672SMark         modifiedUtf8Bytes.add(cesuByte);
30*33edd672SMark       } else {
31*33edd672SMark         modifiedUtf8Bytes.add((byte) 0xC0);
32*33edd672SMark         modifiedUtf8Bytes.add((byte) 0x80);
33*33edd672SMark       }
34*33edd672SMark     }
35*33edd672SMark     byte[] out = new byte[modifiedUtf8Bytes.size()];
36*33edd672SMark     for (int i = 0; i < modifiedUtf8Bytes.size(); i++) {
37*33edd672SMark       out[i] = modifiedUtf8Bytes.get(i);
38*33edd672SMark     }
39*33edd672SMark     return out;
40*33edd672SMark   }
41*33edd672SMark }
42