blob: 7481ed796c9e31b562e428432a32c49de9fe66d8 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package java.nio.charset;
import java.io.UTFDataFormatException;
/**
* @hide internal use only
*/
public class ModifiedUtf8 {
/**
* Decodes a byte array containing <i>modified UTF-8</i> bytes into a string.
*
* <p>Note that although this method decodes the (supposedly impossible) zero byte to U+0000,
* that's what the RI does too.
*/
public static String decode(byte[] in, char[] out, int offset, int utfSize) throws UTFDataFormatException {
int count = 0, s = 0, a;
while (count < utfSize) {
if ((out[s] = (char) in[offset + count++]) < '\u0080') {
s++;
} else if (((a = out[s]) & 0xe0) == 0xc0) {
if (count >= utfSize) {
throw new UTFDataFormatException("bad second byte at " + count);
}
int b = in[offset + count++];
if ((b & 0xC0) != 0x80) {
throw new UTFDataFormatException("bad second byte at " + (count - 1));
}
out[s++] = (char) (((a & 0x1F) << 6) | (b & 0x3F));
} else if ((a & 0xf0) == 0xe0) {
if (count + 1 >= utfSize) {
throw new UTFDataFormatException("bad third byte at " + (count + 1));
}
int b = in[offset + count++];
int c = in[offset + count++];
if (((b & 0xC0) != 0x80) || ((c & 0xC0) != 0x80)) {
throw new UTFDataFormatException("bad second or third byte at " + (count - 2));
}
out[s++] = (char) (((a & 0x0F) << 12) | ((b & 0x3F) << 6) | (c & 0x3F));
} else {
throw new UTFDataFormatException("bad byte at " + (count - 1));
}
}
return new String(out, 0, s);
}
private ModifiedUtf8() {
}
}