Author: reschke Date: Wed Feb 14 15:53:41 2018 New Revision: 1824253 URL: http://svn.apache.org/viewvc?rev=1824253&view=rev Log: OAK-7268: Create charset encoding utility that detects malformed input
Added: jackrabbit/oak/trunk/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/util/UTF8Encoder.java (with props) Added: jackrabbit/oak/trunk/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/util/UTF8Encoder.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/util/UTF8Encoder.java?rev=1824253&view=auto ============================================================================== --- jackrabbit/oak/trunk/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/util/UTF8Encoder.java (added) +++ jackrabbit/oak/trunk/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/util/UTF8Encoder.java Wed Feb 14 15:53:41 2018 @@ -0,0 +1,85 @@ +/* +* Licensed to the Apache Software Foundation (ASF) under one or more +* contributor license agreements. See the NOTICE file distributed with +* this work for additional information regarding copyright ownership. +* The ASF licenses this file to You under the Apache License, Version 2.0 +* (the "License"); you may not use this file except in compliance with +* the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ +package org.apache.jackrabbit.oak.plugins.document.util; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.CodingErrorAction; + +import java.nio.charset.StandardCharsets; + +/** + * Utility class related to encoding characters into (UTF-8) byte sequences. + */ +public class UTF8Encoder { + + private UTF8Encoder() { + } + + /** + * Get an encoder suitable for UTF-8, using the proper error handling flags. + * + * Note that the instance is kept in a thread local to avoid the cost of + * constructing an encoder for every call. This actually mimics the + * internals of {@link String#getBytes(String)}. + */ + private static ThreadLocal<CharsetEncoder> CSE = new ThreadLocal<CharsetEncoder>() { + @Override + protected CharsetEncoder initialValue() { + CharsetEncoder e = StandardCharsets.UTF_8.newEncoder(); + e.onUnmappableCharacter(CodingErrorAction.REPORT); + e.onMalformedInput(CodingErrorAction.REPORT); + return e; + } + }; + + /** + * Like {@link String#getBytes(java.nio.charset.Charset)} (with "UTF-8"), + * except that invalid character sequences (such as unpaired surrogates) are + * reported as exceptions (see {@link CodingErrorAction#REPORT}, instead of + * being silently replaced by a replacement character as it would happen + * otherwise. + * + * @param input + * String to encode + * @return String encoded using {@link StandardCharsets#UTF_8} + * @throws IOException + * on encoding error + */ + public static byte[] encodeAsByteArray(String input) throws IOException { + CharsetEncoder e = CSE.get(); + e.reset(); + return bytes(e.encode(CharBuffer.wrap(input.toCharArray()))); + } + + /** + * @see {@link CharsetEncoder#canEncode(CharSequence) + */ + public static boolean canEncode(CharSequence input) { + CharsetEncoder e = CSE.get(); + e.reset(); + return e.canEncode(input); + } + + private static byte[] bytes(ByteBuffer b) { + byte[] a = new byte[b.remaining()]; + b.get(a); + return a; + } +} Propchange: jackrabbit/oak/trunk/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/util/UTF8Encoder.java ------------------------------------------------------------------------------ svn:eol-style = native Propchange: jackrabbit/oak/trunk/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/util/UTF8Encoder.java ------------------------------------------------------------------------------ svn:executable = *