Extract string encoding utility methods.
Project: http://git-wip-us.apache.org/repos/asf/logging-log4j2/repo Commit: http://git-wip-us.apache.org/repos/asf/logging-log4j2/commit/9ad36035 Tree: http://git-wip-us.apache.org/repos/asf/logging-log4j2/tree/9ad36035 Diff: http://git-wip-us.apache.org/repos/asf/logging-log4j2/diff/9ad36035 Branch: refs/heads/LOG4J2-89 Commit: 9ad3603518c975fcbd5575375733985c8debc8d1 Parents: 6de55fd Author: Matt Sicker <[email protected]> Authored: Mon Nov 9 15:45:36 2015 -0600 Committer: Ralph Goers <[email protected]> Committed: Fri Nov 20 17:39:21 2015 -0700 ---------------------------------------------------------------------- .../log4j/core/layout/AbstractCsvLayout.java | 3 +- .../log4j/core/layout/AbstractStringLayout.java | 82 +------------- .../log4j/core/layout/PatternLayout.java | 3 +- .../logging/log4j/core/util/StringEncoder.java | 111 +++++++++++++++++++ 4 files changed, 117 insertions(+), 82 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/logging-log4j2/blob/9ad36035/log4j-core/src/main/java/org/apache/logging/log4j/core/layout/AbstractCsvLayout.java ---------------------------------------------------------------------- diff --git a/log4j-core/src/main/java/org/apache/logging/log4j/core/layout/AbstractCsvLayout.java b/log4j-core/src/main/java/org/apache/logging/log4j/core/layout/AbstractCsvLayout.java index fa945ac..ae55447 100644 --- a/log4j-core/src/main/java/org/apache/logging/log4j/core/layout/AbstractCsvLayout.java +++ b/log4j-core/src/main/java/org/apache/logging/log4j/core/layout/AbstractCsvLayout.java @@ -20,6 +20,7 @@ import java.nio.charset.Charset; import org.apache.commons.csv.CSVFormat; import org.apache.commons.csv.QuoteMode; +import org.apache.logging.log4j.core.util.StringEncoder; /** * A superclass for Comma-Separated Value (CSV) layouts. @@ -39,7 +40,7 @@ public abstract class AbstractCsvLayout extends AbstractStringLayout { protected AbstractCsvLayout(final Charset charset, final CSVFormat csvFormat, final String header, final String footer) { - super(charset, toBytes(header, charset), toBytes(footer, charset)); + super(charset, StringEncoder.toBytes(header, charset), StringEncoder.toBytes(footer, charset)); this.format = csvFormat; } http://git-wip-us.apache.org/repos/asf/logging-log4j2/blob/9ad36035/log4j-core/src/main/java/org/apache/logging/log4j/core/layout/AbstractStringLayout.java ---------------------------------------------------------------------- diff --git a/log4j-core/src/main/java/org/apache/logging/log4j/core/layout/AbstractStringLayout.java b/log4j-core/src/main/java/org/apache/logging/log4j/core/layout/AbstractStringLayout.java index 2bfa371..68b9fb6 100644 --- a/log4j-core/src/main/java/org/apache/logging/log4j/core/layout/AbstractStringLayout.java +++ b/log4j-core/src/main/java/org/apache/logging/log4j/core/layout/AbstractStringLayout.java @@ -25,6 +25,7 @@ import java.nio.charset.StandardCharsets; import org.apache.logging.log4j.core.LogEvent; import org.apache.logging.log4j.core.StringLayout; +import org.apache.logging.log4j.core.util.StringEncoder; /** * Abstract base class for Layouts that result in a String. @@ -86,28 +87,6 @@ public abstract class AbstractStringLayout extends AbstractLayout<String> implem } } - /** - * Converts a String to a byte[]. - * - * @param str if null, return null. - * @param charset if null, use the default charset. - * @return a byte[] - */ - static byte[] toBytes(final String str, final Charset charset) { - if (str != null) { - if (StandardCharsets.ISO_8859_1.equals(charset)) { - return encodeSingleByteChars(str); - } - final Charset actual = charset != null ? charset : Charset.defaultCharset(); - try { // LOG4J2-935: String.getBytes(String) gives better performance - return str.getBytes(actual.name()); - } catch (UnsupportedEncodingException e) { - return str.getBytes(actual); - } - } - return null; - } - private void writeObject(final ObjectOutputStream out) throws IOException { out.defaultWriteObject(); out.writeUTF(charset.name()); @@ -136,7 +115,7 @@ public abstract class AbstractStringLayout extends AbstractLayout<String> implem protected byte[] getBytes(final String s) { if (useCustomEncoding) { // rely on branch prediction to eliminate this check if false - return encodeSingleByteChars(s); + return StringEncoder.encodeSingleByteChars(s); } try { // LOG4J2-935: String.getBytes(String) gives better performance return s.getBytes(charsetName); @@ -145,63 +124,6 @@ public abstract class AbstractStringLayout extends AbstractLayout<String> implem } } - /** - * Encode the specified string by casting each character to a byte. - * - * @param s the string to encode - * @return the encoded String - * @see https://issues.apache.org/jira/browse/LOG4J2-1151 - */ - private static byte[] encodeSingleByteChars(String s) { - final int length = s.length(); - final byte[] result = new byte[length]; - encodeString(s, 0, length, result); - return result; - } - - // LOG4J2-1151 - /* - * Implementation note: this is the fast path. If the char array contains only ISO-8859-1 characters, all the work - * will be done here. - */ - private static int encodeIsoChars(String charArray, int charIndex, byte[] byteArray, int byteIndex, int length) { - int i = 0; - for (; i < length; i++) { - char c = charArray.charAt(charIndex++); - if (c > 255) { - break; - } - byteArray[(byteIndex++)] = ((byte) c); - } - return i; - } - - // LOG4J2-1151 - private static int encodeString(String charArray, int charOffset, int charLength, byte[] byteArray) { - int byteOffset = 0; - int length = Math.min(charLength, byteArray.length); - int charDoneIndex = charOffset + length; - while (charOffset < charDoneIndex) { - int done = encodeIsoChars(charArray, charOffset, byteArray, byteOffset, length); - charOffset += done; - byteOffset += done; - if (done != length) { - char c = charArray.charAt(charOffset++); - if ((Character.isHighSurrogate(c)) && (charOffset < charDoneIndex) - && (Character.isLowSurrogate(charArray.charAt(charOffset)))) { - if (charLength > byteArray.length) { - charDoneIndex++; - charLength--; - } - charOffset++; - } - byteArray[(byteOffset++)] = '?'; - length = Math.min(charDoneIndex - charOffset, byteArray.length - byteOffset); - } - } - return byteOffset; - } - @Override public Charset getCharset() { return charset; http://git-wip-us.apache.org/repos/asf/logging-log4j2/blob/9ad36035/log4j-core/src/main/java/org/apache/logging/log4j/core/layout/PatternLayout.java ---------------------------------------------------------------------- diff --git a/log4j-core/src/main/java/org/apache/logging/log4j/core/layout/PatternLayout.java b/log4j-core/src/main/java/org/apache/logging/log4j/core/layout/PatternLayout.java index c78a7b7..33e6d71 100644 --- a/log4j-core/src/main/java/org/apache/logging/log4j/core/layout/PatternLayout.java +++ b/log4j-core/src/main/java/org/apache/logging/log4j/core/layout/PatternLayout.java @@ -37,6 +37,7 @@ import org.apache.logging.log4j.core.pattern.LogEventPatternConverter; import org.apache.logging.log4j.core.pattern.PatternFormatter; import org.apache.logging.log4j.core.pattern.PatternParser; import org.apache.logging.log4j.core.pattern.RegexReplacement; +import org.apache.logging.log4j.core.util.StringEncoder; /** * A flexible layout configurable with pattern string. @@ -125,7 +126,7 @@ public final class PatternLayout extends AbstractStringLayout { final PatternSelector patternSelector, final Charset charset, final boolean alwaysWriteExceptions, final boolean noConsoleNoAnsi, final String header, final String footer) { - super(charset, toBytes(header, charset), toBytes(footer, charset)); + super(charset, StringEncoder.toBytes(header, charset), StringEncoder.toBytes(footer, charset)); this.replace = replace; this.conversionPattern = pattern; this.patternSelector = patternSelector; http://git-wip-us.apache.org/repos/asf/logging-log4j2/blob/9ad36035/log4j-core/src/main/java/org/apache/logging/log4j/core/util/StringEncoder.java ---------------------------------------------------------------------- diff --git a/log4j-core/src/main/java/org/apache/logging/log4j/core/util/StringEncoder.java b/log4j-core/src/main/java/org/apache/logging/log4j/core/util/StringEncoder.java new file mode 100644 index 0000000..5c36ba4 --- /dev/null +++ b/log4j-core/src/main/java/org/apache/logging/log4j/core/util/StringEncoder.java @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache license, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the license for the specific language governing permissions and + * limitations under the license. + */ +package org.apache.logging.log4j.core.util; + +import java.io.UnsupportedEncodingException; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; + +/** + * String to byte encoding support. + * + * @since 2.5 + */ +public final class StringEncoder { + + private StringEncoder() { + } + + /** + * Converts a String to a byte[]. + * + * @param str if null, return null. + * @param charset if null, use the default charset. + * @return a byte[] + */ + public static byte[] toBytes(final String str, final Charset charset) { + if (str != null) { + if (StandardCharsets.ISO_8859_1.equals(charset)) { + return encodeSingleByteChars(str); + } + final Charset actual = charset != null ? charset : Charset.defaultCharset(); + try { // LOG4J2-935: String.getBytes(String) gives better performance + return str.getBytes(actual.name()); + } catch (UnsupportedEncodingException e) { + return str.getBytes(actual); + } + } + return null; + } + + /** + * Encode the specified string by casting each character to a byte. + * + * @param s the string to encode + * @return the encoded String + * @see https://issues.apache.org/jira/browse/LOG4J2-1151 + */ + public static byte[] encodeSingleByteChars(String s) { + final int length = s.length(); + final byte[] result = new byte[length]; + encodeString(s, 0, length, result); + return result; + } + + // LOG4J2-1151 + /* + * Implementation note: this is the fast path. If the char array contains only ISO-8859-1 characters, all the work + * will be done here. + */ + public static int encodeIsoChars(String charArray, int charIndex, byte[] byteArray, int byteIndex, int length) { + int i = 0; + for (; i < length; i++) { + char c = charArray.charAt(charIndex++); + if (c > 255) { + break; + } + byteArray[(byteIndex++)] = ((byte) c); + } + return i; + } + + // LOG4J2-1151 + public static int encodeString(String charArray, int charOffset, int charLength, byte[] byteArray) { + int byteOffset = 0; + int length = Math.min(charLength, byteArray.length); + int charDoneIndex = charOffset + length; + while (charOffset < charDoneIndex) { + int done = encodeIsoChars(charArray, charOffset, byteArray, byteOffset, length); + charOffset += done; + byteOffset += done; + if (done != length) { + char c = charArray.charAt(charOffset++); + if ((Character.isHighSurrogate(c)) && (charOffset < charDoneIndex) + && (Character.isLowSurrogate(charArray.charAt(charOffset)))) { + if (charLength > byteArray.length) { + charDoneIndex++; + charLength--; + } + charOffset++; + } + byteArray[(byteOffset++)] = '?'; + length = Math.min(charDoneIndex - charOffset, byteArray.length - byteOffset); + } + } + return byteOffset; + } +}
