001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache license, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the license for the specific language governing permissions and
015 * limitations under the license.
016 */
017package org.apache.logging.log4j.core.util;
018
019import java.io.UnsupportedEncodingException;
020import java.nio.charset.Charset;
021import java.nio.charset.StandardCharsets;
022
023/**
024 * Encodes Strings to bytes.
025 *
026 * @since 2.5
027 */
028public final class StringEncoder {
029
030    private StringEncoder() {
031    }
032
033    /**
034     * Converts a String to a byte[].
035     *
036     * @param str if null, return null.
037     * @param charset if null, use the default charset.
038     * @return a byte[]
039     */
040    public static byte[] toBytes(final String str, final Charset charset) {
041        if (str != null) {
042            if (StandardCharsets.ISO_8859_1.equals(charset)) {
043                return encodeSingleByteChars(str);
044            }
045            final Charset actual = charset != null ? charset : Charset.defaultCharset();
046            try { // LOG4J2-935: String.getBytes(String) gives better performance
047                return str.getBytes(actual.name());
048            } catch (final UnsupportedEncodingException e) {
049                return str.getBytes(actual);
050            }
051        }
052        return null;
053    }
054
055    /**
056     * Encodes the specified char sequence by casting each character to a byte.
057     *
058     * @param s the char sequence to encode
059     * @return the encoded String
060     * @see <a href="https://issues.apache.org/jira/browse/LOG4J2-1151">LOG4J2-1151</a>
061     */
062    public static byte[] encodeSingleByteChars(final CharSequence s) {
063        final int length = s.length();
064        final byte[] result = new byte[length];
065        encodeString(s, 0, length, result);
066        return result;
067    }
068
069    // LOG4J2-1151
070    /*
071     * Implementation note: this is the fast path. If the char array contains only ISO-8859-1 characters, all the work
072     * will be done here.
073     */
074    public static int encodeIsoChars(final CharSequence charArray, int charIndex, final byte[] byteArray, int byteIndex, final int length) {
075        int i = 0;
076        for (; i < length; i++) {
077            final char c = charArray.charAt(charIndex++);
078            if (c > 255) {
079                break;
080            }
081            byteArray[(byteIndex++)] = ((byte) c);
082        }
083        return i;
084    }
085
086    // LOG4J2-1151
087    public static int encodeString(final CharSequence charArray, int charOffset, int charLength, final byte[] byteArray) {
088        int byteOffset = 0;
089        int length = Math.min(charLength, byteArray.length);
090        int charDoneIndex = charOffset + length;
091        while (charOffset < charDoneIndex) {
092            final int done = encodeIsoChars(charArray, charOffset, byteArray, byteOffset, length);
093            charOffset += done;
094            byteOffset += done;
095            if (done != length) {
096                final char c = charArray.charAt(charOffset++);
097                if ((Character.isHighSurrogate(c)) && (charOffset < charDoneIndex)
098                        && (Character.isLowSurrogate(charArray.charAt(charOffset)))) {
099                    if (charLength > byteArray.length) {
100                        charDoneIndex++;
101                        charLength--;
102                    }
103                    charOffset++;
104                }
105                byteArray[(byteOffset++)] = '?';
106                length = Math.min(charDoneIndex - charOffset, byteArray.length - byteOffset);
107            }
108        }
109        return byteOffset;
110    }
111}