001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache license, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the license for the specific language governing permissions and 015 * limitations under the license. 016 */ 017package org.apache.logging.log4j.core.util; 018 019import java.io.UnsupportedEncodingException; 020import java.nio.charset.Charset; 021import java.nio.charset.StandardCharsets; 022 023/** 024 * Encodes Strings to bytes. 025 * 026 * @since 2.5 027 */ 028public final class StringEncoder { 029 030 private StringEncoder() { 031 } 032 033 /** 034 * Converts a String to a byte[]. 035 * 036 * @param str if null, return null. 037 * @param charset if null, use the default charset. 038 * @return a byte[] 039 */ 040 public static byte[] toBytes(final String str, final Charset charset) { 041 if (str != null) { 042 if (StandardCharsets.ISO_8859_1.equals(charset)) { 043 return encodeSingleByteChars(str); 044 } 045 final Charset actual = charset != null ? charset : Charset.defaultCharset(); 046 try { // LOG4J2-935: String.getBytes(String) gives better performance 047 return str.getBytes(actual.name()); 048 } catch (final UnsupportedEncodingException e) { 049 return str.getBytes(actual); 050 } 051 } 052 return null; 053 } 054 055 /** 056 * Encodes the specified char sequence by casting each character to a byte. 057 * 058 * @param s the char sequence to encode 059 * @return the encoded String 060 * @see <a href="https://issues.apache.org/jira/browse/LOG4J2-1151">LOG4J2-1151</a> 061 */ 062 public static byte[] encodeSingleByteChars(final CharSequence s) { 063 final int length = s.length(); 064 final byte[] result = new byte[length]; 065 encodeString(s, 0, length, result); 066 return result; 067 } 068 069 // LOG4J2-1151 070 /* 071 * Implementation note: this is the fast path. If the char array contains only ISO-8859-1 characters, all the work 072 * will be done here. 073 */ 074 public static int encodeIsoChars(final CharSequence charArray, int charIndex, final byte[] byteArray, int byteIndex, final int length) { 075 int i = 0; 076 for (; i < length; i++) { 077 final char c = charArray.charAt(charIndex++); 078 if (c > 255) { 079 break; 080 } 081 byteArray[(byteIndex++)] = ((byte) c); 082 } 083 return i; 084 } 085 086 // LOG4J2-1151 087 public static int encodeString(final CharSequence charArray, int charOffset, int charLength, final byte[] byteArray) { 088 int byteOffset = 0; 089 int length = Math.min(charLength, byteArray.length); 090 int charDoneIndex = charOffset + length; 091 while (charOffset < charDoneIndex) { 092 final int done = encodeIsoChars(charArray, charOffset, byteArray, byteOffset, length); 093 charOffset += done; 094 byteOffset += done; 095 if (done != length) { 096 final char c = charArray.charAt(charOffset++); 097 if ((Character.isHighSurrogate(c)) && (charOffset < charDoneIndex) 098 && (Character.isLowSurrogate(charArray.charAt(charOffset)))) { 099 if (charLength > byteArray.length) { 100 charDoneIndex++; 101 charLength--; 102 } 103 charOffset++; 104 } 105 byteArray[(byteOffset++)] = '?'; 106 length = Math.min(charDoneIndex - charOffset, byteArray.length - byteOffset); 107 } 108 } 109 return byteOffset; 110 } 111}