View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements. See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache license, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License. You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the license for the specific language governing permissions and
15   * limitations under the license.
16   */
17  package org.apache.logging.log4j.core.util;
18  
19  import java.io.UnsupportedEncodingException;
20  import java.nio.charset.Charset;
21  import java.nio.charset.StandardCharsets;
22  
23  /**
24   * Encodes Strings to bytes.
25   *
26   * @since 2.5
27   */
28  public final class StringEncoder {
29  
30      private StringEncoder() {
31      }
32  
33      /**
34       * Converts a String to a byte[].
35       *
36       * @param str if null, return null.
37       * @param charset if null, use the default charset.
38       * @return a byte[]
39       */
40      public static byte[] toBytes(final String str, final Charset charset) {
41          if (str != null) {
42              if (StandardCharsets.ISO_8859_1.equals(charset)) {
43                  return encodeSingleByteChars(str);
44              }
45              final Charset actual = charset != null ? charset : Charset.defaultCharset();
46              try { // LOG4J2-935: String.getBytes(String) gives better performance
47                  return str.getBytes(actual.name());
48              } catch (final UnsupportedEncodingException e) {
49                  return str.getBytes(actual);
50              }
51          }
52          return null;
53      }
54  
55      /**
56       * Encodes the specified char sequence by casting each character to a byte.
57       *
58       * @param s the char sequence to encode
59       * @return the encoded String
60       * @see <a href="https://issues.apache.org/jira/browse/LOG4J2-1151">LOG4J2-1151</a>
61       */
62      public static byte[] encodeSingleByteChars(final CharSequence s) {
63          final int length = s.length();
64          final byte[] result = new byte[length];
65          encodeString(s, 0, length, result);
66          return result;
67      }
68  
69      // LOG4J2-1151
70      /*
71       * Implementation note: this is the fast path. If the char array contains only ISO-8859-1 characters, all the work
72       * will be done here.
73       */
74      public static int encodeIsoChars(final CharSequence charArray, int charIndex, final byte[] byteArray, int byteIndex, final int length) {
75          int i = 0;
76          for (; i < length; i++) {
77              final char c = charArray.charAt(charIndex++);
78              if (c > 255) {
79                  break;
80              }
81              byteArray[(byteIndex++)] = ((byte) c);
82          }
83          return i;
84      }
85  
86      // LOG4J2-1151
87      public static int encodeString(final CharSequence charArray, int charOffset, int charLength, final byte[] byteArray) {
88          int byteOffset = 0;
89          int length = Math.min(charLength, byteArray.length);
90          int charDoneIndex = charOffset + length;
91          while (charOffset < charDoneIndex) {
92              final int done = encodeIsoChars(charArray, charOffset, byteArray, byteOffset, length);
93              charOffset += done;
94              byteOffset += done;
95              if (done != length) {
96                  final char c = charArray.charAt(charOffset++);
97                  if ((Character.isHighSurrogate(c)) && (charOffset < charDoneIndex)
98                          && (Character.isLowSurrogate(charArray.charAt(charOffset)))) {
99                      if (charLength > byteArray.length) {
100                         charDoneIndex++;
101                         charLength--;
102                     }
103                     charOffset++;
104                 }
105                 byteArray[(byteOffset++)] = '?';
106                 length = Math.min(charDoneIndex - charOffset, byteArray.length - byteOffset);
107             }
108         }
109         return byteOffset;
110     }
111 }