View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.shiro.codec;
20  
21  /**
22   * Provides <a href="http://en.wikipedia.org/wiki/Base64">Base 64</a> encoding and decoding as defined by
23   * <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>.
24   * <p/>
25   * This class implements section <cite>6.8. Base64 Content-Transfer-Encoding</cite> from RFC 2045 <cite>Multipurpose
26   * Internet Mail Extensions (MIME) Part One: Format of Internet Message Bodies</cite> by Freed and Borenstein.
27   * <p/>
28   * This class was borrowed from Apache Commons Codec SVN repository (rev. 618419) with modifications
29   * to enable Base64 conversion without a full dependecny on Commons Codec.  We didn't want to reinvent the wheel of
30   * great work they've done, but also didn't want to force every Shiro user to depend on the commons-codec.jar
31   * <p/>
32   * As per the Apache 2.0 license, the original copyright notice and all author and copyright information have
33   * remained in tact.
34   *
35   * @see <a href="http://en.wikipedia.org/wiki/Base64">Wikipedia: Base 64</a>
36   * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>
37   * @since 0.9
38   */
39  public class Base64 {
40  
41      /**
42       * Chunk size per RFC 2045 section 6.8.
43       * <p/>
44       * The character limit does not count the trailing CRLF, but counts all other characters, including any
45       * equal signs.
46       *
47       * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 6.8</a>
48       */
49      static final int CHUNK_SIZE = 76;
50  
51      /**
52       * Chunk separator per RFC 2045 section 2.1.
53       *
54       * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 2.1</a>
55       */
56      static final byte[] CHUNK_SEPARATOR = "\r\n".getBytes();
57  
58      /**
59       * The base length.
60       */
61      private static final int BASELENGTH = 255;
62  
63      /**
64       * Lookup length.
65       */
66      private static final int LOOKUPLENGTH = 64;
67  
68      /**
69       * Used to calculate the number of bits in a byte.
70       */
71      private static final int EIGHTBIT = 8;
72  
73      /**
74       * Used when encoding something which has fewer than 24 bits.
75       */
76      private static final int SIXTEENBIT = 16;
77  
78      /**
79       * Used to determine how many bits data contains.
80       */
81      private static final int TWENTYFOURBITGROUP = 24;
82  
83      /**
84       * Used to get the number of Quadruples.
85       */
86      private static final int FOURBYTE = 4;
87  
88      /**
89       * Used to test the sign of a byte.
90       */
91      private static final int SIGN = -128;
92  
93      /**
94       * Byte used to pad output.
95       */
96      private static final byte PAD = (byte) '=';
97  
98      /**
99       * Contains the Base64 values <code>0</code> through <code>63</code> accessed by using character encodings as
100      * indices.
101      * <p/>
102      * <p>For example, <code>base64Alphabet['+']</code> returns <code>62</code>.</p>
103      * <p/>
104      * <p>The value of undefined encodings is <code>-1</code>.</p>
105      */
106     private static final byte[] base64Alphabet = new byte[BASELENGTH];
107 
108     /**
109      * <p>Contains the Base64 encodings <code>A</code> through <code>Z</code>, followed by <code>a</code> through
110      * <code>z</code>, followed by <code>0</code> through <code>9</code>, followed by <code>+</code>, and
111      * <code>/</code>.</p>
112      * <p/>
113      * <p>This array is accessed by using character values as indices.</p>
114      * <p/>
115      * <p>For example, <code>lookUpBase64Alphabet[62] </code> returns <code>'+'</code>.</p>
116      */
117     private static final byte[] lookUpBase64Alphabet = new byte[LOOKUPLENGTH];
118 
119     // Populating the lookup and character arrays
120 
121     static {
122         for (int i = 0; i < BASELENGTH; i++) {
123             base64Alphabet[i] = (byte) -1;
124         }
125         for (int i = 'Z'; i >= 'A'; i--) {
126             base64Alphabet[i] = (byte) (i - 'A');
127         }
128         for (int i = 'z'; i >= 'a'; i--) {
129             base64Alphabet[i] = (byte) (i - 'a' + 26);
130         }
131         for (int i = '9'; i >= '0'; i--) {
132             base64Alphabet[i] = (byte) (i - '0' + 52);
133         }
134 
135         base64Alphabet['+'] = 62;
136         base64Alphabet['/'] = 63;
137 
138         for (int i = 0; i <= 25; i++) {
139             lookUpBase64Alphabet[i] = (byte) ('A' + i);
140         }
141 
142         for (int i = 26, j = 0; i <= 51; i++, j++) {
143             lookUpBase64Alphabet[i] = (byte) ('a' + j);
144         }
145 
146         for (int i = 52, j = 0; i <= 61; i++, j++) {
147             lookUpBase64Alphabet[i] = (byte) ('0' + j);
148         }
149 
150         lookUpBase64Alphabet[62] = (byte) '+';
151         lookUpBase64Alphabet[63] = (byte) '/';
152     }
153 
154     /**
155      * Returns whether or not the <code>octect</code> is in the base 64 alphabet.
156      *
157      * @param octect The value to test
158      * @return <code>true</code> if the value is defined in the the base 64 alphabet, <code>false</code> otherwise.
159      */
160     private static boolean isBase64(byte octect) {
161         if (octect == PAD) {
162             return true;
163         } else //noinspection RedundantIfStatement
164             if (octect < 0 || base64Alphabet[octect] == -1) {
165                 return false;
166             } else {
167                 return true;
168             }
169     }
170 
171     /**
172      * Tests a given byte array to see if it contains only valid characters within the Base64 alphabet.
173      *
174      * @param arrayOctect byte array to test
175      * @return <code>true</code> if all bytes are valid characters in the Base64 alphabet or if the byte array is
176      *         empty; false, otherwise
177      */
178     public static boolean isBase64(byte[] arrayOctect) {
179 
180         arrayOctect = discardWhitespace(arrayOctect);
181 
182         int length = arrayOctect.length;
183         if (length == 0) {
184             // shouldn't a 0 length array be valid base64 data?
185             // return false;
186             return true;
187         }
188         for (int i = 0; i < length; i++) {
189             if (!isBase64(arrayOctect[i])) {
190                 return false;
191             }
192         }
193         return true;
194     }
195 
196     /**
197      * Discards any whitespace from a base-64 encoded block.
198      *
199      * @param data The base-64 encoded data to discard the whitespace from.
200      * @return The data, less whitespace (see RFC 2045).
201      */
202     static byte[] discardWhitespace(byte[] data) {
203         byte groomedData[] = new byte[data.length];
204         int bytesCopied = 0;
205 
206         for (byte aByte : data) {
207             switch (aByte) {
208                 case (byte) ' ':
209                 case (byte) '\n':
210                 case (byte) '\r':
211                 case (byte) '\t':
212                     break;
213                 default:
214                     groomedData[bytesCopied++] = aByte;
215             }
216         }
217 
218         byte packedData[] = new byte[bytesCopied];
219 
220         System.arraycopy(groomedData, 0, packedData, 0, bytesCopied);
221 
222         return packedData;
223     }
224 
225     /**
226      * Base64 encodes the specified byte array and then encodes it as a String using Shiro's preferred character
227      * encoding (UTF-8).
228      *
229      * @param bytes the byte array to Base64 encode.
230      * @return a UTF-8 encoded String of the resulting Base64 encoded byte array.
231      */
232     public static String encodeToString(byte[] bytes) {
233         byte[] encoded = encode(bytes);
234         return CodecSupport.toString(encoded);
235     }
236 
237     /**
238      * Encodes binary data using the base64 algorithm and chunks the encoded output into 76 character blocks
239      *
240      * @param binaryData binary data to encodeToChars
241      * @return Base64 characters chunked in 76 character blocks
242      */
243     public static byte[] encodeChunked(byte[] binaryData) {
244         return encode(binaryData, true);
245     }
246 
247     /**
248      * Encodes a byte[] containing binary data, into a byte[] containing characters in the Base64 alphabet.
249      *
250      * @param pArray a byte array containing binary data
251      * @return A byte array containing only Base64 character data
252      */
253     public static byte[] encode(byte[] pArray) {
254         return encode(pArray, false);
255     }
256 
257     /**
258      * Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks.
259      *
260      * @param binaryData Array containing binary data to encodeToChars.
261      * @param isChunked  if <code>true</code> this encoder will chunk the base64 output into 76 character blocks
262      * @return Base64-encoded data.
263      * @throws IllegalArgumentException Thrown when the input array needs an output array bigger than {@link Integer#MAX_VALUE}
264      */
265     public static byte[] encode(byte[] binaryData, boolean isChunked) {
266         long binaryDataLength = binaryData.length;
267         long lengthDataBits = binaryDataLength * EIGHTBIT;
268         long fewerThan24bits = lengthDataBits % TWENTYFOURBITGROUP;
269         long tripletCount = lengthDataBits / TWENTYFOURBITGROUP;
270         long encodedDataLengthLong;
271         int chunckCount = 0;
272 
273         if (fewerThan24bits != 0) {
274             // data not divisible by 24 bit
275             encodedDataLengthLong = (tripletCount + 1) * 4;
276         } else {
277             // 16 or 8 bit
278             encodedDataLengthLong = tripletCount * 4;
279         }
280 
281         // If the output is to be "chunked" into 76 character sections,
282         // for compliance with RFC 2045 MIME, then it is important to
283         // allow for extra length to account for the separator(s)
284         if (isChunked) {
285 
286             chunckCount = (CHUNK_SEPARATOR.length == 0 ? 0 : (int) Math
287                     .ceil((float) encodedDataLengthLong / CHUNK_SIZE));
288             encodedDataLengthLong += chunckCount * CHUNK_SEPARATOR.length;
289         }
290 
291         if (encodedDataLengthLong > Integer.MAX_VALUE) {
292             throw new IllegalArgumentException(
293                     "Input array too big, output array would be bigger than Integer.MAX_VALUE=" + Integer.MAX_VALUE);
294         }
295         int encodedDataLength = (int) encodedDataLengthLong;
296         byte encodedData[] = new byte[encodedDataLength];
297 
298         byte k, l, b1, b2, b3;
299 
300         int encodedIndex = 0;
301         int dataIndex;
302         int i;
303         int nextSeparatorIndex = CHUNK_SIZE;
304         int chunksSoFar = 0;
305 
306         // log.debug("number of triplets = " + numberTriplets);
307         for (i = 0; i < tripletCount; i++) {
308             dataIndex = i * 3;
309             b1 = binaryData[dataIndex];
310             b2 = binaryData[dataIndex + 1];
311             b3 = binaryData[dataIndex + 2];
312 
313             // log.debug("b1= " + b1 +", b2= " + b2 + ", b3= " + b3);
314 
315             l = (byte) (b2 & 0x0f);
316             k = (byte) (b1 & 0x03);
317 
318             byte val1 = ((b1 & SIGN) == 0) ? (byte) (b1 >> 2) : (byte) ((b1) >> 2 ^ 0xc0);
319             byte val2 = ((b2 & SIGN) == 0) ? (byte) (b2 >> 4) : (byte) ((b2) >> 4 ^ 0xf0);
320             byte val3 = ((b3 & SIGN) == 0) ? (byte) (b3 >> 6) : (byte) ((b3) >> 6 ^ 0xfc);
321 
322             encodedData[encodedIndex] = lookUpBase64Alphabet[val1];
323             // log.debug( "val2 = " + val2 );
324             // log.debug( "k4 = " + (k<<4) );
325             // log.debug( "vak = " + (val2 | (k<<4)) );
326             encodedData[encodedIndex + 1] = lookUpBase64Alphabet[val2 | (k << 4)];
327             encodedData[encodedIndex + 2] = lookUpBase64Alphabet[(l << 2) | val3];
328             encodedData[encodedIndex + 3] = lookUpBase64Alphabet[b3 & 0x3f];
329 
330             encodedIndex += 4;
331 
332             // If we are chunking, let's put a chunk separator down.
333             if (isChunked) {
334                 // this assumes that CHUNK_SIZE % 4 == 0
335                 if (encodedIndex == nextSeparatorIndex) {
336                     System.arraycopy(CHUNK_SEPARATOR, 0, encodedData, encodedIndex, CHUNK_SEPARATOR.length);
337                     chunksSoFar++;
338                     nextSeparatorIndex = (CHUNK_SIZE * (chunksSoFar + 1)) + (chunksSoFar * CHUNK_SEPARATOR.length);
339                     encodedIndex += CHUNK_SEPARATOR.length;
340                 }
341             }
342         }
343 
344         // form integral number of 6-bit groups
345         dataIndex = i * 3;
346 
347         if (fewerThan24bits == EIGHTBIT) {
348             b1 = binaryData[dataIndex];
349             k = (byte) (b1 & 0x03);
350             // log.debug("b1=" + b1);
351             // log.debug("b1<<2 = " + (b1>>2) );
352             byte val1 = ((b1 & SIGN) == 0) ? (byte) (b1 >> 2) : (byte) ((b1) >> 2 ^ 0xc0);
353             encodedData[encodedIndex] = lookUpBase64Alphabet[val1];
354             encodedData[encodedIndex + 1] = lookUpBase64Alphabet[k << 4];
355             encodedData[encodedIndex + 2] = PAD;
356             encodedData[encodedIndex + 3] = PAD;
357         } else if (fewerThan24bits == SIXTEENBIT) {
358 
359             b1 = binaryData[dataIndex];
360             b2 = binaryData[dataIndex + 1];
361             l = (byte) (b2 & 0x0f);
362             k = (byte) (b1 & 0x03);
363 
364             byte val1 = ((b1 & SIGN) == 0) ? (byte) (b1 >> 2) : (byte) ((b1) >> 2 ^ 0xc0);
365             byte val2 = ((b2 & SIGN) == 0) ? (byte) (b2 >> 4) : (byte) ((b2) >> 4 ^ 0xf0);
366 
367             encodedData[encodedIndex] = lookUpBase64Alphabet[val1];
368             encodedData[encodedIndex + 1] = lookUpBase64Alphabet[val2 | (k << 4)];
369             encodedData[encodedIndex + 2] = lookUpBase64Alphabet[l << 2];
370             encodedData[encodedIndex + 3] = PAD;
371         }
372 
373         if (isChunked) {
374             // we also add a separator to the end of the final chunk.
375             if (chunksSoFar < chunckCount) {
376                 System.arraycopy(CHUNK_SEPARATOR, 0, encodedData, encodedDataLength - CHUNK_SEPARATOR.length,
377                         CHUNK_SEPARATOR.length);
378             }
379         }
380 
381         return encodedData;
382     }
383 
384     /**
385      * Converts the specified UTF-8 Base64 encoded String and decodes it to a resultant UTF-8 encoded string.
386      *
387      * @param base64Encoded a UTF-8 Base64 encoded String
388      * @return the decoded String, UTF-8 encoded.
389      */
390     public static String decodeToString(String base64Encoded) {
391         byte[] encodedBytes = CodecSupport.toBytes(base64Encoded);
392         return decodeToString(encodedBytes);
393     }
394 
395     /**
396      * Decodes the specified Base64 encoded byte array and returns the decoded result as a UTF-8 encoded.
397      *
398      * @param base64Encoded a Base64 encoded byte array
399      * @return the decoded String, UTF-8 encoded.
400      */
401     public static String decodeToString(byte[] base64Encoded) {
402         byte[] decoded = decode(base64Encoded);
403         return CodecSupport.toString(decoded);
404     }
405 
406     /**
407      * Converts the specified UTF-8 Base64 encoded String and decodes it to a raw Base64 decoded byte array.
408      *
409      * @param base64Encoded a UTF-8 Base64 encoded String
410      * @return the raw Base64 decoded byte array.
411      */
412     public static byte[] decode(String base64Encoded) {
413         byte[] bytes = CodecSupport.toBytes(base64Encoded);
414         return decode(bytes);
415     }
416 
417     /**
418      * Decodes Base64 data into octects
419      *
420      * @param base64Data Byte array containing Base64 data
421      * @return Array containing decoded data.
422      */
423     public static byte[] decode(byte[] base64Data) {
424         // RFC 2045 requires that we discard ALL non-Base64 characters
425         base64Data = discardNonBase64(base64Data);
426 
427         // handle the edge case, so we don't have to worry about it later
428         if (base64Data.length == 0) {
429             return new byte[0];
430         }
431 
432         int numberQuadruple = base64Data.length / FOURBYTE;
433         byte decodedData[];
434         byte b1, b2, b3, b4, marker0, marker1;
435 
436         // Throw away anything not in base64Data
437 
438         int encodedIndex = 0;
439         int dataIndex;
440         {
441             // this sizes the output array properly - rlw
442             int lastData = base64Data.length;
443             // ignore the '=' padding
444             while (base64Data[lastData - 1] == PAD) {
445                 if (--lastData == 0) {
446                     return new byte[0];
447                 }
448             }
449             decodedData = new byte[lastData - numberQuadruple];
450         }
451 
452         for (int i = 0; i < numberQuadruple; i++) {
453             dataIndex = i * 4;
454             marker0 = base64Data[dataIndex + 2];
455             marker1 = base64Data[dataIndex + 3];
456 
457             b1 = base64Alphabet[base64Data[dataIndex]];
458             b2 = base64Alphabet[base64Data[dataIndex + 1]];
459 
460             if (marker0 != PAD && marker1 != PAD) {
461                 // No PAD e.g 3cQl
462                 b3 = base64Alphabet[marker0];
463                 b4 = base64Alphabet[marker1];
464 
465                 decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4);
466                 decodedData[encodedIndex + 1] = (byte) (((b2 & 0xf) << 4) | ((b3 >> 2) & 0xf));
467                 decodedData[encodedIndex + 2] = (byte) (b3 << 6 | b4);
468             } else if (marker0 == PAD) {
469                 // Two PAD e.g. 3c[Pad][Pad]
470                 decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4);
471             } else {
472                 // One PAD e.g. 3cQ[Pad]
473                 b3 = base64Alphabet[marker0];
474                 decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4);
475                 decodedData[encodedIndex + 1] = (byte) (((b2 & 0xf) << 4) | ((b3 >> 2) & 0xf));
476             }
477             encodedIndex += 3;
478         }
479         return decodedData;
480     }
481 
482     /**
483      * Discards any characters outside of the base64 alphabet, per the requirements on page 25 of RFC 2045 - "Any
484      * characters outside of the base64 alphabet are to be ignored in base64 encoded data."
485      *
486      * @param data The base-64 encoded data to groom
487      * @return The data, less non-base64 characters (see RFC 2045).
488      */
489     static byte[] discardNonBase64(byte[] data) {
490         byte groomedData[] = new byte[data.length];
491         int bytesCopied = 0;
492 
493         for (byte aByte : data) {
494             if (isBase64(aByte)) {
495                 groomedData[bytesCopied++] = aByte;
496             }
497         }
498 
499         byte packedData[] = new byte[bytesCopied];
500 
501         System.arraycopy(groomedData, 0, packedData, 0, bytesCopied);
502 
503         return packedData;
504     }
505 
506 }