001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.shiro.codec;
020
021/**
022 * Provides <a href="http://en.wikipedia.org/wiki/Base64">Base 64</a> encoding and decoding as defined by
023 * <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>.
024 * <p/>
025 * This class implements section <cite>6.8. Base64 Content-Transfer-Encoding</cite> from RFC 2045 <cite>Multipurpose
026 * Internet Mail Extensions (MIME) Part One: Format of Internet Message Bodies</cite> by Freed and Borenstein.
027 * <p/>
028 * This class was borrowed from Apache Commons Codec SVN repository (rev. 618419) with modifications
029 * to enable Base64 conversion without a full dependecny on Commons Codec.  We didn't want to reinvent the wheel of
030 * great work they've done, but also didn't want to force every Shiro user to depend on the commons-codec.jar
031 * <p/>
032 * As per the Apache 2.0 license, the original copyright notice and all author and copyright information have
033 * remained in tact.
034 *
035 * @see <a href="http://en.wikipedia.org/wiki/Base64">Wikipedia: Base 64</a>
036 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>
037 * @since 0.9
038 */
039public class Base64 {
040
041    /**
042     * Chunk size per RFC 2045 section 6.8.
043     * <p/>
044     * The character limit does not count the trailing CRLF, but counts all other characters, including any
045     * equal signs.
046     *
047     * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 6.8</a>
048     */
049    static final int CHUNK_SIZE = 76;
050
051    /**
052     * Chunk separator per RFC 2045 section 2.1.
053     *
054     * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 2.1</a>
055     */
056    static final byte[] CHUNK_SEPARATOR = "\r\n".getBytes();
057
058    /**
059     * The base length.
060     */
061    private static final int BASELENGTH = 255;
062
063    /**
064     * Lookup length.
065     */
066    private static final int LOOKUPLENGTH = 64;
067
068    /**
069     * Used to calculate the number of bits in a byte.
070     */
071    private static final int EIGHTBIT = 8;
072
073    /**
074     * Used when encoding something which has fewer than 24 bits.
075     */
076    private static final int SIXTEENBIT = 16;
077
078    /**
079     * Used to determine how many bits data contains.
080     */
081    private static final int TWENTYFOURBITGROUP = 24;
082
083    /**
084     * Used to get the number of Quadruples.
085     */
086    private static final int FOURBYTE = 4;
087
088    /**
089     * Used to test the sign of a byte.
090     */
091    private static final int SIGN = -128;
092
093    /**
094     * Byte used to pad output.
095     */
096    private static final byte PAD = (byte) '=';
097
098    /**
099     * Contains the Base64 values <code>0</code> through <code>63</code> accessed by using character encodings as
100     * indices.
101     * <p/>
102     * <p>For example, <code>base64Alphabet['+']</code> returns <code>62</code>.</p>
103     * <p/>
104     * <p>The value of undefined encodings is <code>-1</code>.</p>
105     */
106    private static final byte[] base64Alphabet = new byte[BASELENGTH];
107
108    /**
109     * <p>Contains the Base64 encodings <code>A</code> through <code>Z</code>, followed by <code>a</code> through
110     * <code>z</code>, followed by <code>0</code> through <code>9</code>, followed by <code>+</code>, and
111     * <code>/</code>.</p>
112     * <p/>
113     * <p>This array is accessed by using character values as indices.</p>
114     * <p/>
115     * <p>For example, <code>lookUpBase64Alphabet[62] </code> returns <code>'+'</code>.</p>
116     */
117    private static final byte[] lookUpBase64Alphabet = new byte[LOOKUPLENGTH];
118
119    // Populating the lookup and character arrays
120
121    static {
122        for (int i = 0; i < BASELENGTH; i++) {
123            base64Alphabet[i] = (byte) -1;
124        }
125        for (int i = 'Z'; i >= 'A'; i--) {
126            base64Alphabet[i] = (byte) (i - 'A');
127        }
128        for (int i = 'z'; i >= 'a'; i--) {
129            base64Alphabet[i] = (byte) (i - 'a' + 26);
130        }
131        for (int i = '9'; i >= '0'; i--) {
132            base64Alphabet[i] = (byte) (i - '0' + 52);
133        }
134
135        base64Alphabet['+'] = 62;
136        base64Alphabet['/'] = 63;
137
138        for (int i = 0; i <= 25; i++) {
139            lookUpBase64Alphabet[i] = (byte) ('A' + i);
140        }
141
142        for (int i = 26, j = 0; i <= 51; i++, j++) {
143            lookUpBase64Alphabet[i] = (byte) ('a' + j);
144        }
145
146        for (int i = 52, j = 0; i <= 61; i++, j++) {
147            lookUpBase64Alphabet[i] = (byte) ('0' + j);
148        }
149
150        lookUpBase64Alphabet[62] = (byte) '+';
151        lookUpBase64Alphabet[63] = (byte) '/';
152    }
153
154    /**
155     * Returns whether or not the <code>octect</code> is in the base 64 alphabet.
156     *
157     * @param octect The value to test
158     * @return <code>true</code> if the value is defined in the the base 64 alphabet, <code>false</code> otherwise.
159     */
160    private static boolean isBase64(byte octect) {
161        if (octect == PAD) {
162            return true;
163        } else //noinspection RedundantIfStatement
164            if (octect < 0 || base64Alphabet[octect] == -1) {
165                return false;
166            } else {
167                return true;
168            }
169    }
170
171    /**
172     * Tests a given byte array to see if it contains only valid characters within the Base64 alphabet.
173     *
174     * @param arrayOctect byte array to test
175     * @return <code>true</code> if all bytes are valid characters in the Base64 alphabet or if the byte array is
176     *         empty; false, otherwise
177     */
178    public static boolean isBase64(byte[] arrayOctect) {
179
180        arrayOctect = discardWhitespace(arrayOctect);
181
182        int length = arrayOctect.length;
183        if (length == 0) {
184            // shouldn't a 0 length array be valid base64 data?
185            // return false;
186            return true;
187        }
188        for (int i = 0; i < length; i++) {
189            if (!isBase64(arrayOctect[i])) {
190                return false;
191            }
192        }
193        return true;
194    }
195
196    /**
197     * Discards any whitespace from a base-64 encoded block.
198     *
199     * @param data The base-64 encoded data to discard the whitespace from.
200     * @return The data, less whitespace (see RFC 2045).
201     */
202    static byte[] discardWhitespace(byte[] data) {
203        byte groomedData[] = new byte[data.length];
204        int bytesCopied = 0;
205
206        for (byte aByte : data) {
207            switch (aByte) {
208                case (byte) ' ':
209                case (byte) '\n':
210                case (byte) '\r':
211                case (byte) '\t':
212                    break;
213                default:
214                    groomedData[bytesCopied++] = aByte;
215            }
216        }
217
218        byte packedData[] = new byte[bytesCopied];
219
220        System.arraycopy(groomedData, 0, packedData, 0, bytesCopied);
221
222        return packedData;
223    }
224
225    /**
226     * Base64 encodes the specified byte array and then encodes it as a String using Shiro's preferred character
227     * encoding (UTF-8).
228     *
229     * @param bytes the byte array to Base64 encode.
230     * @return a UTF-8 encoded String of the resulting Base64 encoded byte array.
231     */
232    public static String encodeToString(byte[] bytes) {
233        byte[] encoded = encode(bytes);
234        return CodecSupport.toString(encoded);
235    }
236
237    /**
238     * Encodes binary data using the base64 algorithm and chunks the encoded output into 76 character blocks
239     *
240     * @param binaryData binary data to encodeToChars
241     * @return Base64 characters chunked in 76 character blocks
242     */
243    public static byte[] encodeChunked(byte[] binaryData) {
244        return encode(binaryData, true);
245    }
246
247    /**
248     * Encodes a byte[] containing binary data, into a byte[] containing characters in the Base64 alphabet.
249     *
250     * @param pArray a byte array containing binary data
251     * @return A byte array containing only Base64 character data
252     */
253    public static byte[] encode(byte[] pArray) {
254        return encode(pArray, false);
255    }
256
257    /**
258     * Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks.
259     *
260     * @param binaryData Array containing binary data to encodeToChars.
261     * @param isChunked  if <code>true</code> this encoder will chunk the base64 output into 76 character blocks
262     * @return Base64-encoded data.
263     * @throws IllegalArgumentException Thrown when the input array needs an output array bigger than {@link Integer#MAX_VALUE}
264     */
265    public static byte[] encode(byte[] binaryData, boolean isChunked) {
266        long binaryDataLength = binaryData.length;
267        long lengthDataBits = binaryDataLength * EIGHTBIT;
268        long fewerThan24bits = lengthDataBits % TWENTYFOURBITGROUP;
269        long tripletCount = lengthDataBits / TWENTYFOURBITGROUP;
270        long encodedDataLengthLong;
271        int chunckCount = 0;
272
273        if (fewerThan24bits != 0) {
274            // data not divisible by 24 bit
275            encodedDataLengthLong = (tripletCount + 1) * 4;
276        } else {
277            // 16 or 8 bit
278            encodedDataLengthLong = tripletCount * 4;
279        }
280
281        // If the output is to be "chunked" into 76 character sections,
282        // for compliance with RFC 2045 MIME, then it is important to
283        // allow for extra length to account for the separator(s)
284        if (isChunked) {
285
286            chunckCount = (CHUNK_SEPARATOR.length == 0 ? 0 : (int) Math
287                    .ceil((float) encodedDataLengthLong / CHUNK_SIZE));
288            encodedDataLengthLong += chunckCount * CHUNK_SEPARATOR.length;
289        }
290
291        if (encodedDataLengthLong > Integer.MAX_VALUE) {
292            throw new IllegalArgumentException(
293                    "Input array too big, output array would be bigger than Integer.MAX_VALUE=" + Integer.MAX_VALUE);
294        }
295        int encodedDataLength = (int) encodedDataLengthLong;
296        byte encodedData[] = new byte[encodedDataLength];
297
298        byte k, l, b1, b2, b3;
299
300        int encodedIndex = 0;
301        int dataIndex;
302        int i;
303        int nextSeparatorIndex = CHUNK_SIZE;
304        int chunksSoFar = 0;
305
306        // log.debug("number of triplets = " + numberTriplets);
307        for (i = 0; i < tripletCount; i++) {
308            dataIndex = i * 3;
309            b1 = binaryData[dataIndex];
310            b2 = binaryData[dataIndex + 1];
311            b3 = binaryData[dataIndex + 2];
312
313            // log.debug("b1= " + b1 +", b2= " + b2 + ", b3= " + b3);
314
315            l = (byte) (b2 & 0x0f);
316            k = (byte) (b1 & 0x03);
317
318            byte val1 = ((b1 & SIGN) == 0) ? (byte) (b1 >> 2) : (byte) ((b1) >> 2 ^ 0xc0);
319            byte val2 = ((b2 & SIGN) == 0) ? (byte) (b2 >> 4) : (byte) ((b2) >> 4 ^ 0xf0);
320            byte val3 = ((b3 & SIGN) == 0) ? (byte) (b3 >> 6) : (byte) ((b3) >> 6 ^ 0xfc);
321
322            encodedData[encodedIndex] = lookUpBase64Alphabet[val1];
323            // log.debug( "val2 = " + val2 );
324            // log.debug( "k4 = " + (k<<4) );
325            // log.debug( "vak = " + (val2 | (k<<4)) );
326            encodedData[encodedIndex + 1] = lookUpBase64Alphabet[val2 | (k << 4)];
327            encodedData[encodedIndex + 2] = lookUpBase64Alphabet[(l << 2) | val3];
328            encodedData[encodedIndex + 3] = lookUpBase64Alphabet[b3 & 0x3f];
329
330            encodedIndex += 4;
331
332            // If we are chunking, let's put a chunk separator down.
333            if (isChunked) {
334                // this assumes that CHUNK_SIZE % 4 == 0
335                if (encodedIndex == nextSeparatorIndex) {
336                    System.arraycopy(CHUNK_SEPARATOR, 0, encodedData, encodedIndex, CHUNK_SEPARATOR.length);
337                    chunksSoFar++;
338                    nextSeparatorIndex = (CHUNK_SIZE * (chunksSoFar + 1)) + (chunksSoFar * CHUNK_SEPARATOR.length);
339                    encodedIndex += CHUNK_SEPARATOR.length;
340                }
341            }
342        }
343
344        // form integral number of 6-bit groups
345        dataIndex = i * 3;
346
347        if (fewerThan24bits == EIGHTBIT) {
348            b1 = binaryData[dataIndex];
349            k = (byte) (b1 & 0x03);
350            // log.debug("b1=" + b1);
351            // log.debug("b1<<2 = " + (b1>>2) );
352            byte val1 = ((b1 & SIGN) == 0) ? (byte) (b1 >> 2) : (byte) ((b1) >> 2 ^ 0xc0);
353            encodedData[encodedIndex] = lookUpBase64Alphabet[val1];
354            encodedData[encodedIndex + 1] = lookUpBase64Alphabet[k << 4];
355            encodedData[encodedIndex + 2] = PAD;
356            encodedData[encodedIndex + 3] = PAD;
357        } else if (fewerThan24bits == SIXTEENBIT) {
358
359            b1 = binaryData[dataIndex];
360            b2 = binaryData[dataIndex + 1];
361            l = (byte) (b2 & 0x0f);
362            k = (byte) (b1 & 0x03);
363
364            byte val1 = ((b1 & SIGN) == 0) ? (byte) (b1 >> 2) : (byte) ((b1) >> 2 ^ 0xc0);
365            byte val2 = ((b2 & SIGN) == 0) ? (byte) (b2 >> 4) : (byte) ((b2) >> 4 ^ 0xf0);
366
367            encodedData[encodedIndex] = lookUpBase64Alphabet[val1];
368            encodedData[encodedIndex + 1] = lookUpBase64Alphabet[val2 | (k << 4)];
369            encodedData[encodedIndex + 2] = lookUpBase64Alphabet[l << 2];
370            encodedData[encodedIndex + 3] = PAD;
371        }
372
373        if (isChunked) {
374            // we also add a separator to the end of the final chunk.
375            if (chunksSoFar < chunckCount) {
376                System.arraycopy(CHUNK_SEPARATOR, 0, encodedData, encodedDataLength - CHUNK_SEPARATOR.length,
377                        CHUNK_SEPARATOR.length);
378            }
379        }
380
381        return encodedData;
382    }
383
384    /**
385     * Converts the specified UTF-8 Base64 encoded String and decodes it to a resultant UTF-8 encoded string.
386     *
387     * @param base64Encoded a UTF-8 Base64 encoded String
388     * @return the decoded String, UTF-8 encoded.
389     */
390    public static String decodeToString(String base64Encoded) {
391        byte[] encodedBytes = CodecSupport.toBytes(base64Encoded);
392        return decodeToString(encodedBytes);
393    }
394
395    /**
396     * Decodes the specified Base64 encoded byte array and returns the decoded result as a UTF-8 encoded.
397     *
398     * @param base64Encoded a Base64 encoded byte array
399     * @return the decoded String, UTF-8 encoded.
400     */
401    public static String decodeToString(byte[] base64Encoded) {
402        byte[] decoded = decode(base64Encoded);
403        return CodecSupport.toString(decoded);
404    }
405
406    /**
407     * Converts the specified UTF-8 Base64 encoded String and decodes it to a raw Base64 decoded byte array.
408     *
409     * @param base64Encoded a UTF-8 Base64 encoded String
410     * @return the raw Base64 decoded byte array.
411     */
412    public static byte[] decode(String base64Encoded) {
413        byte[] bytes = CodecSupport.toBytes(base64Encoded);
414        return decode(bytes);
415    }
416
417    /**
418     * Decodes Base64 data into octects
419     *
420     * @param base64Data Byte array containing Base64 data
421     * @return Array containing decoded data.
422     */
423    public static byte[] decode(byte[] base64Data) {
424        // RFC 2045 requires that we discard ALL non-Base64 characters
425        base64Data = discardNonBase64(base64Data);
426
427        // handle the edge case, so we don't have to worry about it later
428        if (base64Data.length == 0) {
429            return new byte[0];
430        }
431
432        int numberQuadruple = base64Data.length / FOURBYTE;
433        byte decodedData[];
434        byte b1, b2, b3, b4, marker0, marker1;
435
436        // Throw away anything not in base64Data
437
438        int encodedIndex = 0;
439        int dataIndex;
440        {
441            // this sizes the output array properly - rlw
442            int lastData = base64Data.length;
443            // ignore the '=' padding
444            while (base64Data[lastData - 1] == PAD) {
445                if (--lastData == 0) {
446                    return new byte[0];
447                }
448            }
449            decodedData = new byte[lastData - numberQuadruple];
450        }
451
452        for (int i = 0; i < numberQuadruple; i++) {
453            dataIndex = i * 4;
454            marker0 = base64Data[dataIndex + 2];
455            marker1 = base64Data[dataIndex + 3];
456
457            b1 = base64Alphabet[base64Data[dataIndex]];
458            b2 = base64Alphabet[base64Data[dataIndex + 1]];
459
460            if (marker0 != PAD && marker1 != PAD) {
461                // No PAD e.g 3cQl
462                b3 = base64Alphabet[marker0];
463                b4 = base64Alphabet[marker1];
464
465                decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4);
466                decodedData[encodedIndex + 1] = (byte) (((b2 & 0xf) << 4) | ((b3 >> 2) & 0xf));
467                decodedData[encodedIndex + 2] = (byte) (b3 << 6 | b4);
468            } else if (marker0 == PAD) {
469                // Two PAD e.g. 3c[Pad][Pad]
470                decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4);
471            } else {
472                // One PAD e.g. 3cQ[Pad]
473                b3 = base64Alphabet[marker0];
474                decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4);
475                decodedData[encodedIndex + 1] = (byte) (((b2 & 0xf) << 4) | ((b3 >> 2) & 0xf));
476            }
477            encodedIndex += 3;
478        }
479        return decodedData;
480    }
481
482    /**
483     * Discards any characters outside of the base64 alphabet, per the requirements on page 25 of RFC 2045 - "Any
484     * characters outside of the base64 alphabet are to be ignored in base64 encoded data."
485     *
486     * @param data The base-64 encoded data to groom
487     * @return The data, less non-base64 characters (see RFC 2045).
488     */
489    static byte[] discardNonBase64(byte[] data) {
490        byte groomedData[] = new byte[data.length];
491        int bytesCopied = 0;
492
493        for (byte aByte : data) {
494            if (isBase64(aByte)) {
495                groomedData[bytesCopied++] = aByte;
496            }
497        }
498
499        byte packedData[] = new byte[bytesCopied];
500
501        System.arraycopy(groomedData, 0, packedData, 0, bytesCopied);
502
503        return packedData;
504    }
505
506}