001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing,
013     * software distributed under the License is distributed on an
014     * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015     * KIND, either express or implied.  See the License for the
016     * specific language governing permissions and limitations
017     * under the License.
018     */
019    package org.apache.shiro.codec;
020    
021    /**
022     * Provides <a href="http://en.wikipedia.org/wiki/Base64">Base 64</a> encoding and decoding as defined by
023     * <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>.
024     * <p/>
025     * This class implements section <cite>6.8. Base64 Content-Transfer-Encoding</cite> from RFC 2045 <cite>Multipurpose
026     * Internet Mail Extensions (MIME) Part One: Format of Internet Message Bodies</cite> by Freed and Borenstein.
027     * <p/>
028     * This class was borrowed from Apache Commons Codec SVN repository (rev. 618419) with modifications
029     * to enable Base64 conversion without a full dependecny on Commons Codec.  We didn't want to reinvent the wheel of
030     * great work they've done, but also didn't want to force every Shiro user to depend on the commons-codec.jar
031     * <p/>
032     * As per the Apache 2.0 license, the original copyright notice and all author and copyright information have
033     * remained in tact.
034     *
035     * @see <a href="http://en.wikipedia.org/wiki/Base64">Wikipedia: Base 64</a>
036     * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>
037     * @since 0.9
038     */
039    public class Base64 {
040    
041        /**
042         * Chunk size per RFC 2045 section 6.8.
043         * <p/>
044         * The character limit does not count the trailing CRLF, but counts all other characters, including any
045         * equal signs.
046         *
047         * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 6.8</a>
048         */
049        static final int CHUNK_SIZE = 76;
050    
051        /**
052         * Chunk separator per RFC 2045 section 2.1.
053         *
054         * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 2.1</a>
055         */
056        static final byte[] CHUNK_SEPARATOR = "\r\n".getBytes();
057    
058        /**
059         * The base length.
060         */
061        private static final int BASELENGTH = 255;
062    
063        /**
064         * Lookup length.
065         */
066        private static final int LOOKUPLENGTH = 64;
067    
068        /**
069         * Used to calculate the number of bits in a byte.
070         */
071        private static final int EIGHTBIT = 8;
072    
073        /**
074         * Used when encoding something which has fewer than 24 bits.
075         */
076        private static final int SIXTEENBIT = 16;
077    
078        /**
079         * Used to determine how many bits data contains.
080         */
081        private static final int TWENTYFOURBITGROUP = 24;
082    
083        /**
084         * Used to get the number of Quadruples.
085         */
086        private static final int FOURBYTE = 4;
087    
088        /**
089         * Used to test the sign of a byte.
090         */
091        private static final int SIGN = -128;
092    
093        /**
094         * Byte used to pad output.
095         */
096        private static final byte PAD = (byte) '=';
097    
098        /**
099         * Contains the Base64 values <code>0</code> through <code>63</code> accessed by using character encodings as
100         * indices.
101         * <p/>
102         * <p>For example, <code>base64Alphabet['+']</code> returns <code>62</code>.</p>
103         * <p/>
104         * <p>The value of undefined encodings is <code>-1</code>.</p>
105         */
106        private static final byte[] base64Alphabet = new byte[BASELENGTH];
107    
108        /**
109         * <p>Contains the Base64 encodings <code>A</code> through <code>Z</code>, followed by <code>a</code> through
110         * <code>z</code>, followed by <code>0</code> through <code>9</code>, followed by <code>+</code>, and
111         * <code>/</code>.</p>
112         * <p/>
113         * <p>This array is accessed by using character values as indices.</p>
114         * <p/>
115         * <p>For example, <code>lookUpBase64Alphabet[62] </code> returns <code>'+'</code>.</p>
116         */
117        private static final byte[] lookUpBase64Alphabet = new byte[LOOKUPLENGTH];
118    
119        // Populating the lookup and character arrays
120    
121        static {
122            for (int i = 0; i < BASELENGTH; i++) {
123                base64Alphabet[i] = (byte) -1;
124            }
125            for (int i = 'Z'; i >= 'A'; i--) {
126                base64Alphabet[i] = (byte) (i - 'A');
127            }
128            for (int i = 'z'; i >= 'a'; i--) {
129                base64Alphabet[i] = (byte) (i - 'a' + 26);
130            }
131            for (int i = '9'; i >= '0'; i--) {
132                base64Alphabet[i] = (byte) (i - '0' + 52);
133            }
134    
135            base64Alphabet['+'] = 62;
136            base64Alphabet['/'] = 63;
137    
138            for (int i = 0; i <= 25; i++) {
139                lookUpBase64Alphabet[i] = (byte) ('A' + i);
140            }
141    
142            for (int i = 26, j = 0; i <= 51; i++, j++) {
143                lookUpBase64Alphabet[i] = (byte) ('a' + j);
144            }
145    
146            for (int i = 52, j = 0; i <= 61; i++, j++) {
147                lookUpBase64Alphabet[i] = (byte) ('0' + j);
148            }
149    
150            lookUpBase64Alphabet[62] = (byte) '+';
151            lookUpBase64Alphabet[63] = (byte) '/';
152        }
153    
154        /**
155         * Returns whether or not the <code>octect</code> is in the base 64 alphabet.
156         *
157         * @param octect The value to test
158         * @return <code>true</code> if the value is defined in the the base 64 alphabet, <code>false</code> otherwise.
159         */
160        private static boolean isBase64(byte octect) {
161            if (octect == PAD) {
162                return true;
163            } else //noinspection RedundantIfStatement
164                if (octect < 0 || base64Alphabet[octect] == -1) {
165                    return false;
166                } else {
167                    return true;
168                }
169        }
170    
171        /**
172         * Tests a given byte array to see if it contains only valid characters within the Base64 alphabet.
173         *
174         * @param arrayOctect byte array to test
175         * @return <code>true</code> if all bytes are valid characters in the Base64 alphabet or if the byte array is
176         *         empty; false, otherwise
177         */
178        public static boolean isBase64(byte[] arrayOctect) {
179    
180            arrayOctect = discardWhitespace(arrayOctect);
181    
182            int length = arrayOctect.length;
183            if (length == 0) {
184                // shouldn't a 0 length array be valid base64 data?
185                // return false;
186                return true;
187            }
188            for (int i = 0; i < length; i++) {
189                if (!isBase64(arrayOctect[i])) {
190                    return false;
191                }
192            }
193            return true;
194        }
195    
196        /**
197         * Discards any whitespace from a base-64 encoded block.
198         *
199         * @param data The base-64 encoded data to discard the whitespace from.
200         * @return The data, less whitespace (see RFC 2045).
201         */
202        static byte[] discardWhitespace(byte[] data) {
203            byte groomedData[] = new byte[data.length];
204            int bytesCopied = 0;
205    
206            for (byte aByte : data) {
207                switch (aByte) {
208                    case (byte) ' ':
209                    case (byte) '\n':
210                    case (byte) '\r':
211                    case (byte) '\t':
212                        break;
213                    default:
214                        groomedData[bytesCopied++] = aByte;
215                }
216            }
217    
218            byte packedData[] = new byte[bytesCopied];
219    
220            System.arraycopy(groomedData, 0, packedData, 0, bytesCopied);
221    
222            return packedData;
223        }
224    
225        /**
226         * Base64 encodes the specified byte array and then encodes it as a String using Shiro's preferred character
227         * encoding (UTF-8).
228         *
229         * @param bytes the byte array to Base64 encode.
230         * @return a UTF-8 encoded String of the resulting Base64 encoded byte array.
231         */
232        public static String encodeToString(byte[] bytes) {
233            byte[] encoded = encode(bytes);
234            return CodecSupport.toString(encoded);
235        }
236    
237        /**
238         * Encodes binary data using the base64 algorithm and chunks the encoded output into 76 character blocks
239         *
240         * @param binaryData binary data to encodeToChars
241         * @return Base64 characters chunked in 76 character blocks
242         */
243        public static byte[] encodeChunked(byte[] binaryData) {
244            return encode(binaryData, true);
245        }
246    
247        /**
248         * Encodes a byte[] containing binary data, into a byte[] containing characters in the Base64 alphabet.
249         *
250         * @param pArray a byte array containing binary data
251         * @return A byte array containing only Base64 character data
252         */
253        public static byte[] encode(byte[] pArray) {
254            return encode(pArray, false);
255        }
256    
257        /**
258         * Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks.
259         *
260         * @param binaryData Array containing binary data to encodeToChars.
261         * @param isChunked  if <code>true</code> this encoder will chunk the base64 output into 76 character blocks
262         * @return Base64-encoded data.
263         * @throws IllegalArgumentException Thrown when the input array needs an output array bigger than {@link Integer#MAX_VALUE}
264         */
265        public static byte[] encode(byte[] binaryData, boolean isChunked) {
266            long binaryDataLength = binaryData.length;
267            long lengthDataBits = binaryDataLength * EIGHTBIT;
268            long fewerThan24bits = lengthDataBits % TWENTYFOURBITGROUP;
269            long tripletCount = lengthDataBits / TWENTYFOURBITGROUP;
270            long encodedDataLengthLong;
271            int chunckCount = 0;
272    
273            if (fewerThan24bits != 0) {
274                // data not divisible by 24 bit
275                encodedDataLengthLong = (tripletCount + 1) * 4;
276            } else {
277                // 16 or 8 bit
278                encodedDataLengthLong = tripletCount * 4;
279            }
280    
281            // If the output is to be "chunked" into 76 character sections,
282            // for compliance with RFC 2045 MIME, then it is important to
283            // allow for extra length to account for the separator(s)
284            if (isChunked) {
285    
286                chunckCount = (CHUNK_SEPARATOR.length == 0 ? 0 : (int) Math
287                        .ceil((float) encodedDataLengthLong / CHUNK_SIZE));
288                encodedDataLengthLong += chunckCount * CHUNK_SEPARATOR.length;
289            }
290    
291            if (encodedDataLengthLong > Integer.MAX_VALUE) {
292                throw new IllegalArgumentException(
293                        "Input array too big, output array would be bigger than Integer.MAX_VALUE=" + Integer.MAX_VALUE);
294            }
295            int encodedDataLength = (int) encodedDataLengthLong;
296            byte encodedData[] = new byte[encodedDataLength];
297    
298            byte k, l, b1, b2, b3;
299    
300            int encodedIndex = 0;
301            int dataIndex;
302            int i;
303            int nextSeparatorIndex = CHUNK_SIZE;
304            int chunksSoFar = 0;
305    
306            // log.debug("number of triplets = " + numberTriplets);
307            for (i = 0; i < tripletCount; i++) {
308                dataIndex = i * 3;
309                b1 = binaryData[dataIndex];
310                b2 = binaryData[dataIndex + 1];
311                b3 = binaryData[dataIndex + 2];
312    
313                // log.debug("b1= " + b1 +", b2= " + b2 + ", b3= " + b3);
314    
315                l = (byte) (b2 & 0x0f);
316                k = (byte) (b1 & 0x03);
317    
318                byte val1 = ((b1 & SIGN) == 0) ? (byte) (b1 >> 2) : (byte) ((b1) >> 2 ^ 0xc0);
319                byte val2 = ((b2 & SIGN) == 0) ? (byte) (b2 >> 4) : (byte) ((b2) >> 4 ^ 0xf0);
320                byte val3 = ((b3 & SIGN) == 0) ? (byte) (b3 >> 6) : (byte) ((b3) >> 6 ^ 0xfc);
321    
322                encodedData[encodedIndex] = lookUpBase64Alphabet[val1];
323                // log.debug( "val2 = " + val2 );
324                // log.debug( "k4 = " + (k<<4) );
325                // log.debug( "vak = " + (val2 | (k<<4)) );
326                encodedData[encodedIndex + 1] = lookUpBase64Alphabet[val2 | (k << 4)];
327                encodedData[encodedIndex + 2] = lookUpBase64Alphabet[(l << 2) | val3];
328                encodedData[encodedIndex + 3] = lookUpBase64Alphabet[b3 & 0x3f];
329    
330                encodedIndex += 4;
331    
332                // If we are chunking, let's put a chunk separator down.
333                if (isChunked) {
334                    // this assumes that CHUNK_SIZE % 4 == 0
335                    if (encodedIndex == nextSeparatorIndex) {
336                        System.arraycopy(CHUNK_SEPARATOR, 0, encodedData, encodedIndex, CHUNK_SEPARATOR.length);
337                        chunksSoFar++;
338                        nextSeparatorIndex = (CHUNK_SIZE * (chunksSoFar + 1)) + (chunksSoFar * CHUNK_SEPARATOR.length);
339                        encodedIndex += CHUNK_SEPARATOR.length;
340                    }
341                }
342            }
343    
344            // form integral number of 6-bit groups
345            dataIndex = i * 3;
346    
347            if (fewerThan24bits == EIGHTBIT) {
348                b1 = binaryData[dataIndex];
349                k = (byte) (b1 & 0x03);
350                // log.debug("b1=" + b1);
351                // log.debug("b1<<2 = " + (b1>>2) );
352                byte val1 = ((b1 & SIGN) == 0) ? (byte) (b1 >> 2) : (byte) ((b1) >> 2 ^ 0xc0);
353                encodedData[encodedIndex] = lookUpBase64Alphabet[val1];
354                encodedData[encodedIndex + 1] = lookUpBase64Alphabet[k << 4];
355                encodedData[encodedIndex + 2] = PAD;
356                encodedData[encodedIndex + 3] = PAD;
357            } else if (fewerThan24bits == SIXTEENBIT) {
358    
359                b1 = binaryData[dataIndex];
360                b2 = binaryData[dataIndex + 1];
361                l = (byte) (b2 & 0x0f);
362                k = (byte) (b1 & 0x03);
363    
364                byte val1 = ((b1 & SIGN) == 0) ? (byte) (b1 >> 2) : (byte) ((b1) >> 2 ^ 0xc0);
365                byte val2 = ((b2 & SIGN) == 0) ? (byte) (b2 >> 4) : (byte) ((b2) >> 4 ^ 0xf0);
366    
367                encodedData[encodedIndex] = lookUpBase64Alphabet[val1];
368                encodedData[encodedIndex + 1] = lookUpBase64Alphabet[val2 | (k << 4)];
369                encodedData[encodedIndex + 2] = lookUpBase64Alphabet[l << 2];
370                encodedData[encodedIndex + 3] = PAD;
371            }
372    
373            if (isChunked) {
374                // we also add a separator to the end of the final chunk.
375                if (chunksSoFar < chunckCount) {
376                    System.arraycopy(CHUNK_SEPARATOR, 0, encodedData, encodedDataLength - CHUNK_SEPARATOR.length,
377                            CHUNK_SEPARATOR.length);
378                }
379            }
380    
381            return encodedData;
382        }
383    
384        /**
385         * Converts the specified UTF-8 Base64 encoded String and decodes it to a resultant UTF-8 encoded string.
386         *
387         * @param base64Encoded a UTF-8 Base64 encoded String
388         * @return the decoded String, UTF-8 encoded.
389         */
390        public static String decodeToString(String base64Encoded) {
391            byte[] encodedBytes = CodecSupport.toBytes(base64Encoded);
392            return decodeToString(encodedBytes);
393        }
394    
395        /**
396         * Decodes the specified Base64 encoded byte array and returns the decoded result as a UTF-8 encoded.
397         *
398         * @param base64Encoded a Base64 encoded byte array
399         * @return the decoded String, UTF-8 encoded.
400         */
401        public static String decodeToString(byte[] base64Encoded) {
402            byte[] decoded = decode(base64Encoded);
403            return CodecSupport.toString(decoded);
404        }
405    
406        /**
407         * Converts the specified UTF-8 Base64 encoded String and decodes it to a raw Base64 decoded byte array.
408         *
409         * @param base64Encoded a UTF-8 Base64 encoded String
410         * @return the raw Base64 decoded byte array.
411         */
412        public static byte[] decode(String base64Encoded) {
413            byte[] bytes = CodecSupport.toBytes(base64Encoded);
414            return decode(bytes);
415        }
416    
417        /**
418         * Decodes Base64 data into octects
419         *
420         * @param base64Data Byte array containing Base64 data
421         * @return Array containing decoded data.
422         */
423        public static byte[] decode(byte[] base64Data) {
424            // RFC 2045 requires that we discard ALL non-Base64 characters
425            base64Data = discardNonBase64(base64Data);
426    
427            // handle the edge case, so we don't have to worry about it later
428            if (base64Data.length == 0) {
429                return new byte[0];
430            }
431    
432            int numberQuadruple = base64Data.length / FOURBYTE;
433            byte decodedData[];
434            byte b1, b2, b3, b4, marker0, marker1;
435    
436            // Throw away anything not in base64Data
437    
438            int encodedIndex = 0;
439            int dataIndex;
440            {
441                // this sizes the output array properly - rlw
442                int lastData = base64Data.length;
443                // ignore the '=' padding
444                while (base64Data[lastData - 1] == PAD) {
445                    if (--lastData == 0) {
446                        return new byte[0];
447                    }
448                }
449                decodedData = new byte[lastData - numberQuadruple];
450            }
451    
452            for (int i = 0; i < numberQuadruple; i++) {
453                dataIndex = i * 4;
454                marker0 = base64Data[dataIndex + 2];
455                marker1 = base64Data[dataIndex + 3];
456    
457                b1 = base64Alphabet[base64Data[dataIndex]];
458                b2 = base64Alphabet[base64Data[dataIndex + 1]];
459    
460                if (marker0 != PAD && marker1 != PAD) {
461                    // No PAD e.g 3cQl
462                    b3 = base64Alphabet[marker0];
463                    b4 = base64Alphabet[marker1];
464    
465                    decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4);
466                    decodedData[encodedIndex + 1] = (byte) (((b2 & 0xf) << 4) | ((b3 >> 2) & 0xf));
467                    decodedData[encodedIndex + 2] = (byte) (b3 << 6 | b4);
468                } else if (marker0 == PAD) {
469                    // Two PAD e.g. 3c[Pad][Pad]
470                    decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4);
471                } else {
472                    // One PAD e.g. 3cQ[Pad]
473                    b3 = base64Alphabet[marker0];
474                    decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4);
475                    decodedData[encodedIndex + 1] = (byte) (((b2 & 0xf) << 4) | ((b3 >> 2) & 0xf));
476                }
477                encodedIndex += 3;
478            }
479            return decodedData;
480        }
481    
482        /**
483         * Discards any characters outside of the base64 alphabet, per the requirements on page 25 of RFC 2045 - "Any
484         * characters outside of the base64 alphabet are to be ignored in base64 encoded data."
485         *
486         * @param data The base-64 encoded data to groom
487         * @return The data, less non-base64 characters (see RFC 2045).
488         */
489        static byte[] discardNonBase64(byte[] data) {
490            byte groomedData[] = new byte[data.length];
491            int bytesCopied = 0;
492    
493            for (byte aByte : data) {
494                if (isBase64(aByte)) {
495                    groomedData[bytesCopied++] = aByte;
496                }
497            }
498    
499            byte packedData[] = new byte[bytesCopied];
500    
501            System.arraycopy(groomedData, 0, packedData, 0, bytesCopied);
502    
503            return packedData;
504        }
505    
506    }