001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.shiro.codec; 020 021/** 022 * Provides <a href="http://en.wikipedia.org/wiki/Base64">Base 64</a> encoding and decoding as defined by 023 * <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>. 024 * <p/> 025 * This class implements section <cite>6.8. Base64 Content-Transfer-Encoding</cite> from RFC 2045 <cite>Multipurpose 026 * Internet Mail Extensions (MIME) Part One: Format of Internet Message Bodies</cite> by Freed and Borenstein. 027 * <p/> 028 * This class was borrowed from Apache Commons Codec SVN repository (rev. 618419) with modifications 029 * to enable Base64 conversion without a full dependency on Commons Codec. We didn't want to reinvent the wheel of 030 * great work they've done, but also didn't want to force every Shiro user to depend on the commons-codec.jar 031 * <p/> 032 * As per the Apache 2.0 license, the original copyright notice and all author and copyright information have 033 * remained in tact. 034 * 035 * @see <a href="http://en.wikipedia.org/wiki/Base64">Wikipedia: Base 64</a> 036 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a> 037 * @since 0.9 038 */ 039public class Base64 { 040 041 /** 042 * Chunk size per RFC 2045 section 6.8. 043 * <p/> 044 * The character limit does not count the trailing CRLF, but counts all other characters, including any 045 * equal signs. 046 * 047 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 6.8</a> 048 */ 049 static final int CHUNK_SIZE = 76; 050 051 /** 052 * Chunk separator per RFC 2045 section 2.1. 053 * 054 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 2.1</a> 055 */ 056 static final byte[] CHUNK_SEPARATOR = "\r\n".getBytes(); 057 058 /** 059 * The base length. 060 */ 061 private static final int BASELENGTH = 255; 062 063 /** 064 * Lookup length. 065 */ 066 private static final int LOOKUPLENGTH = 64; 067 068 /** 069 * Used to calculate the number of bits in a byte. 070 */ 071 private static final int EIGHTBIT = 8; 072 073 /** 074 * Used when encoding something which has fewer than 24 bits. 075 */ 076 private static final int SIXTEENBIT = 16; 077 078 /** 079 * Used to determine how many bits data contains. 080 */ 081 private static final int TWENTYFOURBITGROUP = 24; 082 083 /** 084 * Used to get the number of Quadruples. 085 */ 086 private static final int FOURBYTE = 4; 087 088 /** 089 * Used to test the sign of a byte. 090 */ 091 private static final int SIGN = -128; 092 093 /** 094 * Byte used to pad output. 095 */ 096 private static final byte PAD = (byte) '='; 097 098 /** 099 * Contains the Base64 values <code>0</code> through <code>63</code> accessed by using character encodings as 100 * indices. 101 * <p/> 102 * <p>For example, <code>base64Alphabet['+']</code> returns <code>62</code>.</p> 103 * <p/> 104 * <p>The value of undefined encodings is <code>-1</code>.</p> 105 */ 106 private static final byte[] base64Alphabet = new byte[BASELENGTH]; 107 108 /** 109 * <p>Contains the Base64 encodings <code>A</code> through <code>Z</code>, followed by <code>a</code> through 110 * <code>z</code>, followed by <code>0</code> through <code>9</code>, followed by <code>+</code>, and 111 * <code>/</code>.</p> 112 * <p/> 113 * <p>This array is accessed by using character values as indices.</p> 114 * <p/> 115 * <p>For example, <code>lookUpBase64Alphabet[62] </code> returns <code>'+'</code>.</p> 116 */ 117 private static final byte[] lookUpBase64Alphabet = new byte[LOOKUPLENGTH]; 118 119 // Populating the lookup and character arrays 120 121 static { 122 for (int i = 0; i < BASELENGTH; i++) { 123 base64Alphabet[i] = (byte) -1; 124 } 125 for (int i = 'Z'; i >= 'A'; i--) { 126 base64Alphabet[i] = (byte) (i - 'A'); 127 } 128 for (int i = 'z'; i >= 'a'; i--) { 129 base64Alphabet[i] = (byte) (i - 'a' + 26); 130 } 131 for (int i = '9'; i >= '0'; i--) { 132 base64Alphabet[i] = (byte) (i - '0' + 52); 133 } 134 135 base64Alphabet['+'] = 62; 136 base64Alphabet['/'] = 63; 137 138 for (int i = 0; i <= 25; i++) { 139 lookUpBase64Alphabet[i] = (byte) ('A' + i); 140 } 141 142 for (int i = 26, j = 0; i <= 51; i++, j++) { 143 lookUpBase64Alphabet[i] = (byte) ('a' + j); 144 } 145 146 for (int i = 52, j = 0; i <= 61; i++, j++) { 147 lookUpBase64Alphabet[i] = (byte) ('0' + j); 148 } 149 150 lookUpBase64Alphabet[62] = (byte) '+'; 151 lookUpBase64Alphabet[63] = (byte) '/'; 152 } 153 154 /** 155 * Returns whether or not the <code>octet</code> is in the base 64 alphabet. 156 * 157 * @param octect The value to test 158 * @return <code>true</code> if the value is defined in the the base 64 alphabet, <code>false</code> otherwise. 159 */ 160 private static boolean isBase64(byte octect) { 161 if (octect == PAD) { 162 return true; 163 } else //noinspection RedundantIfStatement 164 if (octect < 0 || base64Alphabet[octect] == -1) { 165 return false; 166 } else { 167 return true; 168 } 169 } 170 171 /** 172 * Tests a given byte array to see if it contains only valid characters within the Base64 alphabet. 173 * 174 * @param arrayOctect byte array to test 175 * @return <code>true</code> if all bytes are valid characters in the Base64 alphabet or if the byte array is 176 * empty; false, otherwise 177 */ 178 public static boolean isBase64(byte[] arrayOctect) { 179 180 arrayOctect = discardWhitespace(arrayOctect); 181 182 int length = arrayOctect.length; 183 if (length == 0) { 184 // shouldn't a 0 length array be valid base64 data? 185 // return false; 186 return true; 187 } 188 for (int i = 0; i < length; i++) { 189 if (!isBase64(arrayOctect[i])) { 190 return false; 191 } 192 } 193 return true; 194 } 195 196 /** 197 * Discards any whitespace from a base-64 encoded block. 198 * 199 * @param data The base-64 encoded data to discard the whitespace from. 200 * @return The data, less whitespace (see RFC 2045). 201 */ 202 static byte[] discardWhitespace(byte[] data) { 203 byte groomedData[] = new byte[data.length]; 204 int bytesCopied = 0; 205 206 for (byte aByte : data) { 207 switch (aByte) { 208 case (byte) ' ': 209 case (byte) '\n': 210 case (byte) '\r': 211 case (byte) '\t': 212 break; 213 default: 214 groomedData[bytesCopied++] = aByte; 215 } 216 } 217 218 byte packedData[] = new byte[bytesCopied]; 219 220 System.arraycopy(groomedData, 0, packedData, 0, bytesCopied); 221 222 return packedData; 223 } 224 225 /** 226 * Base64 encodes the specified byte array and then encodes it as a String using Shiro's preferred character 227 * encoding (UTF-8). 228 * 229 * @param bytes the byte array to Base64 encode. 230 * @return a UTF-8 encoded String of the resulting Base64 encoded byte array. 231 */ 232 public static String encodeToString(byte[] bytes) { 233 byte[] encoded = encode(bytes); 234 return CodecSupport.toString(encoded); 235 } 236 237 /** 238 * Encodes binary data using the base64 algorithm and chunks the encoded output into 76 character blocks 239 * 240 * @param binaryData binary data to encodeToChars 241 * @return Base64 characters chunked in 76 character blocks 242 */ 243 public static byte[] encodeChunked(byte[] binaryData) { 244 return encode(binaryData, true); 245 } 246 247 /** 248 * Encodes a byte[] containing binary data, into a byte[] containing characters in the Base64 alphabet. 249 * 250 * @param pArray a byte array containing binary data 251 * @return A byte array containing only Base64 character data 252 */ 253 public static byte[] encode(byte[] pArray) { 254 return encode(pArray, false); 255 } 256 257 /** 258 * Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks. 259 * 260 * @param binaryData Array containing binary data to encodeToChars. 261 * @param isChunked if <code>true</code> this encoder will chunk the base64 output into 76 character blocks 262 * @return Base64-encoded data. 263 * @throws IllegalArgumentException Thrown when the input array needs an output array bigger than {@link Integer#MAX_VALUE} 264 */ 265 public static byte[] encode(byte[] binaryData, boolean isChunked) { 266 long binaryDataLength = binaryData.length; 267 long lengthDataBits = binaryDataLength * EIGHTBIT; 268 long fewerThan24bits = lengthDataBits % TWENTYFOURBITGROUP; 269 long tripletCount = lengthDataBits / TWENTYFOURBITGROUP; 270 long encodedDataLengthLong; 271 int chunckCount = 0; 272 273 if (fewerThan24bits != 0) { 274 // data not divisible by 24 bit 275 encodedDataLengthLong = (tripletCount + 1) * 4; 276 } else { 277 // 16 or 8 bit 278 encodedDataLengthLong = tripletCount * 4; 279 } 280 281 // If the output is to be "chunked" into 76 character sections, 282 // for compliance with RFC 2045 MIME, then it is important to 283 // allow for extra length to account for the separator(s) 284 if (isChunked) { 285 286 chunckCount = (CHUNK_SEPARATOR.length == 0 ? 0 : (int) Math 287 .ceil((float) encodedDataLengthLong / CHUNK_SIZE)); 288 encodedDataLengthLong += chunckCount * CHUNK_SEPARATOR.length; 289 } 290 291 if (encodedDataLengthLong > Integer.MAX_VALUE) { 292 throw new IllegalArgumentException( 293 "Input array too big, output array would be bigger than Integer.MAX_VALUE=" + Integer.MAX_VALUE); 294 } 295 int encodedDataLength = (int) encodedDataLengthLong; 296 byte encodedData[] = new byte[encodedDataLength]; 297 298 byte k, l, b1, b2, b3; 299 300 int encodedIndex = 0; 301 int dataIndex; 302 int i; 303 int nextSeparatorIndex = CHUNK_SIZE; 304 int chunksSoFar = 0; 305 306 // log.debug("number of triplets = " + numberTriplets); 307 for (i = 0; i < tripletCount; i++) { 308 dataIndex = i * 3; 309 b1 = binaryData[dataIndex]; 310 b2 = binaryData[dataIndex + 1]; 311 b3 = binaryData[dataIndex + 2]; 312 313 // log.debug("b1= " + b1 +", b2= " + b2 + ", b3= " + b3); 314 315 l = (byte) (b2 & 0x0f); 316 k = (byte) (b1 & 0x03); 317 318 byte val1 = ((b1 & SIGN) == 0) ? (byte) (b1 >> 2) : (byte) ((b1) >> 2 ^ 0xc0); 319 byte val2 = ((b2 & SIGN) == 0) ? (byte) (b2 >> 4) : (byte) ((b2) >> 4 ^ 0xf0); 320 byte val3 = ((b3 & SIGN) == 0) ? (byte) (b3 >> 6) : (byte) ((b3) >> 6 ^ 0xfc); 321 322 encodedData[encodedIndex] = lookUpBase64Alphabet[val1]; 323 // log.debug( "val2 = " + val2 ); 324 // log.debug( "k4 = " + (k<<4) ); 325 // log.debug( "vak = " + (val2 | (k<<4)) ); 326 encodedData[encodedIndex + 1] = lookUpBase64Alphabet[val2 | (k << 4)]; 327 encodedData[encodedIndex + 2] = lookUpBase64Alphabet[(l << 2) | val3]; 328 encodedData[encodedIndex + 3] = lookUpBase64Alphabet[b3 & 0x3f]; 329 330 encodedIndex += 4; 331 332 // If we are chunking, let's put a chunk separator down. 333 if (isChunked) { 334 // this assumes that CHUNK_SIZE % 4 == 0 335 if (encodedIndex == nextSeparatorIndex) { 336 System.arraycopy(CHUNK_SEPARATOR, 0, encodedData, encodedIndex, CHUNK_SEPARATOR.length); 337 chunksSoFar++; 338 nextSeparatorIndex = (CHUNK_SIZE * (chunksSoFar + 1)) + (chunksSoFar * CHUNK_SEPARATOR.length); 339 encodedIndex += CHUNK_SEPARATOR.length; 340 } 341 } 342 } 343 344 // form integral number of 6-bit groups 345 dataIndex = i * 3; 346 347 if (fewerThan24bits == EIGHTBIT) { 348 b1 = binaryData[dataIndex]; 349 k = (byte) (b1 & 0x03); 350 // log.debug("b1=" + b1); 351 // log.debug("b1<<2 = " + (b1>>2) ); 352 byte val1 = ((b1 & SIGN) == 0) ? (byte) (b1 >> 2) : (byte) ((b1) >> 2 ^ 0xc0); 353 encodedData[encodedIndex] = lookUpBase64Alphabet[val1]; 354 encodedData[encodedIndex + 1] = lookUpBase64Alphabet[k << 4]; 355 encodedData[encodedIndex + 2] = PAD; 356 encodedData[encodedIndex + 3] = PAD; 357 } else if (fewerThan24bits == SIXTEENBIT) { 358 359 b1 = binaryData[dataIndex]; 360 b2 = binaryData[dataIndex + 1]; 361 l = (byte) (b2 & 0x0f); 362 k = (byte) (b1 & 0x03); 363 364 byte val1 = ((b1 & SIGN) == 0) ? (byte) (b1 >> 2) : (byte) ((b1) >> 2 ^ 0xc0); 365 byte val2 = ((b2 & SIGN) == 0) ? (byte) (b2 >> 4) : (byte) ((b2) >> 4 ^ 0xf0); 366 367 encodedData[encodedIndex] = lookUpBase64Alphabet[val1]; 368 encodedData[encodedIndex + 1] = lookUpBase64Alphabet[val2 | (k << 4)]; 369 encodedData[encodedIndex + 2] = lookUpBase64Alphabet[l << 2]; 370 encodedData[encodedIndex + 3] = PAD; 371 } 372 373 if (isChunked) { 374 // we also add a separator to the end of the final chunk. 375 if (chunksSoFar < chunckCount) { 376 System.arraycopy(CHUNK_SEPARATOR, 0, encodedData, encodedDataLength - CHUNK_SEPARATOR.length, 377 CHUNK_SEPARATOR.length); 378 } 379 } 380 381 return encodedData; 382 } 383 384 /** 385 * Converts the specified UTF-8 Base64 encoded String and decodes it to a resultant UTF-8 encoded string. 386 * 387 * @param base64Encoded a UTF-8 Base64 encoded String 388 * @return the decoded String, UTF-8 encoded. 389 */ 390 public static String decodeToString(String base64Encoded) { 391 byte[] encodedBytes = CodecSupport.toBytes(base64Encoded); 392 return decodeToString(encodedBytes); 393 } 394 395 /** 396 * Decodes the specified Base64 encoded byte array and returns the decoded result as a UTF-8 encoded. 397 * 398 * @param base64Encoded a Base64 encoded byte array 399 * @return the decoded String, UTF-8 encoded. 400 */ 401 public static String decodeToString(byte[] base64Encoded) { 402 byte[] decoded = decode(base64Encoded); 403 return CodecSupport.toString(decoded); 404 } 405 406 /** 407 * Converts the specified UTF-8 Base64 encoded String and decodes it to a raw Base64 decoded byte array. 408 * 409 * @param base64Encoded a UTF-8 Base64 encoded String 410 * @return the raw Base64 decoded byte array. 411 */ 412 public static byte[] decode(String base64Encoded) { 413 byte[] bytes = CodecSupport.toBytes(base64Encoded); 414 return decode(bytes); 415 } 416 417 /** 418 * Decodes Base64 data into octets 419 * 420 * @param base64Data Byte array containing Base64 data 421 * @return Array containing decoded data. 422 */ 423 public static byte[] decode(byte[] base64Data) { 424 // RFC 2045 requires that we discard ALL non-Base64 characters 425 base64Data = discardNonBase64(base64Data); 426 427 // handle the edge case, so we don't have to worry about it later 428 if (base64Data.length == 0) { 429 return new byte[0]; 430 } 431 432 int numberQuadruple = base64Data.length / FOURBYTE; 433 byte decodedData[]; 434 byte b1, b2, b3, b4, marker0, marker1; 435 436 // Throw away anything not in base64Data 437 438 int encodedIndex = 0; 439 int dataIndex; 440 { 441 // this sizes the output array properly - rlw 442 int lastData = base64Data.length; 443 // ignore the '=' padding 444 while (base64Data[lastData - 1] == PAD) { 445 if (--lastData == 0) { 446 return new byte[0]; 447 } 448 } 449 decodedData = new byte[lastData - numberQuadruple]; 450 } 451 452 for (int i = 0; i < numberQuadruple; i++) { 453 dataIndex = i * 4; 454 marker0 = base64Data[dataIndex + 2]; 455 marker1 = base64Data[dataIndex + 3]; 456 457 b1 = base64Alphabet[base64Data[dataIndex]]; 458 b2 = base64Alphabet[base64Data[dataIndex + 1]]; 459 460 if (marker0 != PAD && marker1 != PAD) { 461 // No PAD e.g 3cQl 462 b3 = base64Alphabet[marker0]; 463 b4 = base64Alphabet[marker1]; 464 465 decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4); 466 decodedData[encodedIndex + 1] = (byte) (((b2 & 0xf) << 4) | ((b3 >> 2) & 0xf)); 467 decodedData[encodedIndex + 2] = (byte) (b3 << 6 | b4); 468 } else if (marker0 == PAD) { 469 // Two PAD e.g. 3c[Pad][Pad] 470 decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4); 471 } else { 472 // One PAD e.g. 3cQ[Pad] 473 b3 = base64Alphabet[marker0]; 474 decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4); 475 decodedData[encodedIndex + 1] = (byte) (((b2 & 0xf) << 4) | ((b3 >> 2) & 0xf)); 476 } 477 encodedIndex += 3; 478 } 479 return decodedData; 480 } 481 482 /** 483 * Discards any characters outside of the base64 alphabet, per the requirements on page 25 of RFC 2045 - "Any 484 * characters outside of the base64 alphabet are to be ignored in base64 encoded data." 485 * 486 * @param data The base-64 encoded data to groom 487 * @return The data, less non-base64 characters (see RFC 2045). 488 */ 489 static byte[] discardNonBase64(byte[] data) { 490 byte groomedData[] = new byte[data.length]; 491 int bytesCopied = 0; 492 493 for (byte aByte : data) { 494 if (isBase64(aByte)) { 495 groomedData[bytesCopied++] = aByte; 496 } 497 } 498 499 byte packedData[] = new byte[bytesCopied]; 500 501 System.arraycopy(groomedData, 0, packedData, 0, bytesCopied); 502 503 return packedData; 504 } 505 506}