001/* 002 * LZString4Java By Rufus Huang 003 * https://github.com/rufushuang/lz-string4java 004 * MIT License 005 * 006 * Port from original JavaScript version by pieroxy 007 * https://github.com/pieroxy/lz-string 008 */ 009 010package squidpony; 011 012import java.util.ArrayList; 013import java.util.HashMap; 014import java.util.HashSet; 015 016/** 017 * LZ-String compression; wrapped by {@link LZSPlus} for convenience, but extended functionality is available here. 018 * You can compress a String to a smaller, technically-invalid UTF-16 String with {@link #compress(String)}, and undo 019 * that with {@link #decompress(String)}. Compress to a valid UTF-16 String with {@link #compressToUTF16(String)}, 020 * decompress such a String with {@link #decompressFromUTF16(String)} (LZSPlus uses these). Compress to Base64 with 021 * {@link #compressToBase64(String)}, decompress Base64 with {@link #decompressFromBase64(String)}. Compress to 022 * URI-encoded Strings with {@link #compressToEncodedURIComponent(String)}, decompress those with 023 * {@link #decompressFromEncodedURIComponent(String)}. This class is super-sourced with a compatible alternative 024 * implementation on GWT for performance, and a main goal is to provide UTF-16 Strings that can be stored in a browser's 025 * LocalStorage on GWT. This class is also sometimes used internally when a large compressed String in Java source code 026 * makes more sense than an even larger resource file. 027 */ 028public final class LZSEncoding { 029 030 private LZSEncoding() {}; 031 private static final char[] keyStrBase64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=".toCharArray(), 032 keyStrUriSafe = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-$".toCharArray(), 033 valStrBase64 = new char[]{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 034 62, 0, 0, 0, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 0, 0, 0, 64, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 035 0, 0, 0, 0, 0, 0, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51}, 036 valStrUriSafe = new char[]{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0, 62, 0, 63, 0, 0, 037 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 038 0, 0, 0, 0, 0, 0, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51}; 039 040 /** 041 * Compress a String using LZ-String encoding but only using Base64 characters ('A'-'Z', 'a'-'z', '0'-'9', '+', '/', 042 * and '=' for Base64 validity). 043 * @param uncompressed an uncompressed String to encode 044 * @return the encoded, compressed String 045 */ 046 public static String compressToBase64(String uncompressed) { 047 if (uncompressed == null) 048 return null; 049 String res = _compress(uncompressed, 6, keyStrBase64, 0); 050 switch (res.length() & 3) { // To produce valid Base64 051 default: // When could this happen ? 052 case 0: 053 return res; 054 case 1: 055 return res + "==="; 056 case 2: 057 return res + "=="; 058 case 3: 059 return res + "="; 060 } 061 } 062 063 /** 064 * Decompresses a String that had been compressed with {@link #compressToBase64(String)}. 065 * @param compressed a Base64-encoded, compressed String 066 * @return the original uncompressed version of the String 067 */ 068 public static String decompressFromBase64(String compressed) { 069 if (compressed == null) 070 return null; 071 if (compressed.isEmpty()) 072 return ""; 073 final char[] input = compressed.toCharArray(); 074 return _decompress(input.length, 32, input, valStrBase64, 0); 075 } 076 077 /** 078 * Compresses a String using the properties of UTF-16 encoding to store approximately 15 bits of LZW-compressed text 079 * in each 2-byte Unicode character, which does particularly well with ASCII text and can be smaller than UTF-8 in 080 * some cases, especially where each char must be stored as UTF-16, e.g. Java Strings or browser-based LocalStorage. 081 * @param uncompressed an uncompressed String to encode 082 * @return the encoded, compressed String 083 */ 084 public static String compressToUTF16(String uncompressed) { 085 if (uncompressed == null) 086 return null; 087 return _compress(uncompressed, 15, null, 32) + " "; 088 } 089 /** 090 * Decompresses a String that had been compressed with {@link #compressToUTF16(String)}. 091 * @param compressed a UTF16-encoded (as by {@link #compressToUTF16(String)}), compressed String 092 * @return the original uncompressed version of the String 093 */ 094 public static String decompressFromUTF16(String compressed) { 095 if (compressed == null) 096 return null; 097 if (compressed.isEmpty()) 098 return ""; 099 final char[] comp = compressed.toCharArray(); 100 return _decompress(comp.length, 16384, comp, null, -32); 101 } 102 103 /** 104 * Compress a String using LZ-String encoding but only using valid URI component characters ('A'-'Z', 'a'-'z', 105 * '0'-'9', '+', '-', and possibly '$'). 106 * @param uncompressed an uncompressed String to encode 107 * @return the encoded, compressed String 108 */ 109 public static String compressToEncodedURIComponent(String uncompressed) { 110 if (uncompressed == null) 111 return null; 112 return _compress(uncompressed, 6, keyStrUriSafe, 0) + '+'; 113 } 114 /** 115 * Decompresses a String that had been compressed with {@link #compressToEncodedURIComponent(String)}. 116 * @param compressed a URI-encoded, compressed String 117 * @return the original uncompressed version of the String 118 */ 119 public static String decompressFromEncodedURIComponent(String compressed) { 120 if (compressed == null) return null; 121 if (compressed.isEmpty()) return ""; 122 final char[] input = compressed.toCharArray(); 123 return _decompress(input.length, 32, input, valStrUriSafe, 0); 124 } 125 126 /** 127 * Compresses a String as tightly as possible by using 16 bits of each 16-bit character, which can infrequently 128 * result in invalid UTF-16 codepoints, but that may not matter for all applications. 129 * @param uncompressed an uncompressed String to encode 130 * @return the encoded, compressed String 131 */ 132 public static String compress(String uncompressed) { 133 return _compress(uncompressed, 16, null, 0); 134 } 135 136 private static String _compress(String uncompressedStr, int bitsPerChar, char[] getCharFromInt, int offset) { 137 if (uncompressedStr == null) return null; 138 if (uncompressedStr.isEmpty()) return ""; 139 int i, value; 140 HashMap<String, Integer> context_dictionary = new HashMap<>(); 141 HashSet<String> context_dictionaryToCreate = new HashSet<>(); 142 String context_c; 143 String context_wc; 144 String context_w = ""; 145 int context_enlargeIn = 2; // Compensate for the first entry which should not count 146 int context_dictSize = 3; 147 int context_numBits = 2; 148 StringBuilder context_data = new StringBuilder(uncompressedStr.length() >>> 1); 149 int context_data_val = 0; 150 int context_data_position = 0; 151 int ii; 152 153 char[] uncompressed = uncompressedStr.toCharArray(); 154 for (ii = 0; ii < uncompressed.length; ii++) { 155 context_c = String.valueOf(uncompressed[ii]); 156 if (!context_dictionary.containsKey(context_c)) { 157 context_dictionary.put(context_c, context_dictSize++); 158 context_dictionaryToCreate.add(context_c); 159 } 160 161 context_wc = context_w + context_c; 162 if (context_dictionary.containsKey(context_wc)) { 163 context_w = context_wc; 164 } else { 165 if (context_dictionaryToCreate.contains(context_w)) { 166 if ((value = context_w.charAt(0)) < 256) { 167 for (i = 0; i < context_numBits; i++) { 168 context_data_val = (context_data_val << 1); 169 if (context_data_position == bitsPerChar - 1) { 170 context_data_position = 0; 171 context_data.append((char) (((getCharFromInt == null) ? context_data_val : getCharFromInt[context_data_val]) + offset)); 172 context_data_val = 0; 173 } else { 174 context_data_position++; 175 } 176 } 177 for (i = 0; i < 8; i++) { 178 context_data_val = (context_data_val << 1) | (value & 1); 179 if (context_data_position == bitsPerChar - 1) { 180 context_data_position = 0; 181 context_data.append((char) (((getCharFromInt == null) ? context_data_val : getCharFromInt[context_data_val]) + offset)); 182 context_data_val = 0; 183 } else { 184 context_data_position++; 185 } 186 value >>= 1; 187 } 188 } else { 189 value = 1; 190 for (i = 0; i < context_numBits; i++) { 191 context_data_val = (context_data_val << 1) | value; 192 if (context_data_position == bitsPerChar - 1) { 193 context_data_position = 0; 194 context_data.append((char) (((getCharFromInt == null) ? context_data_val : getCharFromInt[context_data_val]) + offset)); 195 context_data_val = 0; 196 } else { 197 context_data_position++; 198 } 199 value = 0; 200 } 201 value = context_w.charAt(0); 202 for (i = 0; i < 16; i++) { 203 context_data_val = (context_data_val << 1) | (value & 1); 204 if (context_data_position == bitsPerChar - 1) { 205 context_data_position = 0; 206 context_data.append((char) (((getCharFromInt == null) ? context_data_val : getCharFromInt[context_data_val]) + offset)); 207 context_data_val = 0; 208 } else { 209 context_data_position++; 210 } 211 value >>= 1; 212 } 213 } 214 context_enlargeIn--; 215 if (context_enlargeIn == 0) { 216 context_enlargeIn = 1 << context_numBits++; 217 } 218 context_dictionaryToCreate.remove(context_w); 219 } else { 220 value = context_dictionary.get(context_w); 221 for (i = 0; i < context_numBits; i++) { 222 context_data_val = (context_data_val << 1) | (value & 1); 223 if (context_data_position == bitsPerChar - 1) { 224 context_data_position = 0; 225 context_data.append((char) (((getCharFromInt == null) ? context_data_val : getCharFromInt[context_data_val]) + offset)); 226 context_data_val = 0; 227 } else { 228 context_data_position++; 229 } 230 value >>= 1; 231 } 232 233 } 234 context_enlargeIn--; 235 if (context_enlargeIn == 0) { 236 context_enlargeIn = 1 << context_numBits++; 237 } 238 // Add wc to the dictionary. 239 context_dictionary.put(context_wc, context_dictSize++); 240 context_w = context_c; 241 } 242 } 243 244 // Output the code for w. 245 if (!context_w.isEmpty()) { 246 if (context_dictionaryToCreate.contains(context_w)) { 247 if (context_w.charAt(0) < 256) { 248 for (i = 0; i < context_numBits; i++) { 249 context_data_val = (context_data_val << 1); 250 if (context_data_position == bitsPerChar - 1) { 251 context_data_position = 0; 252 context_data.append((char) (((getCharFromInt == null) ? context_data_val : getCharFromInt[context_data_val]) + offset)); 253 context_data_val = 0; 254 } else { 255 context_data_position++; 256 } 257 } 258 value = context_w.charAt(0); 259 for (i = 0; i < 8; i++) { 260 context_data_val = (context_data_val << 1) | (value & 1); 261 if (context_data_position == bitsPerChar - 1) { 262 context_data_position = 0; 263 context_data.append((char) (((getCharFromInt == null) ? context_data_val : getCharFromInt[context_data_val]) + offset)); 264 context_data_val = 0; 265 } else { 266 context_data_position++; 267 } 268 value >>= 1; 269 } 270 } else { 271 value = 1; 272 for (i = 0; i < context_numBits; i++) { 273 context_data_val = (context_data_val << 1) | value; 274 if (context_data_position == bitsPerChar - 1) { 275 context_data_position = 0; 276 context_data.append((char) (((getCharFromInt == null) ? context_data_val : getCharFromInt[context_data_val]) + offset)); 277 context_data_val = 0; 278 } else { 279 context_data_position++; 280 } 281 value = 0; 282 } 283 value = context_w.charAt(0); 284 for (i = 0; i < 16; i++) { 285 context_data_val = (context_data_val << 1) | (value & 1); 286 if (context_data_position == bitsPerChar - 1) { 287 context_data_position = 0; 288 context_data.append((char) (((getCharFromInt == null) ? context_data_val : getCharFromInt[context_data_val]) + offset)); 289 context_data_val = 0; 290 } else { 291 context_data_position++; 292 } 293 value >>= 1; 294 } 295 } 296 297 context_dictionaryToCreate.remove(context_w); 298 } else { 299 value = context_dictionary.get(context_w); 300 for (i = 0; i < context_numBits; i++) { 301 context_data_val = (context_data_val << 1) | (value & 1); 302 if (context_data_position == bitsPerChar - 1) { 303 context_data_position = 0; 304 context_data.append((char) (((getCharFromInt == null) ? context_data_val : getCharFromInt[context_data_val]) + offset)); 305 context_data_val = 0; 306 } else { 307 context_data_position++; 308 } 309 value >>= 1; 310 } 311 312 } 313 } 314 315 // Mark the end of the stream 316 value = 2; 317 for (i = 0; i < context_numBits; i++) { 318 context_data_val = (context_data_val << 1) | (value & 1); 319 if (context_data_position == bitsPerChar - 1) { 320 context_data_position = 0; 321 context_data.append((char) (((getCharFromInt == null) ? context_data_val : getCharFromInt[context_data_val]) + offset)); 322 context_data_val = 0; 323 } else { 324 context_data_position++; 325 } 326 value >>= 1; 327 } 328 329 // Flush the last char 330 while (true) { 331 context_data_val = (context_data_val << 1); 332 if (context_data_position == bitsPerChar - 1) { 333 context_data.append((char) (((getCharFromInt == null) ? context_data_val : getCharFromInt[context_data_val]) + offset)); 334 break; 335 } else 336 context_data_position++; 337 } 338 return context_data.toString(); 339 } 340 /** 341 * Decompresses a String that had been compressed with {@link #compress(String)}. 342 * @param compressed a compressed String using the default encoding from {@link #compress(String)} 343 * @return the original uncompressed version of the String 344 */ 345 public static String decompress(final String compressed) { 346 if (compressed == null) 347 return null; 348 if (compressed.isEmpty()) 349 return ""; 350 return _decompress(compressed.length(), 32768, compressed.toCharArray(), null, 0); 351 } 352 353 private static String _decompress(int length, int resetValue, char[] getNextValue, char[] modify, int offset) { 354 if(getNextValue == null) 355 return null; 356 if(getNextValue.length == 0) 357 return ""; 358 ArrayList<String> dictionary = new ArrayList<>(); 359 int enlargeIn = 4, dictSize = 4, numBits = 3, position = resetValue, index = 1, resb, maxpower, power; 360 String entry, w, c; 361 ArrayList<String> result = new ArrayList<>(); 362 char bits, val = (modify == null) ? (char) (getNextValue[0] + offset) : modify[getNextValue[0]]; 363 364 for (char i = 0; i < 3; i++) { 365 dictionary.add(i, String.valueOf(i)); 366 } 367 368 bits = 0; 369 maxpower = 2; 370 power = 0; 371 while (power != maxpower) { 372 resb = val & position; 373 position >>= 1; 374 if (position == 0) { 375 position = resetValue; 376 val = (modify == null) ? (char) (getNextValue[index++] + offset) : modify[getNextValue[index++]]; 377 } 378 bits |= (resb > 0 ? 1 : 0) << power++; 379 } 380 381 switch (bits) { 382 case 0: 383 bits = 0; 384 maxpower = 8; 385 power = 0; 386 while (power != maxpower) { 387 resb = val & position; 388 position >>= 1; 389 if (position == 0) { 390 position = resetValue; 391 val = (modify == null) ? (char) (getNextValue[index++] + offset) : modify[getNextValue[index++]]; 392 } 393 bits |= (resb > 0 ? 1 : 0) << power++; 394 } 395 c = String.valueOf(bits); 396 break; 397 case 1: 398 bits = 0; 399 maxpower = 16; 400 power = 0; 401 while (power != maxpower) { 402 resb = val & position; 403 position >>= 1; 404 if (position == 0) { 405 position = resetValue; 406 val = (modify == null) ? (char) (getNextValue[index++] + offset) : modify[getNextValue[index++]]; 407 } 408 bits |= (resb > 0 ? 1 : 0) << power++; 409 } 410 c = String.valueOf(bits); 411 break; 412 default: 413 return ""; 414 } 415 dictionary.add(c); 416 w = c; 417 result.add(w); 418 while (true) { 419 if (index > length) { 420 return ""; 421 } 422 int cc = 0; 423 maxpower = numBits; 424 power = 0; 425 while (power != maxpower) { 426 resb = val & position; 427 position >>= 1; 428 if (position == 0) { 429 position = resetValue; 430 val = (modify == null) ? (char) (getNextValue[index++] + offset) : modify[getNextValue[index++]]; 431 } 432 cc |= (resb > 0 ? 1 : 0) << power++; 433 } 434 switch (cc) { 435 case 0: 436 bits = 0; 437 maxpower = 8; 438 power = 0; 439 while (power != maxpower) { 440 resb = val & position; 441 position >>= 1; 442 if (position == 0) { 443 position = resetValue; 444 val = (modify == null) ? (char) (getNextValue[index++] + offset) : modify[getNextValue[index++]]; 445 } 446 bits |= (resb > 0 ? 1 : 0) << power++; 447 } 448 449 dictionary.add(String.valueOf(bits)); 450 cc = dictSize++; 451 enlargeIn--; 452 break; 453 case 1: 454 bits = 0; 455 maxpower = 16; 456 power = 0; 457 while (power != maxpower) { 458 resb = val & position; 459 position >>= 1; 460 if (position == 0) { 461 position = resetValue; 462 val = (modify == null) ? (char) (getNextValue[index++] + offset) : modify[getNextValue[index++]]; 463 } 464 bits |= (resb > 0 ? 1 : 0) << power++; 465 } 466 dictionary.add(String.valueOf(bits)); 467 cc = dictSize++; 468 enlargeIn--; 469 break; 470 case 2: 471 StringBuilder sb = new StringBuilder(result.size()); 472 for (String s : result) 473 sb.append(s); 474 return sb.toString(); 475 } 476 477 if (enlargeIn == 0) { 478 enlargeIn = 1 << numBits; 479 numBits++; 480 } 481 482 if (cc < dictionary.size() && dictionary.get(cc) != null) { 483 entry = dictionary.get(cc); 484 } else { 485 if (cc == dictSize) { 486 entry = w + w.charAt(0); 487 } else { 488 return ""; 489 } 490 } 491 result.add(entry); 492 493 // Add w+entry[0] to the dictionary. 494 dictionary.add(w + entry.charAt(0)); 495 dictSize++; 496 enlargeIn--; 497 498 w = entry; 499 500 if (enlargeIn == 0) { 501 enlargeIn = 1 << numBits; 502 numBits++; 503 } 504 505 } 506 507 } 508}