001/*
002 * LZString4Java By Rufus Huang
003 * https://github.com/rufushuang/lz-string4java
004 * MIT License
005 *
006 * Port from original JavaScript version by pieroxy
007 * https://github.com/pieroxy/lz-string
008 */
009
010package squidpony;
011
012import java.util.ArrayList;
013import java.util.HashMap;
014import java.util.HashSet;
015
016/**
017 * LZ-String compression; wrapped by {@link LZSPlus} for convenience, but extended functionality is available here.
018 * You can compress a String to a smaller, technically-invalid UTF-16 String with {@link #compress(String)}, and undo
019 * that with {@link #decompress(String)}. Compress to a valid UTF-16 String with {@link #compressToUTF16(String)},
020 * decompress such a String with {@link #decompressFromUTF16(String)} (LZSPlus uses these). Compress to Base64 with
021 * {@link #compressToBase64(String)}, decompress Base64 with {@link #decompressFromBase64(String)}. Compress to
022 * URI-encoded Strings with {@link #compressToEncodedURIComponent(String)}, decompress those with
023 * {@link #decompressFromEncodedURIComponent(String)}. This class is super-sourced with a compatible alternative
024 * implementation on GWT for performance, and a main goal is to provide UTF-16 Strings that can be stored in a browser's
025 * LocalStorage on GWT. This class is also sometimes used internally when a large compressed String in Java source code
026 * makes more sense than an even larger resource file.
027 */
028public final class LZSEncoding {
029
030    private LZSEncoding() {};
031    private static final char[] keyStrBase64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=".toCharArray(),
032            keyStrUriSafe = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-$".toCharArray(),
033            valStrBase64 = new char[]{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
034                    62, 0, 0, 0, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 0, 0, 0, 64, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
035                    0, 0, 0, 0, 0, 0, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51},
036            valStrUriSafe = new char[]{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0, 62, 0, 63, 0, 0,
037                    52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
038                    0, 0, 0, 0, 0, 0, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51};
039
040    /**
041     * Compress a String using LZ-String encoding but only using Base64 characters ('A'-'Z', 'a'-'z', '0'-'9', '+', '/',
042     * and '=' for Base64 validity).
043     * @param uncompressed an uncompressed String to encode
044     * @return the encoded, compressed String
045     */
046    public static String compressToBase64(String uncompressed) {
047        if (uncompressed == null)
048            return null;
049        String res = _compress(uncompressed, 6, keyStrBase64, 0);
050        switch (res.length() & 3) { // To produce valid Base64
051            default: // When could this happen ?
052            case 0:
053                return res;
054            case 1:
055                return res + "===";
056            case 2:
057                return res + "==";
058            case 3:
059                return res + "=";
060        }
061    }
062
063    /**
064     * Decompresses a String that had been compressed with {@link #compressToBase64(String)}.
065     * @param compressed a Base64-encoded, compressed String
066     * @return the original uncompressed version of the String
067     */
068    public static String decompressFromBase64(String compressed) {
069        if (compressed == null)
070            return null;
071        if (compressed.isEmpty())
072            return "";
073        final char[] input = compressed.toCharArray();
074        return _decompress(input.length, 32, input, valStrBase64, 0);
075    }
076
077    /**
078     * Compresses a String using the properties of UTF-16 encoding to store approximately 15 bits of LZW-compressed text
079     * in each 2-byte Unicode character, which does particularly well with ASCII text and can be smaller than UTF-8 in
080     * some cases, especially where each char must be stored as UTF-16, e.g. Java Strings or browser-based LocalStorage.
081     * @param uncompressed an uncompressed String to encode
082     * @return the encoded, compressed String
083     */
084    public static String compressToUTF16(String uncompressed) {
085        if (uncompressed == null)
086            return null;
087        return _compress(uncompressed, 15, null, 32) + " ";
088    }
089    /**
090     * Decompresses a String that had been compressed with {@link #compressToUTF16(String)}.
091     * @param compressed a UTF16-encoded (as by {@link #compressToUTF16(String)}), compressed String
092     * @return the original uncompressed version of the String
093     */
094    public static String decompressFromUTF16(String compressed) {
095        if (compressed == null)
096            return null;
097        if (compressed.isEmpty())
098            return "";
099        final char[] comp = compressed.toCharArray();
100        return _decompress(comp.length, 16384, comp, null, -32);
101    }
102
103    /**
104     * Compress a String using LZ-String encoding but only using valid URI component characters ('A'-'Z', 'a'-'z',
105     * '0'-'9', '+', '-', and possibly '$').
106     * @param uncompressed an uncompressed String to encode
107     * @return the encoded, compressed String
108     */
109    public static String compressToEncodedURIComponent(String uncompressed) {
110        if (uncompressed == null)
111            return null;
112        return _compress(uncompressed, 6, keyStrUriSafe, 0) + '+';
113    }
114    /**
115     * Decompresses a String that had been compressed with {@link #compressToEncodedURIComponent(String)}.
116     * @param compressed a URI-encoded, compressed String
117     * @return the original uncompressed version of the String
118     */
119    public static String decompressFromEncodedURIComponent(String compressed) {
120        if (compressed == null) return null;
121        if (compressed.isEmpty()) return "";
122        final char[] input = compressed.toCharArray();
123        return _decompress(input.length, 32, input, valStrUriSafe, 0);
124    }
125
126    /**
127     * Compresses a String as tightly as possible by using 16 bits of each 16-bit character, which can infrequently
128     * result in invalid UTF-16 codepoints, but that may not matter for all applications.
129     * @param uncompressed an uncompressed String to encode
130     * @return the encoded, compressed String
131     */
132    public static String compress(String uncompressed) {
133        return _compress(uncompressed, 16, null, 0);
134    }
135
136    private static String _compress(String uncompressedStr, int bitsPerChar, char[] getCharFromInt, int offset) {
137        if (uncompressedStr == null) return null;
138        if (uncompressedStr.isEmpty()) return "";
139        int i, value;
140        HashMap<String, Integer> context_dictionary = new HashMap<>();
141        HashSet<String> context_dictionaryToCreate = new HashSet<>();
142        String context_c;
143        String context_wc;
144        String context_w = "";
145        int context_enlargeIn = 2; // Compensate for the first entry which should not count
146        int context_dictSize = 3;
147        int context_numBits = 2;
148        StringBuilder context_data = new StringBuilder(uncompressedStr.length() >>> 1);
149        int context_data_val = 0;
150        int context_data_position = 0;
151        int ii;
152
153        char[] uncompressed = uncompressedStr.toCharArray();
154        for (ii = 0; ii < uncompressed.length; ii++) {
155            context_c = String.valueOf(uncompressed[ii]);
156            if (!context_dictionary.containsKey(context_c)) {
157                context_dictionary.put(context_c, context_dictSize++);
158                context_dictionaryToCreate.add(context_c);
159            }
160
161            context_wc = context_w + context_c;
162            if (context_dictionary.containsKey(context_wc)) {
163                context_w = context_wc;
164            } else {
165                if (context_dictionaryToCreate.contains(context_w)) {
166                    if ((value = context_w.charAt(0)) < 256) {
167                        for (i = 0; i < context_numBits; i++) {
168                            context_data_val = (context_data_val << 1);
169                            if (context_data_position == bitsPerChar - 1) {
170                                context_data_position = 0;
171                                context_data.append((char) (((getCharFromInt == null) ? context_data_val : getCharFromInt[context_data_val]) + offset));
172                                context_data_val = 0;
173                            } else {
174                                context_data_position++;
175                            }
176                        }
177                        for (i = 0; i < 8; i++) {
178                            context_data_val = (context_data_val << 1) | (value & 1);
179                            if (context_data_position == bitsPerChar - 1) {
180                                context_data_position = 0;
181                                context_data.append((char) (((getCharFromInt == null) ? context_data_val : getCharFromInt[context_data_val]) + offset));
182                                context_data_val = 0;
183                            } else {
184                                context_data_position++;
185                            }
186                            value >>= 1;
187                        }
188                    } else {
189                        value = 1;
190                        for (i = 0; i < context_numBits; i++) {
191                            context_data_val = (context_data_val << 1) | value;
192                            if (context_data_position == bitsPerChar - 1) {
193                                context_data_position = 0;
194                                context_data.append((char) (((getCharFromInt == null) ? context_data_val : getCharFromInt[context_data_val]) + offset));
195                                context_data_val = 0;
196                            } else {
197                                context_data_position++;
198                            }
199                            value = 0;
200                        }
201                        value = context_w.charAt(0);
202                        for (i = 0; i < 16; i++) {
203                            context_data_val = (context_data_val << 1) | (value & 1);
204                            if (context_data_position == bitsPerChar - 1) {
205                                context_data_position = 0;
206                                context_data.append((char) (((getCharFromInt == null) ? context_data_val : getCharFromInt[context_data_val]) + offset));
207                                context_data_val = 0;
208                            } else {
209                                context_data_position++;
210                            }
211                            value >>= 1;
212                        }
213                    }
214                    context_enlargeIn--;
215                    if (context_enlargeIn == 0) {
216                        context_enlargeIn = 1 << context_numBits++;
217                    }
218                    context_dictionaryToCreate.remove(context_w);
219                } else {
220                    value = context_dictionary.get(context_w);
221                    for (i = 0; i < context_numBits; i++) {
222                        context_data_val = (context_data_val << 1) | (value & 1);
223                        if (context_data_position == bitsPerChar - 1) {
224                            context_data_position = 0;
225                            context_data.append((char) (((getCharFromInt == null) ? context_data_val : getCharFromInt[context_data_val]) + offset));
226                            context_data_val = 0;
227                        } else {
228                            context_data_position++;
229                        }
230                        value >>= 1;
231                    }
232
233                }
234                context_enlargeIn--;
235                if (context_enlargeIn == 0) {
236                    context_enlargeIn = 1 << context_numBits++;
237                }
238                // Add wc to the dictionary.
239                context_dictionary.put(context_wc, context_dictSize++);
240                context_w = context_c;
241            }
242        }
243
244        // Output the code for w.
245        if (!context_w.isEmpty()) {
246            if (context_dictionaryToCreate.contains(context_w)) {
247                if (context_w.charAt(0) < 256) {
248                    for (i = 0; i < context_numBits; i++) {
249                        context_data_val = (context_data_val << 1);
250                        if (context_data_position == bitsPerChar - 1) {
251                            context_data_position = 0;
252                            context_data.append((char) (((getCharFromInt == null) ? context_data_val : getCharFromInt[context_data_val]) + offset));
253                            context_data_val = 0;
254                        } else {
255                            context_data_position++;
256                        }
257                    }
258                    value = context_w.charAt(0);
259                    for (i = 0; i < 8; i++) {
260                        context_data_val = (context_data_val << 1) | (value & 1);
261                        if (context_data_position == bitsPerChar - 1) {
262                            context_data_position = 0;
263                            context_data.append((char) (((getCharFromInt == null) ? context_data_val : getCharFromInt[context_data_val]) + offset));
264                            context_data_val = 0;
265                        } else {
266                            context_data_position++;
267                        }
268                        value >>= 1;
269                    }
270                } else {
271                    value = 1;
272                    for (i = 0; i < context_numBits; i++) {
273                        context_data_val = (context_data_val << 1) | value;
274                        if (context_data_position == bitsPerChar - 1) {
275                            context_data_position = 0;
276                            context_data.append((char) (((getCharFromInt == null) ? context_data_val : getCharFromInt[context_data_val]) + offset));
277                            context_data_val = 0;
278                        } else {
279                            context_data_position++;
280                        }
281                        value = 0;
282                    }
283                    value = context_w.charAt(0);
284                    for (i = 0; i < 16; i++) {
285                        context_data_val = (context_data_val << 1) | (value & 1);
286                        if (context_data_position == bitsPerChar - 1) {
287                            context_data_position = 0;
288                            context_data.append((char) (((getCharFromInt == null) ? context_data_val : getCharFromInt[context_data_val]) + offset));
289                            context_data_val = 0;
290                        } else {
291                            context_data_position++;
292                        }
293                        value >>= 1;
294                    }
295                }
296
297                context_dictionaryToCreate.remove(context_w);
298            } else {
299                value = context_dictionary.get(context_w);
300                for (i = 0; i < context_numBits; i++) {
301                    context_data_val = (context_data_val << 1) | (value & 1);
302                    if (context_data_position == bitsPerChar - 1) {
303                        context_data_position = 0;
304                        context_data.append((char) (((getCharFromInt == null) ? context_data_val : getCharFromInt[context_data_val]) + offset));
305                        context_data_val = 0;
306                    } else {
307                        context_data_position++;
308                    }
309                    value >>= 1;
310                }
311
312            }
313        }
314
315        // Mark the end of the stream
316        value = 2;
317        for (i = 0; i < context_numBits; i++) {
318            context_data_val = (context_data_val << 1) | (value & 1);
319            if (context_data_position == bitsPerChar - 1) {
320                context_data_position = 0;
321                context_data.append((char) (((getCharFromInt == null) ? context_data_val : getCharFromInt[context_data_val]) + offset));
322                context_data_val = 0;
323            } else {
324                context_data_position++;
325            }
326            value >>= 1;
327        }
328
329        // Flush the last char
330        while (true) {
331            context_data_val = (context_data_val << 1);
332            if (context_data_position == bitsPerChar - 1) {
333                context_data.append((char) (((getCharFromInt == null) ? context_data_val : getCharFromInt[context_data_val]) + offset));
334                break;
335            } else
336                context_data_position++;
337        }
338        return context_data.toString();
339    }
340    /**
341     * Decompresses a String that had been compressed with {@link #compress(String)}.
342     * @param compressed a compressed String using the default encoding from {@link #compress(String)}
343     * @return the original uncompressed version of the String
344     */
345    public static String decompress(final String compressed) {
346        if (compressed == null)
347            return null;
348        if (compressed.isEmpty())
349            return "";
350        return _decompress(compressed.length(), 32768, compressed.toCharArray(), null, 0);
351    }
352
353    private static String _decompress(int length, int resetValue, char[] getNextValue, char[] modify, int offset) {
354        if(getNextValue == null)
355            return null;
356        if(getNextValue.length == 0)
357            return "";
358        ArrayList<String> dictionary = new ArrayList<>();
359        int enlargeIn = 4, dictSize = 4, numBits = 3, position = resetValue, index = 1, resb, maxpower, power;
360        String entry, w, c;
361        ArrayList<String> result = new ArrayList<>();
362        char bits, val = (modify == null) ? (char) (getNextValue[0] + offset) : modify[getNextValue[0]];
363
364        for (char i = 0; i < 3; i++) {
365            dictionary.add(i, String.valueOf(i));
366        }
367
368        bits = 0;
369        maxpower = 2;
370        power = 0;
371        while (power != maxpower) {
372            resb = val & position;
373            position >>= 1;
374            if (position == 0) {
375                position = resetValue;
376                val = (modify == null) ? (char) (getNextValue[index++] + offset) : modify[getNextValue[index++]];
377            }
378            bits |= (resb > 0 ? 1 : 0) << power++;
379        }
380
381        switch (bits) {
382            case 0:
383                bits = 0;
384                maxpower = 8;
385                power = 0;
386                while (power != maxpower) {
387                    resb = val & position;
388                    position >>= 1;
389                    if (position == 0) {
390                        position = resetValue;
391                        val = (modify == null) ? (char) (getNextValue[index++] + offset) : modify[getNextValue[index++]];
392                    }
393                    bits |= (resb > 0 ? 1 : 0) << power++;
394                }
395                c = String.valueOf(bits);
396                break;
397            case 1:
398                bits = 0;
399                maxpower = 16;
400                power = 0;
401                while (power != maxpower) {
402                    resb = val & position;
403                    position >>= 1;
404                    if (position == 0) {
405                        position = resetValue;
406                        val = (modify == null) ? (char) (getNextValue[index++] + offset) : modify[getNextValue[index++]];
407                    }
408                    bits |= (resb > 0 ? 1 : 0) << power++;
409                }
410                c = String.valueOf(bits);
411                break;
412            default:
413                return "";
414        }
415        dictionary.add(c);
416        w = c;
417        result.add(w);
418        while (true) {
419            if (index > length) {
420                return "";
421            }
422            int cc = 0;
423            maxpower = numBits;
424            power = 0;
425            while (power != maxpower) {
426                resb = val & position;
427                position >>= 1;
428                if (position == 0) {
429                    position = resetValue;
430                    val = (modify == null) ? (char) (getNextValue[index++] + offset) : modify[getNextValue[index++]];
431                }
432                cc |= (resb > 0 ? 1 : 0) << power++;
433            }
434            switch (cc) {
435                case 0:
436                    bits = 0;
437                    maxpower = 8;
438                    power = 0;
439                    while (power != maxpower) {
440                        resb = val & position;
441                        position >>= 1;
442                        if (position == 0) {
443                            position = resetValue;
444                            val = (modify == null) ? (char) (getNextValue[index++] + offset) : modify[getNextValue[index++]];
445                        }
446                        bits |= (resb > 0 ? 1 : 0) << power++;
447                    }
448
449                    dictionary.add(String.valueOf(bits));
450                    cc = dictSize++;
451                    enlargeIn--;
452                    break;
453                case 1:
454                    bits = 0;
455                    maxpower = 16;
456                    power = 0;
457                    while (power != maxpower) {
458                        resb = val & position;
459                        position >>= 1;
460                        if (position == 0) {
461                            position = resetValue;
462                            val = (modify == null) ? (char) (getNextValue[index++] + offset) : modify[getNextValue[index++]];
463                        }
464                        bits |= (resb > 0 ? 1 : 0) << power++;
465                    }
466                    dictionary.add(String.valueOf(bits));
467                    cc = dictSize++;
468                    enlargeIn--;
469                    break;
470                case 2:
471                    StringBuilder sb = new StringBuilder(result.size());
472                    for (String s : result)
473                        sb.append(s);
474                    return sb.toString();
475            }
476
477            if (enlargeIn == 0) {
478                enlargeIn = 1 << numBits;
479                numBits++;
480            }
481
482            if (cc < dictionary.size() && dictionary.get(cc) != null) {
483                entry = dictionary.get(cc);
484            } else {
485                if (cc == dictSize) {
486                    entry = w + w.charAt(0);
487                } else {
488                    return "";
489                }
490            }
491            result.add(entry);
492
493            // Add w+entry[0] to the dictionary.
494            dictionary.add(w + entry.charAt(0));
495            dictSize++;
496            enlargeIn--;
497
498            w = entry;
499
500            if (enlargeIn == 0) {
501                enlargeIn = 1 << numBits;
502                numBits++;
503            }
504
505        }
506
507    }
508}